Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import csv
- from pprint import pprint
- def get_match_info(championships, matches_details, seen_matches):
- championship_title = championships.contents[1].find('h2').text.strip()
- all_matches = championships.contents[3].find_all('div')
- i = 1
- for match in all_matches:
- # get teams names:
- team_A_tag = match.find('div', {'class': 'teamA'})
- team_B_tag = match.find('div', {'class': 'teamB'})
- if not team_A_tag or not team_B_tag:
- continue
- team_A = team_A_tag.text.strip()
- team_B = team_B_tag.text.strip()
- # get score:
- MResult = match.find('div', {'class': 'MResult'})
- if not MResult:
- continue
- score_spans = MResult.find_all('span', {'class': 'score'})
- time_span = MResult.find('span', {'class': 'time'})
- if len(score_spans) < 2 or not time_span:
- continue
- # score = f"{score_spans[0].text.strip()} - {score_spans[1].text.strip()}"
- score = f"{score_spans[0].text.strip()}:{score_spans[1].text.strip()}"
- # score = f"'{score_spans[0].text.strip()} - {score_spans[1].text.strip()}"
- match_time = time_span.text.strip()
- # Create a tuple that uniquely identifies the match
- match_key = (championship_title, team_A, team_B, match_time, score)
- # Check if we have seen this match before
- if match_key not in seen_matches:
- seen_matches.add(match_key)
- temp_Dict = {
- "Title": championship_title,
- "Team A": team_A,
- "Team B": team_B,
- "Time": match_time,
- "Result": score,
- }
- matches_details.append(temp_Dict)
- print(f"\ngetting details of maatch # {i}")
- i += 1
- pprint(temp_Dict)
- return matches_details
- def main():
- date = "11/22/2024" # example date
- page = requests.get(f"https://www.yallakora.com/match-center/?date={date}#days")
- page.encoding = 'utf-8' # Make sure we decode page content as UTF-8
- src = page.text
- soup = BeautifulSoup(src, "lxml")
- matches_details = []
- # append the Keys Dict
- matches_details.append({
- "Title": "Title",
- "Team A": "Team A",
- "Team B": "Team B",
- "Time": "Time",
- "Result": "Score",
- })
- championships = soup.find_all("div", {'class' :'matchCard'})
- # Set to keep track of seen matches
- seen_matches = set()
- for championship in championships:
- matches_details = get_match_info(championship, matches_details, seen_matches)
- with open('matches_details.csv', 'w', encoding='utf-8-sig', newline='') as output_file:
- dict_writer = csv.DictWriter(output_file, fieldnames=["Title","Team A","Team B","Time","Result"])
- dict_writer.writeheader()
- dict_writer.writerows(matches_details[1:])
- print("File Created")
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement