Advertisement
kingbode

Untitled

Dec 12th, 2024
30
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.00 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4. from pprint import pprint
  5.  
  6. def get_match_info(championships, matches_details, seen_matches):
  7.     championship_title = championships.contents[1].find('h2').text.strip()
  8.     all_matches = championships.contents[3].find_all('div')
  9.  
  10.     i = 1
  11.     for match in all_matches:
  12.         # get teams names:
  13.         team_A_tag = match.find('div', {'class': 'teamA'})
  14.         team_B_tag = match.find('div', {'class': 'teamB'})
  15.         if not team_A_tag or not team_B_tag:
  16.             continue
  17.         team_A = team_A_tag.text.strip()
  18.         team_B = team_B_tag.text.strip()
  19.  
  20.         # get score:
  21.         MResult = match.find('div', {'class': 'MResult'})
  22.         if not MResult:
  23.             continue
  24.  
  25.         score_spans = MResult.find_all('span', {'class': 'score'})
  26.         time_span = MResult.find('span', {'class': 'time'})
  27.         if len(score_spans) < 2 or not time_span:
  28.             continue
  29.  
  30.         # score = f"{score_spans[0].text.strip()} - {score_spans[1].text.strip()}"
  31.         score = f"{score_spans[0].text.strip()}:{score_spans[1].text.strip()}"
  32.         # score = f"'{score_spans[0].text.strip()} - {score_spans[1].text.strip()}"
  33.  
  34.         match_time = time_span.text.strip()
  35.  
  36.         # Create a tuple that uniquely identifies the match
  37.         match_key = (championship_title, team_A, team_B, match_time, score)
  38.  
  39.         # Check if we have seen this match before
  40.         if match_key not in seen_matches:
  41.             seen_matches.add(match_key)
  42.             temp_Dict = {
  43.                 "Title": championship_title,
  44.                 "Team A": team_A,
  45.                 "Team B": team_B,
  46.                 "Time": match_time,
  47.                 "Result": score,
  48.             }
  49.  
  50.             matches_details.append(temp_Dict)
  51.             print(f"\ngetting details of maatch # {i}")
  52.             i += 1
  53.             pprint(temp_Dict)
  54.  
  55.     return matches_details
  56.  
  57.  
  58. def main():
  59.     date = "11/22/2024" # example date
  60.     page = requests.get(f"https://www.yallakora.com/match-center/?date={date}#days")
  61.     page.encoding = 'utf-8'  # Make sure we decode page content as UTF-8
  62.     src = page.text
  63.     soup = BeautifulSoup(src, "lxml")
  64.  
  65.     matches_details = []
  66.     # append the Keys Dict
  67.     matches_details.append({
  68.         "Title": "Title",
  69.         "Team A": "Team A",
  70.         "Team B": "Team B",
  71.         "Time": "Time",
  72.         "Result": "Score",
  73.     })
  74.  
  75.     championships = soup.find_all("div", {'class' :'matchCard'})
  76.  
  77.     # Set to keep track of seen matches
  78.     seen_matches = set()
  79.  
  80.     for championship in championships:
  81.         matches_details = get_match_info(championship, matches_details, seen_matches)
  82.  
  83.     with open('matches_details.csv', 'w', encoding='utf-8-sig', newline='') as output_file:
  84.         dict_writer = csv.DictWriter(output_file, fieldnames=["Title","Team A","Team B","Time","Result"])
  85.         dict_writer.writeheader()
  86.         dict_writer.writerows(matches_details[1:])
  87.         print("File Created")
  88.  
  89.  
  90. main()
  91.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement