Advertisement
Brovashift

Untitled

May 23rd, 2023 (edited)
222
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 23.26 KB | None | 0 0
  1. #!/usr/bin/env python
  2. #coding: utf-8
  3.  
  4. import os
  5. import requests
  6. import json
  7. import csv
  8. from datetime import datetime, timedelta
  9. import sys
  10. import pandas as pd
  11.  
  12. from collections import defaultdict
  13. from datetime import datetime, timedelta
  14. from lxml import etree, html
  15. from orjson import loads, dumps
  16. from re import search
  17.  
  18. from utils.going import get_surface
  19. from utils.header import RandomHeader
  20. from utils.lxml_funcs import find
  21. from utils.region import get_region
  22.  
  23. random_header = RandomHeader()
  24.  
  25. def clean_name(name):
  26.     if name:
  27.         return name.strip().replace("'", '').lower().title()
  28.     else:
  29.         return ''
  30.  
  31. def get_race_urls(session, racecard_url):
  32.     r = session.get(racecard_url, headers=random_header.header())
  33.     doc = html.fromstring(r.content)
  34.  
  35.     race_urls = []
  36.  
  37.     for meeting in doc.xpath("//section[@data-accordion-row]"):
  38.         course = meeting.xpath(".//span[contains(@class, 'RC-accordion__courseName')]")[0]
  39.         if valid_course(course.text_content().strip().lower()):
  40.             for race in meeting.xpath(".//a[@class='RC-meetingItem__link js-navigate-url']"):
  41.                 race_urls.append('https://www.racingpost.com' + race.attrib['href'])
  42.  
  43.     return sorted(list(set(race_urls)))
  44.  
  45. def valid_course(course):
  46.     invalid = ['free to air', 'worldwide stakes', '(arab)']
  47.     return all([x not in course for x in invalid])
  48.  
  49. def parse_races(session, race_urls, date):
  50.     races = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
  51.  
  52.     going_info = get_going_info(session, date)
  53.  
  54.     for url in race_urls:
  55.         r = session.get(url, headers=random_header.header(), allow_redirects=False)
  56.  
  57.         if r.status_code != 200:
  58.             print('Failed to get racecard.')
  59.             print(f'URL: {url}')
  60.             print(f'Response: {r.status_code}')
  61.             continue
  62.  
  63.         try:
  64.             doc = html.fromstring(r.content)
  65.         except etree.ParserError:
  66.             continue
  67.  
  68.         race = {}
  69.  
  70.         url_split = url.split('/')
  71.  
  72.         race['course'] = find(doc, 'h1', 'RC-courseHeader__name')
  73.  
  74.         if race['course'] == 'Belmont At The Big A':
  75.             race['course_id'] = 255
  76.             race['course'] = 'Aqueduct'
  77.         else:
  78.             race['course_id'] = int(url_split[4])
  79.             # Assign the course name directly to the 'course' field
  80.             race['course'] = find(doc, 'h1', 'RC-courseHeader__name')  
  81.  
  82.         race['race_id'] = int(url_split[7])
  83.         race['date'] = url_split[6]
  84.         race['off_time'] = find(doc, 'span', 'RC-courseHeader__time')
  85.         race['race_name'] = find(doc, 'span', 'RC-header__raceInstanceTitle')
  86.         race['distance_round'] = find(doc, 'strong', 'RC-header__raceDistanceRound')
  87.         race['distance'] = find(doc, 'span', 'RC-header__raceDistance')
  88.         race['distance'] = race['distance_round'] if not race['distance'] else race['distance'].strip('()')
  89.         race['distance_f'] = distance_to_furlongs(race['distance_round'])
  90.         race['region'] = get_region(str(race['course_id']))
  91.         race['pattern'] = get_pattern(race['race_name'].lower())
  92.         race['race_class'] = find(doc, 'span', 'RC-header__raceClass')
  93.         race['race_class'] = race['race_class'].strip('()') if race['race_class'] else ''
  94.         race['type'] = get_race_type(doc, race['race_name'].lower(), race['distance_f'])
  95.  
  96.         if not race['race_class']:
  97.             if race['pattern']:
  98.                 race['race_class'] = 'Class 1'
  99.  
  100.         try:
  101.             band = find(doc, 'span', 'RC-header__rpAges').strip('()').split()
  102.             if band:
  103.                 race['age_band'] = band[0]
  104.                 race['rating_band'] = band[1] if len(band) > 1 else None
  105.             else:
  106.                 race['age_band'] = None
  107.                 race['rating_band'] = None
  108.         except AttributeError:
  109.             race['age_band'] = None
  110.             race['rating_band'] = None
  111.  
  112.         prize = find(doc, 'div', 'RC-headerBox__winner').lower()
  113.         race['prize'] = prize.split('winner:')[1].strip() if 'winner:' in prize else None
  114.         field_size = find(doc, 'div', 'RC-headerBox__runners').lower()
  115.         if field_size:
  116.             race['field_size'] = int(field_size.split('runners:')[1].split('(')[0].strip())
  117.         else:
  118.             race['field_size'] = ''
  119.  
  120.         try:
  121.             race['going_detailed'] = going_info[race['course_id']]['going']
  122.             race['rail_movements'] = going_info[race['course_id']]['rail_movements']
  123.             race['stalls'] = going_info[race['course_id']]['stalls']
  124.             race['weather'] = going_info[race['course_id']]['weather']
  125.         except KeyError:
  126.             race['going'] = None
  127.             race['rail_movements'] = None
  128.             race['stalls'] = None
  129.             race['weather'] = None
  130.  
  131.         going = find(doc, 'div', 'RC-headerBox__going').lower()
  132.         race['going'] = going.split('going:')[1].strip().title() if 'going:' in going else ''
  133.  
  134.         race['surface'] = get_surface(race['going'])
  135.  
  136.         profile_hrefs = doc.xpath("//a[@data-test-selector='RC-cardPage-runnerName']/@href")
  137.         profile_urls = ['https://www.racingpost.com' + a.split('#')[0] + '/form' for a in profile_hrefs]
  138.  
  139.         runners = get_runners(session, profile_urls)
  140.  
  141.         for horse in doc.xpath("//div[contains(@class, ' js-PC-runnerRow')]"):
  142.             horse_id = int(find(horse, 'a', 'RC-cardPage-runnerName', attrib='href').split('/')[3])
  143.  
  144.             if 'broken_url' in runners[horse_id]:
  145.                 sire = find(horse, 'a', 'RC-pedigree__sire').split('(')
  146.                 dam = find(horse, 'a', 'RC-pedigree__dam').split('(')
  147.                 damsire = find(horse, 'a', 'RC-pedigree__damsire').lstrip('(').rstrip(')').split('(')
  148.  
  149.                 runners[horse_id]['sire'] = clean_name(sire[0])
  150.                 runners[horse_id]['dam'] = clean_name(dam[0])
  151.                 runners[horse_id]['damsire'] = clean_name(damsire[0])
  152.  
  153.                 runners[horse_id]['sire_region'] = sire[1].replace(')', '').strip()
  154.                 runners[horse_id]['dam_region'] = dam[1].replace(')', '').strip()
  155.                 runners[horse_id]['damsire_region'] = damsire[1].replace(')', '').strip()
  156.  
  157.                 runners[horse_id]['age'] = find(horse, 'span', 'RC-cardPage-runnerAge', attrib='data-order-age')
  158.  
  159.                 sex = find(horse, 'span', 'RC-pedigree__color-sex').split()
  160.  
  161.                 runners[horse_id]['colour'] = sex[0]
  162.                 runners[horse_id]['sex_code'] = sex[1].capitalize()
  163.  
  164.                 runners[horse_id]['trainer'] = find(horse, 'a', 'RC-cardPage-runnerTrainer-name', attrib='data-order-trainer')
  165.  
  166.             runners[horse_id]['number'] = int(find(horse, 'span', 'RC-cardPage-runnerNumber-no', attrib='data-order-no'))
  167.  
  168.             try:
  169.                 runners[horse_id]['draw'] = int(find(horse, 'span', 'RC-cardPage-runnerNumber-draw', attrib='data-order-draw'))
  170.             except ValueError:
  171.                 runners[horse_id]['draw'] = None
  172.  
  173.             runners[horse_id]['headgear'] = find(horse, 'span', 'RC-cardPage-runnerHeadGear')
  174.             runners[horse_id]['headgear_first'] = find(horse, 'span', 'RC-cardPage-runnerHeadGear-first')
  175.  
  176.             try:
  177.                 runners[horse_id]['lbs'] = int(find(horse, 'span', 'RC-cardPage-runnerWgt-carried', attrib='data-order-wgt'))
  178.             except ValueError:
  179.                 runners[horse_id]['lbs'] = None
  180.  
  181.             try:
  182.                 runners[horse_id]['ofr'] = int(find(horse, 'span', 'RC-cardPage-runnerOr', attrib='data-order-or'))
  183.             except ValueError:
  184.                 runners[horse_id]['ofr'] = None
  185.  
  186.             try:
  187.                 runners[horse_id]['rpr'] = int(find(horse, 'span', 'RC-cardPage-runnerRpr', attrib='data-order-rpr'))
  188.             except ValueError:
  189.                 runners[horse_id]['rpr'] = None
  190.  
  191.             try:
  192.                 runners[horse_id]['ts'] = int(find(horse, 'span', 'RC-cardPage-runnerTs', attrib='data-order-ts'))
  193.             except ValueError:
  194.                 runners[horse_id]['ts'] = None
  195.  
  196.             claim = find(horse, 'span', 'RC-cardPage-runnerJockey-allowance')
  197.             jockey = horse.find('.//a[@data-test-selector="RC-cardPage-runnerJockey-name"]')
  198.  
  199.             if jockey is not None:
  200.                 jock = jockey.attrib['data-order-jockey']
  201.                 runners[horse_id]['jockey'] = jock if not claim else jock + f'({claim})'
  202.                 runners[horse_id]['jockey_id'] = int(jockey.attrib['href'].split('/')[3])
  203.             else:
  204.                 runners[horse_id]['jockey'] = None
  205.                 runners[horse_id]['jockey_id'] = None
  206.  
  207.             try:
  208.                 runners[horse_id]['last_run'] = find(horse, 'div', 'RC-cardPage-runnerStats-lastRun')
  209.             except TypeError:
  210.                 runners[horse_id]['last_run'] = None
  211.  
  212.             runners[horse_id]['form'] = find(horse, 'span', 'RC-cardPage-runnerForm')
  213.  
  214.             try:
  215.                 runners[horse_id]['trainer_rtf'] = find(horse, 'span', 'RC-cardPage-runnerTrainer-rtf')
  216.             except TypeError:
  217.                 runners[horse_id]['trainer_rtf'] = None
  218.  
  219.         race['runners'] = [runner for runner in runners.values()]
  220.         races[race['region']][race['course']][race['off_time']] = race
  221.  
  222.     return races
  223.  
  224. def distance_to_furlongs(distance):
  225.     dist = distance.strip().replace('¼', '.25').replace('½', '.5').replace('¾', '.75')
  226.  
  227.     if 'm' in dist:
  228.         if len(dist) > 2:
  229.             dist = int(dist.split('m')[0]) * 8 + float(dist.split('m')[1].strip('f'))
  230.         else:
  231.             dist = int(dist.split('m')[0]) * 8
  232.     else:
  233.         dist = dist.strip('f')
  234.  
  235.     return float(dist)
  236.  
  237. def get_pattern(race_name):
  238.     regex_group = '(\(|\s)((G|g)rade|(G|g)roup) (\d|[A-Ca-c]|I*)(\)|\s)'
  239.     match = search(regex_group, race_name)
  240.  
  241.     if match:
  242.         pattern = f'{match.groups()[1]} {match.groups()[4]}'.title()
  243.         return pattern.title()
  244.  
  245.     if any(x in race_name.lower() for x in {'listed race', '(listed'}):
  246.         return 'Listed'
  247.  
  248.     return ''
  249.  
  250. def get_going_info(session, date):
  251.     r = session.get(f'https://www.racingpost.com/non-runners/{date}', headers=random_header.header())
  252.     doc = html.fromstring(r.content.decode())
  253.  
  254.     json_str = doc.xpath('//body/script')[0].text.replace('var __PRELOADED_STATE__ = ', '').strip().strip(';')
  255.  
  256.     going_info = defaultdict(dict)
  257.  
  258.     for course in loads(json_str):
  259.         going, rail_movements = parse_going(course['going'])
  260.  
  261.         course_id = 0
  262.         course_name = ''
  263.  
  264.         if course['courseName'] == 'Belmont At The Big A':
  265.             course_id = 255
  266.             course_name = 'Aqueduct'
  267.         else:
  268.             course_id = int(course['raceCardsCourseMeetingsUrl'].split('/')[2])
  269.             course_name = course['courseName']
  270.  
  271.         going_info[course_id]['course'] = course_name
  272.         going_info[course_id]['going'] = going
  273.         going_info[course_id]['stalls'] = course['stallsPosition']
  274.         going_info[course_id]['rail_movements'] = rail_movements
  275.         going_info[course_id]['weather'] = course['weather']
  276.  
  277.     return going_info
  278.  
  279. def parse_going(going_info):
  280.     going = going_info
  281.     rail_movements = ''
  282.  
  283.     if 'Rail movements' in going_info:
  284.         going_info = going_info.replace('movements:', 'movements')
  285.         rail_movements = [x.strip() for x in going_info.split('Rail movements')[1].strip().strip(')').split(',')]
  286.         going = going_info.split('(Rail movements')[0].strip()
  287.  
  288.     return going, rail_movements
  289.  
  290. def get_race_type(doc, race, distance):
  291.         race_type = ''
  292.         fences = find(doc, 'div', 'RC-headerBox__stalls')
  293.  
  294.         if 'hurdle' in fences.lower():
  295.             race_type = 'Hurdle'
  296.         elif 'fence' in fences.lower():
  297.             race_type = 'Chase'
  298.         else:
  299.             if distance >= 12:
  300.                 if any(x in race for x in {'national hunt flat', 'nh flat race', 'mares flat race'}):
  301.                     race_type = 'NH Flat'
  302.                 if any(x in race for x in {'inh bumper', ' sales bumper', 'kepak flat race', 'i.n.h. flat race'}):
  303.                     race_type = 'NH Flat'
  304.                 if any(x in race for x in {' hurdle', '(hurdle)'}):
  305.                     race_type = 'Hurdle'
  306.                 if any(x in race for x in {' chase', '(chase)', 'steeplechase', 'steeple-chase', 'steeplchase', 'steepl-chase'}):
  307.                     race_type = 'Chase'
  308.  
  309.         if race_type == '':
  310.             race_type = 'Flat'
  311.  
  312.         return race_type
  313.  
  314. def get_runners(session, profile_urls):
  315.     runners = {}
  316.  
  317.     for url in profile_urls:
  318.         r = session.get(url, headers=random_header.header())
  319.         doc = html.fromstring(r.content)
  320.  
  321.         runner = {}
  322.  
  323.         try:
  324.             json_str = doc.xpath('//body/script')[0].text.split('window.PRELOADED_STATE =')[1].split('\n')[0].strip().strip(';')
  325.             js = loads(json_str)
  326.         except IndexError:
  327.             split = url.split('/')
  328.             runner['horse_id'] = int(split[5])
  329.             runner['name'] = split[6].replace('-', ' ').title()
  330.             runner['broken_url'] = url
  331.             runners[runner['horse_id']] = runner
  332.             continue
  333.  
  334.         runner['horse_id'] = js['profile']['horseUid']
  335.         runner['name'] = clean_name(js['profile']['horseName'])
  336.         runner['dob'] = js['profile']['horseDateOfBirth'].split('T')[0]
  337.         runner['age'] = int(js['profile']['age'].split('-')[0])
  338.         runner['sex'] = js['profile']['horseSex']
  339.         runner['sex_code'] = js['profile']['horseSexCode']
  340.         runner['colour'] = js['profile']['horseColour']
  341.         runner['region'] = js['profile']['horseCountryOriginCode']
  342.  
  343.         runner['breeder'] = js['profile']['breederName']
  344.         runner['dam'] = clean_name(js['profile']['damHorseName'])
  345.         runner['dam_region'] = js['profile']['damCountryOriginCode']
  346.         runner['sire'] = clean_name(js['profile']['sireHorseName'])
  347.         runner['sire_region'] = js['profile']['sireCountryOriginCode']
  348.         runner['grandsire'] = clean_name(js['profile']['siresSireName'])
  349.         runner['damsire'] = clean_name(js['profile']['damSireHorseName'])
  350.         runner['damsire_region'] = js['profile']['damSireCountryOriginCode']
  351.  
  352.         runner['trainer'] = clean_name(js['profile']['trainerName'])
  353.         runner['trainer_id'] = js['profile']['trainerUid']
  354.         runner['trainer_location'] = js['profile']['trainerLocation']
  355.         runner['trainer_14_days'] = js['profile']['trainerLast14Days']
  356.  
  357.         runner['owner'] = clean_name(js['profile']['ownerName'])
  358.  
  359.         runner['prev_trainers'] = js['profile']['previousTrainers']
  360.  
  361.         if runner['prev_trainers']:
  362.             prev_trainers = []
  363.  
  364.             for trainer in runner['prev_trainers']:
  365.                 prev_trainer = {}
  366.                 prev_trainer['trainer'] = trainer['trainerStyleName']
  367.                 prev_trainer['trainer_id'] = trainer['trainerUid']
  368.                 prev_trainer['change_date'] = trainer['trainerChangeDate'].split('T')[0]
  369.                 prev_trainers.append(prev_trainer)
  370.  
  371.             runner['prev_trainers'] = prev_trainers
  372.  
  373.         runner['prev_owners'] = js['profile']['previousOwners']
  374.  
  375.         if runner['prev_owners']:
  376.             prev_owners = []
  377.  
  378.             for owner in runner['prev_owners']:
  379.                 prev_owner = {}
  380.                 prev_owner['owner'] = owner['ownerStyleName']
  381.                 prev_owner['owner_id'] = owner['ownerUid']
  382.                 prev_owner['change_date'] = owner['ownerChangeDate'].split('T')[0]
  383.                 prev_owners.append(prev_owner)
  384.  
  385.             runner['prev_owners'] = prev_owners
  386.  
  387.         if js['profile']['comments']:
  388.             runner['comment'] = js['profile']['comments'][0]['individualComment']
  389.             runner['spotlight'] = js['profile']['comments'][0]['individualSpotlight']
  390.         else:
  391.             runner['comment'] = None
  392.             runner['spotlight'] = None
  393.  
  394.         if js['profile']['medical']:
  395.             medicals = []
  396.  
  397.             for med in js['profile']['medical']:
  398.                 medical = {}
  399.                 medical['date'] = med['medicalDate'].split('T')[0]
  400.                 medical['type'] = med['medicalType']
  401.                 medicals.append(medical)
  402.  
  403.             runner['medical'] = medicals
  404.  
  405.         runner['quotes'] = None
  406.  
  407.         if js['quotes']:
  408.             quotes = []
  409.  
  410.             for q in js['quotes']:
  411.                 quote = {}
  412.                 quote['date'] = q['raceDate'].split('T')[0]
  413.                 quote['horse'] = q['horseStyleName']
  414.                 quote['horse_id'] = q['horseUid']
  415.                 quote['race'] = q['raceTitle']
  416.                 quote['race_id'] = q['raceId']
  417.                 quote['course'] = q['courseStyleName']
  418.                 quote['course_id'] = q['courseUid']
  419.                 quote['distance_f'] = q['distanceFurlong']
  420.                 quote['distance_y'] = q['distanceYard']
  421.                 quote['quote'] = q['notes']
  422.                 quotes.append(quote)
  423.  
  424.             runner['quotes'] = quotes
  425.  
  426.         runner['stable_tour'] = None
  427.  
  428.         if js['stableTourQuotes']:
  429.             quotes = []
  430.  
  431.             for q in js['stableTourQuotes']:
  432.                 quote = {}
  433.                 quote['horse'] = q['horseName']
  434.                 quote['horse_id'] = q['horseUid']
  435.                 quote['quote'] = q['notes']
  436.                 quotes.append(quote)
  437.  
  438.             runner['stable_tour'] = quotes
  439.  
  440.         runners[runner['horse_id']] = runner
  441.  
  442.     return runners
  443.  
  444. def save_runners_to_csv(races, filename):
  445.     with open(filename, 'w', newline='') as csvfile:
  446.         fieldnames = ['horse_id', 'name', 'dob', 'age', 'sex', 'sex_code', 'colour', 'region',
  447.                         'breeder', 'dam', 'dam_region', 'sire', 'sire_region', 'grandsire',
  448.                         'damsire', 'damsire_region', 'trainer', 'trainer_id', 'trainer_location',
  449.                         'trainer_14_days', 'owner', 'prev_trainers', 'prev_owners', 'comment',
  450.                         'spotlight', 'medical', 'quotes', 'stable_tour', 'course', 'course_id',
  451.                         'race_id', 'date', 'off_time', 'race_name', 'distance_round', 'distance',
  452.                         'distance_f', 'region', 'pattern', 'race_class', 'type', 'age_band',
  453.                         'rating_band', 'prize', 'field_size', 'going_detailed', 'rail_movements',
  454.                         'stalls', 'weather', 'going', 'surface', 'number', 'draw', 'headgear',
  455.                         'headgear_first', 'lbs', 'ofr', 'rpr', 'ts', 'jockey', 'jockey_id',
  456.                         'last_run', 'form', 'trainer_rtf']
  457.  
  458.    
  459.         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
  460.         writer.writeheader()
  461.  
  462.     for region in races.values():
  463.         for course in region.values():
  464.             for race in course.values():
  465.                 writer.writerow(race)
  466.  
  467. def convert_json_to_csv(json_file, csv_file):
  468.     # Function to convert the saved JSON file to a CSV file
  469.     with open(json_file, encoding='utf-8') as json_file:
  470.         data = json.load(json_file)
  471.  
  472.     # Process racecard
  473.     header = {'custom': ['date', 'off_time', 'course', 'race_name', 'distance_f', 'race_class', 'type', 'field_size', 'going', 'prize', 'age_band', 'name', 'sex', 'region',
  474.                          'dam', 'sire', 'trainer', 'owner', 'comment', 'spotlight', 'draw', 'headgear', 'lbs', 'ofr', 'rpr', 'ts', 'jockey', 'form', 'distance', 'colour']}
  475.  
  476.     fixture_list = []
  477.     runner_list = []
  478.     country_list = (country for country in data if country in ['GB', 'IRE'])
  479.  
  480.     for country in country_list:
  481.         for fixture in data[country]:
  482.             events = [event for event in data[country][fixture]]
  483.             for event in events:
  484.                 races = [race for race in data[country][fixture][event]]
  485.                 this_fixture = []
  486.                 for detail in races:
  487.                     if detail == 'runners':
  488.                         runners = [runner for runner in data[country][fixture][event][detail]]
  489.                         for runner in runners:
  490.                             this_runner = []
  491.                             this_runner.append(data[country][fixture][event]['date'])
  492.                             this_runner.append(data[country][fixture][event]['off_time'])
  493.                             this_runner.append(data[country][fixture][event]['course'])
  494.                             this_runner.append(data[country][fixture][event]['race_name'])
  495.                             this_runner.append(data[country][fixture][event]['distance_f'])
  496.                             this_runner.append(data[country][fixture][event]['race_class'])
  497.                             this_runner.append(data[country][fixture][event]['type'])
  498.                             this_runner.append(data[country][fixture][event]['field_size'])
  499.                             this_runner.append(data[country][fixture][event]['going'])
  500.                             this_runner.append(data[country][fixture][event]['prize'])
  501.                             this_runner.append(data[country][fixture][event]['age_band'])
  502.  
  503.                             for k, v in runner.items():
  504.                                 if k in ['name','sex','region','dam','sire','comment','spotlight','trainer','owner','draw','headgear','lbs','ofr','rpr','ts','jockey',
  505.                                          'form','tips','odds']:
  506.                                     this_runner.append(v)
  507.  
  508.                             this_runner.append(runner.get('distance', ''))
  509.                             this_runner.append(runner.get('colour', ''))
  510.                                    
  511.                             runner_list.append(this_runner)
  512.                             this_fixture = []
  513.                             if detail in ['date','off_time','course']:
  514.                                 v = data[country][fixture][event][detail]
  515.                                 this_fixture.append(v)
  516.                                 fixture_list.append(this_fixture)
  517.  
  518.     # Filter racecard
  519.     df = pd.DataFrame(runner_list, columns=header['custom'])
  520.     df = df.astype('str')
  521.     df = df.drop_duplicates(subset=None, keep="first", inplace=False)
  522.  
  523.     field_filter = df
  524.  
  525.     # Export race data
  526.     print('No. of courses: %i' % field_filter['course'].nunique())
  527.     print('No. of races: %i' % field_filter['race_name'].nunique())
  528.     print('No. of horses: %i' % field_filter['name'].nunique())
  529.     field_filter.to_csv(csv_file, index=False)
  530.  
  531. def main():
  532.     if len(sys.argv) != 2 or sys.argv[1].lower() not in {'today', 'tomorrow'}:
  533.         return print('Usage: ./racecards.py [today|tomorrow]')
  534.  
  535.     racecard_url = 'https://www.racingpost.com/racecards'
  536.  
  537.     date = datetime.today().strftime('%Y-%m-%d')
  538.  
  539.     if sys.argv[1].lower() == 'tomorrow':
  540.         racecard_url += '/tomorrow'
  541.         date = (datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')
  542.  
  543.     session = requests.Session()
  544.  
  545.     race_urls = get_race_urls(session, racecard_url)
  546.     races = parse_races(session, race_urls, date)
  547.  
  548.     if not os.path.exists('../racecards'):
  549.         os.makedirs(f'../racecards')
  550.  
  551.      # Save JSON data
  552.     json_file = f'../racecards/{date}.json'
  553.     with open(json_file, 'w') as f:
  554.         json.dump(races, f)
  555.  
  556.     # Convert JSON to CSV
  557.     csv_file = f'../racecards/{date}.csv'
  558.     convert_json_to_csv(json_file, csv_file)
  559.  
  560.  
  561. if __name__ == '__main__':
  562.     main()
  563.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement