Advertisement
YaBoiSwayZ

Bet365 Web-scraper

May 26th, 2024
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.53 KB | Source Code | 0 0
  1. import aiohttp
  2. import asyncio
  3. import aiofiles
  4. import requests_cache
  5. import pandas as pd
  6. import logging
  7. from bs4 import BeautifulSoup
  8. from selenium import webdriver
  9. from selenium.webdriver.chrome.service import Service
  10. from selenium.webdriver.chrome.options import Options
  11. from webdriver_manager.chrome import ChromeDriverManager
  12.  
  13. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  14.  
  15. requests_cache.install_cache('bet365_cache', expire_after=3600)
  16.  
  17. semaphore = asyncio.Semaphore(5)
  18.  
  19. class WebDriver:
  20.     def __init__(self):
  21.         options = Options()
  22.         options.headless = True
  23.         self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
  24.  
  25.     def __enter__(self):
  26.         return self.driver
  27.  
  28.     def __exit__(self, exc_type, exc_val, exc_tb):
  29.         self.driver.quit()
  30.  
  31. async def fetch_odds_data(session, url):
  32.     async with semaphore:
  33.         try:
  34.             async with session.get(url) as response:
  35.                 response.raise_for_status()
  36.                 return await response.text()
  37.         except aiohttp.ClientError as e:
  38.             logging.error(f"Request failed for {url}: {e}")
  39.             return None
  40.  
  41. async def fetch_odds_data_selenium(url):
  42.     def get_html_content():
  43.         with WebDriver() as driver:
  44.             driver.get(url)
  45.             return driver.page_source
  46.  
  47.     loop = asyncio.get_running_loop()
  48.  
  49.     html_content = await loop.run_in_executor(None, get_html_content)
  50.     return html_content
  51.  
  52. def parse_odds_data(html_content):
  53.     pass
  54.  
  55. async def save_to_excel(data, filename):
  56.     if data:
  57.         df = pd.DataFrame(data, columns=['Match', 'Odds1', 'OddsX', 'Odds2'])
  58.         csv_content = df.to_csv(index=False)
  59.         async with aiofiles.open(filename, 'w') as file:
  60.             await file.write(csv_content)
  61.         logging.info(f"Data saved to {filename}")
  62.     else:
  63.         logging.warning("No data to save.")
  64.  
  65. async def main(urls):
  66.     async with aiohttp.ClientSession() as session:
  67.         tasks = [fetch_odds_data(session, url) for url in urls]
  68.         html_contents = await asyncio.gather(*tasks)
  69.        
  70.         for url, html_content in zip(urls, html_contents):
  71.             if html_content:
  72.                 odds_data = parse_odds_data(html_content)
  73.                 await save_to_excel(odds_data, f'odds_data_{urls.index(url)}.csv')
  74.  
  75. if __name__ == '__main__':
  76.     urls = [
  77.         'https://www.bet365.com/#/AC/B1/C1/D13/E2/F163/',
  78.     ]
  79.    
  80.     asyncio.run(main(urls))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement