Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import aiohttp
- import asyncio
- import aiofiles
- import requests_cache
- import pandas as pd
- import logging
- from bs4 import BeautifulSoup
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.chrome.options import Options
- from webdriver_manager.chrome import ChromeDriverManager
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
- requests_cache.install_cache('bet365_cache', expire_after=3600)
- semaphore = asyncio.Semaphore(5)
- class WebDriver:
- def __init__(self):
- options = Options()
- options.headless = True
- self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
- def __enter__(self):
- return self.driver
- def __exit__(self, exc_type, exc_val, exc_tb):
- self.driver.quit()
- async def fetch_odds_data(session, url):
- async with semaphore:
- try:
- async with session.get(url) as response:
- response.raise_for_status()
- return await response.text()
- except aiohttp.ClientError as e:
- logging.error(f"Request failed for {url}: {e}")
- return None
- async def fetch_odds_data_selenium(url):
- def get_html_content():
- with WebDriver() as driver:
- driver.get(url)
- return driver.page_source
- loop = asyncio.get_running_loop()
- html_content = await loop.run_in_executor(None, get_html_content)
- return html_content
- def parse_odds_data(html_content):
- pass
- async def save_to_excel(data, filename):
- if data:
- df = pd.DataFrame(data, columns=['Match', 'Odds1', 'OddsX', 'Odds2'])
- csv_content = df.to_csv(index=False)
- async with aiofiles.open(filename, 'w') as file:
- await file.write(csv_content)
- logging.info(f"Data saved to {filename}")
- else:
- logging.warning("No data to save.")
- async def main(urls):
- async with aiohttp.ClientSession() as session:
- tasks = [fetch_odds_data(session, url) for url in urls]
- html_contents = await asyncio.gather(*tasks)
- for url, html_content in zip(urls, html_contents):
- if html_content:
- odds_data = parse_odds_data(html_content)
- await save_to_excel(odds_data, f'odds_data_{urls.index(url)}.csv')
- if __name__ == '__main__':
- urls = [
- 'https://www.bet365.com/#/AC/B1/C1/D13/E2/F163/',
- ]
- asyncio.run(main(urls))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement