Advertisement
GeorgiLukanov87

task-2

Jan 22nd, 2024
773
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.98 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4. import hashlib
  5.  
  6.  
  7. # Function to download the webpage and extract data
  8. def extract_data():
  9.     url = 'https://bnb.bg/Statistics/StInterbankForexMarket/index.htm'
  10.     response = requests.get(url)
  11.     soup = BeautifulSoup(response.text, 'html.parser')
  12.  
  13.     # Convert the soup to a string
  14.     html_string = str(soup)
  15.  
  16.     # Search for the specified string
  17.     target_string = 'Спот търговия на банките с чуждестранна валута  срещу левове*'
  18.     start_index = html_string.find(target_string)
  19.  
  20.     if start_index == -1:
  21.         print("String not found. Please inspect the HTML structure and update the script accordingly.")
  22.         return []
  23.  
  24.     # Extract data_rows from the content after the found string
  25.     data_start_index = start_index + len(target_string)
  26.     data_rows = []
  27.  
  28.     # Check if there is a table after the target string
  29.     if '<table' in html_string[data_start_index:]:
  30.         # Extract data from the table
  31.         table_start_index = html_string[data_start_index:].find('<table')
  32.         table_end_index = html_string[data_start_index + table_start_index:].find('</table>') + len('</table>')
  33.         table_html = html_string[data_start_index + table_start_index:data_start_index + table_end_index]
  34.         table_soup = BeautifulSoup(table_html, 'html.parser')
  35.  
  36.         for row in table_soup.find_all('tr')[1:]:
  37.             columns = row.find_all('td')
  38.             data_row = [col.text.strip() for col in columns]
  39.             data_rows.append(data_row)
  40.  
  41.     return data_rows
  42.  
  43.  
  44. # Function to save data to CSV file
  45. def save_to_csv(data_rows):
  46.     # Sort the data before saving to CSV
  47.     print(data_rows)
  48.     print(data_rows[1:-2])
  49.     sorted_data = sorted(data_rows[1:-2], key=lambda x: float(x[7].replace(' ', '')), reverse=True)
  50.  
  51.     with open('forex_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
  52.         csv_writer = csv.writer(csvfile)
  53.         csv_writer.writerows(sorted_data)
  54.  
  55.  
  56. # Function to compare current data with the existing CSV file
  57. def is_data_changed(data_rows):
  58.     current_hash = hashlib.md5(str(data_rows).encode()).hexdigest()
  59.  
  60.     try:
  61.         with open('forex_data_hash.txt', 'r') as hash_file:
  62.             previous_hash = hash_file.read()
  63.             return current_hash != previous_hash
  64.     except FileNotFoundError:
  65.         return True
  66.  
  67.  
  68. # Main script
  69. if __name__ == "__main__":
  70.     # Extract data from the webpage
  71.     extracted_data = extract_data()
  72.  
  73.     # Check if the data has changed
  74.     if is_data_changed(extracted_data):
  75.         # Save the sorted data to a CSV file
  76.         save_to_csv(extracted_data)
  77.  
  78.         # Update the hash file with the new hash
  79.         with open('forex_data_hash.txt', 'w') as hash_file:
  80.             hash_file.write(hashlib.md5(str(extracted_data).encode()).hexdigest())
  81.  
  82.         print("CSV file updated.")
  83.     else:
  84.         print("No changes in data. CSV file not updated.")
  85.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement