Advertisement
jarekmor

bexley_collection_scrape

Jan 10th, 2023 (edited)
830
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.25 KB | None | 0 0
  1. from requests_html import HTMLSession
  2. import json
  3. from datetime import datetime
  4. import calendar
  5.  
  6. now = datetime.now()
  7. current_day = int(now.strftime("%d"))
  8. current_month_nr = int(now.strftime("%m"))
  9. current_month_name = calendar.month_name[current_month_nr]
  10. current_year = int(now.strftime("%Y"))
  11.  
  12. session = HTMLSession()
  13.  
  14. url = "https://www.bexley.gov.uk/services/rubbish-and-recycling/bin-collection-calendar/rotation-week-1"
  15.  
  16. r = session.get(url)
  17.  
  18. # Get all tables with calendar
  19. months = r.html.find("table.calendar")
  20.  
  21. # Iterate over every table
  22. calendar = {}
  23. for month in months:
  24.     period = month.find('caption')[0].text
  25.     month_dict = {}
  26.  
  27.     for i in month.find('span'):    
  28.         month_dict.update( {int(i.attrs['title'].split(':')[0].split(" ")[1]): i.attrs['title'].split(':')[1].strip()} )
  29.        
  30.     calendar.update( {period:month_dict} )
  31.  
  32. # Serializing json
  33. calendar_json = json.dumps(calendar, indent=4)
  34.  
  35. # Writing to bin_collection.json
  36. with open("bin_collection.json", "w") as outfile:
  37.     outfile.write(calendar_json)
  38.  
  39. # Example: Print output for January 2022, 15
  40. print(f"\n {current_month_name} {current_year}: \n", f"Day {current_day}: ",calendar[f"{current_month_name} {current_year}"][current_day].title(), "\n")
Advertisement
Comments
Add Comment
Please, Sign In to add comment
Advertisement