Advertisement
jarekmor

PKP_info

Nov 18th, 2021
186
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.83 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import pandas as pd
  4. import re
  5.  
  6. url="https://infopasazer.intercity.pl/?p=station&id=33605"
  7.  
  8. html_content = requests.get(url)
  9. html_content.encoding = "utf-8"
  10.  
  11. soup = BeautifulSoup(html_content.text, "lxml")
  12.  
  13. gdp = soup.find_all("table", attrs={"class": "table table-delay mbn"})
  14. print("Number of tables on site: ",len(gdp))
  15.  
  16. table1 = gdp[0]
  17. body = table1.find_all("tr")
  18. head = body[0]
  19. body_rows = body[1:]
  20. headings = ['Numer', 'Przewoznik', 'Data', 'Relacja', 'Przyjazd_plan', 'Opoznienie']
  21.  
  22. all_rows = []
  23. for row_num in range(len(body_rows)):
  24.     row = []
  25.     for row_item in body_rows[row_num].find_all("td"):
  26.         aa = re.sub("(\xa0)|(\n)|,","",row_item.text)
  27.         row.append(aa)
  28.     all_rows.append(row)
  29.  
  30. df = pd.DataFrame(data=all_rows,columns=headings)
  31. print(df)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement