Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import pandas as pd
- import re
- url="https://infopasazer.intercity.pl/?p=station&id=33605"
- html_content = requests.get(url)
- html_content.encoding = "utf-8"
- soup = BeautifulSoup(html_content.text, "lxml")
- gdp = soup.find_all("table", attrs={"class": "table table-delay mbn"})
- print("Number of tables on site: ",len(gdp))
- table1 = gdp[0]
- body = table1.find_all("tr")
- head = body[0]
- body_rows = body[1:]
- headings = ['Numer', 'Przewoznik', 'Data', 'Relacja', 'Przyjazd_plan', 'Opoznienie']
- all_rows = []
- for row_num in range(len(body_rows)):
- row = []
- for row_item in body_rows[row_num].find_all("td"):
- aa = re.sub("(\xa0)|(\n)|,","",row_item.text)
- row.append(aa)
- all_rows.append(row)
- df = pd.DataFrame(data=all_rows,columns=headings)
- print(df)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement