Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import selenium.webdriver
- from bs4 import BeautifulSoup
- import time
- news_category = [
- "http://e.vnexpress.net/news/news"
- ]
- # "http://e.vnexpress.net/news/business",
- # "http://e.vnexpress.net/news/travel-life",
- # "http://e.vnexpress.net/news/world"
- #]
- driver = selenium.webdriver.Chrome()
- for url in news_category:
- driver.get(url)
- for x in range(5):
- driver.find_element_by_id('vnexpress_folder_load_more').click()
- #time.sleep(0.2)
- time.sleep(2)
- soup = BeautifulSoup(driver.page_source, "html.parser")
- links = soup.find_all('a')
- url_list = []
- for link in links:
- all_link = link.get('href')
- if all_link.startswith(url):
- url_list.append(all_link)
- print(url_list)
- print('count:', len(url_list))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement