Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import re
- # Download the HTML page
- def getPage(url):
- response = requests.get(url)
- return response.text
- def main():
- url = "https://travel.state.gov/content/travel/en/us-visas/visa-information-resources/list-of-posts.html"
- topPage = getPage(url)
- links = re.findall("href=\"(.+\.html)", topPage)
- for link in links:
- fullURL = 'https://travel.state.gov' + link
- page = getPage(fullURL)
- emails = re.findall("mailto: (\S+@\S+\.(gov|edu|com))", page)
- for email in emails:
- print(email[0])
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement