Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2, re
- url = "http://{}/".format(raw_input("http://"))
- REx = {
- "comments": "<!--(.*)-->",
- "paragraphs": "<p>(.*)<\/p>",
- "inline css paragraphs": "<p.+>(.*)<\/p>",
- }
- matches = {}
- for key in REx: matches[key] = []
- sitedata = urllib2.urlopen(url).read()
- for pattern in REx:
- result = re.findall(REx[pattern], sitedata)
- for i in result:
- i = i.strip() if pattern == "comments" else i
- matches[pattern].append(i)
- print matches #:^)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement