Advertisement
Jhynjhiruu

Crawler.py

Feb 12th, 2018
752
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.60 KB | None | 0 0
  1. import urllib.request
  2. import os
  3. x = int(input("Start number:\n"))
  4. end = int(input("End number:\n"))
  5. f1 = open("do.txt", "a")
  6. f2 = open("dont.txt", "a")
  7. if os.stat('dont.txt').st_size == 0:
  8.     f1.write("\n")
  9.     f2.write("\n")
  10. f1.close()
  11. f2.close()
  12. while x < end:
  13.     try:
  14.         urllib.request.urlretrieve ("http://cds.idc.ique.com:16963/cds/download?content_id=" + str(x), "download" + str(x))
  15.         print("http://cds.idc.ique.com:16963/cds/download?content_id=" + str(x) + " downloaded")
  16.         with open("do.txt", "r+") as file:
  17.             for line in file:
  18.                 if "\n" + str(x) + "\n" in line:
  19.                     print("http://cds.idc.ique.com:16963/cds/download?content_id=" + str(x) + " already known to exist")
  20.                     break
  21.                 else:
  22.                     file.write(str(x) + "\n")
  23.                     break
  24.     except urllib.error.HTTPError:
  25.         print("http://cds.idc.ique.com:16963/cds/download?content_id=" + str(x) + " not found")
  26.         with open("dont.txt", "r+") as file:
  27.             for line in file:
  28.                 if "\n" + str(x) + "\n" in line:
  29.                     print("http://cds.idc.ique.com:16963/cds/download?content_id=" + str(x) + " already known to not exist")
  30.                     break
  31.                 else:
  32.                     file.write(str(x) + "\n")
  33.                     break
  34.     except ConnectionResetError:
  35.         print("Connection reset error, retrying")
  36.         x -= 1
  37.     except urllib.error.URLError:
  38.         print("URLError, retrying")
  39.         x -= 1
  40.     x += 1
  41. exit = input("Press any button to exit...")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement