iStrzalka

downloader.py

Sep 26th, 2018
234
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.03 KB | None | 0 0
  1. import requests
  2. import os
  3. from re import finditer
  4.  
  5. headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) But it's actually python cuz i'm bored"}
  6.  
  7. def html_get(url):
  8.     r = requests.get(url, headers=headers)
  9.     return r.text
  10.  
  11. search_name = input("Name of manga : ")
  12. search_html = html_get("http://mangakakalot.com/search/" + search_name.replace(' ', '_'))
  13. found_iter = finditer('item-name', search_html)
  14.  
  15. links = []
  16. for iteration in found_iter:
  17.     index = iteration.start() + 21
  18.     url = ""
  19.     while search_html[index] != '"':
  20.         url += search_html[index]
  21.         index += 1
  22.     links.append(url)
  23.  
  24. if len(links) == 0:
  25.     print("None of that name found")
  26.     input("Press Enter to exit")
  27.     exit()
  28.  
  29. for i, url in enumerate(links):
  30.     print("{} : {}".format(i + 1, url.replace("_", " ")[30:]))
  31. user_input = int(input("Which one is the correct one? [1 : {}] : ".format(len(links))))
  32. #print(user_input)
  33. #print(links[user_input - 1])
  34. url_link = links[user_input - 1]
  35.  
  36. manga_name = links[user_input - 1].replace("_", " ")[30:].title()
  37.  
  38. if not os.path.exists(manga_name):
  39.     os.makedirs(manga_name)
  40. os.chdir(manga_name)
  41.  
  42. #url_link = input("Link to the manga [mangakakalot.com / manganelo.com source] : ")
  43. chapter_link = url_link[::-1].replace('/agnam/', '/retpahc/', 1)[::-1]
  44. html = html_get(url_link)
  45. index = html.find('div class="row">')
  46. html = html[index:]
  47. #print(chapter_link)
  48.  
  49. found_iter = finditer(chapter_link, html)
  50.  
  51. chapter_links = []
  52. for iteration in found_iter:
  53.     index = iteration.start() + len(chapter_link)
  54.     url = chapter_link
  55.     while html[index] != '"':
  56.         url += html[index]
  57.         index += 1
  58.     #print(url)
  59.     chapter_links.append(url)
  60. chapter_links = list(reversed(chapter_links))
  61.  
  62. print("Preparations complete")
  63. print("Chapters like 2.5 count as one just so you know")
  64. print("But the folders' chapter number will be fitting to the chapter")
  65. f_ch = int(input("From Chapter [{} is Last] : ".format(len(chapter_links))))
  66. t_ch = int(input("Till Chapter : "))
  67.  
  68. for i in range(f_ch, t_ch + 1):
  69.     ch_link = chapter_links[i - 1]
  70.     html = html_get(ch_link)
  71.    
  72.     index = html.find('<div class="vung-doc" id="vungdoc">')
  73.     index = html.find('1.jpg', index) - 1
  74.     sub_url = ""
  75.     while html[index] != '"':
  76.         sub_url += html[index]
  77.         index -= 1
  78.     sub_url = sub_url[::-1]
  79.  
  80.     Chapter_name = chapter_links[i-1][len(chapter_link) + 1:].replace("_", " ").title()
  81.  
  82.     if not os.path.exists("{}".format(Chapter_name)):
  83.         os.makedirs("{}".format(Chapter_name))
  84.     os.chdir("{}".format(Chapter_name))
  85.    
  86.     nr = 1
  87.     s = 2
  88.     while True:
  89.         r = requests.get("{}{}.jpg".format(sub_url, nr), headers=headers)
  90.         if r.status_code == 404:
  91.             break
  92.         with open("{}{}.jpg".format("0" * s, nr), "wb") as img_obj:
  93.            img_obj.write(r.content)
  94.         print("Done with {} Page {}".format(Chapter_name, nr))
  95.         nr += 1
  96.         if nr == 10 or nr == 100:
  97.             s -= 1
  98.     os.chdir("..")
  99.  
  100. input("Done\a")
Add Comment
Please, Sign In to add comment