Advertisement
Korotkodul

flow lock

Dec 1st, 2022
851
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.90 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3.  
  4. from threading import Thread, Lock
  5. from time import sleep
  6.  
  7. #link = "https://edition.cnn.com/"
  8. goto =    "https://www.povarenok.ru/recipes/kitchen/77/"
  9. #link = "https://www.povarenok.ru/recipes/show/70127/"
  10.  
  11. pages = [''] * 13
  12. pages[0] = goto
  13.  
  14. s = 'https://www.povarenok.ru/recipes/kitchen/77/~2/'
  15. pages[1] = s
  16.  
  17.  
  18.  
  19. for i in range(2, 13):
  20.     s = pages[i - 1]
  21.     s = goto + '~' + str(i + 1) + '/'
  22.     pages[i] = s
  23. print(pages)
  24. """в конце изменить goto:
  25.  
  26. """
  27. #for i in range(13):
  28.  
  29. names = []
  30. recep = []
  31. photo = []
  32. #<div class="ingredients-bl">
  33. debug = False
  34.  
  35. def ingrid(goto):
  36.     #print("goto = ", goto)
  37.     html = requests.get(goto)
  38.     html.encoding = 'windows-1251'
  39.     sp = BeautifulSoup(html.text, 'lxml')
  40.     ing = sp.find('div', "ingredients-bl")
  41.     all = ing.find_all('span')
  42.     #print("all")
  43.     #print(all)
  44.     ing_list = []
  45.  
  46.     for i in range(0, len(all), 2):
  47.         if i + 1 >= len(all):
  48.             break
  49.         one_ing = all[i]
  50.         one_ing = str(one_ing)
  51.         since = one_ing.find('n>') + 2
  52.         till = one_ing.find('</')
  53.         ing_str = one_ing[since: till]
  54.         amount = all[i + 1]
  55.         amount = str(amount)
  56.         since = amount.find('n>') + 2
  57.         till = amount.find('</')
  58.         amount_str = amount[since: till]
  59.         res = ing_str + "  " + amount_str
  60.         ing_list.append(res)
  61.         #print(one_ing, amount)
  62.     #print("ing list")
  63.     #print(ing_list)
  64.     recep.append(ing_list)
  65.     #print(ing)
  66.  
  67. def work(goto):
  68.     print("WORK")
  69.     html = requests.get(goto)
  70.     html.encoding = 'windows-1251'
  71.     sp = BeautifulSoup(html.text, 'lxml')
  72.     all = sp.find_all('div', "m-img desktop-img conima")
  73.     #print(all)
  74.     for rec in all:
  75.         if debug:
  76.             print("NEW REC")
  77.             print(rec)
  78.         #print(rec)
  79.         raw = rec.find('img')
  80.         #отсюда название и  ссылка на картинку
  81.         #print("IMAGE")
  82.         #print(img)
  83.         raw = str(raw)
  84.         since = raw.find('Ре')
  85.         till = raw.find('src')
  86.         name = raw[since: till]
  87.         names.append(name)
  88.  
  89.         since = raw.find('htt')
  90.         till = raw.find('/>') - 1
  91.         photo_link = raw[since: till]
  92.         photo.append(photo_link)
  93.         #КАК получить список ингридиентов???
  94.         link = rec.find('a')
  95.         if debug:
  96.             print("link")
  97.             print(link)
  98.             print("end link")
  99.         link = str(link)
  100.         till = link.find(">")
  101.         since = link.find("htt")
  102.         link = link[since: till - 1]
  103.         ingrid(link)
  104.         if debug:
  105.             print("new link")
  106.             print(link)
  107.         #print()
  108.         #print()
  109.         #break
  110.  
  111. stop = False
  112. lock = Lock()
  113.  
  114. cnt = -1
  115. def flow():
  116.     global cnt
  117.  
  118.     while cnt + 1 < len(pages):
  119.         print("flow1")
  120.         lock.acquire()
  121.         cnt += 1
  122.         print("cnt = ", cnt)
  123.         lock.release()
  124.         work(pages[cnt])
  125.  
  126.  
  127. flow1 = Thread(target=flow)
  128. flow1.start()
  129.  
  130. while cnt + 1 < len(pages):
  131.     print("flow0")
  132.     lock.acquire()
  133.     cnt += 1
  134.     print("cnt = ", cnt)
  135.     lock.release()
  136.     work(pages[cnt])
  137.  
  138.  
  139. print("photo", len(photo))
  140. print(photo)
  141. print("recep", len(recep))
  142. print(recep)
  143. print("names", len(names))
  144. print(names)
  145.  
  146. import lxml
  147. from xlwt import *
  148. workbook = Workbook(encoding = 'utf-8')
  149. table = workbook.add_sheet('data')
  150. table.write(0, 0, 'Название')
  151. table.write(0, 1, 'Рецепт')
  152. table.write(0, 2, 'Фото')
  153.  
  154. N = len(recep)
  155. line = 0
  156. for i in range(N):
  157.     line += 1
  158.     table.write(line, 0, names[i])
  159.     table.write(line, 2, photo[i])
  160.     table.write(line, 1, 'ингридиенты')
  161.     for j in range(len(recep[i])):
  162.         line += 1
  163.         table.write(line, 1, recep[i][j])
  164.     line += 1
  165.  
  166. workbook.save('recep4.xls')
  167. print("FILE SAVED")
  168.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement