kirya_shkolnik

Какой-то парсер

Dec 12th, 2022 (edited)
445
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.90 KB | None | 0 0
  1. import base64
  2. import pandas as pd
  3. import requests
  4. from bs4 import BeautifulSoup as bs
  5. import cv2
  6. import numpy as np
  7.  
  8. def readb64(uri): # Функция чтобы читать png:base64 изображения
  9.    encoded_data = uri.split(',')[1]
  10.    nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
  11.    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
  12.    return img
  13.  
  14.  
  15. main_url = "http://autosale2022.task-sss.krasilnikov.spb.ru"
  16. url = main_url + "/ads/6ed459ea169d96c38c7167d4cf471013"
  17.  
  18. response = requests.get(url)
  19. # print(response.text)
  20. task = []  # Массив для строк
  21. soup = bs(response.text,'html.parser')
  22. i = 0
  23. for link in soup.find_all('a'):
  24.     i+=1
  25.     print('Check ' + str(i) + ' car')
  26.     car_url = main_url+link['href']
  27.     response = requests.get(car_url)
  28.     car_soup = bs(response.text,'html.parser')
  29.     id = car_soup.find('td', string = "id").find_next_sibling().get_text()
  30.     name = car_soup.find('td', string = "name").find_next_sibling().get_text()
  31.     acceleration = car_soup.find('td', string = "acceleration").find_next_sibling().get_text()
  32.     year = car_soup.find('td', string = "year").find_next_sibling().get_text()
  33.     origin = car_soup.find('td', string = "origin").find_next_sibling().get_text()
  34.     displacement = car_soup.find('td', string = "displacement").find_next_sibling().get_text() # Не работает
  35.     # print(id,name,acceleration)
  36.  
  37.     # Манипуляции с QR кодом
  38.     img_url = car_soup.find('td', string = "weight").find_next_sibling().find('img')['src']
  39.  
  40.     img = readb64(img_url)
  41.     detector = cv2.QRCodeDetector()
  42.     weight = detector.detectAndDecode(img)[0]
  43.  
  44.  
  45.  
  46.     task.append((id,name,year,acceleration,origin,weight))
  47.     # task1_temp.append(name)
  48.     # task1_temp.append(year)
  49.     # task1_temp.append(acceleration)
  50.     # task1_temp.append(origin)
  51.  
  52. data_task1 = pd.DataFrame(task)
  53. print(data_task1)
  54.  
Add Comment
Please, Sign In to add comment