Advertisement
1nikitas

Untitled

Jun 11th, 2022
281
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.61 KB | None | 0 0
  1. import asyncio
  2. from CONFIG import token_id
  3. import os
  4. import openpyxl
  5. from aiogram import Bot, Dispatcher, types, executor
  6. from aiogram.dispatcher.filters import Command
  7. from selenium import webdriver
  8. from selenium.webdriver.common.by import By
  9. import shutil
  10. import schedule
  11. import time
  12. import datetime
  13.  
  14.  
  15.  
  16. bot = Bot(token=token_id)
  17. dp = Dispatcher(bot)
  18. a = [421048811, 582897416]
  19.  
  20.  
  21. """
  22. 1. Считывание файлы
  23. 2. Берем ссылки из файла
  24. 3. По ссылкам из файла начинаем парсить раз в день, записывая цены
  25. 3.1 две колонки:
  26.  
  27.  
  28. """
  29.  
  30.  
  31.  
  32.  
  33. @dp.message_handler(Command('admin1703'))
  34. async def admin(message: types.Message):
  35. a.append(message.from_user.id)
  36. print(a)
  37. await message.answer('ID добавлен: Вам доступны функции (загрузка файла и /getFile)')
  38. admins_ids = a
  39.  
  40.  
  41. @dp.message_handler(Command('my_id'))
  42. async def id(message: types.Message):
  43. await message.answer(message.from_user.id)
  44.  
  45. @dp.message_handler(Command('start'))
  46. async def start(message: types.Message):
  47. await message.answer("All Commands:\n/my_id\n/getFile")
  48.  
  49.  
  50. @dp.message_handler(content_types=['document'])
  51. async def doc(message: types.Message):
  52. if message.from_user.id in admins_ids:
  53. # os.mkdir('doc')
  54. path = os.path.abspath(os.path.dirname('doc'))
  55.  
  56. if os.path.exists(os.path.join(path, f'{message.from_user.id}')):
  57. print(f'{path}\{message.from_user.id}')
  58. # await message.document.download(f'{path}\{message.from_user.id}')
  59. # shutil.rmtree(os.path.join(path, f'{message.from_user.id}'))
  60. path = os.path.join(path, f'{message.from_user.id}')
  61. await message.document.download(os.path.join(path, f'{message.from_user.id}.xlsx'))
  62.  
  63. else:
  64.  
  65. os.mkdir(f'{message.from_user.id}')
  66.  
  67. await message.document.download(os.path.join(path, f'{message.from_user.id}.xlsx'))
  68. else:
  69. await message.answer("В доступе отказано!")
  70.  
  71. def check_url(string):
  72. if len(string) > 1 and "https" in string:
  73. return True
  74. else:
  75. return False
  76.  
  77.  
  78. async def parser(path):
  79.  
  80. options = webdriver.ChromeOptions()
  81. options.add_argument('--headless')
  82. browser = webdriver.Chrome('chromedriver', options=options)
  83. excel = openpyxl.load_workbook(path)
  84. sheet = excel['Sheet1']
  85.  
  86. urls = []
  87. for col in sheet['A']:
  88. if len(str(col.value)) > 5 and "https" in str(col.value):
  89. urls.append(col)
  90. days = sheet["A"][0].value
  91. time = sheet["C"][0].value
  92. n = 1
  93. for url in urls:
  94.  
  95. try:
  96. print(url.value)
  97. browser.get(url.value)
  98. except Exception as err:
  99. pass
  100. # print(f"Get error {err=}, {type(err)=}\n")
  101. try:
  102. product = browser.find_element(By.TAG_NAME, 'h1')
  103. except Exception as err:
  104. product = ""
  105. # print(f"Find element TAG {err=}, {type(err)=}\n")
  106. try:
  107. price = browser.find_element(By.CLASS_NAME, 'new-price')
  108. except Exception as err:
  109. price = ""
  110. # print(f"Find element CLASS_NAME NEW {err=}, {type(err)=}\n")
  111. try:
  112. product = product.text
  113. except Exception as err:
  114. product = 'Not Found'
  115. # print(f"product.text {err=}, {type(err)=}\n")
  116. try:
  117. price1 = price.text.split(" ")[0]
  118. except Exception as err:
  119. price1 = '0'
  120. print(product, price1)
  121.  
  122. sheet.cell(row=url.row, column=n+2).value = price1
  123. excel.save(f'Result{n}.xlsx')
  124. n += 1
  125.  
  126.  
  127.  
  128. @dp.message_handler(Command('getFile'))
  129. async def get_file(message: types.Message):
  130. if message.from_user.id in admins_ids:
  131. msg = await message.answer("Началась обработка запроса!")
  132. #path = r'C:\Users\North\PycharmProjects\Torg_Otdel_Ulibka_06_05_2022\documents'
  133. #path = r'/home/daniil/Torg_Otdel_Ulibka_06_05_2022/documents'
  134. path = os.path.abspath(os.path.dirname('doc'))
  135. path = os.path.join(path, os.path.join(f'{message.from_user.id}',
  136. f'{message.from_user.id}.xlsx'))
  137. excel = openpyxl.load_workbook(path)
  138. sheet = excel['Sheet1']
  139. days = sheet["A"][0].value
  140. time = sheet["C"][0].value
  141. count = 0
  142. await parser(path)
  143. # while True:
  144. # if count == days:
  145. # break
  146. # elif datetime.datetime.now() == time:
  147. # await parser(path)
  148. # count += 1
  149.  
  150.  
  151.  
  152.  
  153. # exel_file.close()
  154. else:
  155. await message.answer("В доступе отказано!")
  156. #
  157. # def parser1(url)
  158.  
  159. # async def sheduled(path, days=0):
  160. # if "/documents" in path:
  161. # pass
  162. # else:
  163. # path = f'{path}/documents'
  164. # files = os.listdir(path)
  165. # exel_file = openpyxl.load_workbook(f'{path}/{files[0]}')
  166. # sheet = exel_file.active
  167. # days = sheet.cell(row=1, column=1).value
  168. # parser = sheet.cell(row=1, column=2).value
  169. # time = sheet.cell(row=1, column=3).value
  170. # print(time, days, parser)
  171. # for i in range(int(days)):
  172. # await schedule.every().day().at(time).do(sheduled(path))
  173.  
  174.  
  175.  
  176. if __name__ == "__main__":
  177. # need user_id + file to
  178. # loop = asyncio.get_event_loop()
  179. # # loop.create_task(sheduled("582897416"))
  180. executor.start_polling(dp, skip_updates=True)
  181. schedule.run_pending()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement