Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import asyncio
- from CONFIG import token_id
- import os
- import openpyxl
- from aiogram import Bot, Dispatcher, types, executor
- from aiogram.dispatcher.filters import Command
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- import shutil
- import schedule
- import time
- import datetime
- bot = Bot(token=token_id)
- dp = Dispatcher(bot)
- a = [421048811, 582897416]
- """
- 1. Считывание файлы
- 2. Берем ссылки из файла
- 3. По ссылкам из файла начинаем парсить раз в день, записывая цены
- 3.1 две колонки:
- """
- @dp.message_handler(Command('admin1703'))
- async def admin(message: types.Message):
- a.append(message.from_user.id)
- print(a)
- await message.answer('ID добавлен: Вам доступны функции (загрузка файла и /getFile)')
- admins_ids = a
- @dp.message_handler(Command('my_id'))
- async def id(message: types.Message):
- await message.answer(message.from_user.id)
- @dp.message_handler(Command('start'))
- async def start(message: types.Message):
- await message.answer("All Commands:\n/my_id\n/getFile")
- @dp.message_handler(content_types=['document'])
- async def doc(message: types.Message):
- if message.from_user.id in admins_ids:
- # os.mkdir('doc')
- path = os.path.abspath(os.path.dirname('doc'))
- if os.path.exists(os.path.join(path, f'{message.from_user.id}')):
- print(f'{path}\{message.from_user.id}')
- # await message.document.download(f'{path}\{message.from_user.id}')
- # shutil.rmtree(os.path.join(path, f'{message.from_user.id}'))
- path = os.path.join(path, f'{message.from_user.id}')
- await message.document.download(os.path.join(path, f'{message.from_user.id}.xlsx'))
- else:
- os.mkdir(f'{message.from_user.id}')
- await message.document.download(os.path.join(path, f'{message.from_user.id}.xlsx'))
- else:
- await message.answer("В доступе отказано!")
- def check_url(string):
- if len(string) > 1 and "https" in string:
- return True
- else:
- return False
- async def parser(path):
- options = webdriver.ChromeOptions()
- options.add_argument('--headless')
- browser = webdriver.Chrome('chromedriver', options=options)
- excel = openpyxl.load_workbook(path)
- sheet = excel['Sheet1']
- urls = []
- for col in sheet['A']:
- if len(str(col.value)) > 5 and "https" in str(col.value):
- urls.append(col)
- days = sheet["A"][0].value
- time = sheet["C"][0].value
- n = 1
- for url in urls:
- try:
- print(url.value)
- browser.get(url.value)
- except Exception as err:
- pass
- # print(f"Get error {err=}, {type(err)=}\n")
- try:
- product = browser.find_element(By.TAG_NAME, 'h1')
- except Exception as err:
- product = ""
- # print(f"Find element TAG {err=}, {type(err)=}\n")
- try:
- price = browser.find_element(By.CLASS_NAME, 'new-price')
- except Exception as err:
- price = ""
- # print(f"Find element CLASS_NAME NEW {err=}, {type(err)=}\n")
- try:
- product = product.text
- except Exception as err:
- product = 'Not Found'
- # print(f"product.text {err=}, {type(err)=}\n")
- try:
- price1 = price.text.split(" ")[0]
- except Exception as err:
- price1 = '0'
- print(product, price1)
- sheet.cell(row=url.row, column=n+2).value = price1
- excel.save(f'Result{n}.xlsx')
- n += 1
- @dp.message_handler(Command('getFile'))
- async def get_file(message: types.Message):
- if message.from_user.id in admins_ids:
- msg = await message.answer("Началась обработка запроса!")
- #path = r'C:\Users\North\PycharmProjects\Torg_Otdel_Ulibka_06_05_2022\documents'
- #path = r'/home/daniil/Torg_Otdel_Ulibka_06_05_2022/documents'
- path = os.path.abspath(os.path.dirname('doc'))
- path = os.path.join(path, os.path.join(f'{message.from_user.id}',
- f'{message.from_user.id}.xlsx'))
- excel = openpyxl.load_workbook(path)
- sheet = excel['Sheet1']
- days = sheet["A"][0].value
- time = sheet["C"][0].value
- count = 0
- await parser(path)
- # while True:
- # if count == days:
- # break
- # elif datetime.datetime.now() == time:
- # await parser(path)
- # count += 1
- # exel_file.close()
- else:
- await message.answer("В доступе отказано!")
- #
- # def parser1(url)
- # async def sheduled(path, days=0):
- # if "/documents" in path:
- # pass
- # else:
- # path = f'{path}/documents'
- # files = os.listdir(path)
- # exel_file = openpyxl.load_workbook(f'{path}/{files[0]}')
- # sheet = exel_file.active
- # days = sheet.cell(row=1, column=1).value
- # parser = sheet.cell(row=1, column=2).value
- # time = sheet.cell(row=1, column=3).value
- # print(time, days, parser)
- # for i in range(int(days)):
- # await schedule.every().day().at(time).do(sheduled(path))
- if __name__ == "__main__":
- # need user_id + file to
- # loop = asyncio.get_event_loop()
- # # loop.create_task(sheduled("582897416"))
- executor.start_polling(dp, skip_updates=True)
- schedule.run_pending()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement