View difference between Paste ID: Rf38vBQM and RXynXwJS
SHOW: | | - or go back to the newest paste.
1
import requests
2
3
4
def get_page(category: str, page_id: int) -> str:
5
    if page_id:
6
        url = 'https://www.ozon.ru/brand/{0}/?page={1}'.format(category, page_id)
7
    else:
8
        url = 'https://www.ozon.ru/brand/{0}/'.format(category)
9
    print('get url: {0}'.format(url))
10
    response = requests.get(url)
11
    return response.text
12
13
14
def load_data():
15
    category_list = ['adidas-144082850', 'puma-87235756']
16
    for category in category_list:
17
        for page_id in range(50):
18
            text = get_page(category, page_id)
19
            # обрабатываем полученный текст, сохраняем в файл/базу
20
21
22
if __name__ == '__main__':
23
    load_data()
24