Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- from pathlib import Path
- from urllib.parse import urljoin
- import bs4
- import requests
- BASE_URL = "https://ifm.com"
- REST_URL = "https://www.ifm.com/restservices/{}/productsAndAttributes"
- def get_categories():
- return {
- e.text: e["href"]
- for e in bs4.BeautifulSoup(
- requests.get(urljoin(BASE_URL, "/de/de/category")).content, "html.parser"
- ).select("ul.sub-categories--secondary li a")
- }
- def get_sub_categories(link):
- return {
- e.text.strip(): e["href"]
- for e in bs4.BeautifulSoup(
- requests.get(urljoin(BASE_URL, link)).content,
- "html.parser",
- ).select("a.tile__link-wrapper")
- }
- product_file = Path.home().joinpath("Desktop", "ifm", "product_pages.json")
- product_file.parent.mkdir(exist_ok=True)
- if not product_file.exists():
- product_pages = []
- for description, link in get_categories().items():
- print(description)
- print("=" * len(description))
- sub_categories = get_sub_categories(link)
- if sub_categories:
- for description, link in sub_categories.items():
- print(description)
- product_pages.append(link)
- print("\n\n")
- else:
- print("\n")
- product_pages.append(link)
- with product_file.open("w") as fd:
- product_pages = json.dump(product_pages, fd)
- else:
- with product_file.open("r") as fd:
- product_pages = json.load(fd)
- for products in product_pages:
- url = REST_URL.format(products)
- for product in requests.get(url).json().get("productResults", []):
- attributes = product.get("attributes", {})
- name = attributes.get("at_produktbezeichnung", "NoName")
- price = attributes.get("formattedPrice", "NoPrice")
- if product_id := attributes.get("productId"):
- print(name, price)
- with product_file.parent.joinpath(f"{product_id}.json").open("w") as fd:
- json.dump(attributes, fd)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement