Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium.webdriver.common.action_chains import ActionChains
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.common.by import By
- from selenium_stealth import stealth
- from selenium import webdriver
- from bs4 import BeautifulSoup
- import time
- import json
- # Path to ChromeDriver
- chrome_driver_path = "chromedriver-win64/chromedriver.exe"
- # My Chrome version: 133.0.6943.54
- # ChromeDriver Download link: https://storage.googleapis.com/chrome-for-testing-public/133.0.6943.53/win64/
- # Browser configuration
- options = webdriver.ChromeOptions()
- options.add_argument("--disable-blink-features=AutomationControlled")
- options.add_argument("--incognito")
- options.add_argument("--no-sandbox")
- options.add_argument("--disable-dev-shm-usage")
- options.add_argument("--disable-geolocation") # Disable location requests
- # Start WebDriver
- service = Service(chrome_driver_path)
- driver = webdriver.Chrome(service=service, options=options)
- # Apply stealth settings
- stealth(driver,
- languages=["en-US", "en"],
- vendor="Google Inc.",
- platform="Win32",
- webgl_vendor="Intel Inc.",
- renderer="Intel Iris OpenGL Engine",
- fix_hairline=True,
- )
- try:
- # Load the page
- url = "https://www.academy.com/p/nike-womens-court-legacy-next-nature-shoes"
- # url = "https://www.academy.com/p/nike-womens-air-max-sc-se-running-shoes-139654084?sku=dark-red-10-b"
- driver.get(url)
- time.sleep(5) # Wait for the page to load
- # **Fake geolocation if required**
- driver.execute_cdp_cmd("Emulation.setGeolocationOverride", {
- "latitude": 37.7749, # Fake geolocation - San Francisco
- "longitude": -122.4194,
- "accuracy": 100
- })
- # Check for CAPTCHA button
- try:
- captcha_button = driver.find_element(By.ID, "px-captcha") # ID may change
- action = ActionChains(driver)
- action.click_and_hold(captcha_button).perform()
- time.sleep(15) # Hold button for CAPTCHA
- action.release().perform()
- print("CAPTCHA successfully bypassed.")
- except Exception as e:
- print(f"CAPTCHA not found or already bypassed. Error: {e}")
- # Extract HTML content
- html_content = driver.page_source
- # print(html_content)
- # Create a BeautifulSoup object
- soup = BeautifulSoup(html_content, 'html.parser')
- # print(soup)
- # Extract product data
- # Product name
- product_name_element = soup.find('title')
- product_name = product_name_element.text.split('|')[0].strip() if product_name_element else "No name"
- # Price
- price_element = soup.find('span', class_='pricing nowPrice lg')
- price = float(price_element.text.strip()[1:]) if price_element else 0.0
- # Number of reviews
- reviews_count_element = soup.find('button', class_='ratingCount linkBtn focusable smallLink')
- reviews_count = int(reviews_count_element.text.strip('()')) if reviews_count_element else 0
- # Average rating
- average_rating_element = soup.find('span', class_='ratingAvg textCaption')
- average_rating = float(average_rating_element.text.strip()) if average_rating_element else 0.0
- # Extract available colors
- color_buttons = soup.find_all('button', class_='buttonWrapper--S9sgu')
- available_colours = []
- for button in color_buttons:
- color = button.get('aria-label', '').strip() or button.get('id', '').strip()
- if color:
- clean_color = color.replace("selected", "").replace("unavailable", "").replace("Clearance", "").strip()
- available_colours.append(clean_color)
- # Set the first available color as the main color
- main_colour = available_colours[0] if available_colours else "No color"
- # pdpContentWrapper > section:nth-child(2) > div > div.headerWrapper--ycKHq > div > div > span > span.swatchName--KWu4Q
- if main_colour == "No color":
- search_main_color_by_selector = soup.select_one(".swatchName--KWu4Q")
- main_colour = search_main_color_by_selector.text.strip()
- # Create product data dictionary
- product_data = {
- "name": product_name,
- "price": price,
- "colour": main_colour,
- "availableColours": available_colours[1:],
- "reviews_count": reviews_count,
- "reviews_score": average_rating
- }
- # Save data to a JSON file
- with open('product_data.json', 'w') as f:
- json.dump(product_data, f, indent=4)
- print("Data successfully saved to product_data.json!")
- print(json.dumps(product_data, indent=4))
- except Exception as e:
- print(f"Error: {str(e)}")
- finally:
- driver.quit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement