Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import json
- import os
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.chrome.options import Options
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.common.action_chains import ActionChains
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.chrome.options import Options
- # Prompt user for Instagram username
- username = input("Enter the Instagram username you want to scrape: ")
- # Set up Chrome options for headless operation
- chrome_options = Options()
- chrome_options.add_argument("--headless") # Run in headless mode
- chrome_options.add_argument("--disable-gpu")
- chrome_options.add_argument("--no-sandbox")
- # Specify path to your ChromeDriver
- service = Service("/usr/local/bin/chromedriver") # Correct path to your chromedriver
- # Initialize WebDriver
- driver = webdriver.Chrome(service=service, options=chrome_options)
- # Instagram login URL
- login_url = "https://www.instagram.com/accounts/login/"
- # Go to the Instagram login page
- driver.get(login_url)
- # Wait until login form is present
- WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, "username")))
- # Prompt user for Instagram credentials (use securely in real-world scenarios)
- insta_username = input("Enter your Instagram username: ")
- insta_password = input("Enter your Instagram password: ")
- # Log in to Instagram
- driver.find_element(By.NAME, "username").send_keys(insta_username)
- driver.find_element(By.NAME, "password").send_keys(insta_password)
- driver.find_element(By.NAME, "password").send_keys(Keys.RETURN)
- # Wait for login to complete and profile page to load
- WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//a[contains(@href, '/accounts/edit/')]")))
- print("Login successful.")
- # Navigate to the user's profile
- driver.get(f"https://www.instagram.com/{username}/")
- # Wait for the page to load
- WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//header//section//h2")))
- # Scrape data from the profile
- profile_data = {}
- # Scrape the profile details (followers, following, posts count)
- profile_data['username'] = username
- profile_data['bio'] = driver.find_element(By.XPATH, "//div[@class='-vDIg']//span").text if len(driver.find_elements(By.XPATH, "//div[@class='-vDIg']//span")) > 0 else None
- profile_data['full_name'] = driver.find_element(By.XPATH, "//h1").text
- profile_data['followers'] = driver.find_element(By.XPATH, "//a[contains(@href,'/followers')]//span").text
- profile_data['following'] = driver.find_element(By.XPATH, "//a[contains(@href,'/following')]//span").text
- profile_data['posts'] = driver.find_element(By.XPATH, "//span[@class='-nal3']//span").text
- # Optionally, save data to a JSON file
- with open(f"{username}_profile_data.json", "w") as outfile:
- json.dump(profile_data, outfile, indent=4)
- print(f"Profile data for {username} has been saved to {username}_profile_data.json.")
- # Scrape the latest posts from the profile (up to 5 posts in this case)
- posts_data = []
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
- time.sleep(2) # Let the page load more posts
- # Scrape the first 5 posts
- post_elements = driver.find_elements(By.XPATH, "//div[@class='v1Nh3 kIKUG _bz0w']")
- for i, post in enumerate(post_elements[:5]):
- post_data = {}
- post.click()
- time.sleep(1)
- # Wait for post details to load
- WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//article//header//div[@class='tD3Xx']")))
- # Get the post's caption
- post_data['caption'] = driver.find_element(By.XPATH, "//div[@class='C4VMK']/span").text
- post_data['likes'] = driver.find_element(By.XPATH, "//button[@class='wpO6b']").text
- post_data['comments'] = driver.find_element(By.XPATH, "//ul[@class='Mr508']").text
- # Scrape the post image/video URLs
- media = driver.find_element(By.XPATH, "//div[@class='KL4Bh']")
- media_url = media.find_element(By.TAG_NAME, "img").get_attribute("src") if media.find_elements(By.TAG_NAME, "img") else media.find_element(By.TAG_NAME, "video").get_attribute("src")
- post_data['media_url'] = media_url
- posts_data.append(post_data)
- # Close the post modal
- driver.find_element(By.XPATH, "//div[@class='Igw0E IwRSH eGOV_ _4EzTm']").click()
- time.sleep(1)
- # Optionally, save post data to a JSON file
- with open(f"{username}_posts_data.json", "w") as outfile:
- json.dump(posts_data, outfile, indent=4)
- print(f"Post data for {username} has been saved to {username}_posts_data.json.")
- # Close the browser
- driver.quit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement