Advertisement
Sweetening

Instagram_scrape.py

Feb 10th, 2025
7
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.73 KB | None | 0 0
  1. import time
  2. import json
  3. import os
  4. from selenium import webdriver
  5. from selenium.webdriver.chrome.service import Service
  6. from selenium.webdriver.chrome.options import Options
  7. from selenium.webdriver.common.by import By
  8. from selenium.webdriver.common.keys import Keys
  9. from selenium.webdriver.common.action_chains import ActionChains
  10. from selenium.webdriver.support.ui import WebDriverWait
  11. from selenium.webdriver.support import expected_conditions as EC
  12. from selenium.webdriver.chrome.options import Options
  13.  
  14. # Prompt user for Instagram username
  15. username = input("Enter the Instagram username you want to scrape: ")
  16.  
  17. # Set up Chrome options for headless operation
  18. chrome_options = Options()
  19. chrome_options.add_argument("--headless") # Run in headless mode
  20. chrome_options.add_argument("--disable-gpu")
  21. chrome_options.add_argument("--no-sandbox")
  22.  
  23. # Specify path to your ChromeDriver
  24. service = Service("/usr/local/bin/chromedriver") # Correct path to your chromedriver
  25.  
  26. # Initialize WebDriver
  27. driver = webdriver.Chrome(service=service, options=chrome_options)
  28.  
  29. # Instagram login URL
  30. login_url = "https://www.instagram.com/accounts/login/"
  31.  
  32. # Go to the Instagram login page
  33. driver.get(login_url)
  34.  
  35. # Wait until login form is present
  36. WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, "username")))
  37.  
  38. # Prompt user for Instagram credentials (use securely in real-world scenarios)
  39. insta_username = input("Enter your Instagram username: ")
  40. insta_password = input("Enter your Instagram password: ")
  41.  
  42. # Log in to Instagram
  43. driver.find_element(By.NAME, "username").send_keys(insta_username)
  44. driver.find_element(By.NAME, "password").send_keys(insta_password)
  45. driver.find_element(By.NAME, "password").send_keys(Keys.RETURN)
  46.  
  47. # Wait for login to complete and profile page to load
  48. WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//a[contains(@href, '/accounts/edit/')]")))
  49.  
  50. print("Login successful.")
  51.  
  52. # Navigate to the user's profile
  53. driver.get(f"https://www.instagram.com/{username}/")
  54.  
  55. # Wait for the page to load
  56. WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//header//section//h2")))
  57.  
  58. # Scrape data from the profile
  59. profile_data = {}
  60.  
  61. # Scrape the profile details (followers, following, posts count)
  62. profile_data['username'] = username
  63. profile_data['bio'] = driver.find_element(By.XPATH, "//div[@class='-vDIg']//span").text if len(driver.find_elements(By.XPATH, "//div[@class='-vDIg']//span")) > 0 else None
  64. profile_data['full_name'] = driver.find_element(By.XPATH, "//h1").text
  65. profile_data['followers'] = driver.find_element(By.XPATH, "//a[contains(@href,'/followers')]//span").text
  66. profile_data['following'] = driver.find_element(By.XPATH, "//a[contains(@href,'/following')]//span").text
  67. profile_data['posts'] = driver.find_element(By.XPATH, "//span[@class='-nal3']//span").text
  68.  
  69. # Optionally, save data to a JSON file
  70. with open(f"{username}_profile_data.json", "w") as outfile:
  71. json.dump(profile_data, outfile, indent=4)
  72.  
  73. print(f"Profile data for {username} has been saved to {username}_profile_data.json.")
  74.  
  75. # Scrape the latest posts from the profile (up to 5 posts in this case)
  76. posts_data = []
  77. driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  78. time.sleep(2) # Let the page load more posts
  79.  
  80. # Scrape the first 5 posts
  81. post_elements = driver.find_elements(By.XPATH, "//div[@class='v1Nh3 kIKUG _bz0w']")
  82. for i, post in enumerate(post_elements[:5]):
  83. post_data = {}
  84. post.click()
  85. time.sleep(1)
  86.  
  87. # Wait for post details to load
  88. WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//article//header//div[@class='tD3Xx']")))
  89.  
  90. # Get the post's caption
  91. post_data['caption'] = driver.find_element(By.XPATH, "//div[@class='C4VMK']/span").text
  92. post_data['likes'] = driver.find_element(By.XPATH, "//button[@class='wpO6b']").text
  93. post_data['comments'] = driver.find_element(By.XPATH, "//ul[@class='Mr508']").text
  94.  
  95. # Scrape the post image/video URLs
  96. media = driver.find_element(By.XPATH, "//div[@class='KL4Bh']")
  97. media_url = media.find_element(By.TAG_NAME, "img").get_attribute("src") if media.find_elements(By.TAG_NAME, "img") else media.find_element(By.TAG_NAME, "video").get_attribute("src")
  98. post_data['media_url'] = media_url
  99.  
  100. posts_data.append(post_data)
  101.  
  102. # Close the post modal
  103. driver.find_element(By.XPATH, "//div[@class='Igw0E IwRSH eGOV_ _4EzTm']").click()
  104. time.sleep(1)
  105.  
  106. # Optionally, save post data to a JSON file
  107. with open(f"{username}_posts_data.json", "w") as outfile:
  108. json.dump(posts_data, outfile, indent=4)
  109.  
  110. print(f"Post data for {username} has been saved to {username}_posts_data.json.")
  111.  
  112. # Close the browser
  113. driver.quit()
  114.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement