xosski
Dec 4th, 2024
1
0
Never
This is comment for paste TikTok scrapper
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import os
  2. import time
  3. import csv
  4. from selenium import webdriver
  5. from selenium.webdriver.chrome.service import Service
  6. from selenium.webdriver.chrome.options import Options as ChromeOptions
  7. from selenium.webdriver.common.by import By
  8. from selenium.webdriver.common.action_chains import ActionChains
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.support import expected_conditions as EC
  11.  
  12. def main():
  13. # Path to ChromeDriver
  14. chromedriver_path = r"D:\Scolarité\Droit\M1\RECHERCHE\DSA TRANSPARENCY\ROUMANIE\tiktokscrap\chromedriver.exe"
  15.  
  16. # Chrome options
  17. options = ChromeOptions()
  18. options.add_argument("--disable-gpu")
  19. options.add_argument("--window-size=1920,1080")
  20. # options.add_argument("--headless") # Uncomment to run without a GUI
  21.  
  22. # Initialize WebDriver with Service
  23. service = Service(chromedriver_path)
  24. driver = webdriver.Chrome(service=service, options=options)
  25.  
  26. # CSV file for saving data
  27. output_file = "tiktok_video_data.csv"
  28. if not os.path.exists(output_file):
  29. with open(output_file, mode="w", newline="", encoding="utf-8") as file:
  30. writer = csv.writer(file)
  31. writer.writerow(["Video Link", "Views", "Likes", "Comments", "Bookmarks", "Date"])
  32.  
  33. try:
  34. # Access TikTok account page
  35. account_url = "https://www.tiktok.com/@calingeorgescuoficial"
  36. print(f"Navigating to TikTok account: {account_url}")
  37. driver.get(account_url)
  38.  
  39. # Wait for the page to load and potentially solve CAPTCHA manually
  40. print("Waiting for you to resolve CAPTCHA (if needed).")
  41. WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.TAG_NAME, "body"))) # Wait for body element to load
  42.  
  43. # Scroll to load all videos
  44. print("Scrolling to ensure all videos load...")
  45. scroll_pause_time = 2
  46. last_height = driver.execute_script("return document.body.scrollHeight")
  47.  
  48. while True:
  49. driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  50. time.sleep(scroll_pause_time)
  51. new_height = driver.execute_script("return document.body.scrollHeight")
  52. if new_height == last_height:
  53. break
  54. last_height = new_height
  55.  
  56. # Scroll back to the top
  57. print("Scrolling back to the top...")
  58. driver.execute_script("window.scrollTo(0, 0);")
  59. time.sleep(2)
  60.  
  61. # Collect all video elements
  62. print("Collecting video elements...")
  63. video_containers = driver.find_elements(By.XPATH, '//div[contains(@class, "css-13fa1gi-DivWrapper")]//a[@href and contains(@class, "css-1g95xhm-AVideoContainer")]')
  64.  
  65. if not video_containers:
  66. print("No videos found on the page.")
  67. return
  68.  
  69. # Display available videos and ask user for a starting point
  70. print("Available videos:")
  71. video_links = [video.get_attribute("href") for video in video_containers]
  72. for idx, video_link in enumerate(video_links):
  73. print(f"{idx + 1}: {video_link}")
  74.  
  75. choice = int(input("Enter the number of the video to start from: ")) - 1
  76. if choice < 0 or choice >= len(video_links):
  77. print("Invalid choice. Exiting.")
  78. return
  79.  
  80. print(f"Starting from video: {video_links[choice]}")
  81.  
  82. # Process the selected video and subsequent ones
  83. for video_element in video_containers[choice:]:
  84. try:
  85. # Scroll to the video
  86. ActionChains(driver).move_to_element(video_element).perform()
  87. time.sleep(2)
  88.  
  89. # Collect video details
  90. video_link = video_element.get_attribute("href")
  91. print(f"Video Link: {video_link}")
  92.  
  93. # Wait for views to be visible
  94. views_xpath = './/strong[@data-e2e="video-views" and contains(@class, "video-count")]'
  95. video_views_element = WebDriverWait(video_element, 10).until(EC.presence_of_element_located((By.XPATH, views_xpath)))
  96. video_views = video_views_element.text
  97. print(f"Views: {video_views}")
  98.  
  99. # Click the video to get more details
  100. video_element.click()
  101. time.sleep(2)
  102.  
  103. # Pause the video
  104. print("Pausing the video...")
  105. video_tag = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "video")))
  106. video_tag.click()
  107. time.sleep(1)
  108.  
  109. # Collect likes, comments, bookmarks, and date
  110. like_count = driver.find_element(By.XPATH, '//strong[@data-e2e="browse-like-count" and contains(@class, "css-vc3yj-StrongText")]').text
  111. comment_count = driver.find_element(By.XPATH, '//strong[@data-e2e="browse-comment-count" and contains(@class, "css-vc3yj-StrongText")]').text
  112. bookmark_count = driver.find_element(By.XPATH, '//strong[@data-e2e="undefined-count" and contains(@class, "css-vc3yj-StrongText")]').text
  113. date = driver.find_element(By.XPATH, '//span[@data-e2e="browser-nickname"]/span[last()]').text
  114.  
  115. print(f"Likes: {like_count}")
  116. print(f"Comments: {comment_count}")
  117. print(f"Bookmarks: {bookmark_count}")
  118. print(f"Date: {date}")
  119.  
  120. # Write data to CSV
  121. with open(output_file, mode="a", newline="", encoding="utf-8") as file:
  122. writer = csv.writer(file)
  123. writer.writerow([video_link, video_views, like_count, comment_count, bookmark_count, date])
  124.  
  125. print("Data saved to CSV.")
  126.  
  127. # Return to the previous page
  128. driver.execute_script("window.history.go(-1)")
  129. time.sleep(2)
  130.  
  131. except Exception as e:
  132. print(f"Error processing video: {e}")
  133.  
  134. except Exception as e:
  135. print(f"An error occurred: {e}")
  136.  
  137. finally:
  138. # Close the browser
  139. driver.quit()
  140. print("Browser closed.")
  141.  
  142. if __name__ == "__main__":
  143. main()
Add Comment
Please, Sign In to add comment