Advertisement
xosski

TikTok scrapper

Dec 4th, 2024
27
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.26 KB | None | 0 0
  1. import os
  2. import time
  3. import csv
  4. from selenium import webdriver
  5. from selenium.webdriver.chrome.service import Service
  6. from selenium.webdriver.chrome.options import Options as ChromeOptions
  7. from selenium.webdriver.common.by import By
  8. from selenium.webdriver.common.action_chains import ActionChains
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.support import expected_conditions as EC
  11.  
  12. def main():
  13. # Chemin vers ChromeDriver
  14. chromedriver_path = r"D:\Scolarité\Droit\M1\RECHERCHE\DSA TRANSPARENCY\ROUMANIE\tiktokscrap\chromedriver.exe" # Remplacez par le chemin exact
  15.  
  16. # Options pour Chrome
  17. options = ChromeOptions()
  18. options.add_argument("--disable-gpu")
  19. options.add_argument("--window-size=1920,1080")
  20. # Supprimez "--headless" pour voir le navigateur
  21. # options.add_argument("--headless")
  22.  
  23. # Initialisation du WebDriver avec Service
  24. service = Service(chromedriver_path)
  25. driver = webdriver.Chrome(service=service, options=options)
  26.  
  27. # Fichier CSV pour enregistrer les données
  28. output_file = "tiktok_video_data.csv"
  29. if not os.path.exists(output_file):
  30. with open(output_file, mode="w", newline="", encoding="utf-8") as file:
  31. writer = csv.writer(file)
  32. writer.writerow(["Video Link", "Views", "Likes", "Comments", "Bookmarks", "Date"])
  33.  
  34. try:
  35. # Accéder au compte TikTok spécifique
  36. account_url = "https://www.tiktok.com/@calingeorgescuoficial"
  37. print(f"Navigating to TikTok account: {account_url}")
  38. driver.get(account_url)
  39.  
  40. # Attendre que la page charge
  41. print("Waiting for page to load...")
  42. WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "tiktok-1dykci5-DivWrapper")]')))
  43.  
  44. # Scrolling pour charger toutes les vidéos
  45. print("Scrolling to ensure all videos load...")
  46. scroll_pause_time = 2
  47. last_height = driver.execute_script("return document.body.scrollHeight")
  48.  
  49. while True:
  50. driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  51. time.sleep(scroll_pause_time)
  52. new_height = driver.execute_script("return document.body.scrollHeight")
  53. if new_height == last_height:
  54. break
  55. last_height = new_height
  56.  
  57. # Scrolling vers le haut après avoir chargé toutes les vidéos
  58. print("Scrolling back to the top...")
  59. driver.execute_script("window.scrollTo(0, 0);")
  60. time.sleep(2)
  61.  
  62. # Collecter les vidéos
  63. video_containers = driver.find_elements(By.XPATH, '//div[contains(@class, "tiktok-1dykci5-DivWrapper")]//a[@href and contains(@class, "tiktok-1ghj7dv-AVideoContainer")]')
  64.  
  65. if video_containers:
  66. for video_element in video_containers:
  67. try:
  68. # Faire défiler jusqu'à l'élément
  69. ActionChains(driver).move_to_element(video_element).perform()
  70. time.sleep(2)
  71.  
  72. # Collecter le lien de la vidéo
  73. video_link = video_element.get_attribute("href")
  74. print(f"Video Link: {video_link}")
  75.  
  76. # Cliquer sur la vidéo pour ouvrir la page de détails
  77. video_element.click()
  78. WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "video")))
  79.  
  80. # Mettre la vidéo en pause
  81. video_tag = driver.find_element(By.TAG_NAME, "video")
  82. video_tag.click()
  83. time.sleep(1)
  84.  
  85. # Collecter les autres données
  86. like_xpath = '//strong[@data-e2e="browse-like-count"]'
  87. comment_xpath = '//strong[@data-e2e="browse-comment-count"]'
  88. bookmark_xpath = '//strong[@data-e2e="undefined-count"]'
  89. date_xpath = '//span[@data-e2e="browser-nickname"]/span[last()]'
  90.  
  91. like_count = driver.find_element(By.XPATH, like_xpath).text
  92. comment_count = driver.find_element(By.XPATH, comment_xpath).text
  93. bookmark_count = driver.find_element(By.XPATH, bookmark_xpath).text
  94. date = driver.find_element(By.XPATH, date_xpath).text
  95.  
  96. # Sauvegarder les données dans le fichier CSV
  97. with open(output_file, mode="a", newline="", encoding="utf-8") as file:
  98. writer = csv.writer(file)
  99. writer.writerow([video_link, "", like_count, comment_count, bookmark_count, date])
  100.  
  101. print("Data saved to CSV.")
  102. driver.back()
  103. WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "tiktok-1dykci5-DivWrapper")]')))
  104.  
  105. except Exception as e:
  106. print(f"Error processing video: {e}")
  107. else:
  108. print("No videos found on the page.")
  109.  
  110. except Exception as e:
  111. print(f"An error occurred: {e}")
  112.  
  113. finally:
  114. # Fermer le navigateur
  115. driver.quit()
  116. print("Browser closed.")
  117.  
  118. if __name__ == "__main__":
  119. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement