Advertisement
xosski

TikTok scrapper

Dec 4th, 2024
15
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.15 KB | None | 0 0
  1. import os
  2. import time
  3. import csv
  4. from selenium import webdriver
  5. from selenium.webdriver.chrome.service import Service
  6. from selenium.webdriver.chrome.options import Options as ChromeOptions
  7. from selenium.webdriver.common.by import By
  8. from selenium.webdriver.common.action_chains import ActionChains
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.support import expected_conditions as EC
  11.  
  12. def main():
  13. # Path to ChromeDriver
  14. chromedriver_path = r"D:\Scolarité\Droit\M1\RECHERCHE\DSA TRANSPARENCY\ROUMANIE\tiktokscrap\chromedriver.exe"
  15.  
  16. # Chrome options
  17. options = ChromeOptions()
  18. options.add_argument("--disable-gpu")
  19. options.add_argument("--window-size=1920,1080")
  20. # options.add_argument("--headless") # Uncomment to run without a GUI
  21.  
  22. # Initialize WebDriver with Service
  23. service = Service(chromedriver_path)
  24. driver = webdriver.Chrome(service=service, options=options)
  25.  
  26. # CSV file for saving data
  27. output_file = "tiktok_video_data.csv"
  28. if not os.path.exists(output_file):
  29. with open(output_file, mode="w", newline="", encoding="utf-8") as file:
  30. writer = csv.writer(file)
  31. writer.writerow(["Video Link", "Views", "Likes", "Comments", "Bookmarks", "Date"])
  32.  
  33. try:
  34. # Access TikTok account page
  35. account_url = "https://www.tiktok.com/@calingeorgescuoficial"
  36. print(f"Navigating to TikTok account: {account_url}")
  37. driver.get(account_url)
  38.  
  39. # Wait for the page to load and potentially solve CAPTCHA manually
  40. print("Waiting for you to resolve CAPTCHA (if needed).")
  41. WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.TAG_NAME, "body"))) # Wait for body element to load
  42.  
  43. # Scroll to load all videos
  44. print("Scrolling to ensure all videos load...")
  45. scroll_pause_time = 2
  46. last_height = driver.execute_script("return document.body.scrollHeight")
  47.  
  48. while True:
  49. driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  50. time.sleep(scroll_pause_time)
  51. new_height = driver.execute_script("return document.body.scrollHeight")
  52. if new_height == last_height:
  53. break
  54. last_height = new_height
  55.  
  56. # Scroll back to the top
  57. print("Scrolling back to the top...")
  58. driver.execute_script("window.scrollTo(0, 0);")
  59. time.sleep(2)
  60.  
  61. # Collect all video elements
  62. print("Collecting video elements...")
  63. video_containers = driver.find_elements(By.XPATH, '//div[contains(@class, "css-13fa1gi-DivWrapper")]//a[@href and contains(@class, "css-1g95xhm-AVideoContainer")]')
  64.  
  65. if not video_containers:
  66. print("No videos found on the page.")
  67. return
  68.  
  69. # Display available videos and ask user for a starting point
  70. print("Available videos:")
  71. video_links = [video.get_attribute("href") for video in video_containers]
  72. for idx, video_link in enumerate(video_links):
  73. print(f"{idx + 1}: {video_link}")
  74.  
  75. choice = int(input("Enter the number of the video to start from: ")) - 1
  76. if choice < 0 or choice >= len(video_links):
  77. print("Invalid choice. Exiting.")
  78. return
  79.  
  80. print(f"Starting from video: {video_links[choice]}")
  81.  
  82. # Process the selected video and subsequent ones
  83. for video_element in video_containers[choice:]:
  84. try:
  85. # Scroll to the video
  86. ActionChains(driver).move_to_element(video_element).perform()
  87. time.sleep(2)
  88.  
  89. # Collect video details
  90. video_link = video_element.get_attribute("href")
  91. print(f"Video Link: {video_link}")
  92.  
  93. # Wait for views to be visible
  94. views_xpath = './/strong[@data-e2e="video-views" and contains(@class, "video-count")]'
  95. video_views_element = WebDriverWait(video_element, 10).until(EC.presence_of_element_located((By.XPATH, views_xpath)))
  96. video_views = video_views_element.text
  97. print(f"Views: {video_views}")
  98.  
  99. # Click the video to get more details
  100. video_element.click()
  101. time.sleep(2)
  102.  
  103. # Pause the video
  104. print("Pausing the video...")
  105. video_tag = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "video")))
  106. video_tag.click()
  107. time.sleep(1)
  108.  
  109. # Collect likes, comments, bookmarks, and date
  110. like_count = driver.find_element(By.XPATH, '//strong[@data-e2e="browse-like-count" and contains(@class, "css-vc3yj-StrongText")]').text
  111. comment_count = driver.find_element(By.XPATH, '//strong[@data-e2e="browse-comment-count" and contains(@class, "css-vc3yj-StrongText")]').text
  112. bookmark_count = driver.find_element(By.XPATH, '//strong[@data-e2e="undefined-count" and contains(@class, "css-vc3yj-StrongText")]').text
  113. date = driver.find_element(By.XPATH, '//span[@data-e2e="browser-nickname"]/span[last()]').text
  114.  
  115. print(f"Likes: {like_count}")
  116. print(f"Comments: {comment_count}")
  117. print(f"Bookmarks: {bookmark_count}")
  118. print(f"Date: {date}")
  119.  
  120. # Write data to CSV
  121. with open(output_file, mode="a", newline="", encoding="utf-8") as file:
  122. writer = csv.writer(file)
  123. writer.writerow([video_link, video_views, like_count, comment_count, bookmark_count, date])
  124.  
  125. print("Data saved to CSV.")
  126.  
  127. # Return to the previous page
  128. driver.execute_script("window.history.go(-1)")
  129. time.sleep(2)
  130.  
  131. except Exception as e:
  132. print(f"Error processing video: {e}")
  133.  
  134. except Exception as e:
  135. print(f"An error occurred: {e}")
  136.  
  137. finally:
  138. # Close the browser
  139. driver.quit()
  140. print("Browser closed.")
  141.  
  142. if __name__ == "__main__":
  143. main()
Advertisement
Comments
  • xosski
    44 days
    # text 6.15 KB | 0 0
    1. import os
    2. import time
    3. import csv
    4. from selenium import webdriver
    5. from selenium.webdriver.chrome.service import Service
    6. from selenium.webdriver.chrome.options import Options as ChromeOptions
    7. from selenium.webdriver.common.by import By
    8. from selenium.webdriver.common.action_chains import ActionChains
    9. from selenium.webdriver.support.ui import WebDriverWait
    10. from selenium.webdriver.support import expected_conditions as EC
    11.  
    12. def main():
    13. # Path to ChromeDriver
    14. chromedriver_path = r"D:\Scolarité\Droit\M1\RECHERCHE\DSA TRANSPARENCY\ROUMANIE\tiktokscrap\chromedriver.exe"
    15.  
    16. # Chrome options
    17. options = ChromeOptions()
    18. options.add_argument("--disable-gpu")
    19. options.add_argument("--window-size=1920,1080")
    20. # options.add_argument("--headless") # Uncomment to run without a GUI
    21.  
    22. # Initialize WebDriver with Service
    23. service = Service(chromedriver_path)
    24. driver = webdriver.Chrome(service=service, options=options)
    25.  
    26. # CSV file for saving data
    27. output_file = "tiktok_video_data.csv"
    28. if not os.path.exists(output_file):
    29. with open(output_file, mode="w", newline="", encoding="utf-8") as file:
    30. writer = csv.writer(file)
    31. writer.writerow(["Video Link", "Views", "Likes", "Comments", "Bookmarks", "Date"])
    32.  
    33. try:
    34. # Access TikTok account page
    35. account_url = "https://www.tiktok.com/@calingeorgescuoficial"
    36. print(f"Navigating to TikTok account: {account_url}")
    37. driver.get(account_url)
    38.  
    39. # Wait for the page to load and potentially solve CAPTCHA manually
    40. print("Waiting for you to resolve CAPTCHA (if needed).")
    41. WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.TAG_NAME, "body"))) # Wait for body element to load
    42.  
    43. # Scroll to load all videos
    44. print("Scrolling to ensure all videos load...")
    45. scroll_pause_time = 2
    46. last_height = driver.execute_script("return document.body.scrollHeight")
    47.  
    48. while True:
    49. driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    50. time.sleep(scroll_pause_time)
    51. new_height = driver.execute_script("return document.body.scrollHeight")
    52. if new_height == last_height:
    53. break
    54. last_height = new_height
    55.  
    56. # Scroll back to the top
    57. print("Scrolling back to the top...")
    58. driver.execute_script("window.scrollTo(0, 0);")
    59. time.sleep(2)
    60.  
    61. # Collect all video elements
    62. print("Collecting video elements...")
    63. video_containers = driver.find_elements(By.XPATH, '//div[contains(@class, "css-13fa1gi-DivWrapper")]//a[@href and contains(@class, "css-1g95xhm-AVideoContainer")]')
    64.  
    65. if not video_containers:
    66. print("No videos found on the page.")
    67. return
    68.  
    69. # Display available videos and ask user for a starting point
    70. print("Available videos:")
    71. video_links = [video.get_attribute("href") for video in video_containers]
    72. for idx, video_link in enumerate(video_links):
    73. print(f"{idx + 1}: {video_link}")
    74.  
    75. choice = int(input("Enter the number of the video to start from: ")) - 1
    76. if choice < 0 or choice >= len(video_links):
    77. print("Invalid choice. Exiting.")
    78. return
    79.  
    80. print(f"Starting from video: {video_links[choice]}")
    81.  
    82. # Process the selected video and subsequent ones
    83. for video_element in video_containers[choice:]:
    84. try:
    85. # Scroll to the video
    86. ActionChains(driver).move_to_element(video_element).perform()
    87. time.sleep(2)
    88.  
    89. # Collect video details
    90. video_link = video_element.get_attribute("href")
    91. print(f"Video Link: {video_link}")
    92.  
    93. # Wait for views to be visible
    94. views_xpath = './/strong[@data-e2e="video-views" and contains(@class, "video-count")]'
    95. video_views_element = WebDriverWait(video_element, 10).until(EC.presence_of_element_located((By.XPATH, views_xpath)))
    96. video_views = video_views_element.text
    97. print(f"Views: {video_views}")
    98.  
    99. # Click the video to get more details
    100. video_element.click()
    101. time.sleep(2)
    102.  
    103. # Pause the video
    104. print("Pausing the video...")
    105. video_tag = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "video")))
    106. video_tag.click()
    107. time.sleep(1)
    108.  
    109. # Collect likes, comments, bookmarks, and date
    110. like_count = driver.find_element(By.XPATH, '//strong[@data-e2e="browse-like-count" and contains(@class, "css-vc3yj-StrongText")]').text
    111. comment_count = driver.find_element(By.XPATH, '//strong[@data-e2e="browse-comment-count" and contains(@class, "css-vc3yj-StrongText")]').text
    112. bookmark_count = driver.find_element(By.XPATH, '//strong[@data-e2e="undefined-count" and contains(@class, "css-vc3yj-StrongText")]').text
    113. date = driver.find_element(By.XPATH, '//span[@data-e2e="browser-nickname"]/span[last()]').text
    114.  
    115. print(f"Likes: {like_count}")
    116. print(f"Comments: {comment_count}")
    117. print(f"Bookmarks: {bookmark_count}")
    118. print(f"Date: {date}")
    119.  
    120. # Write data to CSV
    121. with open(output_file, mode="a", newline="", encoding="utf-8") as file:
    122. writer = csv.writer(file)
    123. writer.writerow([video_link, video_views, like_count, comment_count, bookmark_count, date])
    124.  
    125. print("Data saved to CSV.")
    126.  
    127. # Return to the previous page
    128. driver.execute_script("window.history.go(-1)")
    129. time.sleep(2)
    130.  
    131. except Exception as e:
    132. print(f"Error processing video: {e}")
    133.  
    134. except Exception as e:
    135. print(f"An error occurred: {e}")
    136.  
    137. finally:
    138. # Close the browser
    139. driver.quit()
    140. print("Browser closed.")
    141.  
    142. if __name__ == "__main__":
    143. main()
Add Comment
Please, Sign In to add comment
Advertisement