Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/python
- # Copyright 2023 Nitestryker
- # This software is provided 'as-is', without any express or implied
- # warranty. In no event will the authors be held liable for any damages
- # arising from the use of this software.
- # Permission is granted to anyone to use this software for any purpose,
- # including commercial applications, and to alter it and redistribute it
- # freely, subject to the following restrictions:
- # 1. The origin of this software must not be misrepresented; you must not
- # claim that you wrote the original software. If you use this software
- # in a product, an acknowledgment in the product documentation would be
- # appreciated but is not required.
- # 2. Altered source versions must be plainly marked as such, and must not be
- # misrepresented as being the original software.
- # 3. This notice may not be removed or altered from any source distribution.
- import requests
- import os
- def download_pdf(url, output_path):
- #response = requests.get(url, stream=True)
- response = requests.get(url, stream=True, verify=False) # Disable SSL certificate verification
- if response.status_code == 200:
- with open(output_path, 'wb') as file:
- for chunk in response.iter_content(chunk_size=1024):
- if chunk:
- file.write(chunk)
- print(f"Downloaded: {output_path}")
- else:
- print(f"Failed to download: {url} (Status Code: {response.status_code})")
- def search_and_download(query, num_results, base_url, output_dir):
- # Perform the Google search using the query
- search_url = f"{base_url}/search?q={query}"
- response = requests.get(search_url)
- search_results = response.text
- # Create the output directory if it doesn't exist
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
- # Keep track of the number of downloaded PDFs
- downloaded_count = 0
- # Extract and download PDF links
- start_index = 0
- while downloaded_count < num_results:
- start_link = search_results.find('<a href="/url?q=', start_index)
- end_link = search_results.find('&', start_link + 1)
- if start_link == -1 or end_link == -1:
- break
- url = search_results[start_link + 16: end_link]
- if url.endswith('.pdf'):
- filename = url.split("/")[-1]
- output_path = os.path.join(output_dir, filename)
- download_pdf(url, output_path)
- downloaded_count += 1
- start_index = end_link
- # Set the query, number of results to retrieve, base URL, and output directory
- query = 'filetype:pdf programming'
- num_results = 10
- base_url = "https://www.google.com"
- output_dir = "pdfs"
- # Call the search_and_download function
- search_and_download(query, num_results, base_url, output_dir)
Advertisement
Comments
-
- This version only works for PDFs because it searches for files that end with the .pdf file type I am going make another version you will be able to select what file type to download stay tuned
Add Comment
Please, Sign In to add comment
Advertisement