dork downloader (PDFs)

#!/bin/python
# Copyright 2023 Nitestryker

# This software is provided 'as-is', without any express or implied
# warranty.  In no event will the authors be held liable for any damages
#  arising from the use of this software.

#  Permission is granted to anyone to use this software for any purpose,
#  including commercial applications, and to alter it and redistribute it
#  freely, subject to the following restrictions:

#  1. The origin of this software must not be misrepresented; you must not
#     claim that you wrote the original software. If you use this software
#     in a product, an acknowledgment in the product documentation would be
#    appreciated but is not required.
#  2. Altered source versions must be plainly marked as such, and must not be
#     misrepresented as being the original software.
#  3. This notice may not be removed or altered from any source distribution.

import requests
import os

def download_pdf(url, output_path):
    #response = requests.get(url, stream=True)
    response = requests.get(url, stream=True, verify=False) # Disable SSL certificate verification
    if response.status_code == 200:
        with open(output_path, 'wb') as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
        print(f"Downloaded: {output_path}")
    else:
        print(f"Failed to download: {url} (Status Code: {response.status_code})")

def search_and_download(query, num_results, base_url, output_dir):
    # Perform the Google search using the query
    search_url = f"{base_url}/search?q={query}"
    response = requests.get(search_url)
    search_results = response.text

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Keep track of the number of downloaded PDFs
    downloaded_count = 0

    # Extract and download PDF links
    start_index = 0
    while downloaded_count < num_results:
        start_link = search_results.find('<a href="/url?q=', start_index)
        end_link = search_results.find('&amp;', start_link + 1)
        if start_link == -1 or end_link == -1:
            break

        url = search_results[start_link + 16: end_link]
        if url.endswith('.pdf'):
            filename = url.split("/")[-1]
            output_path = os.path.join(output_dir, filename)
            download_pdf(url, output_path)
            downloaded_count += 1

        start_index = end_link

# Set the query, number of results to retrieve, base URL, and output directory
query = 'filetype:pdf programming'
num_results = 10
base_url = "https://www.google.com"
output_dir = "pdfs"

# Call the search_and_download function
search_and_download(query, num_results, base_url, output_dir)

This version only works  for PDFs because it searches for files that end with the .pdf file type I am going make another version you will be able to select what file type to download stay tuned