# webscrape_images2.py

# webscrape_images2.py

import urllib.request
import os
import re
import time
import random
from PIL import Image
import subprocess

internal = []
keep = []
links_only = 1

def redirect(html):
    image_links = [target + t.split('"')[0] for t in html.split(r'***/')] ### substring needed to be changed now and then
    return image_links

def simulate_button_click(html_file_name, button_name):
    # Open the HTML file and read its contents.
    with open(html_file_name, "rb") as f:
        html = f.read()

    # Create an HTTP POST request with the button's name as the value of the "name" parameter.
    data = "name={}".format(button_name).encode("utf-8")
    headers = {"Content-Type": "application/x-www-form-urlencoded"}
    process = subprocess.run(["curl", "-X", "POST", "-d", data, "-H", str(headers), "file://{}".format(html_file_name)], capture_output=True)

    # Return the HTTP response from the server, which should contain the contents of the next page.
    return process.stdout.decode("utf-8")

def get_data(link):
    req = urllib.request.Request(url=link, headers=headers)
    return str(urllib.request.urlopen(req).read())

def addlinks(keyURL):
    html = get_data(keyURL)
    html = html.split('<img src="')

    for z in html:
        z = z.split('"')[0]
        keep.append(z)
        internal.append(z)

base_folder = r'c:\Z-DL-TEMP'
if not os.path.exists(base_folder):
    os.makedirs(base_folder)

#fixed duplicate checker function
dupl = set()

def download_image(url):
    if url in dupl:
        return
    file_name = url.split(target)[1]
    file_name = file_name.replace('/', '_')
    file_name = file_name.split('.')
    file_name = '_'.join(file_name[:-1]) + '.' + file_name[-1]

    # Download the image first
    urllib.request.urlretrieve(url, os.path.join(base_folder, file_name))

    # Open and process the image using Pillow
    img = Image.open(os.path.join(base_folder, file_name))
    width, height = img.size

    print([width, height])
    if width > 500 and height > 500:
        dupl.add(url)
        if img.mode != 'RGB':
            img = img.convert('RGB')
        img.save(os.path.join(base_folder, file_name), format='JPEG', quality=90, optimize=True)
        print('+'*20, 'IMAGE SAVED', '+'*20, '\n')
    elif width < 200 and height < 200:
        print('')
        return 1
    else:
        print('__too small__')
    print('')

def from_page(u):
    print('='*20)
    print("+++", u)
    print('='*20)
    print('')

    in_page = get_data(u)
    image_links = redirect(in_page)
    for link in image_links[1:]:
        print("Trying:", link)
        download_image(link)

def main(z):
    internal.append(z)
    keep.append(z)
    while internal:
        random.shuffle(internal)
        link = internal.pop(0)
        if '***.' in link:
            addlinks(link)
            from_page(link)

url = 'https://***'

target = 'https://***'

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}

main(url)

print('\n','_'*10,'DONE!','_'*10, '\n')