odoo wkhtmltopdf debugging

#!/usr/bin/env python3

"""
Wrapper to debug wkhtmltopdf
"""

import sys
import logging
import logging.handlers
import shutil
import pwd
import os
from pathlib import Path
from stat import S_IROTH, S_IXOTH, S_IRGRP
from subprocess import Popen, PIPE
from urllib.parse import urljoin, urlparse

import requests
import setproctitle
from bs4 import BeautifulSoup


setproctitle.setproctitle("wkhtmltopdf.py")


PWD = pwd.getpwnam("odoo")
UID = PWD.pw_uid
GID = PWD.pw_gid
DEBUG = True
LAST_BODY = Path("/tmp/last_body.html")
LAST_HEADER = Path("/tmp/last_header.html")
LAST_FOOTER = Path("/tmp/last_footer.html")
LAST_HTML = Path("/tmp/last_html.html")
LAST_ARGS = Path("/tmp/last_args.txt")
STDOUT = Path("/tmp/last_wkhtmtopdf_stdout.txt")
STDERR = Path("/tmp/last_wkhtmtopdf_stderr.txt")
WKHTMLTOPDF = Path("/usr/bin/wkhtmltopdf.bin")

handler = logging.handlers.SysLogHandler(address="/dev/log")
logger = logging.getLogger("wkhtmltopdf")
logger.setLevel(logging.INFO)
logger.addHandler(handler)


def url_is_relative(url):
    url = urlparse(url)
    return not url.path.startswith("/")


def url_no_base(url):
    url = urlparse(url)
    return url.path.startswith("/") and not url.scheme and not url.netloc


def download_files(html_file):
    tmpdir = Path("/tmp")
    static_dir = Path("/tmp/static")
    static_dir.mkdir(exist_ok=True)
    static_dir.chmod(static_dir.stat().st_mode | S_IROTH | S_IXOTH)
    with open(html_file, "rb") as fd:
        bs = BeautifulSoup(fd.read(), "html.parser")
    base_element = bs.find("base")
    base_url = base_element["href"]
    # base_element["href"] = base_url # + ":8080"
    if not url_is_relative(base_url):
        raise ValueError("base_url must be absolute.")
    for element in bs.find_all("link", href=True):
        if url_no_base(element["href"]):
            download_url = urljoin(base_url, element["href"])
            if download_url.rpartition(".")[2] in ("woff", "woff2"):
                continue
            req = session.get(download_url)
            logger.info(f"GET {download_url} {req.status_code}")
            if req.status_code == 200:
                file_name = Path(urlparse(download_url).path).name
                file = static_dir / file_name
                with file.open("wb") as fd:
                    fd.write(req.content)
                os.chown(file, UID, GID)
                os.chmod(file, file.stat().st_mode | S_IRGRP | S_IROTH)
                element["href"] = str(file)
    with open(html_file, "wt") as fd:
        fd.write(bs.prettify())
    os.chown(html_file, UID, GID)
    os.chmod(html_file, html_file.stat().st_mode | S_IRGRP | S_IROTH)


if __name__ == "__main__":
    args = sys.argv[1:]
    header = b""
    footer = b""
    session = requests.Session()
    if DEBUG and "--cookie" in args:
        session_id_idx = args.index("--cookie") + 2
        session.cookies["session_id"] = args[session_id_idx]
    if DEBUG and "--header-html" in args:
        header_idx = args.index("--header-html") + 1
        header_uri = args[header_idx]
        if header_uri.startswith("http"):
            header_req = session.get(header_uri)
            if header_req.status_code == 200:
                if header_req.content:
                    with LAST_HEADER.open("wb") as fd:
                        fd.write(header_req.content)
        else:
            shutil.copy(header_uri, LAST_HEADER)
        os.chown(LAST_HEADER, UID, GID)
        try:
            download_files(LAST_HEADER)
        except Exception as e:
            with open("/tmp/log.txt", "w") as fd:
                fd.write(repr(e))
    if DEBUG and "--footer-html" in args:
        footer_idx = args.index("--footer-html") + 1
        footer_uri = args[footer_idx]
        if footer_uri.startswith("http"):
            footer_req = session.get(footer_uri)
            if footer_req.status_code == 200:
                if footer_req.content:
                    with LAST_FOOTER.open("wb") as fd:
                        fd.write(footer_req.content)
        else:
            shutil.copy(footer_uri, LAST_FOOTER)
        os.chown(LAST_FOOTER, UID, GID)
        download_files(LAST_FOOTER)
    if len(args) >= 2:
        try:
            uri = args[-2]
            if uri.startswith("http"):
                body_req = session.get(uri)
                if body_req.status_code == 200:
                    with LAST_BODY.open("wb") as fd:
                        fd.write(body_req.content)
            else:
                shutil.copy(uri, LAST_BODY)
            download_files(LAST_BODY)
        except IndexError:
            pass

    if DEBUG:
        while "--quiet" in args:
            args.remove("--quiet")

    proc = Popen([WKHTMLTOPDF, *args], stdin=None, stdout=PIPE, stderr=PIPE)

    if DEBUG:
        with LAST_ARGS.open("w") as fd:
            fd.write("\n".join(args))
        os.chown(LAST_ARGS, UID, GID)
        stdout, stderr = proc.communicate()
        with STDOUT.open("wb") as stdout_fd, STDERR.open("wb") as stderr_fd:
            stdout_fd.write(stdout)
            stderr_fd.write(stderr)
        os.chown(STDOUT, UID, GID)
        os.chown(STDERR, UID, GID)
        print(stdout.decode(), file=sys.stdout)
        print(stderr.decode(), file=sys.stderr)
        # proc.wait()
        shutil.copy(args[-1], "/tmp/last_output.pdf")
        os.chmod("/tmp/last_output.pdf", 0o644)