Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- """
- Wrapper to debug wkhtmltopdf
- """
- import sys
- import logging
- import logging.handlers
- import shutil
- import pwd
- import os
- from pathlib import Path
- from stat import S_IROTH, S_IXOTH, S_IRGRP
- from subprocess import Popen, PIPE
- from urllib.parse import urljoin, urlparse
- import requests
- import setproctitle
- from bs4 import BeautifulSoup
- setproctitle.setproctitle("wkhtmltopdf.py")
- PWD = pwd.getpwnam("odoo")
- UID = PWD.pw_uid
- GID = PWD.pw_gid
- DEBUG = True
- LAST_BODY = Path("/tmp/last_body.html")
- LAST_HEADER = Path("/tmp/last_header.html")
- LAST_FOOTER = Path("/tmp/last_footer.html")
- LAST_HTML = Path("/tmp/last_html.html")
- LAST_ARGS = Path("/tmp/last_args.txt")
- STDOUT = Path("/tmp/last_wkhtmtopdf_stdout.txt")
- STDERR = Path("/tmp/last_wkhtmtopdf_stderr.txt")
- WKHTMLTOPDF = Path("/usr/bin/wkhtmltopdf.bin")
- handler = logging.handlers.SysLogHandler(address="/dev/log")
- logger = logging.getLogger("wkhtmltopdf")
- logger.setLevel(logging.INFO)
- logger.addHandler(handler)
- def url_is_relative(url):
- url = urlparse(url)
- return not url.path.startswith("/")
- def url_no_base(url):
- url = urlparse(url)
- return url.path.startswith("/") and not url.scheme and not url.netloc
- def download_files(html_file):
- tmpdir = Path("/tmp")
- static_dir = Path("/tmp/static")
- static_dir.mkdir(exist_ok=True)
- static_dir.chmod(static_dir.stat().st_mode | S_IROTH | S_IXOTH)
- with open(html_file, "rb") as fd:
- bs = BeautifulSoup(fd.read(), "html.parser")
- base_element = bs.find("base")
- base_url = base_element["href"]
- # base_element["href"] = base_url # + ":8080"
- if not url_is_relative(base_url):
- raise ValueError("base_url must be absolute.")
- for element in bs.find_all("link", href=True):
- if url_no_base(element["href"]):
- download_url = urljoin(base_url, element["href"])
- if download_url.rpartition(".")[2] in ("woff", "woff2"):
- continue
- req = session.get(download_url)
- logger.info(f"GET {download_url} {req.status_code}")
- if req.status_code == 200:
- file_name = Path(urlparse(download_url).path).name
- file = static_dir / file_name
- with file.open("wb") as fd:
- fd.write(req.content)
- os.chown(file, UID, GID)
- os.chmod(file, file.stat().st_mode | S_IRGRP | S_IROTH)
- element["href"] = str(file)
- with open(html_file, "wt") as fd:
- fd.write(bs.prettify())
- os.chown(html_file, UID, GID)
- os.chmod(html_file, html_file.stat().st_mode | S_IRGRP | S_IROTH)
- if __name__ == "__main__":
- args = sys.argv[1:]
- header = b""
- footer = b""
- session = requests.Session()
- if DEBUG and "--cookie" in args:
- session_id_idx = args.index("--cookie") + 2
- session.cookies["session_id"] = args[session_id_idx]
- if DEBUG and "--header-html" in args:
- header_idx = args.index("--header-html") + 1
- header_uri = args[header_idx]
- if header_uri.startswith("http"):
- header_req = session.get(header_uri)
- if header_req.status_code == 200:
- if header_req.content:
- with LAST_HEADER.open("wb") as fd:
- fd.write(header_req.content)
- else:
- shutil.copy(header_uri, LAST_HEADER)
- os.chown(LAST_HEADER, UID, GID)
- try:
- download_files(LAST_HEADER)
- except Exception as e:
- with open("/tmp/log.txt", "w") as fd:
- fd.write(repr(e))
- if DEBUG and "--footer-html" in args:
- footer_idx = args.index("--footer-html") + 1
- footer_uri = args[footer_idx]
- if footer_uri.startswith("http"):
- footer_req = session.get(footer_uri)
- if footer_req.status_code == 200:
- if footer_req.content:
- with LAST_FOOTER.open("wb") as fd:
- fd.write(footer_req.content)
- else:
- shutil.copy(footer_uri, LAST_FOOTER)
- os.chown(LAST_FOOTER, UID, GID)
- download_files(LAST_FOOTER)
- if len(args) >= 2:
- try:
- uri = args[-2]
- if uri.startswith("http"):
- body_req = session.get(uri)
- if body_req.status_code == 200:
- with LAST_BODY.open("wb") as fd:
- fd.write(body_req.content)
- else:
- shutil.copy(uri, LAST_BODY)
- download_files(LAST_BODY)
- except IndexError:
- pass
- if DEBUG:
- while "--quiet" in args:
- args.remove("--quiet")
- proc = Popen([WKHTMLTOPDF, *args], stdin=None, stdout=PIPE, stderr=PIPE)
- if DEBUG:
- with LAST_ARGS.open("w") as fd:
- fd.write("\n".join(args))
- os.chown(LAST_ARGS, UID, GID)
- stdout, stderr = proc.communicate()
- with STDOUT.open("wb") as stdout_fd, STDERR.open("wb") as stderr_fd:
- stdout_fd.write(stdout)
- stderr_fd.write(stderr)
- os.chown(STDOUT, UID, GID)
- os.chown(STDERR, UID, GID)
- print(stdout.decode(), file=sys.stdout)
- print(stderr.decode(), file=sys.stderr)
- # proc.wait()
- shutil.copy(args[-1], "/tmp/last_output.pdf")
- os.chmod("/tmp/last_output.pdf", 0o644)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement