Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/home/ounis/pyapps/virtual_env/getrti/bin/python
- # by OuNiS 04.2023
- # wyciągane do schowka tekstu z pliku .pdf
- #
- # moduły:
- # pyperclip https://pypi.org/project/pyperclip/
- # pyPdf https://pypi.org/project/pypdf/
- # tqdm https://pypi.org/project/tqdm/
- import sys
- import os
- import argparse
- from time import sleep
- import pyperclip
- from pypdf import PdfReader
- from tqdm import tqdm
- TEXT_2_FIND = "abonamentowa za"
- WIATRACZEK = ["-", "\\", "|", "/", "-","\\", "|", "/"]
- # WIATRACZEK = [".", "o", "O", "o"]
- def arguments():
- parser = argparse.ArgumentParser(prog="getrti.py", description="szuka frazy w pliku .pdf")
- parser.add_argument("filename",help="plik .pdf")
- parser.add_argument("text_2_find", help="szukana fraza")
- return parser.parse_args()
- def main(args):
- # w razie błęddu z pyperclip:
- # https://pyperclip.readthedocs.io/en/latest/index.html#not-implemented-error
- # pyperclip.copy("SURFACE!!!")
- # print(pyperclip.paste())
- # parser = argparse.ArgumentParser(prog="getrti.py", description="szuka frazy w pliku .pdf")
- # parser.add_argument("filename",help="plik .pdf")
- # parser.add_argument("text_2_find", help="szukana fraza")
- # args = parser.parse_args()
- args = arguments()
- filename = args.filename
- text_2_find = args.text_2_find
- founded_texts = []
- if os.path.isfile(filename):
- print("\nOtwieranie pliku: %s" % filename)
- reader = PdfReader(filename)
- number_of_pages = len(reader.pages)
- print("\nIlość stron w dokumencie: %d" % number_of_pages)
- print(f"Wyszukiwana fraza: \"{text_2_find}\"\n")
- #for p_num, page in tqdm(enumerate(reader.pages,1), total=number_of_pages):
- for p_num, page in enumerate(reader.pages,1):
- text = page.extract_text()
- lines = text.split("\n")
- # print("\nStrona %d" % p_num)
- # print("Ilość linii na stronie: %d" % len(lines))
- # print("Tytuł(y) przelewów:")
- sys.stdout.write('Wyszukiwanie... {0}\r'.format(WIATRACZEK[p_num % len(WIATRACZEK)]))
- sys.stdout.flush()
- # opóźnieni do testów
- # sleep(.011)
- for line_num, line in enumerate(lines, 1):
- if line.find(text_2_find) > -1:
- '''
- { "line" : <str>,
- "page_num" : <int>,
- "line_num_on_page" : <int>
- }
- '''
- founded_texts.append({"line":line, "page_num":p_num, "line_num_on_page":line_num})
- # founded_texts.append([line, p_num, line_num])
- print("\a", end="")
- print(flush=True, end="")
- # found = True
- # print("\tznaleziono: '%s' na stronie: %d" % (line, p_num))
- # answ = input("Akcja: ([s] - skopiuj, [q] - zakończ, [ENTER] - dalej)?: ").lower()
- # # answ = answ.lower()
- # match answ:
- # case "s":
- # # if answ.lower() == "s":
- # found = True
- # pyperclip.copy(line)
- # print("Skopiowano...")
- # continue
- # case "q":
- # # if answ.lower() == "q":
- # break_main_loop = True
- # break
- # list pusta? nic nie znaleziono...
- if not founded_texts :
- print("\n\t - brak, nie znaleziono wzorca: '%s'" % text_2_find)
- else:
- print()
- print("Znalezione frazy: ")
- '''
- line_info is a list of dicts like that:
- { "line" : <str>,
- "page_num" : <int>,
- "line_num_on_page" : <int>
- }
- '''
- for id, line_info in enumerate(founded_texts, 1):
- print(f'\t{id}. {line_info.get("line")}, \n\t\tstrona {line_info.get("page_num")}, linia {line_info.get("line_num_on_page")}')
- print(f"Wybierz numer w celu skopiowania do schowka(1 - {len(founded_texts)}) (q - koniec) ")
- while (True):
- answ = input(">")
- if answ.isalpha() and answ in ["q", "Q"]:
- break
- elif answ.isnumeric():
- num = int(answ)-1
- if num in range(len(founded_texts)):
- pyperclip.copy(founded_texts[num].get("line"))
- print("Skopiowane...")
- break
- else:
- print("ERR: plik: '%s' nie istnieje!!!" % filename)
- # if "main" in dir():
- if __name__ == "__main__":
- sys.exit(main(sys.argv))
- # else:
- # print("ERR: no main() func. to execute...")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement