Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- class bcolors:
- HEADER = '\033[95m'
- OKBLUE = '\033[94m'
- OKCYAN = '\033[96m'
- OKGREEN = '\033[92m'
- WARNING = '\033[93m'
- FAIL = '\033[91m'
- ENDC = '\033[0m'
- BOLD = '\033[1m'
- UNDERLINE = '\033[4m'
- def remove_links_and_colon(text):
- url_pattern = re.compile(r'https?://[^:]+:|www\.[^:]+:')
- return url_pattern.sub('', text)
- def process_files(keyword, files):
- results = []
- for file_path in files:
- with open(file_path, 'r', encoding='utf-8') as file:
- contents = file.read()
- matches = [line for line in contents.split('\n') if keyword.lower() in line.lower()]
- results.extend(matches)
- return results
- def save_results(keyword, directory, results):
- if results:
- output_filename = f"{keyword}_results.txt"
- output_path = os.path.join(directory, output_filename)
- with open(output_path, 'w', encoding='utf-8') as output_file:
- output_file.write('\n'.join(results))
- print(f"{bcolors.OKGREEN}Os dados extraídos foram salvos em: {output_path}{bcolors.ENDC}")
- else:
- print(f"{bcolors.WARNING}Nenhuma correspondência encontrada.{bcolors.ENDC}")
- def remove_url_from_data(file_path):
- url_pattern = re.compile(r'https?://[^:]+:|www\.[^:]+:')
- new_file_path = file_path.replace('.txt', '_cleaned.txt')
- if not os.path.isfile(file_path):
- print(f"{bcolors.FAIL}Arquivo não encontrado.{bcolors.ENDC}")
- return
- with open(file_path, 'r', encoding='utf-8') as file:
- contents = file.read()
- cleaned_contents = url_pattern.sub('', contents)
- with open(new_file_path, 'w', encoding='utf-8') as new_file:
- new_file.write(cleaned_contents)
- print(f"{bcolors.OKBLUE}Os links foram removidos e o arquivo foi salvo como: {new_file_path}{bcolors.ENDC}")
- def main_menu():
- print(f"{bcolors.HEADER}Bem-vindo ao Extrator de Dados!{bcolors.ENDC}")
- print("Escolha uma das seguintes opções:")
- print(f"{bcolors.OKCYAN}1 - Extrair dados de um único arquivo{bcolors.ENDC}")
- print(f"{bcolors.OKCYAN}2 - Extrair dados de todos os arquivos em uma pasta{bcolors.ENDC}")
- print(f"{bcolors.OKCYAN}3 - Remover a parte do link e deixar 'nome:senha'{bcolors.ENDC}")
- print(f"{bcolors.FAIL}4 - Sair{bcolors.ENDC}")
- if __name__ == "__main__":
- main_menu()
- option = input(f"{bcolors.BOLD}Digite o número da opção desejada: {bcolors.ENDC}")
- if option == '4':
- print(f"{bcolors.WARNING}Saindo do programa.{bcolors.ENDC}")
- elif option == '3':
- file_path = input("Digite o caminho completo do arquivo para remover a parte do link: ")
- remove_url_from_data(file_path)
- else:
- keyword = input("Digite a palavra-chave: ")
- if option == '1':
- file_path = input("Digite o caminho completo do arquivo: ")
- if os.path.isfile(file_path):
- results = process_files(keyword, [file_path])
- save_results(keyword, os.path.dirname(file_path), results)
- else:
- print(f"{bcolors.FAIL}Arquivo não encontrado.{bcolors.ENDC}")
- elif option == '2':
- directory = input("Digite o caminho da pasta: ")
- if os.path.isdir(directory):
- files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.txt')]
- results = process_files(keyword, files)
- save_results(keyword, directory, results)
- else:
- print(f"{bcolors.FAIL}Pasta não encontrada.{bcolors.ENDC}")
- else:
- print(f"{bcolors.FAIL}Opção inválida.{bcolors.ENDC}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement