Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- # Filename: remove_diacritics.py
- # Author: Jeoi Reqi
- """
- This Python script removes diacritics (Eg; é, è, ê, ñ, ü, ç, ả, ă & å) from a specified text file and saves the results.
- Requirements:
- - Python 3
- - unidecode library (install using 'pip install unidecode')
- """
- import os
- from unidecode import unidecode
- def remove_diacritics(input_file, output_file):
- try:
- with open(input_file, 'r', encoding='utf-8') as file:
- text = file.read()
- text_without_diacritics = unidecode(text)
- with open(output_file, 'w', encoding='utf-8') as file:
- file.write(text_without_diacritics)
- print(f"Diacritics removed successfully. Output saved to '{output_file}'.")
- except Exception as e:
- print(f"Error: {str(e)}")
- if __name__ == "__main__":
- input_filename = "input.txt" # Change 'input.txt' to the name of your input file
- output_filename = "output.txt" # Change 'output.txt' to the desired name for the output file
- input_path = os.path.join(os.getcwd(), input_filename)
- output_path = os.path.join(os.getcwd(), output_filename)
- remove_diacritics(input_path, output_path)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement