Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # Filename: split_data_w_markers.py
- # Author: Jeoi Reqi
- # Split Data With Markers is a simple script to be used along with the Data Marker Python Script
- # Script: https://pastebin.com/VyfFfZiN
- import os
- def split_file(input_file, output_directory, lines_per_chunk=100):
- with open(input_file, 'r', encoding='utf-8') as file:
- lines = file.readlines()
- total_lines = len(lines)
- total_chunks = (total_lines // lines_per_chunk) + 1 # Adjust for the last chunk
- print(f"Total number of chunks: {total_chunks}")
- # Calculate the number of digits needed for naming files
- num_digits = len(str(total_chunks))
- for i in range(total_chunks):
- start_index = i * lines_per_chunk
- end_index = min((i + 1) * lines_per_chunk, total_lines)
- chunk = lines[start_index:end_index]
- # Format the chunk number with leading zeros
- chunk_number = str(i + 1).zfill(num_digits)
- output_file_path = os.path.join(output_directory, f'chunk_{chunk_number}.txt')
- with open(output_file_path, 'w', encoding='utf-8') as output:
- output.writelines(chunk)
- print("File split complete.")
- if __name__ == "__main__":
- current_directory = os.getcwd()
- input_file_path = os.path.join(current_directory, 'isoon_chat.txt') # Edit Input Name To Your Desired File Name
- output_directory_path = os.path.join(current_directory, 'split_isoon_chat_data_w_markers') # Edit Output File Name To Your Desired File Name
- os.makedirs(output_directory_path, exist_ok=True)
- split_file(input_file_path, output_directory_path, lines_per_chunk=100) # Split At The Markers
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement