Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import sys
- import os
- def analyze_csv(csv_path):
- # Read CSV file
- try:
- # Skip first row as header
- df = pd.read_csv(csv_path, header=0)
- except Exception as e:
- print(f"Error reading CSV file: {e}")
- sys.exit(1)
- # Get filename without extension
- base_filename = os.path.splitext(os.path.basename(csv_path))[0]
- output_filename = f"{base_filename}_info.txt"
- # Get required information
- total_rows = len(df)
- total_columns = len(df.columns)
- # Last column analysis
- last_column = df.iloc[:, -1]
- unique_classes_last = last_column.nunique()
- class_counts_last = last_column.value_counts().to_dict()
- # Second-to-last column analysis
- second_last_column = df.iloc[:, -2]
- unique_classes_second_last = second_last_column.nunique()
- class_counts_second_last = second_last_column.value_counts().to_dict()
- # Format class counts for last column
- class_count_str_last = "\n".join(f" {class_name}: {count}" for class_name, count in class_counts_last.items())
- # Format class counts for second-to-last column
- class_count_str_second_last = "\n".join(f" {class_name}: {count}" for class_name, count in class_counts_second_last.items())
- # Create output content
- output_content = f"""0. Filename - {base_filename}
- 1. Count the number of rows in the dataset : {total_rows}
- 2. Count the number of columns in the dataset : {total_columns}
- 3. In the last column the number of unique classes : {unique_classes_last}
- 4. In the last number the total count of each classes :
- {class_count_str_last}
- 5. In the second last column the number of unique classes : {unique_classes_second_last}
- 6. In the second last column the total count of each classes :
- {class_count_str_second_last}"""
- # Write to output file
- try:
- with open(output_filename, 'w') as f:
- f.write(output_content)
- print(f"Analysis completed. Results written to {output_filename}")
- except Exception as e:
- print(f"Error writing output file: {e}")
- sys.exit(1)
- def main():
- if len(sys.argv) != 2:
- print("Usage: python script.py <path_to_csv_file>")
- sys.exit(1)
- csv_path = sys.argv[1]
- if not os.path.exists(csv_path):
- print(f"Error: File {csv_path} does not exist")
- sys.exit(1)
- analyze_csv(csv_path)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement