Advertisement
mayankjoin3

dataset_properties_get_row_features_count

Feb 11th, 2025 (edited)
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.46 KB | None | 0 0
  1. import pandas as pd
  2. import sys
  3. import os
  4.  
  5. def analyze_csv(csv_path):
  6.     # Read CSV file
  7.     try:
  8.         # Skip first row as header
  9.         df = pd.read_csv(csv_path, header=0)
  10.     except Exception as e:
  11.         print(f"Error reading CSV file: {e}")
  12.         sys.exit(1)
  13.    
  14.     # Get filename without extension
  15.     base_filename = os.path.splitext(os.path.basename(csv_path))[0]
  16.     output_filename = f"{base_filename}_info.txt"
  17.    
  18.     # Get required information
  19.     total_rows = len(df)
  20.     total_columns = len(df.columns)
  21.    
  22.     # Last column analysis
  23.     last_column = df.iloc[:, -1]
  24.     unique_classes_last = last_column.nunique()
  25.     class_counts_last = last_column.value_counts().to_dict()
  26.    
  27.     # Second-to-last column analysis
  28.     second_last_column = df.iloc[:, -2]
  29.     unique_classes_second_last = second_last_column.nunique()
  30.     class_counts_second_last = second_last_column.value_counts().to_dict()
  31.    
  32.     # Format class counts for last column
  33.     class_count_str_last = "\n".join(f"    {class_name}: {count}" for class_name, count in class_counts_last.items())
  34.    
  35.     # Format class counts for second-to-last column
  36.     class_count_str_second_last = "\n".join(f"    {class_name}: {count}" for class_name, count in class_counts_second_last.items())
  37.    
  38.     # Create output content
  39.     output_content = f"""0. Filename - {base_filename}
  40. 1. Count the number of rows in the dataset : {total_rows}
  41. 2. Count the number of columns in the dataset : {total_columns}
  42. 3. In the last column the number of unique classes : {unique_classes_last}
  43. 4. In the last number the total count of each classes :
  44. {class_count_str_last}
  45. 5. In the second last column the number of unique classes : {unique_classes_second_last}
  46. 6. In the second last column the total count of each classes :
  47. {class_count_str_second_last}"""
  48.    
  49.     # Write to output file
  50.     try:
  51.         with open(output_filename, 'w') as f:
  52.             f.write(output_content)
  53.         print(f"Analysis completed. Results written to {output_filename}")
  54.     except Exception as e:
  55.         print(f"Error writing output file: {e}")
  56.         sys.exit(1)
  57.  
  58. def main():
  59.     if len(sys.argv) != 2:
  60.         print("Usage: python script.py <path_to_csv_file>")
  61.         sys.exit(1)
  62.    
  63.     csv_path = sys.argv[1]
  64.     if not os.path.exists(csv_path):
  65.         print(f"Error: File {csv_path} does not exist")
  66.         sys.exit(1)
  67.    
  68.     analyze_csv(csv_path)
  69.  
  70. if __name__ == "__main__":
  71.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement