Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- inputfile='Wednesday-workingHours.pcap_ISCX.csv'
- import pandas as pd
- import numpy as np
- import logging
- # Set up logging to output to a file
- logging.basicConfig(filename='error_log.txt', level=logging.INFO, format='%(message)s')
- # Load the CSV file
- try:
- df = pd.read_csv(inputfile)
- except FileNotFoundError:
- logging.error("Error: 'input.csv' not found.")
- exit()
- # Select only numeric columns to avoid issues with string data
- numeric_df = df.select_dtypes(include=[np.number])
- # Identify problematic values
- infinity_mask = numeric_df.isin([np.inf, -np.inf])
- threshold = np.finfo(np.float64).max
- large_value_mask = numeric_df.abs() > threshold
- # Combine masks for all problematic values
- problematic_mask = infinity_mask | large_value_mask
- # Log problematic values
- if problematic_mask.any().any():
- logging.info("Problematic values found in 'input.csv':")
- problematic_indices = np.where(problematic_mask)
- for row, col in zip(*problematic_indices):
- logging.info(f"Row: {row}, Column: '{numeric_df.columns[col]}', Value: {numeric_df.iat[row, col]}")
- else:
- logging.info("No infinity or extremely large values found in 'input.csv'.")
- # Remove rows with any problematic values
- # First, replace `inf`, `-inf`, and extremely large values with NaN, then drop rows with NaN
- df.replace([np.inf, -np.inf], np.nan, inplace=True)
- numeric_columns = df.select_dtypes(include=[np.number])
- problematic_mask = (numeric_columns.abs() > threshold) | numeric_columns.isna()
- rows_to_drop = problematic_mask.any(axis=1) # Identify rows to drop
- # Drop the identified rows
- df_cleaned = df[~rows_to_drop]
- # Save cleaned DataFrame to a new CSV file without problematic rows
- df_cleaned.to_csv('cleaned_output.csv', index=False)
- print("Data cleaning complete. Rows with problematic values have been removed. Output saved to 'cleaned_output.csv' and errors logged to 'error_log.txt'.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement