Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from scipy.stats import norm
- # Replace 'B5 segmentSummary (1).csv' with the actual CSV file path
- file_path = 'B5 segmentSummary (1).csv'
- # Define the range of column positions you want to analyze (columns 7 through 10)
- start_column_position = 6 # Corresponds to column 7
- end_column_position = 9 # Corresponds to column 10
- # Read the CSV file into a Pandas DataFrame
- df = pd.read_csv(file_path)
- # Select columns 7 through 10 for analysis
- selected_columns = df.iloc[:, start_column_position:end_column_position + 1]
- # Create a figure with subplots for each Gaussian distribution
- plt.figure(figsize=(12, 6))
- # Define the x-axis range for the plots
- x_range = np.linspace(selected_columns.min().min(), selected_columns.max().max(), 1000)
- # Iterate through the selected columns and analyze them
- for i, col in enumerate(selected_columns.columns):
- plt.subplot(2, 2, i + 1) # Create subplots in a 2x2 grid
- target_column = selected_columns[col]
- mean = target_column.mean()
- std_dev = target_column.std()
- # Fit a Gaussian distribution
- fitted_data = norm.pdf(x_range, mean, std_dev)
- # Plot the Gaussian distribution
- plt.plot(x_range, fitted_data, 'b-', linewidth=2, label='Fitted Gaussian')
- # Plot the original data as a histogram
- plt.hist(target_column, bins=30, density=True, alpha=0.7, color='gray', label='Data Histogram')
- # Add labels and title with the target column name
- plt.xlabel('Values')
- plt.ylabel('Frequency')
- plt.title(f'Gaussian Fit for {col}')
- # Show the mean shift
- plt.axvline(mean, color='red', linestyle='dashed', linewidth=2, label='Mean Shift')
- # Add a legend
- plt.legend()
- # Adjust layout
- plt.tight_layout()
- # Show the plot
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement