Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import matplotlib.pyplot as plt
- from scipy.stats import norm
- # Replace 'B5 segmentSummary (1).csv' with the actual CSV file path
- file_path = 'B5 segmentSummary (1).csv'
- # Read the CSV file into a Pandas DataFrame
- df = pd.read_csv(file_path)
- # Calculate summary statistics for the 'cases' column
- cases_summary = df['Slide name'].describe()
- # Print summary statistics for 'cases' column
- print("Summary Statistics for 'Slide name' column:")
- print(cases_summary)
- # Define the range of column positions you want to analyze (columns 6 through 9)
- start_column_position = 6 # Corresponds to column 6
- end_column_position = 9 # Corresponds to column 9
- # Iterate through each numerical column and create a separate figure for Gaussian distribution
- for i in range(start_column_position, end_column_position + 1):
- col = df.columns[i]
- # Skip 'cases' column
- if col == 'Slide name':
- continue
- plt.figure(figsize=(8, 6))
- # Plot Gaussian distribution
- target_column = df.iloc[:, i]
- mean = target_column.mean()
- std_dev = target_column.std()
- # Generate data for the Gaussian distribution
- x_range = target_column.dropna().values
- fitted_data = norm.pdf(x_range, mean, std_dev)
- # Plot the Gaussian distribution
- plt.plot(x_range, fitted_data, 'b-', label='Fitted Gaussian')
- # Plot settings
- plt.hist(target_column.dropna(), bins=30, density=True, alpha=0.7, color='gray', label='Data Histogram')
- plt.axvline(mean, color='red', linestyle='dashed', linewidth=2, label='Mean')
- plt.xlabel('Values')
- plt.ylabel('Density')
- plt.title(f'Gaussian Fit for {col}')
- plt.legend()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement