Advertisement
UF6

Case Protein 1-9 Violin/Distribution

UF6
Nov 14th, 2023
466
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.91 KB | Source Code | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from scipy.stats import norm
  6. import math
  7.  
  8. # Replace 'B5 segmentSummary (1).csv' with the actual CSV file path
  9. file_path = 'B5 segmentSummary (1).csv'
  10.  
  11. # Read the CSV file into a Pandas DataFrame
  12. df = pd.read_csv(file_path)
  13.  
  14. # Check the available column names
  15. print(df.columns)
  16.  
  17. # Example: Replace 'categorical_column' with the correct column name containing categorical data
  18. categorical_column = df.columns[0]  # Change this to the correct column name
  19.  
  20. # Define the range of column positions you want to analyze (columns 6 through 10)
  21. start_column_position = 6  # Corresponds to column 6
  22. end_column_position = 9  # Corresponds to column 10
  23.  
  24. # Create a list of unique categories in the categorical column
  25. categories = df[categorical_column].unique()
  26.  
  27. # Create a Seaborn color palette for differentiation
  28. colors = sns.color_palette('husl', n_colors=end_column_position - start_column_position + 1)
  29.  
  30. # Iterate through each category and create separate figures with six Raincloud plots and six Gaussian distributions per page
  31. for category in categories:
  32.     category_data = df[df[categorical_column] == category]
  33.     num_plots = end_column_position - start_column_position + 1
  34.     num_pages = math.ceil(num_plots / 6)  # Determine the number of pages needed
  35.    
  36.     for page in range(num_pages):
  37.         plt.figure(figsize=(18, 12))
  38.        
  39.         for i in range(6):
  40.             plot_num = page * 6 + i
  41.             if plot_num >= num_plots:
  42.                 break
  43.            
  44.             col = df.columns[start_column_position + plot_num]
  45.            
  46.             # Raincloud plot
  47.             plt.subplot(2, 6, i + 1)
  48.             sns.violinplot(data=category_data, y=col, color=colors[plot_num])
  49.             sns.boxplot(data=category_data, y=col, color='white', width=0.2)
  50.             plt.title(f'Raincloud Plot\n{category} - {col}')
  51.        
  52.             # Gaussian distribution plot
  53.             plt.subplot(2, 6, i + 7)
  54.             target_column = category_data[col]
  55.             mean = target_column.mean()
  56.             std_dev = target_column.std()
  57.             x_range = np.linspace(target_column.min(), target_column.max(), 1000)
  58.             fitted_data = norm.pdf(x_range, mean, std_dev)
  59.             plt.plot(x_range, fitted_data, 'b-', linewidth=2, label='Fitted Gaussian')
  60.             plt.hist(target_column, bins=30, density=True, alpha=0.7, color='gray', label='Data Histogram')
  61.             plt.xlabel('Values')
  62.             plt.ylabel('Frequency')
  63.             plt.title(f'Gaussian Fit\n{category} - {col}')
  64.             plt.axvline(mean, color='red', linestyle='dashed', linewidth=2, label='Mean')
  65.             plt.legend()
  66.  
  67.         plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # Adjust the layout to avoid title overlap
  68.         plt.suptitle(f'{category} Plots - Page {page + 1}', fontsize=16)
  69.         plt.show()
  70.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement