Advertisement
UF6

geomx_rna_analysis

UF6
Aug 9th, 2024
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.09 KB | Source Code | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from sklearn.preprocessing import QuantileTransformer
  6. import statsmodels.api as sm
  7. from statsmodels.stats.multitest import multipletests
  8. import scanpy as sc
  9.  
  10. # Step 1: Load the Data
  11. data_dir = 'path_to_your_data_directory/'
  12.  
  13. # Assuming your data is in CSV format; modify this as needed
  14. demo_data = pd.read_csv(f'{data_dir}/your_data.csv')
  15.  
  16. # Step 2: Data Normalization
  17. transformer = QuantileTransformer(output_distribution='normal')
  18. normalized_data = pd.DataFrame(transformer.fit_transform(demo_data.iloc[:, 1:]), columns=demo_data.columns[1:])
  19. normalized_data.insert(0, 'Sample', demo_data['Sample'])  # Preserving the sample column
  20.  
  21. # Step 3: Quality Control and Filtering
  22. filtered_data = normalized_data[normalized_data['ROICount'] > 100]  # Modify based on your QC metric
  23.  
  24. # Step 4: Exploratory Data Analysis - Boxplot
  25. plt.figure(figsize=(10, 6))
  26. sns.boxplot(data=filtered_data.melt(id_vars=['Sample']), x='variable', y='value')
  27. plt.xticks(rotation=90)
  28. plt.show()
  29.  
  30. # Step 5: Differential Expression Analysis
  31. # Assuming 'Group' is a categorical column in your data
  32. design_matrix = pd.get_dummies(filtered_data['Group'], drop_first=True)
  33. # Fit a linear model (example: expression vs group)
  34. model = sm.OLS(filtered_data.drop(columns=['Sample', 'Group']).values, design_matrix.values).fit()
  35. results = model.summary()
  36.  
  37. # Extract p-values for differential expression
  38. pvals = model.pvalues
  39. adjusted_pvals = multipletests(pvals, alpha=0.05, method='fdr_bh')[1]
  40. top_genes = filtered_data.columns[adjusted_pvals < 0.05]
  41.  
  42. # Step 6: Heatmap Visualization
  43. sns.clustermap(filtered_data.drop(columns=['Sample', 'Group']).T, cmap="viridis", figsize=(10,10))
  44. plt.show()
  45.  
  46. # Step 7: Save Results
  47. filtered_data.to_csv('filtered_data.csv', index=False)
  48.  
  49. # Optional Step: Advanced Analysis - Clustering with Scanpy (for single-cell RNA-seq data)
  50. adata = sc.AnnData(filtered_data.drop(columns=['Sample', 'Group']).values)
  51. sc.pp.neighbors(adata, n_neighbors=10)
  52. sc.tl.umap(adata)
  53. sc.pl.umap(adata, color='Sample')
  54.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement