Advertisement
1fractal

Untitled

Sep 28th, 2023
1,200
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.99 KB | None | 0 0
  1. import os
  2. import pandas as pd
  3.  
  4. # Specify the directory where your CSV files are located
  5. directory = '/path/to/your/directory'
  6.  
  7. # Get a list of all CSV files in the directory
  8. csv_files = [file for file in os.listdir(directory) if file.endswith('.csv')]
  9.  
  10. # Create an empty DataFrame to store the combined data
  11. combined_data = pd.DataFrame()
  12.  
  13. # Iterate through each CSV file and extract the specified columns
  14. for file in csv_files:
  15.     file_path = os.path.join(directory, file)
  16.    
  17.     # Read the CSV file with only the "SNP" and "P" columns
  18.     # Assuming the columns "SNP" and "P" exist in all CSV files
  19.     data = pd.read_csv(file_path, usecols=['SNP', 'P'])
  20.    
  21.     # Append the extracted data to the combined DataFrame
  22.     combined_data = combined_data.append(data, ignore_index=True)
  23.  
  24. # Print the first few rows of the combined data
  25. print(combined_data.head())
  26.  
  27. # Optionally, you can save the combined data to a new CSV file
  28. combined_data.to_csv('combined_data.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement