Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import pandas as pd
- # Specify the directory where your CSV files are located
- directory = '/path/to/your/directory'
- # Get a list of all CSV files in the directory
- csv_files = [file for file in os.listdir(directory) if file.endswith('.csv')]
- # Create an empty DataFrame to store the combined data
- combined_data = pd.DataFrame()
- # Iterate through each CSV file and extract the specified columns
- for file in csv_files:
- file_path = os.path.join(directory, file)
- # Read the CSV file with only the "SNP" and "P" columns
- # Assuming the columns "SNP" and "P" exist in all CSV files
- data = pd.read_csv(file_path, usecols=['SNP', 'P'])
- # Append the extracted data to the combined DataFrame
- combined_data = combined_data.append(data, ignore_index=True)
- # Print the first few rows of the combined data
- print(combined_data.head())
- # Optionally, you can save the combined data to a new CSV file
- combined_data.to_csv('combined_data.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement