Untitled

import os
import pandas as pd

# Specify the directory where your CSV files are located
directory = '/path/to/your/directory'

# Get a list of all CSV files in the directory
csv_files = [file for file in os.listdir(directory) if file.endswith('.csv')]

# Create an empty DataFrame to store the combined data
combined_data = pd.DataFrame()

# Iterate through each CSV file and extract the specified columns
for file in csv_files:
    file_path = os.path.join(directory, file)

    # Read the CSV file with only the "SNP" and "P" columns
    # Assuming the columns "SNP" and "P" exist in all CSV files
    data = pd.read_csv(file_path, usecols=['SNP', 'P'])

    # Append the extracted data to the combined DataFrame
    combined_data = combined_data.append(data, ignore_index=True)

# Print the first few rows of the combined data
print(combined_data.head())

# Optionally, you can save the combined data to a new CSV file
combined_data.to_csv('combined_data.csv', index=False)