Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Sat Oct 15 23:01:33 2022
- @author: Sanjit
- """
- import os
- import pandas as pd
- inDF = pd.read_csv('input.csv')
- # nTrain = 5 # for n number
- nTrain = int(len(inDF)*0.7) # for 70%
- corrSer = inDF.corr().iloc[:,-1].abs().sort_values(ascending=False)
- resDF = inDF[corrSer.index[1:].append(corrSer.index[:1])]
- if(not os.path.exists('output')):
- os.mkdir('output')
- for i in range(resDF.shape[1]):
- for j in range(i):
- if(not os.path.exists('output/input Group%s'%i)):
- os.mkdir('output/input Group%s'%i)
- resDF[resDF.columns[:j+1].append(resDF.columns[-1:])][:nTrain].to_csv('output/input Group%s/trainGroup%s.csv'%(i,i), index=False)
- resDF[resDF.columns[:j+1].append(resDF.columns[-1:])][nTrain:].to_csv('output/input Group%s/testGroup%s.csv'%(i,i), index=False)
Add Comment
Please, Sign In to add comment