Advertisement
pharmokan

pandas udemy course python helper functions

Sep 19th, 2019
1,037
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.06 KB | None | 0 0
  1. from ftplib import FTP, error_perm
  2. import os
  3. import glob
  4. import pandas
  5. import numpy
  6. import patoolib
  7. import seaborn as sns
  8. import simplekml
  9.  
  10. def ftpDownloader(stationId,startYear,endYear,url="ftp.pyclass.com",user="student@pyclass.com",passwd="student123"):
  11.     ftp=FTP(url)        
  12.     ftp.login(user,passwd)
  13.     if not os.path.exists("C:\\in"):          
  14.         os.makedirs("C:\\in")
  15.     os.chdir("C:\\in")
  16.     for year in range(startYear,endYear+1):
  17.         fullpath='/Data/%s/%s-%s.gz' % (year,stationId,year)    
  18.         filename=os.path.basename(fullpath)
  19.         try:
  20.             with open(filename,"wb") as file:
  21.                 ftp.retrbinary('RETR %s' % fullpath, file.write)
  22.             print("%s succesfully downloaded" % filename)
  23.         except error_perm:
  24.             print("%s is not available" % filename)
  25.             os.remove(filename)    
  26.     ftp.close()
  27.            
  28. def extractFiles(indir="C:\\in",out="C:\\in\\Extracted"):
  29.     os.chdir(indir)
  30.     archives=glob.glob("*.gz")
  31.     print (archives)
  32.     if not os.path.exists(out):
  33.        os.makedirs(out)
  34.     files=os.listdir("Extracted")
  35.     print(files)
  36.     for archive in archives:
  37.         if archive[:-3] not in files:
  38.             patoolib.extract_archive(archive,outdir=out)
  39.            
  40. def addField(indir="C:\\in\\Extracted"):
  41.     os.chdir(indir)
  42.     fileList=glob.glob("*")
  43.     print ("fileList" ,fileList)
  44.     for filename in fileList:
  45.         df=pandas.read_csv(filename,sep='\s+',header=None)
  46.         df["Station"]=[filename.rsplit("-",1)[0]]*df.shape[0]
  47.         df.to_csv(filename+".csv",index=None,header=None)
  48.         os.remove(filename)
  49.        
  50. def concatenate(indir="C:\\in\\Extracted",outfile="C:\\out\\Concatenated.csv"):
  51.     os.chdir(indir)
  52.     fileList=glob.glob("*.csv")
  53.     dfList=[]
  54.     colnames=["Year","Month","Day","Hour","Temp","DewTemp","Pressure","WindDir","WindSpeed","Sky","Precip1","Precip6","ID"]
  55.     for filename in fileList:
  56.         print (filename)
  57.         df=pandas.read_csv(filename,header=None)
  58.         dfList.append(df)
  59.     concatDf=pandas.concat(dfList,axis=0)    
  60.     concatDf.columns=colnames
  61.     concatDf.head()
  62.     concatDf.to_csv(outfile,index=None)
  63.    
  64. def merge(left="C:\\out\\Concatenated.csv",right="C:\\CS\\station-info.txt",output="C:\\out\\Concatenated-Merged.csv"):
  65.     leftDf=pandas.read_csv(left)  
  66.     rightDf=pandas.read_fwf(right,converters={"USAF":str,"WBAN":str})
  67.     rightDf["USAF_WBAN"]=rightDf["USAF"]+"-"+rightDf["WBAN"]
  68.     mergedDf=pandas.merge(leftDf,rightDf.ix[:,["USAF_WBAN","STATION NAME","LAT","LON"]],left_on="ID",right_on="USAF_WBAN")
  69.     mergedDf.to_csv(output)
  70.  
  71. def pivot(infile="C:\\out\\Concatenated-Merged.csv",outfile="C:\\out\\Pivoted.csv"):
  72.     df=pandas.read_csv(infile)
  73.     df=df.replace(-9999,numpy.nan)
  74.     df['Temp']=df["Temp"]/10.0
  75.     table=pandas.pivot_table(df,index=["ID"],columns="Year",values="Temp")
  76.     table.to_csv(outfile)
  77.     return table
  78.  
  79. def plot(outfigure="C:\\out\\Ploted.png"):
  80.     df=pivot()
  81.     df.T.plot(subplots=True,kind='bar')
  82.     sns.plt.savefig(outfigure,dpi=200)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement