Advertisement
tyler569

Finance

Mar 28th, 2013
200
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.57 KB | None | 0 0
  1. import urllib
  2. import html
  3. import re
  4. import sys
  5.  
  6. from urllib.request import urlopen
  7.  
  8.  
  9. def input_url():
  10.     '''
  11.    Returns the URL of a stock tiker from the ticker
  12.    Needs more...
  13.    '''
  14.     if len(sys.argv) != 2:
  15.         raise TypeError('One argument is needed for the ticker')
  16.     ticker = sys.argv[1]
  17.  
  18.     url = ('https://www.google.com/finance?q='+ticker+
  19.            '&fstype=ii&ei=1PVQUYDJO4jC0AH9Gw')
  20.  
  21.     return url
  22.  
  23. def parse(raw_input):
  24.     '''
  25.    Strips the passage to the raw <table></table>
  26.    '''
  27.     # Pulls the title
  28.     title = re.search(r'Financial Statements for .+? -', raw_input,
  29.                         re.DOTALL).group()
  30.     title = title[25:-2]
  31.    
  32.     # Pulls out only the blocks needed
  33.     bal_raw = re.search(r'balannualdiv.+casinterimdiv', raw_input,
  34.                         re.DOTALL).group()
  35.     inc_raw = re.search(r'incannualdiv.+balinterimdiv', raw_input,
  36.                         re.DOTALL).group()
  37.    
  38.     # Pulls out the exact needed parts
  39.     bal_out_text = re.search(r'\<table.+\<\/table\>', bal_raw,
  40.                              re.DOTALL).group()
  41.     inc_out_text = re.search(r'\<table.+\<\/table\>', inc_raw,
  42.                              re.DOTALL).group()
  43.  
  44.     # Pulls dates
  45.     bal_dates = re.findall(r'"rgt.+?\<\/th\>', bal_out_text, re.DOTALL)
  46.     inc_dates = re.findall(r'"rgt.+?\<\/th\>', inc_out_text, re.DOTALL)
  47.    
  48.     def parse_dates(dates):
  49.         out_dates = []
  50.         for date in dates:
  51.             out_date = re.search(r'^(\w|\s|-)+$', date, re.MULTILINE).group()
  52.             out_dates.append(out_date)
  53.         return out_dates
  54.        
  55.     dates_bal = parse_dates(bal_dates)
  56.     dates_inc = parse_dates(inc_dates)
  57.                              
  58.     # Seperates blocks
  59.     bal_blocks = re.findall(r'lft lm.+?\<\/tr\>', bal_out_text, re.DOTALL)
  60.     inc_blocks = re.findall(r'lft lm.+?\<\/tr\>', inc_out_text, re.DOTALL)
  61.    
  62.     def block_parse(blocks):
  63.         parsed = []
  64.         for block in blocks:
  65.             numbers_out = []
  66.             title = re.search(r'\>(\w|\s|[\,\;\&\/\-\(\)\.\#])+$', block,
  67.                               re.MULTILINE).group()
  68.             title = title[1:].replace('&amp;','&').replace('&#39','\'')
  69.             numbers = re.findall(r'\>[0-9.,-]+\<', block)
  70.             for number in numbers:
  71.                 num_out = number[1:-1]
  72.                 if num_out != '-':
  73.                     num_out = float(num_out.replace(',',''))
  74.                 numbers_out.append(num_out)
  75.             parsed.append((title,numbers_out))
  76.         return parsed
  77.    
  78.     parsed_bal = block_parse(bal_blocks)
  79.     parsed_inc = block_parse(inc_blocks)
  80.    
  81.     return [(dates_bal,parsed_bal),(dates_inc,parsed_inc),title]
  82.    
  83. def output(name,dates,data,title):
  84.     output = ',' + title + '\n,'
  85.     def average(list):
  86.         for i in range(len(list)):
  87.             if list[i] == '-':
  88.                 list[i] = 0
  89.         avgs = []
  90.         for i in range(len(list) - 1):
  91.             i += 1
  92.             if list[-i] == 0:
  93.                 continue
  94.             avgs.append((list[-i-1] - list[-i]) / list[-i])
  95.         if avgs:
  96.             avg_avg = round((sum(avgs) / len(avgs))*100, 2)
  97.         else:
  98.             avg_avg = 'N/A'
  99.         return avg_avg
  100.     def change(list):
  101.         start = False
  102.         end = False
  103.         for i in range(len(list)):
  104.             if isinstance(list[i],float) and not end:
  105.                 end = list[i]
  106.             if isinstance(list[-(i+1)],float) and not start:
  107.                 start = list[-(i+1)]
  108.         if end:
  109.             change = round(((end - start) / start)*100, 2)
  110.         else:
  111.             change = 'N/A'
  112.         return change
  113.     file = open(name,'w+')
  114.     for date in dates:
  115.         output = output + ',' + date
  116.     output = output + ',YoY Average,Total Change\n'
  117.     for datum in data:
  118.         if ',' in datum[0]:
  119.             new_datum = '"' + datum[0] + '"'
  120.         else:
  121.             new_datum = datum[0]
  122.         output = output + ',' + new_datum
  123.         for number in datum[1]:
  124.             output = output + ',' + str(number)
  125.         output = (output + ',' + str(average(datum[1])) + '%,' +
  126.                   str(change(datum[1])) + '%\n')
  127.     file.write(output)
  128.     file.close()
  129.  
  130. def main():
  131.     stock_url = input_url()
  132.    
  133.     site = urllib.request.urlopen(stock_url)
  134.     site_html_raw = site.read().decode()
  135.    
  136.     parsed = parse(site_html_raw)
  137.    
  138.     output('balence.csv', parsed[0][0], parsed[0][1],parsed[2])
  139.     output('income.csv', parsed[1][0], parsed[1][1],parsed[2])
  140.    
  141.    
  142. if __name__ == '__main__':
  143.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement