Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib
- import html
- import re
- import sys
- from urllib.request import urlopen
- def input_url():
- '''
- Returns the URL of a stock tiker from the ticker
- Needs more...
- '''
- if len(sys.argv) != 2:
- raise TypeError('One argument is needed for the ticker')
- ticker = sys.argv[1]
- url = ('https://www.google.com/finance?q='+ticker+
- '&fstype=ii&ei=1PVQUYDJO4jC0AH9Gw')
- return url
- def parse(raw_input):
- '''
- Strips the passage to the raw <table></table>
- '''
- # Pulls the title
- title = re.search(r'Financial Statements for .+? -', raw_input,
- re.DOTALL).group()
- title = title[25:-2]
- # Pulls out only the blocks needed
- bal_raw = re.search(r'balannualdiv.+casinterimdiv', raw_input,
- re.DOTALL).group()
- inc_raw = re.search(r'incannualdiv.+balinterimdiv', raw_input,
- re.DOTALL).group()
- # Pulls out the exact needed parts
- bal_out_text = re.search(r'\<table.+\<\/table\>', bal_raw,
- re.DOTALL).group()
- inc_out_text = re.search(r'\<table.+\<\/table\>', inc_raw,
- re.DOTALL).group()
- # Pulls dates
- bal_dates = re.findall(r'"rgt.+?\<\/th\>', bal_out_text, re.DOTALL)
- inc_dates = re.findall(r'"rgt.+?\<\/th\>', inc_out_text, re.DOTALL)
- def parse_dates(dates):
- out_dates = []
- for date in dates:
- out_date = re.search(r'^(\w|\s|-)+$', date, re.MULTILINE).group()
- out_dates.append(out_date)
- return out_dates
- dates_bal = parse_dates(bal_dates)
- dates_inc = parse_dates(inc_dates)
- # Seperates blocks
- bal_blocks = re.findall(r'lft lm.+?\<\/tr\>', bal_out_text, re.DOTALL)
- inc_blocks = re.findall(r'lft lm.+?\<\/tr\>', inc_out_text, re.DOTALL)
- def block_parse(blocks):
- parsed = []
- for block in blocks:
- numbers_out = []
- title = re.search(r'\>(\w|\s|[\,\;\&\/\-\(\)\.\#])+$', block,
- re.MULTILINE).group()
- title = title[1:].replace('&','&').replace(''','\'')
- numbers = re.findall(r'\>[0-9.,-]+\<', block)
- for number in numbers:
- num_out = number[1:-1]
- if num_out != '-':
- num_out = float(num_out.replace(',',''))
- numbers_out.append(num_out)
- parsed.append((title,numbers_out))
- return parsed
- parsed_bal = block_parse(bal_blocks)
- parsed_inc = block_parse(inc_blocks)
- return [(dates_bal,parsed_bal),(dates_inc,parsed_inc),title]
- def output(name,dates,data,title):
- output = ',' + title + '\n,'
- def average(list):
- for i in range(len(list)):
- if list[i] == '-':
- list[i] = 0
- avgs = []
- for i in range(len(list) - 1):
- i += 1
- if list[-i] == 0:
- continue
- avgs.append((list[-i-1] - list[-i]) / list[-i])
- if avgs:
- avg_avg = round((sum(avgs) / len(avgs))*100, 2)
- else:
- avg_avg = 'N/A'
- return avg_avg
- def change(list):
- start = False
- end = False
- for i in range(len(list)):
- if isinstance(list[i],float) and not end:
- end = list[i]
- if isinstance(list[-(i+1)],float) and not start:
- start = list[-(i+1)]
- if end:
- change = round(((end - start) / start)*100, 2)
- else:
- change = 'N/A'
- return change
- file = open(name,'w+')
- for date in dates:
- output = output + ',' + date
- output = output + ',YoY Average,Total Change\n'
- for datum in data:
- if ',' in datum[0]:
- new_datum = '"' + datum[0] + '"'
- else:
- new_datum = datum[0]
- output = output + ',' + new_datum
- for number in datum[1]:
- output = output + ',' + str(number)
- output = (output + ',' + str(average(datum[1])) + '%,' +
- str(change(datum[1])) + '%\n')
- file.write(output)
- file.close()
- def main():
- stock_url = input_url()
- site = urllib.request.urlopen(stock_url)
- site_html_raw = site.read().decode()
- parsed = parse(site_html_raw)
- output('balence.csv', parsed[0][0], parsed[0][1],parsed[2])
- output('income.csv', parsed[1][0], parsed[1][1],parsed[2])
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement