Finance

import urllib
import html
import re
import sys

from urllib.request import urlopen


def input_url():
    '''
    Returns the URL of a stock tiker from the ticker
    Needs more...
    '''
    if len(sys.argv) != 2:
        raise TypeError('One argument is needed for the ticker')
    ticker = sys.argv[1]

    url = ('https://www.google.com/finance?q='+ticker+
           '&fstype=ii&ei=1PVQUYDJO4jC0AH9Gw')

    return url

def parse(raw_input):
    '''
    Strips the passage to the raw <table></table>
    '''
    # Pulls the title
    title = re.search(r'Financial Statements for .+? -', raw_input,
                        re.DOTALL).group()
    title = title[25:-2]

    # Pulls out only the blocks needed
    bal_raw = re.search(r'balannualdiv.+casinterimdiv', raw_input,
                        re.DOTALL).group()
    inc_raw = re.search(r'incannualdiv.+balinterimdiv', raw_input,
                        re.DOTALL).group()

    # Pulls out the exact needed parts
    bal_out_text = re.search(r'\<table.+\<\/table\>', bal_raw,
                             re.DOTALL).group()
    inc_out_text = re.search(r'\<table.+\<\/table\>', inc_raw,
                             re.DOTALL).group()

    # Pulls dates
    bal_dates = re.findall(r'"rgt.+?\<\/th\>', bal_out_text, re.DOTALL)
    inc_dates = re.findall(r'"rgt.+?\<\/th\>', inc_out_text, re.DOTALL)

    def parse_dates(dates):
        out_dates = []
        for date in dates:
            out_date = re.search(r'^(\w|\s|-)+$', date, re.MULTILINE).group()
            out_dates.append(out_date)
        return out_dates

    dates_bal = parse_dates(bal_dates)
    dates_inc = parse_dates(inc_dates)

    # Seperates blocks
    bal_blocks = re.findall(r'lft lm.+?\<\/tr\>', bal_out_text, re.DOTALL)
    inc_blocks = re.findall(r'lft lm.+?\<\/tr\>', inc_out_text, re.DOTALL)

    def block_parse(blocks):
        parsed = []
        for block in blocks:
            numbers_out = []
            title = re.search(r'\>(\w|\s|[\,\;\&\/\-\(\)\.\#])+$', block,
                              re.MULTILINE).group()
            title = title[1:].replace('&amp;','&').replace('&#39','\'')
            numbers = re.findall(r'\>[0-9.,-]+\<', block)
            for number in numbers:
                num_out = number[1:-1]
                if num_out != '-':
                    num_out = float(num_out.replace(',',''))
                numbers_out.append(num_out)
            parsed.append((title,numbers_out))
        return parsed

    parsed_bal = block_parse(bal_blocks)
    parsed_inc = block_parse(inc_blocks)

    return [(dates_bal,parsed_bal),(dates_inc,parsed_inc),title]

def output(name,dates,data,title):
    output = ',' + title + '\n,'
    def average(list):
        for i in range(len(list)):
            if list[i] == '-':
                list[i] = 0
        avgs = []
        for i in range(len(list) - 1):
            i += 1
            if list[-i] == 0:
                continue
            avgs.append((list[-i-1] - list[-i]) / list[-i])
        if avgs:
            avg_avg = round((sum(avgs) / len(avgs))*100, 2)
        else:
            avg_avg = 'N/A'
        return avg_avg
    def change(list):
        start = False
        end = False
        for i in range(len(list)):
            if isinstance(list[i],float) and not end:
                end = list[i]
            if isinstance(list[-(i+1)],float) and not start:
                start = list[-(i+1)]
        if end:
            change = round(((end - start) / start)*100, 2)
        else:
            change = 'N/A'
        return change
    file = open(name,'w+')
    for date in dates:
        output = output + ',' + date
    output = output + ',YoY Average,Total Change\n'
    for datum in data:
        if ',' in datum[0]:
            new_datum = '"' + datum[0] + '"'
        else:
            new_datum = datum[0]
        output = output + ',' + new_datum
        for number in datum[1]:
            output = output + ',' + str(number)
        output = (output + ',' + str(average(datum[1])) + '%,' +
                  str(change(datum[1])) + '%\n')
    file.write(output)
    file.close()

def main():
    stock_url = input_url()

    site = urllib.request.urlopen(stock_url)
    site_html_raw = site.read().decode()

    parsed = parse(site_html_raw)

    output('balence.csv', parsed[0][0], parsed[0][1],parsed[2])
    output('income.csv', parsed[1][0], parsed[1][1],parsed[2])


if __name__ == '__main__':
    main()