Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # import xlrd
- # # Open a workbook
- # workbook = xlrd.open_workbook('call.xls')
- # # Loads only current sheets to memory
- # workbook = xlrd.open_workbook('call.xls', on_demand = True)
- # print(workbook)
- # # Load a specific sheet by index
- # worksheet = workbook.sheet_by_index(0)
- # # Retrieve the value from cell at indices (0,0)
- # for i in worksheet:
- # print(worksheet.cell(i, i).value)
- from datetime import datetime
- import xlrd
- import re;
- from itertools import filterfalse
- def formater_phone(string):
- # string = ''.join(c for c in string if c.isdigit())
- if string == None:
- return None
- return string
- def purge_dublicates(X):
- unique_X = []
- for i, row in enumerate(X):
- if row not in X[i + 1:]:
- unique_X.append(row)
- return unique_X
- def drop_string(string):
- try:
- res = string.split('-')
- return str(string[0]) + ', кв ' + str(string[1])
- except Exception as e:
- return string.split('-')[0]
- def convert_to_normal(string):
- try:
- address = str(string[0]) + ', '+ str(string[1]) + ', ' + str(drop_string(string[2]))
- print(address)
- except Exception as e:
- print(e)
- address = ''
- return address
- def formater_address(string):
- string = re.sub(r'\d{6}', '', string)
- if not re.search('Самара', string):
- return None
- string = string.replace(' ', ' ')
- string = string.replace('д.', ',')
- list_exclude = ['cамарская','г.', ' ',' ','пр-кт',',','область','ул','ш','р-н','кв', 'р-н.','.','Россия','обл.', 'Самарская', 'УЛ.', 'УЛ', 'домофон','обл', ' ',' ','этаж','поъез','поъезд','подъезд','ПР-КТ','не работает','Страна','Промыленный','ПР','Красноглинский','промыленный']
- for i in list_exclude:
- string = string.replace(i, '')
- string = string.strip()
- string = string.split(' ')
- while '' in string:
- string.remove('')
- string = purge_dublicates(string)
- list_exclude = ['д','Карла','КАРЛА','п','ос', 'г','км','Стара','вор','СЗАГОРА','эт']
- for i,val in enumerate(list_exclude, start=0):
- for j, vals in enumerate(string):
- if val == vals:
- del string[j]
- for i, val in enumerate(string):
- if i == 1:
- if string[i].isdigit():
- new_element = str(string[i]) + ' ' + str(string[i+1])
- num = i
- del string[num]
- del string[num]
- string.insert(1, new_element)
- comlite_address = convert_to_normal(string)
- return comlite_address
- def parse_data_exel(FILE):
- """Метод парсит документ и возвращает список"""
- try:
- # Экземпляр документа
- lists = []
- rb = xlrd.open_workbook(FILE,on_demand = True)
- sheet = rb.sheet_by_index(0)
- for rownum in range(sheet.nrows):
- row = sheet.row_values(rownum)
- lists.append(row)
- pacient_list = []
- for i, val in enumerate(lists, start=1):
- if i > 1:
- address = formater_address(val[5])
- y, m, d, h, i, s = xlrd.xldate_as_tuple(val[2], rb.datemode)
- date_of_birth = '{0}-{1}-{2}'.format(y, m, d)
- if address != None:
- pacient_list.append({
- 'FIO' : val[1],
- 'date_birth' : date_of_birth,
- 'address' : address,
- 'addition' : val[6],
- 'phone' : formater_phone(val[7]) if formater_phone(val[7]) != '' else 80000000000 ,
- 'reason' : val[12],
- })
- return pacient_list
- except Exception as e:
- # logging.info('Error parse document - %s' % (e))
- print('Error parse document - %s' % (e))
- pacient = parse_data_exel('call.xls')
- for i, val in enumerate(pacient):
- print(pacient[i]['date_birth'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement