Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- If it will be helpful, please like this post
- """
- import pandas as pd
- import numpy as np
- def first_step_comp(data: pd.DataFrame) -> list:
- result = {}
- result_arr = []
- for i, value in enumerate(data['ogrn']):
- if f'{value}_{data["_name"][i]}' not in result:
- result[f'{value}_{data["_name"][i]}'] = []
- if str(data['identifier'][i]).isdigit():
- result[f'{value}_{data["_name"][i]}'].append(data['identifier'][i])
- for key, value in result.items():
- print(set(value))
- if len(set(value)) > 1:
- result_arr.append(key)
- pd.DataFrame({'ogrn_name': result_arr}).to_excel('out_more_one_first.xlsx')
- def second_step_comp(data: pd.DataFrame) -> list:
- result = {}
- result_arr = []
- for i, value in enumerate(data['_name']):
- if f'{value}' not in result:
- result[f'{value}'] = []
- if str(data['identifier'][i]).isdigit():
- result[f'{value}'].append(data['identifier'][i])
- for key, value in result.items():
- print(set(value))
- if len(set(value)) > 1:
- result_arr.append(key)
- pd.DataFrame({'name': result_arr}).to_excel('out_more_one_second.xlsx')
- return result_arr
- def third_step_comp(data: pd.DataFrame):
- # result_arr = []
- for agency in data['agency'].unique():
- result_arr = []
- result = {}
- data_check = data[data['agency'] == agency].reset_index().drop(columns='index')
- for i, value in enumerate(data_check['_name']):
- if f'{value}' not in result:
- result[f'{value}'] = []
- if str(data_check['identifier'][i]).isdigit():
- result[f'{value}'].append(data_check['identifier'][i])
- for key, value in result.items():
- print(set(value))
- if len(set(value)) > 1:
- result_arr.append(key)
- pd.DataFrame({'name': result_arr}).to_excel(f'out_more_one_{agency}.xlsx')
- if __name__ == '__main__':
- input_data = pd.read_excel("", sheet_name='Sheet1')
- #
- # for value
- # print(input_data)
- result_true = {}
- result_false = {}
- agency = ''
- data = input_data[input_data[f'agency'] == f'{agency}']
- print(data)
- data = data.reset_index().drop(columns=['index'])
- for i, value in enumerate(data['_name']):
- print(i)
- print(value)
- if value not in result_true:
- result_true[value] = []
- if value not in result_false:
- result_false[value] = []
- if str(data['identifier'][i]).isdigit():
- result_true[value].append(data['identifier'][i])
- # try:
- # result_true[value].append(data['identifier'][i])
- # except:
- # result_false[value].append(i)
- # print(result_true)
- result = {}
- for key, value in result_true.items():
- if key not in result:
- result[key] = []
- for val in range(len(value) - 1):
- if value[val] == value[val + 1]:
- result[key].append('+')
- else:
- result[key].append('-')
- result_upd = []
- for key, value in result.items():
- if '-' in value:
- result_upd.append(key)
- stop = 'here'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement