Advertisement
Danila_lipatov

my_data_checker

Dec 2nd, 2023 (edited)
616
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.26 KB | None | 0 0
  1. """
  2. If it will be helpful, please like this post
  3. """
  4. import pandas as pd
  5. import numpy as np
  6.  
  7.  
  8. def first_step_comp(data: pd.DataFrame) -> list:
  9.     result = {}
  10.     result_arr = []
  11.     for i, value in enumerate(data['ogrn']):
  12.         if f'{value}_{data["_name"][i]}' not in result:
  13.             result[f'{value}_{data["_name"][i]}'] = []
  14.         if str(data['identifier'][i]).isdigit():
  15.             result[f'{value}_{data["_name"][i]}'].append(data['identifier'][i])
  16.  
  17.     for key, value in result.items():
  18.         print(set(value))
  19.         if len(set(value)) > 1:
  20.             result_arr.append(key)
  21.  
  22.     pd.DataFrame({'ogrn_name': result_arr}).to_excel('out_more_one_first.xlsx')
  23.  
  24. def second_step_comp(data: pd.DataFrame) -> list:
  25.     result = {}
  26.     result_arr = []
  27.     for i, value in enumerate(data['_name']):
  28.         if f'{value}' not in result:
  29.             result[f'{value}'] = []
  30.         if str(data['identifier'][i]).isdigit():
  31.             result[f'{value}'].append(data['identifier'][i])
  32.  
  33.     for key, value in result.items():
  34.         print(set(value))
  35.         if len(set(value)) > 1:
  36.             result_arr.append(key)
  37.  
  38.     pd.DataFrame({'name': result_arr}).to_excel('out_more_one_second.xlsx')
  39.  
  40.     return result_arr
  41.  
  42. def third_step_comp(data: pd.DataFrame):
  43.     # result_arr = []
  44.  
  45.     for agency in data['agency'].unique():
  46.         result_arr = []
  47.         result = {}
  48.         data_check = data[data['agency'] == agency].reset_index().drop(columns='index')
  49.         for i, value in enumerate(data_check['_name']):
  50.             if f'{value}' not in result:
  51.                 result[f'{value}'] = []
  52.             if str(data_check['identifier'][i]).isdigit():
  53.                 result[f'{value}'].append(data_check['identifier'][i])
  54.  
  55.         for key, value in result.items():
  56.             print(set(value))
  57.             if len(set(value)) > 1:
  58.                 result_arr.append(key)
  59.  
  60.         pd.DataFrame({'name': result_arr}).to_excel(f'out_more_one_{agency}.xlsx')
  61.  
  62.  
  63. if __name__ == '__main__':
  64.     input_data = pd.read_excel("", sheet_name='Sheet1')
  65.     #
  66.     # for value
  67.     # print(input_data)
  68.     result_true = {}
  69.     result_false = {}
  70.     agency = ''
  71.     data = input_data[input_data[f'agency'] == f'{agency}']
  72.     print(data)
  73.     data = data.reset_index().drop(columns=['index'])
  74.     for i, value in enumerate(data['_name']):
  75.         print(i)
  76.         print(value)
  77.         if value not in result_true:
  78.             result_true[value] = []
  79.         if value not in result_false:
  80.             result_false[value] = []
  81.         if str(data['identifier'][i]).isdigit():
  82.             result_true[value].append(data['identifier'][i])
  83.         # try:
  84.         #     result_true[value].append(data['identifier'][i])
  85.         # except:
  86.         #     result_false[value].append(i)
  87.         # print(result_true)
  88.     result = {}
  89.     for key, value in result_true.items():
  90.         if key not in result:
  91.             result[key] = []
  92.         for val in range(len(value) - 1):
  93.             if value[val] == value[val + 1]:
  94.                 result[key].append('+')
  95.             else:
  96.                 result[key].append('-')
  97.     result_upd = []
  98.     for key, value in result.items():
  99.         if '-' in value:
  100.             result_upd.append(key)
  101.     stop = 'here'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement