Advertisement
VssA

python_urfu

Nov 17th, 2022 (edited)
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.21 KB | None | 0 0
  1. import csv
  2. from openpyxl import Workbook
  3. from openpyxl.utils import get_column_letter
  4. from openpyxl.styles import Font, Border, Side
  5.  
  6.  
  7. class Vacancy:
  8. currency_to_rub = {
  9. "AZN": 35.68, "BYR": 23.91, "EUR": 59.90, "GEL": 21.74, "KGS": 0.76,
  10. "KZT": 0.13, "RUR": 1, "UAH": 1.64, "USD": 60.66, "UZS": 0.0055,
  11. }
  12.  
  13. def __init__(self, vacancy):
  14. self.name = vacancy['name']
  15. self.salary_from = int(float(vacancy['salary_from']))
  16. self.salary_to = int(float(vacancy['salary_to']))
  17. self.salary_currency = vacancy['salary_currency']
  18. self.salary_average = self.currency_to_rub[self.salary_currency] * (self.salary_from + self.salary_to) / 2
  19. self.area_name = vacancy['area_name']
  20. self.year = int(vacancy['published_at'][:4])
  21.  
  22.  
  23. class DataSet:
  24. def __init__(self, file_name, vacancy_name):
  25. self.file_name = file_name
  26. self.vacancy_name = vacancy_name
  27.  
  28. @staticmethod
  29. def increment(dictionary, key, amount):
  30. if key in dictionary:
  31. dictionary[key] += amount
  32. else:
  33. dictionary[key] = amount
  34.  
  35. @staticmethod
  36. def average(dictionary):
  37. new_dictionary = {}
  38. for key, values in dictionary.items():
  39. new_dictionary[key] = int(sum(values) / len(values))
  40. return new_dictionary
  41.  
  42. def csv_reader(self):
  43. with open(self.file_name, mode='r', encoding='utf-8-sig') as file:
  44. reader = csv.reader(file)
  45. header = next(reader)
  46. header_length = len(header)
  47. for row in reader:
  48. if '' not in row and len(row) == header_length:
  49. yield dict(zip(header, row))
  50.  
  51. def get_statistic(self):
  52. salary = {}
  53. salary_of_vacancy_name = {}
  54. salary_city = {}
  55. count_of_vacancies = 0
  56.  
  57. for vacancy_dictionary in self.csv_reader():
  58. vacancy = Vacancy(vacancy_dictionary)
  59. self.increment(salary, vacancy.year, [vacancy.salary_average])
  60. if vacancy.name.find(self.vacancy_name) != -1:
  61. self.increment(salary_of_vacancy_name, vacancy.year, [vacancy.salary_average])
  62. self.increment(salary_city, vacancy.area_name, [vacancy.salary_average])
  63. count_of_vacancies += 1
  64.  
  65. vacancies_number = dict([(key, len(value)) for key, value in salary.items()])
  66. vacancies_number_by_name = dict([(key, len(value)) for key, value in salary_of_vacancy_name.items()])
  67.  
  68. if not salary_of_vacancy_name:
  69. salary_of_vacancy_name = dict([(key, [0]) for key, value in salary.items()])
  70. vacancies_number_by_name = dict([(key, 0) for key, value in vacancies_number.items()])
  71.  
  72. stats = self.average(salary)
  73. stats2 = self.average(salary_of_vacancy_name)
  74. stats3 = self.average(salary_city)
  75.  
  76. stats4 = {}
  77. for year, salaries in salary_city.items():
  78. stats4[year] = round(len(salaries) / count_of_vacancies, 4)
  79. stats4 = list(filter(lambda a: a[-1] >= 0.01, [(key, value) for key, value in stats4.items()]))
  80. stats4.sort(key=lambda a: a[-1], reverse=True)
  81. stats5 = stats4.copy()
  82. stats4 = dict(stats4)
  83. stats3 = list(filter(lambda a: a[0] in list(stats4.keys()), [(key, value) for key, value in stats3.items()]))
  84. stats3.sort(key=lambda a: a[-1], reverse=True)
  85. stats3 = dict(stats3[:10])
  86. stats5 = dict(stats5[:10])
  87.  
  88. return stats, vacancies_number, stats2, vacancies_number_by_name, stats3, stats5
  89.  
  90. @staticmethod
  91. def print_statistic(stats1, stats2, stats3, stats4, stats5, stats6):
  92. print('Динамика уровня зарплат по годам: {0}'.format(stats1))
  93. print('Динамика количества вакансий по годам: {0}'.format(stats2))
  94. print('Динамика уровня зарплат по годам для выбранной профессии: {0}'.format(stats3))
  95. print('Динамика количества вакансий по годам для выбранной профессии: {0}'.format(stats4))
  96. print('Уровень зарплат по городам (в порядке убывания): {0}'.format(stats5))
  97. print('Доля вакансий по городам (в порядке убывания): {0}'.format(stats6))
  98.  
  99.  
  100. class InputConnect:
  101. def __init__(self):
  102. self.file_name = input('Введите название файла: ')
  103. self.vacancy_name = input('Введите название профессии: ')
  104.  
  105. dataset = DataSet(self.file_name, self.vacancy_name)
  106. stats1, stats2, stats3, stats4, stats5, stats6 = dataset.get_statistic()
  107. dataset.print_statistic(stats1, stats2, stats3, stats4, stats5, stats6)
  108.  
  109. report = Report(self.vacancy_name, stats1, stats2, stats3, stats4, stats5, stats6)
  110. report.generate_excel()
  111.  
  112.  
  113. class Report:
  114. def __init__(self, vacancy_name, stats1, stats2, stats3, stats4, stats5, stats6):
  115. self.wb = Workbook()
  116. self.vacancy_name = vacancy_name
  117. self.stats1 = stats1
  118. self.stats2 = stats2
  119. self.stats3 = stats3
  120. self.stats4 = stats4
  121. self.stats5 = stats5
  122. self.stats6 = stats6
  123.  
  124. def generate_excel(self):
  125. ws1 = self.wb.active
  126. ws1.title = 'Статистика по годам'
  127. ws1.append(['Год', 'Средняя зарплата', 'Средняя зарплата - ' + self.vacancy_name, 'Количество вакансий', 'Количество вакансий - ' + self.vacancy_name])
  128. for year in self.stats1.keys():
  129. ws1.append([year, self.stats1[year], self.stats3[year], self.stats2[year], self.stats4[year]])
  130.  
  131. data = [['Год ', 'Средняя зарплата ', ' Средняя зарплата - ' + self.vacancy_name, ' Количество вакансий', ' Количество вакансий - ' + self.vacancy_name]]
  132. column_widths = []
  133. for row in data:
  134. for i, cell in enumerate(row):
  135. if len(column_widths) > i:
  136. if len(cell) > column_widths[i]:
  137. column_widths[i] = len(cell)
  138. else:
  139. column_widths += [len(cell)]
  140.  
  141. for i, column_width in enumerate(column_widths, 1): # ,1 to start at 1
  142. ws1.column_dimensions[get_column_letter(i)].width = column_width + 2
  143.  
  144. data = []
  145. data.append(['Город', 'Уровень зарплат', '', 'Город', 'Доля вакансий'])
  146. for (city1, value1), (city2, value2) in zip(self.stats5.items(), self.stats6.items()):
  147. data.append([city1, value1, '', city2, value2])
  148. ws2 = self.wb.create_sheet('Статистика по городам')
  149. for row in data:
  150. ws2.append(row)
  151.  
  152. column_widths = []
  153. for row in data:
  154. for i, cell in enumerate(row):
  155. cell = str(cell)
  156. if len(column_widths) > i:
  157. if len(cell) > column_widths[i]:
  158. column_widths[i] = len(cell)
  159. else:
  160. column_widths += [len(cell)]
  161.  
  162. for i, column_width in enumerate(column_widths, 1): # ,1 to start at 1
  163. ws2.column_dimensions[get_column_letter(i)].width = column_width + 2
  164.  
  165. font_bold = Font(bold=True)
  166. for col in 'ABCDE':
  167. ws1[col + '1'].font = font_bold
  168. ws2[col + '1'].font = font_bold
  169.  
  170. for index, _ in enumerate(self.stats5):
  171. ws2['E' + str(index + 2)].number_format = '0.00%'
  172.  
  173. thin = Side(border_style='thin', color='00000000')
  174.  
  175. for row in range(len(data)):
  176. for col in 'ABDE':
  177. ws2[col + str(row + 1)].border = Border(left=thin, bottom=thin, right=thin, top=thin)
  178.  
  179. self.stats1[1] = 1
  180. for row, _ in enumerate(self.stats1):
  181. for col in 'ABCDE':
  182. ws1[col + str(row + 1)].border = Border(left=thin, bottom=thin, right=thin, top=thin)
  183.  
  184. self.wb.save('report.xlsx')
  185.  
  186.  
  187. if __name__ == '__main__':
  188. InputConnect()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement