Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from openpyxl import Workbook
- from openpyxl.utils import get_column_letter
- from openpyxl.styles import Font, Border, Side
- class InputConnect:
- def __init__(self):
- self.file_name = input('Введите название файла: ')
- self.vacancy_name = input('Введите название профессии: ')
- dataset = DataSet(self.file_name, self.vacancy_name)
- salary_dynamics_city, vacancy_dynamics_year, vacancy_dynamics_salary, \
- vacancy_dynamics_years, salary_level, vacancy_share = dataset.get_statistic()
- dataset.print_statistic(salary_dynamics_city, vacancy_dynamics_year,
- vacancy_dynamics_salary, vacancy_dynamics_years, salary_level, vacancy_share)
- report = Report(self.vacancy_name, salary_dynamics_city, vacancy_dynamics_year, vacancy_dynamics_salary,
- vacancy_dynamics_years, salary_level, vacancy_share)
- report.generate_excel()
- class Report:
- def __init__(self, vacancy_name, salary_dynamics_city, vacancy_dynamics_year,
- vacancy_dynamics_salary, vacancy_dynamics_years, salary_level, vacancy_share):
- self.wb = Workbook()
- self.vacancy_name = vacancy_name
- self.stats1 = salary_dynamics_city
- self.stats2 = vacancy_dynamics_year
- self.stats3 = vacancy_dynamics_salary
- self.stats4 = vacancy_dynamics_years
- self.stats5 = salary_level
- self.stats6 = vacancy_share
- def generate_excel(self):
- workbook1 = self.wb.active
- workbook1.title = 'Статистика по годам'
- workbook1.append(['Год', 'Средняя зарплата', 'Средняя зарплата - ' + self.vacancy_name, 'Количество вакансий',
- 'Количество вакансий - ' + self.vacancy_name])
- for year in self.stats1.keys():
- workbook1.append([year, self.stats1[year], self.stats3[year], self.stats2[year], self.stats4[year]])
- data = [['Год ', 'Средняя зарплата ', ' Средняя зарплата - ' + self.vacancy_name, ' Количество вакансий',
- ' Количество вакансий - ' + self.vacancy_name]]
- column_widths = []
- column_widths = self.make_columns(column_widths, data)
- for i, column_width in enumerate(column_widths, 1): # ,1 to start at 1
- workbook1.column_dimensions[get_column_letter(i)].width = column_width + 2
- data = [['Город', 'Уровень зарплат', '', 'Город', 'Доля вакансий']]
- for (city1, value1), (city2, value2) in zip(self.stats5.items(), self.stats6.items()):
- data.append([city1, value1, '', city2, value2])
- workbook2 = self.wb.create_sheet('Статистика по городам')
- for row in data:
- workbook2.append(row)
- column_widths = []
- column_widths = self.make_columns(column_widths, data)
- for i, column_width in enumerate(column_widths, 1): # ,1 to start at 1
- workbook2.column_dimensions[get_column_letter(i)].width = column_width + 2
- font_bold = Font(bold=True)
- for col in 'ABCDE':
- workbook1[col + '1'].font = font_bold
- workbook2[col + '1'].font = font_bold
- for index, _ in enumerate(self.stats5):
- workbook2['E' + str(index + 2)].number_format = '0.00%'
- thin = Side(border_style='thin', color='00000000')
- for row in range(len(data)):
- for col in 'ABDE':
- workbook2[col + str(row + 1)].border = Border(left=thin, bottom=thin, right=thin, top=thin)
- self.stats1[1] = 1
- for row, _ in enumerate(self.stats1):
- for col in 'ABCDE':
- workbook1[col + str(row + 1)].border = Border(left=thin, bottom=thin, right=thin, top=thin)
- self.wb.save('report.xlsx')
- def make_columns(self, column_widths, data):
- for row in data:
- for i, cell in enumerate(row):
- cell = str(cell)
- if len(column_widths) > i:
- if len(cell) > column_widths[i]:
- column_widths[i] = len(cell)
- else:
- column_widths += [len(cell)]
- return column_widths
- class Vacancy:
- currency_in_rub = {
- "AZN": 35.68,
- "BYR": 23.91,
- "EUR": 59.90,
- "GEL": 21.74,
- "KGS": 0.76,
- "KZT": 0.13,
- "RUR": 1,
- "UAH": 1.64,
- "USD": 60.66,
- "UZS": 0.0055,
- }
- def __init__(self, vacancy):
- self.name = vacancy['name']
- self.salary_from = int(float(vacancy['salary_from']))
- self.salary_to = int(float(vacancy['salary_to']))
- self.salary_currency = vacancy['salary_currency']
- salary_rub = self.currency_in_rub[self.salary_currency]
- salary_average_value = (self.salary_from + self.salary_to) / 2
- self.salary_average = salary_rub * salary_average_value
- self.area_name = vacancy['area_name']
- self.year = int(vacancy['published_at'][:4])
- class DataSet:
- def __init__(self, file_name, vacancy_name):
- self.file_name = file_name
- self.vacancy_name = vacancy_name
- @staticmethod
- def increment(dictionary, key, amount):
- if key in dictionary:
- dictionary[key] += amount
- else:
- dictionary[key] = amount
- @staticmethod
- def average(dictionary):
- new_dictionary = {}
- for key, values in dictionary.items():
- new_dictionary[key] = int(sum(values) / len(values))
- return new_dictionary
- def work_with_file(self):
- with open(self.file_name, mode='r', encoding='utf-8-sig') as file:
- reader = csv.reader(file)
- header = next(reader)
- header_length = len(header)
- for row in reader:
- if '' not in row and len(row) == header_length:
- yield dict(zip(header, row))
- def get_statistic(self):
- salary = {}
- salary_of_vacancy_name = {}
- salary_city = {}
- count_of_vacancies = 0
- for vacancy_dictionary in self.work_with_file():
- vacancy = Vacancy(vacancy_dictionary)
- self.increment(salary, vacancy.year, [vacancy.salary_average])
- if vacancy.name.find(self.vacancy_name) != -1:
- self.increment(salary_of_vacancy_name, vacancy.year, [vacancy.salary_average])
- self.increment(salary_city, vacancy.area_name, [vacancy.salary_average])
- count_of_vacancies += 1
- vacancies_number = dict([(key, len(value)) for key, value in salary.items()])
- vacancies_number_by_name = dict([(key, len(value)) for key, value in salary_of_vacancy_name.items()])
- if not salary_of_vacancy_name:
- salary_of_vacancy_name = dict([(key, [0]) for key, value in salary.items()])
- vacancies_number_by_name = dict([(key, 0) for key, value in vacancies_number.items()])
- salary_dynamics_city, salary_level, vacancy_dynamics_salary, vacancy_dynamics_year = self.make_analytics(
- count_of_vacancies, salary, salary_city, salary_of_vacancy_name)
- return salary_dynamics_city, vacancies_number, vacancy_dynamics_year, \
- vacancies_number_by_name, vacancy_dynamics_salary, salary_level
- def make_analytics(self, count_of_vacancies, salary, salary_city, salary_of_vacancy_name):
- salary_dynamics_city = self.average(salary)
- vacancy_dynamics_year = self.average(salary_of_vacancy_name)
- vacancy_dynamics_salary = self.average(salary_city)
- vacancy_dynamics_years = {}
- for year, salaries in salary_city.items():
- vacancy_dynamics_years[year] = round(len(salaries) / count_of_vacancies, 4)
- vacancy_dynamics_years = list(
- filter(lambda a: a[-1] >= 0.01, [(key, value) for key, value in vacancy_dynamics_years.items()]))
- vacancy_dynamics_years.sort(key=lambda a: a[-1], reverse=True)
- salary_level = vacancy_dynamics_years.copy()
- vacancy_dynamics_years = dict(vacancy_dynamics_years)
- vacancy_dynamics_salary = list(filter(lambda a: a[0] in list(vacancy_dynamics_years.keys()),
- [(key, value) for key, value in vacancy_dynamics_salary.items()]))
- vacancy_dynamics_salary.sort(key=lambda a: a[-1], reverse=True)
- vacancy_dynamics_salary = dict(vacancy_dynamics_salary[:10])
- salary_level = dict(salary_level[:10])
- return salary_dynamics_city, salary_level, vacancy_dynamics_salary, vacancy_dynamics_year
- @staticmethod
- def print_statistic(salary_dynamics_city, vacancy_dynamics_year, vacancy_dynamics_salary,
- vacancy_dynamics_years, salary_level, vacancy_share):
- print('Динамика уровня зарплат по годам: {0}'.format(salary_dynamics_city))
- print('Динамика количества вакансий по годам: {0}'.format(vacancy_dynamics_year))
- print('Динамика уровня зарплат по годам для выбранной профессии: {0}'.format(vacancy_dynamics_salary))
- print('Динамика количества вакансий по годам для выбранной профессии: {0}'.format(vacancy_dynamics_years))
- print('Уровень зарплат по городам (в порядке убывания): {0}'.format(salary_level))
- print('Доля вакансий по городам (в порядке убывания): {0}'.format(vacancy_share))
- if __name__ == '__main__':
- InputConnect()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement