Advertisement
VssA

Untitled

Jan 14th, 2023
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 14.40 KB | None | 0 0
  1. from django.shortcuts import render
  2.  
  3. import math
  4. import billiard as multiprocessing
  5. import datetime
  6. import requests
  7.  
  8. import csv
  9. import threading
  10. from concurrent.futures import ProcessPoolExecutor
  11. from multiprocessing import cpu_count
  12.  
  13. from django.shortcuts import render
  14. # import multiprocessing
  15. import os
  16. import pandas as pd
  17. import matplotlib.pyplot as plt
  18. import numpy as np
  19.  
  20. # ------------------------ demand
  21. list_print1 = ['Динамика уровня зарплат по годам: ', 'Динамика количества вакансий по годам: ',
  22.                'Динамика уровня зарплат по годам для выбранной профессии: ',
  23.                'Динамика количества вакансий по годам для выбранной профессии: ',
  24.                'Уровень зарплат по городам (в порядке убывания): ',
  25.                'Доля вакансий по городам (в порядке убывания): ']
  26.  
  27.  
  28. class Solution:
  29.     def __init__(self, path_to_file, name_vacancy):
  30.         self.path_to_file = path_to_file
  31.         self.name_vacancy = name_vacancy
  32.         self.dynamics1 = {}
  33.         self.dynamics2 = {}
  34.         self.dynamics3 = {}
  35.         self.dynamics4 = {}
  36.         self.dynamics5 = {}
  37.         self.dynamics6 = {}
  38.  
  39.     def split_by_year(self):
  40.         data_of_file = pd.read_csv(self.path_to_file, low_memory=False)
  41.         data_of_file["year"] = data_of_file["published_at"].apply(lambda x: x[:4])
  42.         data_of_file = data_of_file.groupby("year")
  43.         for year, data in data_of_file:
  44.             data[["name", "salary_from", "salary_to", "salary_currency", "area_name", "published_at"]]. \
  45.                 to_csv(rf"templates/data/year_number_{year}.csv", index=False)
  46.  
  47.     def get_dynamics(self):
  48.         self.get_dynamics_by_year_with_multiprocessing()
  49.         self.get_dynamics_by_city()
  50.         return self.dynamics1, self.dynamics2, self.dynamics3, self.dynamics4, self.dynamics5, self.dynamics6
  51.  
  52.     def get_statistic_by_year(self, file_csv):
  53.  
  54.         data_of_file = pd.read_csv(file_csv, low_memory=False)
  55.         data_of_file["salary"] = data_of_file[["salary_from", "salary_to"]].mean(axis=1)
  56.         data_of_file["salary"] = data_of_file["salary"].apply(lambda s: 0 if math.isnan(s) else int(s))
  57.         data_of_file["published_at"] = data_of_file["published_at"].apply(lambda s: int(s[:4]))
  58.         data_of_file_vacancy = data_of_file[data_of_file["name"].str.contains(self.name_vacancy, case=False)]
  59.  
  60.         return data_of_file["published_at"].values[0], [int(data_of_file["salary"].mean()), len(data_of_file),
  61.                                                         int(data_of_file_vacancy["salary"].mean() if len(
  62.                                                             data_of_file_vacancy) != 0 else 0),
  63.                                                         len(data_of_file_vacancy)]
  64.  
  65.     def get_dynamics_by_year_with_multiprocessing(self):
  66.         files = [rf"templates/data/{file_name}" for file_name in
  67.                  os.listdir(rf"templates/data")]
  68.         pool = multiprocessing.Pool(4)
  69.         result = pool.starmap(self.get_statistic_by_year, [(file,) for file in files])
  70.         pool.close()
  71.         for year, data_dynamics in result:
  72.             self.dynamics1[year] = data_dynamics[0]
  73.             self.dynamics2[year] = data_dynamics[1]
  74.             self.dynamics3[year] = data_dynamics[2]
  75.             self.dynamics4[year] = data_dynamics[3]
  76.  
  77.     def get_dynamics_by_city(self):
  78.         data_of_file = pd.read_csv(self.path_to_file, low_memory=False)
  79.         total = len(data_of_file)
  80.         data_of_file["salary"] = data_of_file[["salary_from", "salary_to"]].mean(axis=1)
  81.         data_of_file["count"] = data_of_file.groupby("area_name")["area_name"].transform("count")
  82.         data_of_file = data_of_file[data_of_file["count"] > total * 0.01]
  83.         data_of_file = data_of_file.groupby("area_name", as_index=False)
  84.         data_of_file = data_of_file[["salary", "count"]].mean().sort_values("salary", ascending=False)
  85.         data_of_file["salary"] = data_of_file["salary"].apply(lambda s: 0 if math.isnan(s) else int(s))
  86.  
  87.         self.dynamics5 = dict(zip(data_of_file.head(10)["area_name"], data_of_file.head(10)["salary"]))
  88.  
  89.         data_of_file = data_of_file.sort_values("count", ascending=False)
  90.         data_of_file["count"] = round(data_of_file["count"] / total, 4)
  91.  
  92.         self.dynamics6 = dict(zip(data_of_file.head(10)["area_name"], data_of_file.head(10)["count"]))
  93.  
  94.     def get_statistic(self):
  95.         InputConnect(self.path_to_file, self.name_vacancy, self.dynamics1, self.dynamics2, self.dynamics3,
  96.                      self.dynamics4, self.dynamics5, self.dynamics6)
  97.         return self.dynamics1, self.dynamics2, self.dynamics3, self.dynamics4, self.dynamics5, self.dynamics6
  98.  
  99.  
  100. class InputConnect:
  101.     def __init__(self, path_to_file, name_vacancy, dynamics1, dynamics2, dynamics3, dynamics4, dynamics5,
  102.                  dynamics6):
  103.         self.path_to_file, self.name_vacancy = path_to_file, name_vacancy
  104.         dynamics1, dynamics2, dynamics3, dynamics4, dynamics5, dynamics6 = dynamics1, dynamics2, dynamics3, dynamics4, dynamics5, dynamics6
  105.         new_graphic = Report(self.name_vacancy, dynamics1, dynamics2, dynamics3, dynamics4, dynamics5, dynamics6)
  106.         new_graphic.generate_image_demand()
  107.         new_graphic.generate_image_geo()
  108.  
  109.  
  110. def dict_sort(dict):
  111.     arr = sorted(dict.items())
  112.     dict2 = {}
  113.     for cort in arr:
  114.         dict2[cort[0]] = cort[1]
  115.     return dict2
  116.  
  117.  
  118. class Report:
  119.     def __init__(self, name_vacancy, dynamics1, dynamics2, dynamics3, dynamics4, dynamics5, dynamics6):
  120.         self.name_vacancy = name_vacancy
  121.         self.dynamics1 = dict_sort(dynamics1)
  122.         self.dynamics2 = dict_sort(dynamics2)
  123.         self.dynamics3 = dict_sort(dynamics3)
  124.         self.dynamics4 = dict_sort(dynamics4)
  125.         self.dynamics5 = dynamics5
  126.         self.dynamics6 = dynamics6
  127.  
  128.     def generate_image_demand(self):
  129.         x = np.arange(len(self.dynamics1.keys()))
  130.         width = 0.35
  131.  
  132.         fig, axs = plt.subplots(1, 2)
  133.         axs[0].bar(x - width / 2, self.dynamics1.values(), width, label='средняя з/п')
  134.         axs[0].bar(x + width / 2, self.dynamics3.values(), width, label='з/п {0}'.format(self.name_vacancy))
  135.         plt.rcParams['font.size'] = '8'
  136.         for label in (axs[0].get_xticklabels() + axs[0].get_yticklabels()):
  137.             label.set_fontsize(7)
  138.         axs[0].set_title('Уровень зарплат по годам')
  139.         axs[0].set_xticks(x, self.dynamics1.keys(), rotation=90)
  140.         axs[0].grid(axis='y')
  141.         axs[0].legend(fontsize=7)
  142.  
  143.         axs[1].bar(x - width / 2, self.dynamics2.values(), width, label='количество вакансий')
  144.         axs[1].bar(x + width / 2, self.dynamics4.values(), width,
  145.                    label='количество вакансий {0}'.format(self.name_vacancy))
  146.         for label in (axs[1].get_xticklabels() + axs[1].get_yticklabels()):
  147.             label.set_fontsize(7)
  148.         axs[1].set_title('Количество вакансий по годам')
  149.         axs[1].set_xticks(x, self.dynamics2.keys(), rotation=90)
  150.         axs[1].grid(axis='y')
  151.         axs[1].legend(fontsize=7)
  152.         fig.tight_layout()
  153.  
  154.         plt.tight_layout()
  155.         plt.savefig('C:/Users/вадим/PycharmProjects/Android_dev/static/graph2.png', dpi=300)
  156.  
  157.     def generate_image_geo(self):
  158.         fig, axs = plt.subplots(ncols=2, nrows=1)
  159.         fig.tight_layout()
  160.         areas = []
  161.         for area in self.dynamics5.keys():
  162.             areas.append(str(area).replace(' ', '\n').replace('-', '-\n'))
  163.         y_pos = np.arange(len(areas))
  164.         performance = self.dynamics5.values()
  165.         error = np.random.rand(len(areas))
  166.         axs[0].barh(y_pos, performance, xerr=error, align='center')
  167.         for label in (axs[0].get_xticklabels() + axs[0].get_yticklabels()):
  168.             label.set_fontsize(7)
  169.         axs[0].set_yticks(y_pos, labels=areas, size=7)
  170.         axs[0].invert_yaxis()
  171.         axs[0].grid(axis='x')
  172.         axs[0].set_title('Уровень зарплат по городам')
  173.  
  174.         val = list(self.dynamics6.values()) + [1 - sum(list(self.dynamics6.values()))]
  175.         k = list(self.dynamics6.keys()) + ['Другие']
  176.         axs[1].pie(val, labels=k, startangle=150)
  177.         axs[1].set_title('Доля вакансий по городам')
  178.  
  179.         plt.tight_layout()
  180.         plt.savefig('C:/Users/вадим/PycharmProjects/Android_dev/static/graph3.png', dpi=300)
  181.  
  182.  
  183. def demand(request):
  184.     filename = 'C:/Users/вадим/PycharmProjects/Android_dev/templates/data/vacancies_with_skills.csv'
  185.     name_vacancy = "Android-разработчик"
  186.     solve = Solution(filename, name_vacancy)
  187.     solve.split_by_year()
  188.     solve.get_dynamics()
  189.     dynamics1, dynamics2, dynamics3, dynamics4, dynamics5, dynamics6 = solve.get_statistic()
  190.     dynamics = []
  191.  
  192.     for year in dynamics2.keys():
  193.         dynamics.append([year, dynamics1[year], dynamics2[year], dynamics3[year], dynamics4[year]])
  194.  
  195.     dynamics.sort()
  196.  
  197.     data = {'name': name_vacancy,
  198.             'path': 'C:/Users/вадим/PycharmProjects/Android_dev/static/graph2.png',
  199.             'val0': dynamics[0],
  200.             'val1': dynamics[1],
  201.             'val2': dynamics[2],
  202.             'val3': dynamics[3],
  203.             'val4': dynamics[4],
  204.             'val5': dynamics[5],
  205.             'val6': dynamics[6],
  206.             'val7': dynamics[7],
  207.             'val8': dynamics[8],
  208.             'val9': dynamics[9],
  209.             'val10': dynamics[10],
  210.             'val11': dynamics[11],
  211.             'val12': dynamics[12],
  212.             'val13': dynamics[13],
  213.             'val14': dynamics[14],
  214.             'val15': dynamics[15],
  215.             'val16': dynamics[16],
  216.             'val17': dynamics[17],
  217.             'val18': dynamics[18],
  218.             'val19': dynamics[19],
  219.             "dinamics": dynamics,
  220.             }
  221.  
  222.     return render(request, "demand.html", context=data)
  223.  
  224.  
  225. def main(request):
  226.     return render(request, "main.html")
  227.  
  228.  
  229. def geography(request):
  230.     filename = 'C:/Users/вадим/PycharmProjects/Android_dev/templates/data/vacancies_with_skills.csv'
  231.     name_vacancy = "Android-разработчик"
  232.     solve = Solution(filename, name_vacancy)
  233.     solve.get_dynamics()
  234.     dynamics1, dynamics2, dynamics3, dynamics4, dynamics5, dynamics6 = solve.get_statistic()
  235.     for key in dynamics6:
  236.         dynamics6[key] = round(dynamics6[key] * 100, 2)
  237.     data = {
  238.         'dynamics5': dynamics5.items(),
  239.         'dynamics6': dynamics6.items(),
  240.     }
  241.     return render(request, "geography.html", context=data)
  242.  
  243.  
  244. # ----------------------------------------
  245.  
  246.  
  247. vacancy_choices = '|'.join(['android'])
  248. data = pd.read_csv('C:/Users/вадим/PycharmProjects/Android_dev/templates/data/vacancies_with_skills.csv',
  249.                    usecols=['name', 'key_skills', 'published_at'])
  250.  
  251. data = data[data['name'].str.contains(vacancy_choices, case=False)]
  252. data = data[data['key_skills'].notnull()]
  253. data['year'] = data['published_at'].apply(lambda x: x[:4])
  254. groups = data.groupby(['year'])
  255.  
  256.  
  257. def write_group_to_csv(group):
  258.     year, data = group
  259.     data.to_csv(rf'skills_by_city/key_skillsby{year}.csv', index=False)
  260.  
  261.  
  262. thread_pool = []
  263.  
  264. for group in groups:
  265.     thread = threading.Thread(target=write_group_to_csv, args=(group,))
  266.     thread.start()
  267.     thread_pool.append(thread)
  268.  
  269. for thread in thread_pool:
  270.     thread.join()
  271.  
  272.  
  273. class Skills:
  274.     def __init__(self, name, amount):
  275.         self.name = name
  276.         self.amount = amount
  277.  
  278.     def __repr__(self):
  279.         return f'{self.name}: {self.amount}'
  280.  
  281.  
  282. def analyze_year(file_name):
  283.     all_skills = []
  284.     with open(file_name, 'r', encoding='utf-8-sig') as skills:
  285.         rows = csv.reader(skills)
  286.         next(rows)
  287.         for row in rows:
  288.             s = row[1]
  289.             for i in s.split('\n'):
  290.                 all_skills.append(i)
  291.     DATA = pd.DataFrame({'skills': all_skills}).value_counts().head(10).to_dict()
  292.     date = file_name[-8:-4]
  293.     return date, [Skills(skill[0], amount) for skill, amount in DATA.items()]
  294.  
  295.  
  296. def skills(request):
  297.     file_names = [rf'skills_by_city/{file}' for file in os.listdir('skills_by_city')]
  298.     with ProcessPoolExecutor(max_workers=cpu_count()) as ex:
  299.         res = ex.map(analyze_year, file_names)
  300.         data = {"res": sorted(res)}
  301.     return render(request, "skills.html", context=data)
  302. # ------------------------
  303.  
  304.  
  305. def clean_vacancy(vacancy):
  306.     # vacancy['area'] = vacancy['area']['name'] if vacancy['area'].__contains__('name') else 'Нет данных'
  307.     if vacancy['salary']['from'] != None and vacancy['salary']['to'] != None and vacancy['salary']['from'] != \
  308.             vacancy['salary']['to']:
  309.         vacancy[
  310.             'salary'] = f"от {'{0:,}'.format(vacancy['salary']['from']).replace(',', ' ')} до {'{0:,}'.format(vacancy['salary']['to']).replace(',', ' ')} {vacancy['salary']['currency']}"
  311.     elif vacancy['salary']['from'] != None:
  312.         vacancy[
  313.             'salary'] = f"{'{0:,}'.format(vacancy['salary']['from']).replace(',', ' ')} {vacancy['salary']['currency']}"
  314.     elif vacancy['salary']['to'] != None:
  315.         vacancy[
  316.             'salary'] = f"{'{0:,}'.format(vacancy['salary']['to']).replace(',', ' ')} {vacancy['salary']['currency']}"
  317.     else:
  318.         vacancy['salary'] = 'Нет данных'
  319.     vacancy['key_skills'] = ', '.join(map(lambda x: x['name'], vacancy['key_skills']))
  320.     return vacancy
  321.  
  322.  
  323. def get_vacancies():
  324.     try:
  325.         data = []
  326.         info = requests.get('https://api.hh.ru/vacancies?text=%22android%22&specialization=1&per_page=100').json()
  327.         for row in info['items']:
  328.             if row['name'].lower().__contains__('android') and not row['salary'] is None:
  329.                 data.append({'id': row['id'], 'published_at': row['published_at']})
  330.         data = sorted(data, key=lambda x: x['published_at'])
  331.         vacancies = []
  332.         for vacancy in data[len(data) - 10:]:
  333.             vacancies.append(clean_vacancy(requests.get(f'https://api.hh.ru/vacancies/{vacancy["id"]}').json()))
  334.         return vacancies
  335.     except Exception as e:
  336.         print(e)
  337.         print(datetime.datetime.now())
  338.         return []
  339.  
  340.  
  341. def last_vacansies(request):
  342.     return render(request, "last_vacansies.html", context={"vacansies": get_vacancies(), })
Tags: views
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement