Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import sqlite3
- database_name = input()
- csv_file = input()
- table_name = input()
- currency_table = input()
- def preprocess_vacancies_data(database_name, csv_file_name, table_name, currency_table):
- df_vacancies = pd.read_csv(csv_file_name)
- conn = sqlite3.connect(database_name)
- df_vacancies['salary_from'] = df_vacancies.apply(lambda row: convert_currency(row['salary_from'], row['salary_currency'],row['published_at'],database_name), axis=1)
- df_vacancies['salary_to'] = df_vacancies.apply(lambda row: convert_currency(row['salary_to'], row['salary_currency'],row['published_at'],database_name), axis=1)
- df_vacancies['salary'] = df_vacancies.apply(lambda row: calculate_average_salary(row['salary_from'], row['salary_to']), axis=1)
- df_vacancies = df_vacancies[['name', 'salary', 'area_name', 'published_at']]
- df_vacancies.to_sql(table_name, conn, if_exists='replace', index=False)
- conn.close()
- def convert_currency(value, currency, year,database_name):
- if pd.isnull(value) or pd.isnull(currency):
- return None
- coefficient = get_currency_coefficient(currency_table, currency, year,database_name)
- if coefficient is not None:
- return round(value * coefficient)
- else:
- return None
- def get_currency_coefficient(currency_table, currency, date,database_name):
- if currency == 'RUR':
- return 1
- if currency is not None and currency != 'RUR':
- conn = sqlite3.connect(database_name)
- cursor = conn.cursor()
- query = f"SELECT {currency} FROM {currency_table} WHERE date = '{date.split('T')[0][0:7]}'"
- cursor.execute(query)
- coefficient = cursor.fetchone()[0]
- conn.close()
- return coefficient
- else:
- return None
- def calculate_average_salary(salary_from, salary_to):
- if pd.isnull(salary_from) and pd.isnull(salary_to):
- return None
- elif pd.isnull(salary_from):
- return salary_to
- elif pd.isnull(salary_to):
- return salary_from
- else:
- return (salary_from + salary_to) // 2
- preprocess_vacancies_data(database_name, csv_file, table_name, currency_table)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement