Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import sqlite3
- database_name = input()
- csv_file = input()
- table_name = input()
- currency_table = input()
- def preprocess_vacancies_data(database_name, csv_file_name, table_name, currency_table):
- # Чтение данных о вакансиях из CSV-файла
- df_vacancies = pd.read_csv(csv_file_name)
- # Подключение к базе данных SQLite
- conn = sqlite3.connect(database_name)
- # Чтение данных о курсах валют из таблицы базы данных
- query = f"SELECT * FROM {currency_table}"
- df_currency = pd.read_sql_query(query, conn)
- df_vacancies['salary_from'] = df_vacancies.apply(lambda row: convert_currency(row['salary_from'], row['salary_currency'], df_currency,df_vacancies['published_at']), axis=1)
- df_vacancies['salary_to'] = df_vacancies.apply(lambda row: convert_currency(row['salary_to'], row['salary_currency'], df_currency,df_vacancies['published_at']), axis=1)
- df_vacancies['salary'] = df_vacancies.apply(lambda row: calculate_average_salary(row['salary_from'], row['salary_to']), axis=1)
- # Удаление ненужных столбцов
- df_vacancies = df_vacancies[['name', 'salary', 'area_name', 'published_at']]
- # Запись данных в таблицу базы данных
- df_vacancies.to_sql(table_name, conn, if_exists='replace', index=False)
- # Закрытие соединения с базой данных
- conn.close()
- def convert_currency(value, currency, df_currency,year):
- if pd.isnull(value) or pd.isnull(currency):
- return None
- # Получение коэффициента для преобразования валюты
- coefficient = get_currency_coefficient(currency, df_currency,year)
- # Преобразование валюты в рубли
- if coefficient is not None:
- return round(value * coefficient)
- else:
- return None
- def get_currency_coefficient(currency, df_currency,year):
- if currency in df_currency.columns:
- return df_currency[currency].iloc[0]
- if currency == 'RUR':
- return 1
- else:
- return None
- def calculate_average_salary(salary_from, salary_to):
- if pd.isnull(salary_from) and pd.isnull(salary_to):
- return None
- elif pd.isnull(salary_from):
- return salary_to
- elif pd.isnull(salary_to):
- return salary_from
- else:
- return (salary_from + salary_to) // 2
- # Пример использования функции
- preprocess_vacancies_data(database_name, csv_file, table_name, currency_table)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement