Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ################ ЯЧЕЙКА 1 ################
- import numpy as np
- import pandas as pd
- import os
- import boto3
- import os
- from pathlib import Path
- ##########################################
- ################ ЯЧЕЙКА 2 ################
- S3_CREDS = {
- "aws_access_key_id": "........",
- "aws_secret_access_key": "......",
- }
- bucket = "test-bucket2"
- def download_files(s3_client, bucket_name: str, local_path: str, file_name: str) -> None:
- local_path = Path(local_path)
- local_path.mkdir(parents=True, exist_ok=True)
- file_path = Path.joinpath(local_path, file_name)
- file_path.parent.mkdir(parents=True, exist_ok=True)
- s3_client.download_file(
- bucket_name,
- file_name,
- str(file_path)
- )
- def get_mean_value(input_data):
- df = pd.read_csv(f'from-s3-folder/{input_data["file_name"]}')
- return df[input_data['column_name']].mean()
- ##########################################
- ########## ЯЧЕЙКА 3 для закрепа ##########
- input_data = dict(file_name='diabetes.csv', column_name='age')
- client = boto3.client(
- service_name='s3',
- endpoint_url='https://storage.yandexcloud.net',
- **S3_CREDS
- )
- download_files(
- client,
- bucket,
- "from-s3-folder",
- input_data['file_name']
- )
- output_data = get_mean_value(input_data)
- print(output_data)
- ##########################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement