Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # *********************************************************************
- # *********************************************************************
- # Step 1 - Setup
- # *********************************************************************
- # *********************************************************************
- from google.cloud import bigquery
- # Create a "Client" object
- client = bigquery.Client()
- # Construct a reference to the "world_bank_intl_education" dataset
- dataset_ref = client.dataset("world_bank_intl_education", project="bigquery-public-data")
- # API request - fetch the dataset
- dataset = client.get_dataset(dataset_ref)
- # Construct a reference to the "international_education" table
- table_ref = dataset_ref.table("international_education")
- # API request - fetch the table
- table = client.get_table(table_ref)
- # Preview the first five lines of the "international_education" table
- client.list_rows(table, max_results=5).to_dataframe()
- # *********************************************************************
- # *********************************************************************
- # Step 2 - Group by, order by
- # *********************************************************************
- # *********************************************************************
- country_spend_pct_query = """
- SELECT country_name, AVG(value) AS avg_ed_spending_pct
- FROM `bigquery-public-data.world_bank_intl_education.international_education`
- WHERE indicator_code = 'SE.XPD.TOTL.GD.ZS' AND year>=2010 and year<=2017
- GROUP BY country_name
- ORDER BY avg_ed_spending_pct DESC
- """
- # Set up the query (cancel the query if it would use too much of
- # your quota, with the limit set to 1 GB)
- safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
- country_spend_pct_query_job = client.query(country_spend_pct_query, job_config=safe_config)
- # API request - run the query, and return a pandas DataFrame
- country_spending_results = country_spend_pct_query_job.to_dataframe()
- # View top few rows of results
- print(country_spending_results.head())
- # *********************************************************************
- # *********************************************************************
- # Step 3 - Group by a combination of columns (indicator_code, indicator_name)
- # *********************************************************************
- # *********************************************************************
- code_count_query = """
- SELECT indicator_code, indicator_name, COUNT(1) AS num_rows
- FROM `bigquery-public-data.world_bank_intl_education.international_education`
- WHERE year = 2016
- GROUP BY indicator_name, indicator_code
- HAVING COUNT(1) >= 175
- ORDER BY COUNT(1) DESC
- """
- # Set up the query
- safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
- code_count_query_job = client.query(code_count_query, job_config=safe_config)
- # API request - run the query, and return a pandas DataFrame
- code_count_results = code_count_query_job.to_dataframe()
- # View top few rows of results
- print(code_count_results.head())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement