Advertisement
makispaiktis

Python-based SQL queries - Group by, Count - Exercise 3

Jun 8th, 2023 (edited)
811
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.77 KB | None | 0 0
  1. # **********************************************************************
  2. # **********************************************************************
  3. # Step 1 - Setup
  4. # **********************************************************************
  5. # **********************************************************************
  6.  
  7.  
  8. # Import
  9. from google.cloud import bigquery
  10.  
  11. # Create a "Client" object
  12. client = bigquery.Client()
  13.  
  14. # Construct a reference to the "hacker_news" dataset
  15. dataset_ref = client.dataset("hacker_news", project="bigquery-public-data")
  16.  
  17. # API request - fetch the dataset
  18. dataset = client.get_dataset(dataset_ref)
  19.  
  20. # Construct a reference to the "comments" table
  21. table_ref = dataset_ref.table("comments")
  22.  
  23. # API request - fetch the table
  24. table = client.get_table(table_ref)
  25.  
  26. # Preview the first five lines of the "comments" table
  27. client.list_rows(table, max_results=5).to_dataframe()
  28.  
  29.  
  30.  
  31.  
  32.  
  33. # **********************************************************************
  34. # **********************************************************************
  35. # Step 2 - Group by
  36. # **********************************************************************
  37. # **********************************************************************
  38.  
  39. # Query to select prolific commenters and post counts
  40. prolific_commenters_query = """
  41.                            SELECT author, COUNT(1) as NumPosts
  42.                            FROM `bigquery-public-data.hacker_news.comments`
  43.                            GROUP BY author
  44.                            HAVING count(1)>10000
  45.                            """
  46.  
  47. # Set up the query (cancel the query if it would use too much of
  48. # your quota, with the limit set to 1 GB)
  49. safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
  50. query_job = client.query(prolific_commenters_query, job_config=safe_config)
  51.  
  52. # API request - run the query, and return a pandas DataFrame
  53. prolific_commenters = query_job.to_dataframe()
  54. # View top few rows of results
  55. print(prolific_commenters.head())
  56.  
  57.  
  58.  
  59. # **********************************************************************
  60. # **********************************************************************
  61. # Step 3 - Count
  62. # **********************************************************************
  63. # **********************************************************************
  64.  
  65. # Write your query here and figure out the answer
  66. deleted_query = """
  67.                SELECT COUNT(deleted) AS Deleted_Comments
  68.                FROM `bigquery-public-data.hacker_news.comments`
  69.                WHERE deleted = True
  70.                """
  71. safe_config = bigquery.QueryJobConfig(maximum_bytes_billed=10**10)
  72. query_job = client.query(deleted_query, job_config=safe_config)
  73.  
  74. deleted_commenters = query_job.to_dataframe()
  75. print(deleted_commenters.head())
  76.  
  77.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement