Advertisement
kieni17

Untitled

Apr 19th, 2020
2,729
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SPARK 0.67 KB | None | 0 0
  1. var dataframe_query3 = octDF.as("oct")
  2.     .where(col("oct.event_type") === "purchase" && col("oct.brand").isNotNull)
  3.     .join(
  4.         octDF.as("oct_sub")
  5.             .where(col("oct_sub.event_type") === "purchase")
  6.             .groupBy(col("oct_sub.user_id"))
  7.             .agg(count(col("oct_sub.brand")).as("num_all"))
  8.             .select(col("user_id"), col("num_all"))
  9.         , Seq("user_id")
  10.     )
  11.     .groupBy(col("oct.user_id"), col("oct.brand"), col("num_all"))
  12.     .agg(count("*").alias("num_brand"))
  13.     .where(col("num_brand") >= col("num_all"))
  14.     .select(col("user_id"), col("brand"))
  15.  
  16. dataframe_query3.explain()
  17. spark.time(dataframe_query3.show(false))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement