Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var dataframe_query3 = octDF.as("oct")
- .where(col("oct.event_type") === "purchase" && col("oct.brand").isNotNull)
- .join(
- octDF.as("oct_sub")
- .where(col("oct_sub.event_type") === "purchase")
- .groupBy(col("oct_sub.user_id"))
- .agg(count(col("oct_sub.brand")).as("num_all"))
- .select(col("user_id"), col("num_all"))
- , Seq("user_id")
- )
- .groupBy(col("oct.user_id"), col("oct.brand"), col("num_all"))
- .agg(count("*").alias("num_brand"))
- .where(col("num_brand") >= col("num_all"))
- .select(col("user_id"), col("brand"))
- dataframe_query3.explain()
- spark.time(dataframe_query3.show(false))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement