Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Exception in thread "main" org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
- Exchange hashpartitioning(value#28, 200)
- +- *HashAggregate(keys=[value#28], functions=[], output=[value#28])
- +- Union
- :- *SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#28]
- : +- *MapElements <function1>, obj#27: java.lang.String
- : +- *DeserializeToObject createexternalrow(artist_id#2.toString, StructField(artist_id,StringType,false)), obj#26: org.apache.spark.sql.Row
- : +- Scan ExistingRDD[artist_id#2]
- +- *SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#33]
- +- *MapElements <function1>, obj#32: java.lang.String
- +- *DeserializeToObject createexternalrow(artist_id#14.toString, StructField(artist_id,StringType,false)), obj#31: org.apache.spark.sql.Row
- +- Scan ExistingRDD[artist_id#14]
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
- at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141)
- at org.apache.spark.sql.execution.DeserializeToObjectExec.inputRDDs(objects.scala:74)
- at org.apache.spark.sql.execution.MapElementsExec.inputRDDs(objects.scala:205)
- at org.apache.spark.sql.execution.SerializeFromObjectExec.inputRDDs(objects.scala:111)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
- at org.apache.spark.sql.execution.UnionExec$$anonfun$doExecute$1.apply(basicPhysicalOperators.scala:491)
- at org.apache.spark.sql.execution.UnionExec$$anonfun$doExecute$1.apply(basicPhysicalOperators.scala:491)
- at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
- at scala.collection.AbstractTraversable.map(Traversable.scala:104)
- at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:491)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
- at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:90)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
- at org.apache.spark.sql.execution.MapElementsExec.doExecute(objects.scala:234)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
- at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
- at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
- at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2570)
- at org.apache.spark.sql.Dataset.rdd(Dataset.scala:2567)
- at com.vertigo.mapping.job.SpotifyMapping$.main(SpotifyMapping.scala:65)
- at com.vertigo.mapping.job.SpotifyMapping.main(SpotifyMapping.scala)
- Caused by: org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: s3://vmi-music-data/spotify/20170909000000/full/spotify_artist.tsv
- at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:251)
- at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270)
- at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:202)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:84)
- at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:84)
- at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- at scala.collection.immutable.List.foreach(List.scala:392)
- at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
- at scala.collection.immutable.List.map(List.scala:296)
- at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:84)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
- at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
- at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
- ... 54 more
- Disconnected from the target VM, address: '127.0.0.1:41996', transport: 'socket'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement