val
cities
6
chars
=
cities.filter(
_
.name.length
==
6
).map(
_
.name.toUpperCase)
cities
6
chars.explain(
true
)
scala> cities
6
chars.queryExecution.optimizedPlan
res
33
:
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
=
SerializeFromObject [staticinvoke(
class
org.apache.spark.unsafe.types.UTF
8
String, StringType, fromString, input[
0
, java.lang.String,
true
],
true
) AS value
#
248
]
+- MapElements <function
1
>,
class
City, [StructField(id,LongType,
false
), StructField(name,StringType,
true
)], obj
#
247
:
java.lang.String
+- Filter <function
1
>.apply
+- DeserializeToObject newInstance(
class
City), obj
#
246
:
City
+- Relation[id
#
236
L,name
#
237
] parquet
val
cities
6
chars
=
cities.filter(
_
.name
==
"Warsaw"
).map(
_
.name.toUpperCase)
cities
6
chars.explain(
true
)
scala> cities.where('name
===
"Warsaw"
).queryExecution.executedPlan
res
29
:
org.apache.spark.sql.execution.SparkPlan
=
*Project [id
#
128
L, name
#
129
]
+- *Filter (isnotnull(name
#
129
) && (name
#
129
=
Warsaw))
+- *FileScan parquet [id
#
128
L,name
#
129
] Batched
:
true
, Format
:
ParquetFormat, InputPaths
:
file
:
/Users/jacek/dev/oss/spark/cities.parquet, PartitionFilters
:
[], PushedFilters
:
[IsNotNull(name), EqualTo(name,Warsaw)], ReadSchema
:
struct<id
:
bigint,name
:
string>
val
isWarsaw
=
udf { (s
:
String)
=
> s
==
"Warsaw"
}
scala> cities.where(isWarsaw('name)).queryExecution.executedPlan
res
33
:
org.apache.spark.sql.execution.SparkPlan
=
*Filter UDF(name
#
129
)
+- *FileScan parquet [id
#
128
L,name
#
129
] Batched
:
true
, Format
:
ParquetFormat, InputPaths
:
file
:
/Users/jacek/dev/oss/spark/cities.parquet, PartitionFilters
:
[], PushedFilters
:
[], ReadSchema
:
struct<id
:
bigint,name
:
string>