- scala> val df = spark.read.JSON("file:///opt/spark/examples/src/main/resources/people.json")
- df: org.apache.spark.sql.DataFrame = [age: bigint, name: string]
- scala> df.show()
- +----+-------+
- | age| name|
- +----+-------+
- |null|Michael|
- | 30| Andy|
- | 19| Justin|
- +----+-------+
- scala> df.select("name").show()
- +-------+
- | name|
- +-------+
- |Michael|
- | Andy|
- | Justin|
- +-------+
- scala> df.select(df("name"), df("age") + 1).show()
- +-------+---------+
- | name|(age + 1)|
- +-------+---------+
- |Michael| null|
- | Andy| 31|
- | Justin| 20|
- +-------+---------+
- scala> df.filter(df("age")> 21).show()
- +---+----+
- |age|name|
- +---+----+
- | 30|Andy|
- +---+----+
- scala> df.groupBy("age").count().show()
- +----+-----+
- | age|count|
- +----+-----+
- | 19| 1|
- |null| 1|
- | 30| 1|
- +----+-----+
- scala>
来源: http://www.bubuko.com/infodetail-3298288.html