Spark-Shell执行spark 文件

Spark file example, a.scala

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{Dataset, SaveMode, SparkSession}

val argArray = spark.sparkContext.getConf.get("spark.driver.args").split("\\s+")
print(argArray)
val logs = spark.read.json(argArray(0)).select("cats")
logs.cache()
logs.createOrReplaceTempView("tracker")

val sql1 = "select count(1) from tracker where cats.cat='store' and cats.act='aa'"
spark.sql(sql1).show(false)

val sql2 = "select count(1) from (select explode(cats) cats from tracker ) where cats.cat='store' and cats.act='bb'"
spark.sql(sql1).show(false)
spark.close()

run script example test.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/bin/bash

TaskName="mianfei"
cd `dirname $0`
/data/work/spark2.0/bin/spark-shell \
-i mianfei.scala \
--name ${TaskName} \
--master yarn \
--deploy-mode client \
--executor-memory 1G \
--num-executors 15 \
--executor-cores 2 \
--conf spark.driver.args="/data/logs/20180609/* helloworld"
exit 0