1.導入maven
<properties>
<spark.version>2.1.1</spark.version>
<scala.version>2.11.8</scala.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
2.讀hive數據
val sparkSession: SparkSession = SparkSession.builder()
.appName("MemberLogETL")
.master("local[*]")
.enableHiveSupport()
.getOrCreate()
import sparkSession.implicits._
spark.sql("use db_hive_edu");
Dataset<Row> data = spark.sql("select * from user");
data.show();
sparkSession.close()
3.寫數據到hive
Dataset.write.saveAsTable("dwd_member")
Dataset.write.mode(SaveMode.Overwrite).insertInto("dwd_member")
不使用自帶hive需要配置:hive-site.xml