spark連接hive的兩種方式

原文鏈接:https://blog.csdn.net/weixin_41804049/article/details/85063829

spark連接hive的兩種方式

在pom中添加依賴


 
  1. <?xml version="1.0" encoding="UTF-8"?>

  2. <project xmlns="http://maven.apache.org/POM/4.0.0"

  3. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

  4. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

  5. <modelVersion>4.0.0</modelVersion>

  6.  
  7. <groupId>emg</groupId>

  8. <artifactId>emg.spark</artifactId>

  9. <version>1.0-SNAPSHOT</version>

  10.  
  11.  
  12. <dependencies>

  13. <dependency>

  14. <groupId>org.apache.spark</groupId>

  15. <artifactId>spark-core_2.11</artifactId>

  16. <version>2.1.1</version>

  17. <!--<scope>provided</scope>-->

  18. </dependency>

  19. <dependency>

  20. <groupId>org.apache.spark</groupId>

  21. <artifactId>spark-sql_2.11</artifactId>

  22. <version>2.1.1</version>

  23. <!--<scope>provided</scope>-->

  24. </dependency>

  25. <dependency>

  26. <groupId>org.apache.spark</groupId>

  27. <artifactId>spark-hive_2.11</artifactId>

  28. <version>2.1.1</version>

  29. <!--<scope>provided</scope>-->

  30. </dependency>

  31. <dependency>

  32. <groupId>org.scala-lang</groupId>

  33. <artifactId>scala-library</artifactId>

  34. <version>2.11.8</version>

  35. </dependency>

  36. <dependency>

  37. <groupId>org.scala-lang</groupId>

  38. <artifactId>scala-compiler</artifactId>

  39. <version>2.11.8</version>

  40. </dependency>

  41. <dependency>

  42. <groupId>org.scala-lang</groupId>

  43. <artifactId>scala-reflect</artifactId>

  44. <version>2.11.8</version>

  45. </dependency>

  46.  
  47. <dependency>

  48. <groupId>org.apache.hadoop</groupId>

  49. <artifactId>hadoop-common</artifactId>

  50. <version>2.7.2</version>

  51. </dependency>

  52. <dependency>

  53. <groupId>org.apache.hadoop</groupId>

  54. <artifactId>hadoop-hdfs</artifactId>

  55. <version>2.7.2</version>

  56. </dependency>

  57. <dependency>

  58. <groupId>log4j</groupId>

  59. <artifactId>log4j</artifactId>

  60. <version>1.2.17</version>

  61. </dependency>

  62.  
  63. <dependency>

  64. <groupId>mysql</groupId>

  65. <artifactId>mysql-connector-java</artifactId>

  66. <version>5.1.38</version>

  67. </dependency>

  68.  
  69. <dependency>

  70. <groupId>org.apache.hive</groupId>

  71. <artifactId>hive-jdbc</artifactId>

  72. <version>1.1.0</version>

  73. </dependency>

  74.  
  75. </dependencies>

  76.  
  77. <build>

  78. <pluginManagement>

  79. <plugins>

  80. <!-- 編譯scala的插件 -->

  81. <plugin>

  82. <groupId>net.alchim31.maven</groupId>

  83. <artifactId>scala-maven-plugin</artifactId>

  84. <version>3.2.2</version>

  85. </plugin>

  86. <!-- 編譯java的插件 -->

  87. <plugin>

  88. <groupId>org.apache.maven.plugins</groupId>

  89. <artifactId>maven-compiler-plugin</artifactId>

  90. <version>3.5.1</version>

  91. </plugin>

  92. </plugins>

  93. </pluginManagement>

  94. <plugins>

  95. <plugin>

  96. <groupId>net.alchim31.maven</groupId>

  97. <artifactId>scala-maven-plugin</artifactId>

  98. <executions>

  99. <execution>

  100. <id>scala-compile-first</id>

  101. <phase>process-resources</phase>

  102. <goals>

  103. <goal>add-source</goal>

  104. <goal>compile</goal>

  105. </goals>

  106. </execution>

  107. <execution>

  108. <id>scala-test-compile</id>

  109. <phase>process-test-resources</phase>

  110. <goals>

  111. <goal>testCompile</goal>

  112. </goals>

  113. </execution>

  114. </executions>

  115. </plugin>

  116.  
  117. <plugin>

  118. <groupId>org.apache.maven.plugins</groupId>

  119. <artifactId>maven-compiler-plugin</artifactId>

  120. <executions>

  121. <execution>

  122. <phase>compile</phase>

  123. <goals>

  124. <goal>compile</goal>

  125. </goals>

  126. </execution>

  127. </executions>

  128. </plugin>

  129.  
  130.  
  131. <!-- 打jar插件 -->

  132. <plugin>

  133. <groupId>org.apache.maven.plugins</groupId>

  134. <artifactId>maven-shade-plugin</artifactId>

  135. <version>2.4.3</version>

  136. <executions>

  137. <execution>

  138. <phase>package</phase>

  139. <goals>

  140. <goal>shade</goal>

  141. </goals>

  142. <configuration>

  143. <filters>

  144. <filter>

  145. <artifact>*:*</artifact>

  146. <excludes>

  147. <exclude>META-INF/*.SF</exclude>

  148. <exclude>META-INF/*.DSA</exclude>

  149. <exclude>META-INF/*.RSA</exclude>

  150. </excludes>

  151. </filter>

  152. </filters>

  153. <transformers>

  154. <transformer

  155. implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">

  156. <!-- 指定自己的類名 -->

  157. <mainClass>emg.branchs.EmgFilterDemo</mainClass>

  158. </transformer>

  159. </transformers>

  160. </configuration>

  161. </execution>

  162. </executions>

  163. </plugin>

  164.  
  165. </plugins>

  166. </build>

  167.  
  168.  
  169. </project>

 

方式1.使用sparkSQL直接連接hive

    經自己測試 ,hive的metastore啓動不了,只啓動hiveServer2,這種方式一直報錯,找不到hive的元數據庫


 
  1.  
  2. def main(args: Array[String]): Unit = {

  3.  
  4. val Array(inpath, dt, hour) = args

  5.  
  6.  
  7. val conf = new SparkConf().setAppName(this.getClass.getSimpleName)

  8. //.setMaster("local[*]")

  9. .setMaster("spark://192.168.40.52:7077")

  10. val session = SparkSession.builder()

  11. .config(conf)

  12. // 指定hive的metastore的端口 默認爲9083 在hive-site.xml中查看

  13. .config("hive.metastore.uris", "thrift://192.168.40.51:9083")

  14. //指定hive的warehouse目錄

  15. .config("spark.sql.warehouse.dir", "hdfs://192.168.40.51:9000/user/hive/warehouse")

  16. //直接連接hive

  17. .enableHiveSupport()

  18. .getOrCreate()

  19.  
  20.  
  21. import session.implicits._

  22.  
  23. val df1 = session.read.parquet(inpath)

  24. //df1.write.saveAsTable(s"tmp.tmp_app_log_1")

  25. df1.createOrReplaceTempView("tmp_app_log_test")

  26.  
  27. //sql的代碼省略

  28. val sql1 =

  29. s"""

  30. |select *

  31. |from tmp_app_log_test

  32. """.stripMargin

  33.  
  34.  
  35. val hive_table = "dwb2.fact_mbk_offline_log_mbk_app_action_event_v2_i_h"

  36. val sql2 = s"alter table $hive_table add if not exists partition ( dt='$dt',hour='$hour')"

  37. session.sql(sql2)

  38.  
  39. val tmp_table =s"""tmp.app_log_${dt}_${hour}"""

  40. val sql3 = s"""drop table IF EXISTS $tmp_table""".stripMargin

  41. session.sql(sql3)

  42.  
  43. val df2 = session.sql(sql1)

  44. //結果先寫入臨時表

  45. df2.write.saveAsTable(tmp_table)

  46.  
  47.  
  48. //結果從臨時表寫入分區表

  49. val sql4 =

  50. s"""INSERT OVERWRITE TABLE $hive_table

  51. |PARTITION( dt='$dt',hour='$hour')

  52. | select * from $tmp_table """.stripMargin

  53.  
  54. session.sql(sql4)

  55.  
  56. val sql5 = s"""drop table IF EXISTS $tmp_table""".stripMargin

  57. session.sql(sql5)

  58. }

方式2 使用jdbc連接hive

    經自己測試  hive的metastore啓動不了 只啓動hiveServer2  jdbc連接方式可以正常使用

 


 
  1. def main(args: Array[String]): Unit = {

  2.  
  3. //經自己測試 hive的metastore啓動不了 只啓動hiveServer2 jdbc連接方式可以正常使用

  4. val conf = new SparkConf().setAppName(this.getClass.getSimpleName)

  5. //.setMaster("local[*]")

  6. .setMaster("spark://192.168.40.**:7077")

  7. val session = SparkSession.builder()

  8. .config(conf)

  9. .getOrCreate()

  10.  
  11. //注意這裏的寫法 好像是hive1.3版本以上不一樣了 自行百度

  12. val url = "jdbc:hive2://192.168.40.**:10000/emg"

  13. val username = "root"

  14. val password = "123456"

  15.  
  16. val driverName = "org.apache.hive.jdbc.HiveDriver"

  17. try {

  18. Class.forName(driverName)

  19. } catch {

  20. case e: ClassNotFoundException =>

  21. println("Missing Class", e)

  22. }

  23. val con: Connection = DriverManager.getConnection(url, username, password)

  24. val state = con.createStatement()

  25.  
  26. import session.implicits._

  27. var paths = "/user/emg/cxb_out/" + CurrentTime.getMonthDate() + "/" + CurrentTime.getYesterday() + "/" + CurrentTime.getHourDate() + "/"

  28.  
  29. //由於hive的元數據庫啓動不了 連接不上 只能用jdbc的方式將結果load進hive表中

  30. var sql2 = "load data inpath '" + paths + "' into table result01"

  31.  
  32. try {

  33. val assertion = state.execute(sql2)

  34. state.execute(sql2)

  35. println("===============================存入hvie成功==========================")

  36. } catch {

  37. case e: Exception => e.printStackTrace()

  38. } finally {

  39. if (null != con) {

  40. con.close()

  41. }

  42. }

  43.  
  44. /* val sql =

  45. """

  46. |create external table zz_result(id bigint,lat float,lon float,utc bigint,tags int)

  47. |row format delimited fields terminated by '\t' location '/user/hive/zz'

  48. """.stripMargin

  49. state.executeQuery(sql)

  50. println("建表成功")

  51.  
  52.  
  53. try {

  54. val assertion = state.execute(sql)

  55. state.execute(sql)

  56. println("===============================存入hvie成功==========================")

  57. } catch {

  58. case e: Exception => e.printStackTrace()

  59. } finally {

  60. if (null != con) {

  61. con.close()

  62. }

  63. }

  64. */

  65.  
  66. session.close()

  67.  
  68. }

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章