spark讀取和存儲jdbc數據庫

原創

2018-11-03 23:54

框架

import java.sql.{DriverManager, SQLException}
import org.apache.spark.sql.DataFrame
     //讀取數據(讀取出來的數據爲dataframe格式)
     //connection:jdbc的配置存儲
     //source:讀取的源表
      def read(source: String, connection: Connection): DataFrame = {
        connection.spark.read.format("jdbc").options(Map(
          "url" -> connection.url,
          "driver" -> connection.driver,
          "dbtable" -> source,
          "user" -> connection.user,
          "password" -> connection.password)).load()
      }
      //存儲數據
      /* *
      *sourceDf:來源表的dataframe
      *numPartitions：分區數
      *tabFormat：存儲數據格式
      *partionBy：分區
      *saveMode：存儲方式
      *targetTable:存入的表
      *connection:jdbc的配置存儲
      * */
      def save(sourceDf: DataFrame, numPartitions: Integer,tabFormat:String=null,partionBy: String=null,
                        saveMode: String, targetTable: String, connection: Connection) = {
        if(saveMode== "overwrite"){
          //清空表操作
          truncate(connection, targetTable)
          //插入數據操作
          insert(sourceDf, numPartitions, targetTable, connection)
        }else{
          //插入數據操作
          insert(sourceDf, numPartitions, targetTable, connection)
        }
      }
    
      //獲取連接
      def getConnection(connection: Connection) ={
        DriverManager.getConnection(connection.url, connection.user, connection.password)
      }
      
      //清空表
      def truncate(connection: Connection, targetTable: String): Unit = {
        getConnection(connection).prepareStatement(s"truncate table $targetTable").executeUpdate()
      }
      
	   //插入數據
	   def insert(sourceDf: DataFrame, numPartitions: Integer, targetTable: String, connection: Connection)={
	    //生成插入sql語句中的列名和佔位符 insert into aa(name,age) values(?,?)
	    val sql = sqls(sourceDf,targetTable)
	    //取出dataframe裏面的字段
	    val cols = sourceDf.columns.toSeq
	    //進行值的插入
	    sourceDf.repartition(numPartitions).foreachPartition( it =>{
	      //創建連接
	      val conn = getConnection(connection)
	      val statement = conn.prepareStatement(sql)
	      //關閉自動提交
	      conn.setAutoCommit(false)
	      //在每個分區中去執行事務
	      it.foreach( row =>{
	        //根據cols獲取dataframe中的每一條數據
	        val v = row.getValuesMap[String](cols).values.toBuffer
	        //生成可執行的sql  insert into aa(name,age) values("21","32")
	        for ( i <- 0 to v.length-1){
	          statement.setString(i+1,v.apply(i))
	        }
	        statement.addBatch()
	        statement.executeBatch()
	      })
	      //提交數據
	       conn.commit()
	      //關閉連接
	      conn.close()
	    })
	  }
      
      //sql語句
      def sqls(sourceDf: DataFrame, targetTable: String): String ={
        var name = " "
        var value = " "
        for(i <- 0 to sourceDf.columns.length-1) {
          name += sourceDf.columns.apply(i) + ","
          value += "?,"
        }
        name = name.substring(0, name.length()-1).trim()
        value = value.substring(0, value.length()-1).trim()
        val sql = s"insert into $targetTable($name) values($value)"
        sql
      }

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

spark讀取和存儲jdbc數據庫

PDManer [元數建模]-v4.9.0 發佈：一款簡單好用的數據庫建模平臺

使用neovim打造go ide(支持代碼跳轉, 代碼補全, 實時語法檢查)

cs01 CSS Syntax

挑戰程序設計競賽 2.3章習題 poj 3046 Ant Counting

[MASM拾遺]Offset僞指令

h30 HTML Layout Elements

瞭解顯卡

一款基於C#開發的通訊調試工具（支持Modbus RTU、MQTT調試）

Linux/Golang/glibC系統調用

cs04 CSS Measurement Units

嘗試flume配置文件從啓動命令接收參數

Flink Table API和SQL的分析及使用（一）

Flink Table API和SQL的分析及使用（二）

centos7配置可訪問外網網絡

pom文件打包插件

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結