spark讀取和存儲jdbc數據庫

框架

import java.sql.{DriverManager, SQLException}
import org.apache.spark.sql.DataFrame
     //讀取數據(讀取出來的數據爲dataframe格式)
     //connection:jdbc的配置存儲
     //source:讀取的源表
      def read(source: String, connection: Connection): DataFrame = {
        connection.spark.read.format("jdbc").options(Map(
          "url" -> connection.url,
          "driver" -> connection.driver,
          "dbtable" -> source,
          "user" -> connection.user,
          "password" -> connection.password)).load()
      }
      //存儲數據
      /* *
      *sourceDf:來源表的dataframe
      *numPartitions:分區數
      *tabFormat:存儲數據格式
      *partionBy:分區
      *saveMode:存儲方式
      *targetTable:存入的表
      *connection:jdbc的配置存儲
      * */
      def save(sourceDf: DataFrame, numPartitions: Integer,tabFormat:String=null,partionBy: String=null,
                        saveMode: String, targetTable: String, connection: Connection) = {
        if(saveMode== "overwrite"){
          //清空表操作
          truncate(connection, targetTable)
          //插入數據操作
          insert(sourceDf, numPartitions, targetTable, connection)
        }else{
          //插入數據操作
          insert(sourceDf, numPartitions, targetTable, connection)
        }
      }
    
      //獲取連接
      def getConnection(connection: Connection) ={
        DriverManager.getConnection(connection.url, connection.user, connection.password)
      }
      
      //清空表
      def truncate(connection: Connection, targetTable: String): Unit = {
        getConnection(connection).prepareStatement(s"truncate table $targetTable").executeUpdate()
      }
      
	   //插入數據
	   def insert(sourceDf: DataFrame, numPartitions: Integer, targetTable: String, connection: Connection)={
	    //生成插入sql語句中的列名和佔位符 insert into aa(name,age) values(?,?)
	    val sql = sqls(sourceDf,targetTable)
	    //取出dataframe裏面的字段
	    val cols = sourceDf.columns.toSeq
	    //進行值的插入
	    sourceDf.repartition(numPartitions).foreachPartition( it =>{
	      //創建連接
	      val conn = getConnection(connection)
	      val statement = conn.prepareStatement(sql)
	      //關閉自動提交
	      conn.setAutoCommit(false)
	      //在每個分區中去執行事務
	      it.foreach( row =>{
	        //根據cols獲取dataframe中的每一條數據
	        val v = row.getValuesMap[String](cols).values.toBuffer
	        //生成可執行的sql  insert into aa(name,age) values("21","32")
	        for ( i <- 0 to v.length-1){
	          statement.setString(i+1,v.apply(i))
	        }
	        statement.addBatch()
	        statement.executeBatch()
	      })
	      //提交數據
	       conn.commit()
	      //關閉連接
	      conn.close()
	    })
	  }
      
      //sql語句
      def sqls(sourceDf: DataFrame, targetTable: String): String ={
        var name = " "
        var value = " "
        for(i <- 0 to sourceDf.columns.length-1) {
          name += sourceDf.columns.apply(i) + ","
          value += "?,"
        }
        name = name.substring(0, name.length()-1).trim()
        value = value.substring(0, value.length()-1).trim()
        val sql = s"insert into $targetTable($name) values($value)"
        sql
      }
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章