框架
import java.sql.{DriverManager, SQLException}
import org.apache.spark.sql.DataFrame
//讀取數據(讀取出來的數據爲dataframe格式)
//connection:jdbc的配置存儲
//source:讀取的源表
def read(source: String, connection: Connection): DataFrame = {
connection.spark.read.format("jdbc").options(Map(
"url" -> connection.url,
"driver" -> connection.driver,
"dbtable" -> source,
"user" -> connection.user,
"password" -> connection.password)).load()
}
//存儲數據
/* *
*sourceDf:來源表的dataframe
*numPartitions:分區數
*tabFormat:存儲數據格式
*partionBy:分區
*saveMode:存儲方式
*targetTable:存入的表
*connection:jdbc的配置存儲
* */
def save(sourceDf: DataFrame, numPartitions: Integer,tabFormat:String=null,partionBy: String=null,
saveMode: String, targetTable: String, connection: Connection) = {
if(saveMode== "overwrite"){
//清空表操作
truncate(connection, targetTable)
//插入數據操作
insert(sourceDf, numPartitions, targetTable, connection)
}else{
//插入數據操作
insert(sourceDf, numPartitions, targetTable, connection)
}
}
//獲取連接
def getConnection(connection: Connection) ={
DriverManager.getConnection(connection.url, connection.user, connection.password)
}
//清空表
def truncate(connection: Connection, targetTable: String): Unit = {
getConnection(connection).prepareStatement(s"truncate table $targetTable").executeUpdate()
}
//插入數據
def insert(sourceDf: DataFrame, numPartitions: Integer, targetTable: String, connection: Connection)={
//生成插入sql語句中的列名和佔位符 insert into aa(name,age) values(?,?)
val sql = sqls(sourceDf,targetTable)
//取出dataframe裏面的字段
val cols = sourceDf.columns.toSeq
//進行值的插入
sourceDf.repartition(numPartitions).foreachPartition( it =>{
//創建連接
val conn = getConnection(connection)
val statement = conn.prepareStatement(sql)
//關閉自動提交
conn.setAutoCommit(false)
//在每個分區中去執行事務
it.foreach( row =>{
//根據cols獲取dataframe中的每一條數據
val v = row.getValuesMap[String](cols).values.toBuffer
//生成可執行的sql insert into aa(name,age) values("21","32")
for ( i <- 0 to v.length-1){
statement.setString(i+1,v.apply(i))
}
statement.addBatch()
statement.executeBatch()
})
//提交數據
conn.commit()
//關閉連接
conn.close()
})
}
//sql語句
def sqls(sourceDf: DataFrame, targetTable: String): String ={
var name = " "
var value = " "
for(i <- 0 to sourceDf.columns.length-1) {
name += sourceDf.columns.apply(i) + ","
value += "?,"
}
name = name.substring(0, name.length()-1).trim()
value = value.substring(0, value.length()-1).trim()
val sql = s"insert into $targetTable($name) values($value)"
sql
}