項目場景:需要從上報的單個文件中解析出百萬數據入庫,項目中無論是使用jpa 還是 mybatis 存入數據達到10000時速度明顯變慢,達到100000時就讓人難以接受。所以就考慮使用存儲過程或者是使用原生jdbc實現,該案例使用原生jdbc實現。
單線程實現案例:
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Data
public abstract class BigDataInsert<T> {
// String driverClassName = "com.mysql.cj.jdbc.Driver";
String driverClassName = "";
// String url = "jdbc:mysql://localhost:3306/bigdata?useServerPrepStmts=false&rewriteBatchedStatements=true&useUnicode=true&characterEncoding=UTF-8";
String url = "";
// String user = "root";
String user = "";
// String password = "123456";
String password = "";
// String sql = "";
String sql = "";
public BigDataInsert() {
init();
}
public void insertBigData(List<T> list) {
//定義連接、statement對象
Connection conn = null;
PreparedStatement pstm = null;
try {
// 檢查初始化參數
checkInit();
//加載jdbc驅動
Class.forName(driverClassName);
//連接mysql
conn = DriverManager.getConnection(url, user, password);
//將自動提交關閉
conn.setAutoCommit(false);
//預編譯sql
pstm = conn.prepareStatement(sql);
//開始總計時
long bTime1 = System.currentTimeMillis();
List<List<T>> listList = null;
if (list.size() > 100000) {
listList = fixedGrouping(list, 100000);
} else {
listList.add(list);
}
//循環10次,每次十萬數據,一共100萬
for (int i = 0; i < listList.size(); i++) {
//開啓分段計時,計1W數據耗時
long bTime = System.currentTimeMillis();
//開始循環
for (T object : listList.get(i)) {
//賦值
pstmToSetValue(pstm, object);
//添加到同一個批處理中
pstm.addBatch();
}
//執行批處理
pstm.executeBatch();
//提交事務
conn.commit();
//關閉分段計時
long eTime = System.currentTimeMillis();
//輸出
System.out.println("成功插入" + listList.get(i).size() + "條數據耗時:" + (eTime - bTime));
}
//關閉總計時
long eTime1 = System.currentTimeMillis();
//輸出
System.out.println("插入" + list.size() + "數據共耗時:" + (eTime1 - bTime1));
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e1) {
e1.printStackTrace();
}finally {
try {
pstm.close();
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
private void checkInit() {
if ("".equals(driverClassName)) {
log.warn("driverClassName未初始化!");
}
if ("".equals(url)) {
log.warn("url未初始化!");
}
if ("".equals(user)) {
log.warn("user未初始化!");
}
if ("".equals(password)) {
log.warn("password未初始化!");
}
if ("".equals(sql)) {
log.warn("sql未設置!");
}
}
/**
* 將一組數據固定分組,每組n個元素
*
* @param source 要分組的數據源
* @param n 每組n個元素
* @param <T>
* @return
*/
public static <T> List<List<T>> fixedGrouping(List<T> source, int n) {
if (null == source || source.size() == 0 || n <= 0)
return null;
List<List<T>> result = new ArrayList<List<T>>();
int remainder = source.size() % n;
int size = (source.size() / n);
for (int i = 0; i < size; i++) {
List<T> subset = null;
subset = source.subList(i * n, (i + 1) * n);
result.add(subset);
}
if (remainder > 0) {
List<T> subset = null;
subset = source.subList(size * n, size * n + remainder);
result.add(subset);
}
return result;
}
public abstract PreparedStatement pstmToSetValue(PreparedStatement pstm, T object);
public abstract void init();
}
上面代碼是一個批量保存大數據的一個單線程的封裝,使用方法如下:
import *****.NetCollectData;
import org.springframework.stereotype.Component;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.SQLException;
@Component("netCollectDataBigDataInsert")
public class NetCollectDataBigDataInsertImpl extends BigDataInsert<NetCollectData> {
@Override
public void init() {
this.driverClassName = "com.mysql.cj.jdbc.Driver";
this.url = "jdbc:mysql://10.1.1.149:3306/cnleb139?useServerPrepStmts=false&rewriteBatchedStatements=true&useUnicode=true&characterEncoding=UTF-8";
this.user = "root";
this.password = "Root123!!!";
this.sql = "INSERT INTO net_collect_data(collect_time,mac,edu_id,ip,time,TS_UP_4G,TS_UP,TS_DOWN_4G,TS_DOWN,PKG_UP_4G,PKG_UP,PKG_DOWN_4G,PKG_DOWN,status_code,created_date) " +
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
}
@Override
public PreparedStatement pstmToSetValue(PreparedStatement pstm, NetCollectData netCollectData) {
try {
pstm.setString(1, netCollectData.getCOLLECT_TIME());
pstm.setString(2, netCollectData.getMAC());
pstm.setString(3, netCollectData.getEduId());
pstm.setString(4, netCollectData.getIP());
pstm.setString(5, netCollectData.getTIME());
pstm.setLong(6, netCollectData.getTS_UP_4G());
pstm.setLong(7, netCollectData.getTS_UP());
pstm.setLong(8, netCollectData.getTS_DOWN_4G());
pstm.setLong(9, netCollectData.getTS_DOWN());
pstm.setLong(10, netCollectData.getPKG_UP_4G());
pstm.setLong(11, netCollectData.getPKG_UP());
pstm.setLong(12, netCollectData.getPKG_DOWN_4G());
pstm.setLong(13, netCollectData.getPKG_DOWN());
pstm.setInt(14, netCollectData.getStatus());
pstm.setDate(15, new Date(netCollectData.getDate().getTime()));
} catch (SQLException e) {
e.printStackTrace();
}
return pstm;
}
}
繼承BigDataInsert實現init()和pstmToSetValue()方法
init(): 初始化鏈接信息的方法(包括插入語句),在實例被創建出來的時候會被調用
pstmToSetValue(): 對預編譯的sql語句set值,當實例出來的對象調用insertBigData()時執行
根據檢查,這裏是對16個字段的表插入數據,字段越多越慢,50w數據需要15分鐘完成
多線程版本:
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Slf4j
@Data
public abstract class BigDataInsert<T> {
// String driverClassName = "com.mysql.cj.jdbc.Driver";
String driverClassName = "";
// String url = "jdbc:mysql://localhost:3306/bigdata?useServerPrepStmts=false&rewriteBatchedStatements=true&useUnicode=true&characterEncoding=UTF-8";
String url = "";
// String user = "root";
String user = "";
// String password = "123456";
String password = "";
// String sql = "";
String sql = "";
int groupCount = 100000;
int threadPoolCount = 5;
// 創建一個固定大小的線程池
private ExecutorService service = null;
public BigDataInsert() {
init();
}
public void insertBigData(List<T> list) {
// 檢查初始化參數
checkInit();
// 創建線程池對象
service = Executors.newFixedThreadPool(threadPoolCount);
// 將需保存集合分組
List<List<T>> listList = new ArrayList<>();
if (list.size() > groupCount) {
listList = fixedGrouping(list, groupCount);
} else {
listList.add(list);
}
// 計數器
final CountDownLatch latch = new CountDownLatch(listList.size());
//開始總計時
long bTime1 = System.currentTimeMillis();
//循環10次,每次十萬數據,一共100萬
for (int i = 0; i < listList.size(); i++) {
int finalI = i;
List<List<T>> finalListList = listList;
// 多線程保存
service.execute(() -> {
Connection conn = null;
PreparedStatement pstm = null;
try {
//加載jdbc驅動
Class.forName(driverClassName);
//連接mysql
conn = DriverManager.getConnection(url, user, password);
//將自動提交關閉
conn.setAutoCommit(false);
//預編譯sql
pstm = conn.prepareStatement(sql);
//開啓分段計時,計1W數據耗時
long bTime = System.currentTimeMillis();
//開始循環
for (T object : finalListList.get(finalI)) {
//賦值
pstmToSetValue(pstm, object);
//添加到同一個批處理中
pstm.addBatch();
}
//執行批處理
pstm.executeBatch();
//提交事務
conn.commit();
//關閉分段計時
long eTime = System.currentTimeMillis();
//輸出
System.out.println("成功插入" + finalListList.get(finalI).size() + "條數據耗時:" + (eTime - bTime));
} catch (Exception e) {
log.error("批量保存失敗!");
} finally {
latch.countDown();
try {
pstm.close();
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
});
}
try {
latch.await();
} catch (Exception e) {
log.error("多線程分析數據中途異常!,{}", e);
}
//關閉總計時
long eTime1 = System.currentTimeMillis();
//輸出
System.out.println("插入" + list.size() + "數據共耗時:" + (eTime1 - bTime1));
}
private void checkInit() {
if ("".equals(driverClassName)) {
log.warn("driverClassName未初始化!");
}
if ("".equals(url)) {
log.warn("url未初始化!");
}
if ("".equals(user)) {
log.warn("user未初始化!");
}
if ("".equals(password)) {
log.warn("password未初始化!");
}
if ("".equals(sql)) {
log.warn("sql未設置!");
}
}
/**
* 將一組數據固定分組,每組n個元素
*
* @param source 要分組的數據源
* @param n 每組n個元素
* @param <T>
* @return
*/
public static <T> List<List<T>> fixedGrouping(List<T> source, int n) {
if (null == source || source.size() == 0 || n <= 0)
return null;
List<List<T>> result = new ArrayList<List<T>>();
int remainder = source.size() % n;
int size = (source.size() / n);
for (int i = 0; i < size; i++) {
List<T> subset = null;
subset = source.subList(i * n, (i + 1) * n);
result.add(subset);
}
if (remainder > 0) {
List<T> subset = null;
subset = source.subList(size * n, size * n + remainder);
result.add(subset);
}
return result;
}
public abstract PreparedStatement pstmToSetValue(PreparedStatement pstm, T object);
public abstract void init();
}
多線程版本允許在實現init()方法時,修改線程數量(默認5)以及分組保存的數量(默認100000)
經過檢查:50w數據3分鐘左右能夠完成(本人電腦一般,電腦性能好的會更快)
參考blog: https://blog.csdn.net/q6834850/article/details/73726707