項目場景:
採集設備每小時將採集的信息已json文件的形式上報到指定位置,項目服務每小時定時讀取文件的內容,然後將文件中的信息解析入庫,前期由於上報的文件較小,所有采用直接將json字符串轉javaBean對象然後分析後入庫。等上正式環境後發現每小時上報的文件高達200M~400M,此時jvm直接內存溢出。
問題分析:
200M的數據包含的對象信息有100w之多,所以解析json數據和解析後的數據入庫都是一個問題。
之前有寫過批量保存100W以上的數據,所以數據入庫問題不大,這裏主要介紹大文件json數據解析問題。(批量保存百萬級數據鏈接:https://blog.csdn.net/haohao_ding/article/details/102676528)
爲了保證服務不佔用大量的內存所以我們不能使用傳統的將文件讀取,解析,入庫等這樣的步驟方法,只能採用分批讀取,分批解析,分批入庫,保證整個環節內存使用較爲合理的範圍。這裏使用的JsonReader對象,通過文件流的形式,一個一個讀取json文件的對象,讀取一批對象後再進行分析、保存操作。等這一批入庫後接着讀取,再分析、入庫,直到整個文件讀取完爲止。
代碼實現:
import com.google.gson.stream.JsonReader;
import lombok.Data;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
@Data
@Slf4j
public abstract class BigSingleJsonParse<T> {
Reader in = null;
int count = 100000;
ArrayList <T> objectList = new ArrayList<>();
public BigSingleJsonParse(Reader in) {
this.in = in;
}
public BigSingleJsonParse() {
}
public void readMessageArrayToDo(){
if (in != null) {
JsonReader reader = new JsonReader(in);
try {
reader.beginArray();
while (reader.hasNext()) {
if (objectList.size() > count) {
toDoSome(objectList);
objectList.clear();
}
objectList.add(readMessage(reader));
}
reader.endArray();
toDoSome(objectList);
objectList.clear();
} catch (IOException e) {
e.printStackTrace();
}
}else{
log.error("文件流傳入爲null。");
}
}
public void readMessageArrayToDo(Reader in){
this.in = in;
readMessageArrayToDo();
}
public abstract T readMessage(JsonReader reader);
public abstract void toDoSome(ArrayList <T> objectList);
}
上面的代碼是一個簡單的封裝,readMessageArrayToDo()方法就是讀取傳入的文件流,或者實例對象時set的文件流,逐個讀取到count(默認100000)個後,toDoSome()方法對這些數據分析保存,readMessage()方法是具體讀取每一個對象。
下面是具體的實現
@Component("netCollectDataJsonParse")
@Data
@Slf4j
public class NetCollectDataJsonParseImpl extends BigSingleJsonParse<NetCollectData> {
@Autowired
@Qualifier("netCollectDataBigDataInsert")
private BigDataInsertOrUpdate netCollectDataBigDataInsertOrUpdate;
@Autowired
@Qualifier("invalidNetCollectDataBigDataInsert")
private BigDataInsertOrUpdate invalidNetCollectDataBigDataInsertOrUpdate;
public NetCollectDataJsonParseImpl() {
}
public NetCollectDataJsonParseImpl(Reader in) {
super(in);
}
@Override
public NetCollectData readMessage(JsonReader reader) {
NetCollectData netCollectData = new NetCollectData();
try {
reader.beginObject();
while(reader.hasNext()){
String name = reader.nextName();
if("COLLECT_TIME".equals(name)){
netCollectData.setCOLLECT_TIME(reader.nextString());
}else if("MAC".equals(name)){
netCollectData.setMAC(reader.nextString());
}else if("IP".equals(name)){
netCollectData.setIP(reader.nextString());
}else if("TIME".equals(name)){
netCollectData.setTIME(reader.nextString());
}else if("TS_UP_4G".equals(name)){
netCollectData.setTS_UP_4G(reader.nextLong());
}else if("TS_UP".equals(name)){
netCollectData.setTS_UP(reader.nextLong());
}else if("TS_DOWN_4G".equals(name)){
netCollectData.setTS_DOWN_4G(reader.nextLong());
}else if("TS_DOWN".equals(name)){
netCollectData.setTS_DOWN(reader.nextLong());
}else if("PKG_UP_4G".equals(name)){
netCollectData.setPKG_UP_4G(reader.nextLong());
}else if("PKG_UP".equals(name)){
netCollectData.setPKG_UP(reader.nextLong());
}else if("PKG_DOWN_4G".equals(name)){
netCollectData.setPKG_DOWN_4G(reader.nextLong());
}else if("PKG_DOWN".equals(name)){
netCollectData.setPKG_DOWN(reader.nextLong());
}else{
reader.skipValue();
}
}
reader.endObject();
} catch (IOException e) {
e.printStackTrace();
}
return netCollectData;
}
@Override
public void toDoSome(ArrayList<NetCollectData> netCollectDataList) {
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
List<NetCollectData> netCollectDatas = new ArrayList<>();
List<InvalidNetCollectData> invalidNetCollectDatas = new ArrayList<>();
for (int i = 0; i < netCollectDataList.size(); i++) {
NetCollectData netCollectData = netCollectDataList.get(i);
// 時間爲1970.01.01 08:00:00代表斷開網絡
if ("1970.01.01 08:00:00".equals(netCollectData.getTIME())) {
InvalidNetCollectData invalidNetCollectData = netCollectDataToInvalidNetCollectData(netCollectData);
try {
invalidNetCollectData.setDate(dateFormat.parse(netCollectData.getCOLLECT_TIME()));
} catch (ParseException e) {
log.error("讀取到的採集時間轉換異常");
invalidNetCollectData.setDate(new Date());
}
invalidNetCollectDatas.add(invalidNetCollectData);
} else {
if(MacOfEduIdBucket.isContainsKey(netCollectData.getMAC())){
netCollectData.setEduId(MacOfEduIdBucket.getEduIdByMac(netCollectData.getMAC()));
try {
netCollectData.setDate(dateFormat.parse(netCollectData.getCOLLECT_TIME()));
} catch (ParseException e) {
log.error("讀取到的採集時間轉換異常");
netCollectData.setDate(new Date());
}
netCollectDatas.add(netCollectData);
} else {
log.warn("該mac[{}],在redis裏面找不到對應的學生信息", netCollectData.getMAC());
InvalidNetCollectData invalidNetCollectData = netCollectDataToInvalidNetCollectData(netCollectData);
try {
invalidNetCollectData.setDate(dateFormat.parse(netCollectData.getCOLLECT_TIME()));
} catch (ParseException e) {
log.error("讀取到的採集時間轉換異常");
invalidNetCollectData.setDate(new Date());
}
invalidNetCollectDatas.add(invalidNetCollectData);
}
}
}
if(CollectionUtils.isNotEmpty(netCollectDatas)){
netCollectDataBigDataInsertOrUpdate.insertOrUpdateBigData(netCollectDatas);
}
if(CollectionUtils.isNotEmpty(invalidNetCollectDatas)){
invalidNetCollectDataBigDataInsertOrUpdate.insertOrUpdateBigData(invalidNetCollectDatas);
}
}
/**
* 由netCollectData對象得到invalidNetCollectData對象
*
* @param netCollectData
* @return
*/
private InvalidNetCollectData netCollectDataToInvalidNetCollectData(NetCollectData netCollectData) {
InvalidNetCollectData invalidNetCollectData = new InvalidNetCollectData();
invalidNetCollectData.setCOLLECT_TIME(netCollectData.getCOLLECT_TIME());
invalidNetCollectData.setIP(netCollectData.getIP());
invalidNetCollectData.setMAC(netCollectData.getMAC());
invalidNetCollectData.setEduId(netCollectData.getEduId());
invalidNetCollectData.setTIME(netCollectData.getTIME());
invalidNetCollectData.setPKG_DOWN(netCollectData.getPKG_DOWN());
invalidNetCollectData.setPKG_DOWN_4G(netCollectData.getPKG_DOWN_4G());
invalidNetCollectData.setPKG_UP(netCollectData.getPKG_UP());
invalidNetCollectData.setPKG_UP_4G(netCollectData.getPKG_UP_4G());
invalidNetCollectData.setTS_DOWN(netCollectData.getTS_DOWN());
invalidNetCollectData.setTS_DOWN_4G(netCollectData.getTS_DOWN_4G());
invalidNetCollectData.setTS_UP(netCollectData.getTS_UP());
invalidNetCollectData.setTS_UP_4G(netCollectData.getTS_UP_4G());
return invalidNetCollectData;
}
}
使用方法:
@Autowired
private BigSingleJsonParse netCollectDataJsonParse;
.
.
.
.
.
.
netCollectDataJsonParse.setIn(new FileReader(filePath));
netCollectDataJsonParse.readMessageArrayToDo();
經過測試:400M的文件輕鬆解析入庫,不存在內存泄漏的情況