消息存儲到 CommitLog 文件中後,需要及時更新 ConsumeQueue 和 Index 文件,保證消息能被及時消費和查詢。
文章目錄
更新消費隊列和索引文件
啓動 Broker時,會啓動一個 ReputMessageService 線程用來轉發新增加的消息給調度任務列表 dispatcherList 去處理
public class DefaultMessageStore implements MessageStore {
private final ReputMessageService reputMessageService;
private final LinkedList<CommitLogDispatcher> dispatcherList;
// DefaultMessageStore 構造器中初始化
this.reputMessageService = new ReputMessageService();
this.dispatcherList = new LinkedList<>();
this.dispatcherList.addLast(new CommitLogDispatcherBuildConsumeQueue());
this.dispatcherList.addLast(new CommitLogDispatcherBuildIndex());
// DefaultMessageStore.start()
// maxPhysicalPosInLogicQueue CommitLog 文件的最小物理偏移量
long maxPhysicalPosInLogicQueue = commitLog.getMinOffset();
// 更新爲消費隊列中最大的偏移量
for (ConcurrentMap<Integer, ConsumeQueue> maps : this.consumeQueueTable.values()) {
for (ConsumeQueue logic : maps.values()) {
if (logic.getMaxPhysicOffset() > maxPhysicalPosInLogicQueue) {
maxPhysicalPosInLogicQueue = logic.getMaxPhysicOffset();
}
}
}
...忽略 DLedger
// 從此位置開始分發
this.reputMessageService.setReputFromOffset(maxPhysicalPosInLogicQueue);
this.reputMessageService.start();
}
CommitLogDispatcherBuildConsumeQueue 更新 ConsumeQueue 消息消費隊列索引文件
CommitLogDispatcherBuildIndex 更新 IndexFile 索引文件
ReputMessageService 啓動之後,每隔 1ms 執行一次分發
class ReputMessageService extends ServiceThread {
private void doReput() {
...
// 一直到最後一個 CommitLog 文件的最大有效數據的位置
for (boolean doNext = true; this.isCommitLogAvailable() && doNext; ) {
...
// 從此偏移量開始找到這個 CommitLog 文件的所有數據
SelectMappedBufferResult result = DefaultMessageStore.this.commitLog.getData(reputFromOffset);
if (result != null) {
try {
this.reputFromOffset = result.getStartOffset();
for (int readSize = 0; readSize < result.getSize() && doNext; ) {
// 每次讀取一條消息
DispatchRequest dispatchRequest =
DefaultMessageStore.this.commitLog.checkMessageAndReturnSize(result.getByteBuffer(), false, false);
int size = dispatchRequest.getBufferSize() == -1 ? dispatchRequest.getMsgSize() : dispatchRequest.getBufferSize();
if (dispatchRequest.isSuccess()) {
if (size > 0) {
// 獲取到消息才進行分發
for (CommitLogDispatcher dispatcher : this.dispatcherList) {
dispatcher.dispatch(req);
}
...通知消費者消費
this.reputFromOffset += size;
readSize += size;
...
} else if (size == 0) {
// 消息爲空,切換到下個文件
this.reputFromOffset = DefaultMessageStore.this.commitLog.rollNextFile(this.reputFromOffset);
readSize = result.getSize();
}
} else if (!dispatchRequest.isSuccess()) {
...特殊情況處理
}
public void run() {
while (!this.isStopped()) {
try {
Thread.sleep(1);
this.doReput();
} catch (Exception e) {
DefaultMessageStore.log.warn(this.getServiceName() + " service has exception. ", e);
}
}
}
}
CommitLogDispatcherBuildConsumeQueue 構建消息消費隊列索引文件
class CommitLogDispatcherBuildConsumeQueue implements CommitLogDispatcher {
public void dispatch(DispatchRequest request) {
final int tranType = MessageSysFlag.getTransactionValue(request.getSysFlag());
switch (tranType) {
case MessageSysFlag.TRANSACTION_NOT_TYPE:
case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
// 普通消息和提交事務的消息
DefaultMessageStore.this.putMessagePositionInfo(request);
break;
case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
break;
}
}
}
獲取到主題和隊列對應的 ConsumeQueue 後,寫入消息到 buffer
org.apache.rocketmq.store.ConsumeQueue#putMessagePositionInfoWrapper
public void putMessagePositionInfoWrapper(DispatchRequest request) {
final int maxRetries = 30;
// 刷盤不異常就能寫
boolean canWrite = this.defaultMessageStore.getRunningFlags().isCQWriteable();
// 失敗重試30次
for (int i = 0; i < maxRetries && canWrite; i++) {
long tagsCode = request.getTagsCode();
...
// 追加到內存文件中,不刷盤,ConsumeQueue 固定爲異步刷盤
boolean result = this.putMessagePositionInfo(request.getCommitLogOffset(),
request.getMsgSize(), tagsCode, request.getConsumeQueueOffset());
if (result) {
// 設置 consumeQueue 文件刷盤時間,之後異常恢復文件以從此時間爲準
this.defaultMessageStore.getStoreCheckpoint().setLogicsMsgTimestamp(request.getStoreTimestamp());
return;
} else {
Thread.sleep(1000);
}
}
}
文件結構
存儲結構
默認包含 30w個條目,每個條目 20B
org.apache.rocketmq.store.ConsumeQueue#putMessagePositionInfo
private boolean putMessagePositionInfo(final long offset, final int size, final long tagsCode,
final long cqOffset) {
this.byteBufferIndex.flip();
this.byteBufferIndex.limit(CQ_STORE_UNIT_SIZE);
// 記錄此消息的條目信息
this.byteBufferIndex.putLong(offset);
this.byteBufferIndex.putInt(size);
this.byteBufferIndex.putLong(tagsCode);
final long expectLogicOffset = cqOffset * CQ_STORE_UNIT_SIZE;
// 按照邏輯偏移量獲取到隊列下最後一個文件
MappedFile mappedFile = this.mappedFileQueue.getLastMappedFile(expectLogicOffset);
if (mappedFile != null) {
...
// 記錄 CommitLog 最大可消費的物理偏移量
this.maxPhysicOffset = offset + size;
return mappedFile.appendMessage(this.byteBufferIndex.array());
}
return false;
}
消費隊列刷盤
org.apache.rocketmq.store.DefaultMessageStore#flushConsumeQueueService 服務用來執行 consumeQueue 刷盤,每次執行刷盤默認間隔 1s,默認髒頁至少爲2頁才執行刷盤。每次執行 doFlush 時,遍歷每個主題下的每個 consumeQueue,然後執行刷盤。
org.apache.rocketmq.store.DefaultMessageStore.FlushConsumeQueueService#doFlush
private void doFlush(int retryTimes) {
...
ConcurrentMap<String, ConcurrentMap<Integer, ConsumeQueue>> tables = DefaultMessageStore.this.consumeQueueTable;
for (ConcurrentMap<Integer, ConsumeQueue> maps : tables.values()) {
for (ConsumeQueue cq : maps.values()) {
boolean result = false;
for (int i = 0; i < retryTimes && !result; i++) {
result = cq.flush(flushConsumeQueueLeastPages);
}
}
}
// StoreCheckpoint 刷盤前記錄了 logicsMsgTimestamp
if (0 == flushConsumeQueueLeastPages) {
if (logicsMsgTimestamp > 0) {
// 防止刷新檢查點時有新消息進入 consumeQueue,實際上這部分數據是沒有執行 ConsumeQueue 刷盤的
DefaultMessageStore.this.getStoreCheckpoint().setLogicsMsgTimestamp(logicsMsgTimestamp);
}
DefaultMessageStore.this.getStoreCheckpoint().flush();
}
}
org.apache.rocketmq.store.StoreCheckpoint 存儲檢查時間點服務,對應的物理文件爲 checkpoint,用於存儲 commitLog、consumeQueue、index 文件的刷盤時間,用於 Broker 恢復。
public class StoreCheckpoint {
...映射文件
private volatile long physicMsgTimestamp = 0; // commitLog文件刷盤時間
private volatile long logicsMsgTimestamp = 0; // consumeQueue文件刷盤時間
private volatile long indexMsgTimestamp = 0; // index文件刷盤時間
}
默認超過60s還沒有執行過 StoreCheckpoint 刷盤,就進行一次刷盤。
CommitLogDispatcherBuildIndex 構建索引
class CommitLogDispatcherBuildIndex implements CommitLogDispatcher {
@Override
public void dispatch(DispatchRequest request) {
if (DefaultMessageStore.this.messageStoreConfig.isMessageIndexEnable()) {
DefaultMessageStore.this.indexService.buildIndex(request);
}
}
}
public void buildIndex(DispatchRequest req) {
// 創建索引文件
IndexFile indexFile = retryGetAndCreateIndexFile();
if (indexFile != null) {
long endPhyOffset = indexFile.getEndPhyOffset();
DispatchRequest msg = req;
String topic = msg.getTopic();
String keys = msg.getKeys();
// 重複索引不添加
if (msg.getCommitLogOffset() < endPhyOffset) {
return;
}
...
// 若唯一鍵不爲空,創建一個索引
if (req.getUniqKey() != null) {
indexFile = putKey(indexFile, msg, buildKey(topic, req.getUniqKey()));
...
}
// 若創建消息時指定了多個key,創建多個索引
if (keys != null && keys.length() > 0) {
String[] keyset = keys.split(MessageConst.KEY_SEPARATOR);
for (int i = 0; i < keyset.length; i++) {
String key = keyset[i];
if (key.length() > 0) {
indexFile = putKey(indexFile, msg, buildKey(topic, key));
...
}
索引文件的具體結構
文件名fileName是以創建時的時間戳命名的,文件大小是固定的,等於40+500W4+2000W20= 420000040個字節大小。40 Byte 的Header用於保存一些總的統計信息,4*500W的 Slot Table並不保存真正的索引數據,而是保存每個槽位對應的單向鏈表的頭。20*2000W 是真正的索引數據,即一個 Index File 可以保存 2000W個索引。
新建一個索引
org.apache.rocketmq.store.index.IndexFile#putKey
/**
* 在 IndexFile 中新建一個索引
*
* @param key 消息索引
* @param phyOffset 物理偏移量
* @param storeTimestamp 消息存儲時間
* @return
*/
public boolean putKey(final String key, final long phyOffset, final long storeTimestamp) {
// 如果已存索引數量 >= 2000w,返回 false,然後創建一個新的索引文件繼續新建
if (this.indexHeader.getIndexCount() < this.indexNum) {
int keyHash = indexKeyHashMethod(key);
// 通過 key 的 hash 值計算出在 Slot Table 中的下標
int slotPos = keyHash % this.hashSlotNum;
// 計算對應 Slot Table 的物理地址 = 固定的頭長度 40B + 前面已存槽的總長度(slotPos * 4B)
int absSlotPos = IndexHeader.INDEX_HEADER_SIZE + slotPos * hashSlotSize;
try {
// 計算槽存儲的 LinkedList 下標
int slotValue = this.mappedByteBuffer.getInt(absSlotPos);
if (slotValue <= invalidIndex || slotValue > this.indexHeader.getIndexCount()) {
slotValue = invalidIndex;
}
// 計算消息的存儲時間和索引文件中第一條記錄的時間差
long timeDiff = storeTimestamp - this.indexHeader.getBeginTimestamp();
timeDiff = timeDiff / 1000;
if (this.indexHeader.getBeginTimestamp() <= 0) {
timeDiff = 0;
} else if (timeDiff > Integer.MAX_VALUE) {
timeDiff = Integer.MAX_VALUE;
} else if (timeDiff < 0) {
timeDiff = 0;
}
// 新消息的起始物理偏移量
int absIndexPos =
IndexHeader.INDEX_HEADER_SIZE + this.hashSlotNum * hashSlotSize
+ this.indexHeader.getIndexCount() * indexSize;
// index條目的存儲結構:hashCode + phyOffset + timeDiff + pre index no(上一條目錄的index下標位置)
this.mappedByteBuffer.putInt(absIndexPos, keyHash);
this.mappedByteBuffer.putLong(absIndexPos + 4, phyOffset);
this.mappedByteBuffer.putInt(absIndexPos + 4 + 8, (int) timeDiff);
this.mappedByteBuffer.putInt(absIndexPos + 4 + 8 + 4, slotValue);
// 當前Index包含的數量存入Hash槽中,即自己在 LinkedList 中的下標
this.mappedByteBuffer.putInt(absSlotPos, this.indexHeader.getIndexCount());
// 首個索引,記錄消息起始物理偏移量和存儲的消息的最小時間
if (this.indexHeader.getIndexCount() <= 1) {
this.indexHeader.setBeginPhyOffset(phyOffset);
this.indexHeader.setBeginTimestamp(storeTimestamp);
}
this.indexHeader.incHashSlotCount();
// 已存在的索引個數加一
this.indexHeader.incIndexCount();
this.indexHeader.setEndPhyOffset(phyOffset);
this.indexHeader.setEndTimestamp(storeTimestamp);
return true;
} catch (Exception e) {
} finally {
}
} else {
}
return false;
}
按索引檢索消息
org.apache.rocketmq.store.index.IndexFile#selectPhyOffset
/**
* 根據索引查找消息
*
* @param phyOffsets 查找到的消息偏移量
* @param key 消息key
* @param maxNum 查找的最大數目
* @param begin 開始時間戳
* @param end 結束時間戳
* @param lock
*/
public void selectPhyOffset(final List<Long> phyOffsets, final String key, final int maxNum,
final long begin, final long end, boolean lock) {
if (this.mappedFile.hold()) {
int keyHash = indexKeyHashMethod(key);
// 找到 key 對應的槽
int slotPos = keyHash % this.hashSlotNum;
int absSlotPos = IndexHeader.INDEX_HEADER_SIZE + slotPos * hashSlotSize;
try {
// 獲取到在 LinkedList 中的下標
int slotValue = this.mappedByteBuffer.getInt(absSlotPos);
if (slotValue <= invalidIndex || slotValue > this.indexHeader.getIndexCount()
|| this.indexHeader.getIndexCount() <= 1) {
// 下標不在範圍內
} else {
// 處理 hash 衝突的情況,自己存儲了之前 hash 值相同的另一個消息所在的下標
for (int nextIndexToRead = slotValue; ; ) {
// 消息取夠了就返回
if (phyOffsets.size() >= maxNum) {
break;
}
// 計算絕對地址
int absIndexPos =
IndexHeader.INDEX_HEADER_SIZE + this.hashSlotNum * hashSlotSize
+ nextIndexToRead * indexSize;
int keyHashRead = this.mappedByteBuffer.getInt(absIndexPos);
long phyOffsetRead = this.mappedByteBuffer.getLong(absIndexPos + 4);
long timeDiff = (long) this.mappedByteBuffer.getInt(absIndexPos + 4 + 8);
int prevIndexRead = this.mappedByteBuffer.getInt(absIndexPos + 4 + 8 + 4);
// 消息失效
if (timeDiff < 0) {
break;
}
timeDiff *= 1000L;
// 計算還原出此消息的實際存儲時間
long timeRead = this.indexHeader.getBeginTimestamp() + timeDiff;
boolean timeMatched = (timeRead >= begin) && (timeRead <= end);
// hash 和時間都滿足,加入到結果
if (keyHash == keyHashRead && timeMatched) {
phyOffsets.add(phyOffsetRead);
}
// 驗證前一條消息的索引
if (prevIndexRead <= invalidIndex
|| prevIndexRead > this.indexHeader.getIndexCount()
|| prevIndexRead == nextIndexToRead || timeRead < begin) {
break;
}
// 可能存在 hash 衝突,繼續找
nextIndexToRead = prevIndexRead;
}
}
} catch (Exception e) {
log.error("selectPhyOffset exception ", e);
} finally {
this.mappedFile.release();
}
}
}