磁盤管理
磁盤管理是管理ledger目錄以及index目錄的磁盤使用情況。BookKeeper 中有一個週期性任務來執行目錄檢測,這個任務在LedgerDirsMonitor中,這裏會通過 LedgerDirsManager 檢測ledger/index磁盤的使用情況,對於每個 LedgerDirsManager都會執行下面的檢測邏輯:
public LedgerDirsMonitor(final ServerConfiguration conf,
final DiskChecker diskChecker,
final List<LedgerDirsManager> dirsManagers) {
this.interval = conf.getDiskCheckInterval();
// 高優先級寫入所需的最小的磁盤空間,默認值是創建entry log文件所需的最小空間
this.minUsableSizeForHighPriorityWrites = conf.getMinUsableSizeForHighPriorityWrites();
this.conf = conf;
// 兩個對象:
// 1. DiskChecker,執行實際的disk檢測邏輯
// 2. LedgerDirsManager, 管理Ledger/Index目錄
this.diskChecker = diskChecker;
this.dirsManagers = dirsManagers;
}
...
private void check(final LedgerDirsManager ldm) {
final ConcurrentMap<File, Float> diskUsages = ldm.getDiskUsages();
try {
List<File> writableDirs = ldm.getWritableLedgerDirs();
// 檢測每個writable目錄的空間使用
for (File dir : writableDirs) {
try {
// 實際上是通過 diskChecker來檢查磁盤使用情況
// 如果磁盤使用率沒有超過兩個限制,就保存使用率
diskUsages.put(dir, diskChecker.checkDir(dir));
} catch (DiskErrorException e) {
// 遇到磁盤失敗,觸發listener的 diskFailed 方法, 默認會觸發 bookie的shutdown
LOG.error("Ledger directory {} failed on disk checking : ", dir, e);
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.diskFailed(dir);
}
} catch (DiskWarnThresholdException e) {
// 使用率超過警告線,打印使用率信息,並且觸發listener的 diskAlmostFull 方法
// 默認會將 shouldCreateNewEntryLog 置爲true
diskUsages.compute(dir, (d, prevUsage) -> {
if (null == prevUsage || e.getUsage() != prevUsage) {
LOG.warn("Ledger directory {} is almost full : usage {}", dir, e.getUsage());
}
return e.getUsage();
});
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.diskAlmostFull(dir);
}
} catch (DiskOutOfSpaceException e) {
// 使用率超過警告線,打印使用率信息,然後將當前目錄添加到filledDirs中,
// 並且從writableLedgerDirs中刪除
diskUsages.compute(dir, (d, prevUsage) -> {
if (null == prevUsage || e.getUsage() != prevUsage) {
LOG.error("Ledger directory {} is out-of-space : usage {}", dir, e.getUsage());
}
return e.getUsage();
});
ldm.addToFilledDirs(dir);
}
}
// 將所有目錄檢查完畢之後,查看是否存儲可讀目錄,如果沒有直接拋出NoWritableLedgerDirException,
// 這樣做的好處是不用等到下一個檢查中才發現沒有可寫的目錄,及時發現目錄全部不可寫入
ldm.getWritableLedgerDirs();
} catch (NoWritableLedgerDirException e) {
LOG.warn("LedgerDirsMonitor check process: All ledger directories are non writable");
// 如果沒有目錄可寫,則將高優先級寫入置爲true,這些寫入是數據compact、journal replay等操作
boolean highPriorityWritesAllowed = true;
try {
// 檢測有沒有利用率空間在 minUsableSizeForHighPriorityWrites 之上的目錄
// 有則返回目錄列表,沒有就直接拋出 NoWritableLedgerDirException,並將
// highPriorityWritesAllowed 置爲false,然後出發listner的 allDisksFull 方法,
// 默認會將 bookie 狀態轉化爲只讀
ldm.getDirsAboveUsableThresholdSize(minUsableSizeForHighPriorityWrites, false);
} catch (NoWritableLedgerDirException e1) {
highPriorityWritesAllowed = false;
}
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.allDisksFull(highPriorityWritesAllowed);
}
}
List<File> fullfilledDirs = new ArrayList<File>(ldm.getFullFilledLedgerDirs());
boolean makeWritable = ldm.hasWritableLedgerDirs();
// When bookie is in READONLY mode, i.e there are no writableLedgerDirs:
// - Update fullfilledDirs disk usage.
// - If the total disk usage is below DiskLowWaterMarkUsageThreshold
// add fullfilledDirs back to writableLedgerDirs list if their usage is < conf.getDiskUsageThreshold.
try {
if (!makeWritable) {
// 如果沒有可寫的dir,判斷磁盤使用率是否低於磁盤利用的低水位DiskLWMUsageThreshold
// 如果小於的話,設置 makeWritable 爲true
float totalDiskUsage = diskChecker.getTotalDiskUsage(ldm.getAllLedgerDirs());
if (totalDiskUsage < conf.getDiskLowWaterMarkUsageThreshold()) {
makeWritable = true;
} else {
LOG.debug(
"Current TotalDiskUsage: {} is greater than LWMThreshold: {}."
+ " So not adding any filledDir to WritableDirsList",
totalDiskUsage, conf.getDiskLowWaterMarkUsageThreshold());
}
}
// 遍歷fullFilledDirs,並重新檢查利用率
// 1. 如果低於最大上限和告警值,則更新利用率,並且將這個目錄重新放回writableLedgerDir中
// 2. 如果遇到磁盤錯誤,觸發listener的diskFailed方法
// 3. 如果大於告警值,更新利用率,並且重新放回writableLedgerDir
// 4. 如果大於最大上限,更新利用率,不做其他操作
for (File dir : fullfilledDirs) {
try {
diskUsages.put(dir, diskChecker.checkDir(dir));
if (makeWritable) {
ldm.addToWritableDirs(dir, true);
}
} catch (DiskErrorException e) {
// Notify disk failure to all the listeners
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.diskFailed(dir);
}
} catch (DiskWarnThresholdException e) {
diskUsages.put(dir, e.getUsage());
// the full-filled dir become writable but still above the warn threshold
if (makeWritable) {
ldm.addToWritableDirs(dir, false);
}
} catch (DiskOutOfSpaceException e) {
// the full-filled dir is still full-filled
diskUsages.put(dir, e.getUsage());
}
}
} catch (IOException ioe) {
LOG.error("Got IOException while monitoring Dirs", ioe);
for (LedgerDirsListener listener : ldm.getListeners()) {
listener.fatalError();
}
}
}
LedgerDirsMonitor 的檢測依賴於LedgerDirsManager和DiskChecker的方法,LedgerDirsManager初始化如下:
public LedgerDirsManager(ServerConfiguration conf, File[] dirs, DiskChecker diskChecker, StatsLogger statsLogger) {
// ledger 目錄列表
this.ledgerDirectories = Arrays.asList(Bookie
.getCurrentDirectories(dirs));
// 可寫的 ledger 目錄列表,初始時,所有目錄都是可寫的
this.writableLedgerDirectories = new ArrayList<File>(ledgerDirectories);
// 寫滿的目錄列表,在檢測到磁盤目錄使用達到上限之後,writableLedgerDir轉化爲filledDir
this.filledDirs = new ArrayList<File>();
// listener,主要負責磁盤檢測時觸發一些特定的操作,比如變更 writableLedgerDir列表和filledDir列表
// 、bookie狀態 由writable 到 readonly 的轉換等
this.listeners = new ArrayList<LedgerDirsListener>();
this.entryLogSize = conf.getEntryLogSizeLimit();
Gets the minimum safe usable size to be available in index directory for Bookie to create Index File while
* replaying journal at the time of Bookie Start in Readonly Mode (in bytes).
// Bookie以readonly模式啓動,replay journal時在index目錄中創建index文件需要的磁盤最小可用值
this.minUsableSizeForIndexFileCreation = conf.getMinUsableSizeForIndexFileCreation();
// 在ledger 目錄中創建entry log 文件所需要的的磁盤最小可用值
this.minUsableSizeForEntryLogCreation = conf.getMinUsableSizeForEntryLogCreation();
// 初始每個目錄的使用率都是0,此時在statsLogger中初始化每個目錄的使用率指標
for (File dir : ledgerDirectories) {
diskUsages.put(dir, 0f);
String statName = "dir_" + dir.getParent().replace('/', '_') + "_usage";
final File targetDir = dir;
statsLogger.registerGauge(statName, new Gauge<Number>() {
@Override
public Number getDefaultValue() {
return 0;
}
@Override
public Number getSample() {
return diskUsages.get(targetDir) * 100;
}
});
}
// diskChecker實際執行目錄使用檢測的工具類
this.diskChecker = diskChecker;
// 註冊可讀目錄數目的指標
statsLogger.registerGauge(LD_WRITABLE_DIRS, new Gauge<Number>() {
@Override
public Number getDefaultValue() {
return 0;
}
@Override
public Number getSample() {
return writableLedgerDirectories.size();
}
});
}
執行檢測的類是DiskChecker
// 有兩個參數 diskUsageThreshold 和 diskUsageWarnThreshold
// 1. diskUsageThreshold 表示磁盤的最大使用率,默認是0.95,目錄列表中的所有目錄都超過限制之後
// 如果bookie配置可以以readonly模式運行,就會轉化爲readonly狀態,否則會停止;
// 2. diskUsageWarnThreshold 表示磁盤使用的告警閾值,默認是0.90,超過這個值會拋出
// DiskWarnThresholdException,並且會觸發gc,當使用率低於這個值時,目錄重新變爲開寫狀態
public DiskChecker(float threshold, float warnThreshold) {
validateThreshold(threshold, warnThreshold);
this.diskUsageThreshold = threshold;
this.diskUsageWarnThreshold = warnThreshold;
}
...
// 實際執行檢查的邏輯
// 1. 如果磁盤使用率不超過兩個限制,則返回使用率;
// 2. 如果超過使用diskUsageThreshold限制,則返回 DiskOutOfSpaceException
// 3. 如果超過使用 diskUsageWarnThreshold 限制,則返回 DiskWarnThresholdException
float checkDiskFull(File dir) throws DiskOutOfSpaceException, DiskWarnThresholdException {
if (null == dir) {
return 0f;
}
if (dir.exists()) {
long usableSpace = dir.getUsableSpace();
long totalSpace = dir.getTotalSpace();
float free = (float) usableSpace / (float) totalSpace;
float used = 1f - free;
if (used > diskUsageThreshold) {
LOG.error("Space left on device {} : {}, Used space fraction: {} > threshold {}.",
dir, usableSpace, used, diskUsageThreshold);
throw new DiskOutOfSpaceException("Space left on device "
+ usableSpace + " Used space fraction:" + used + " > threshold " + diskUsageThreshold, used);
}
// Warn should be triggered only if disk usage threshold doesn't trigger first.
if (used > diskUsageWarnThreshold) {
LOG.warn("Space left on device {} : {}, Used space fraction: {} > WarnThreshold {}.",
dir, usableSpace, used, diskUsageWarnThreshold);
throw new DiskWarnThresholdException("Space left on device:"
+ usableSpace + " Used space fraction:" + used + " > WarnThreshold:" + diskUsageWarnThreshold,
used);
}
return used;
} else {
return checkDiskFull(dir.getParentFile());
}
}
檢測邏輯主要是根據兩個參數,一個最大磁盤使用上限,一個磁盤告警上限。