Log文件結構
Log文件格式:
Blocksize=32kb
rn :代表不同長度的record
P:代表填充,當一個block剩餘的空間不足以放下一個新的record時,剩下的空間會用
\0填充,空的record最短長度爲7B,當剩餘空間恰好爲7B時,Writer發出一個first
record填滿剩餘空間,並告知所有的用戶數據在下一個新的block
Kheadersize=4+2+1=7,空record最短爲7B
Record記錄格式:
CRC:32位是有效負載的校驗值
Size:16位,有效數據的長度
Type:record的類型,
Kzerotype=0:爲預配置文件的保留類型
kFullType=1:表示record記錄是完整的
kFirstType=2:record被拆分存儲的第一個碎片
kMiddleType=3: record被拆分存儲的其餘碎片
kLastType=4:record被拆分存儲的第一個碎片
Payload:有效數據,大小由size可知
Log的寫入情況爲:
當要寫入一條首先判斷當前block中是否足夠存放該條日誌
S1.如果足夠那麼直接安裝格式寫入;
S2.如果不夠那麼計算出去頭以外可以存放多少內容,將內容組裝爲FIRST的Log typpe寫入;然後新取一個塊判斷是否足夠存放剩下的日誌數據
while(數據未寫完)
S21. 如果足夠就組裝爲LAST的形式寫入;
S22. 如果仍然不夠就組裝爲MIDDLE的形式寫入
Write寫record流程:
Log文件寫入是通過Writer類添加數據到WritableFileWrite類指針*dest,dest_->Append()函數實現
Log文件會先緩存到LogBuffer類,最後會被flush到log文件中去
Reader讀record流程:
首先調用SkipToInitialBlock()來找到記錄的起始位置,然後調用ReadPhysicalRecord()函數,ReadPhysicalRecord()函數做的是解析一條條record,得到type大小和檢驗crc,返回值是record_type類型,再根據record_type來將record數據臨時緩存到*scratch字符串中,然後將整理得到的record保存到Slice類fragment,詳細流程見流程圖
Reader類
namespace rocksdb {
class SequentialFileReader;
using std::unique_ptr; //全局指針
namespace log {
//包含Reader類,實際讀由SequentialFile 接口完成
class Reader {
public:
class Reporter {
public:
virtual ~Reporter(); //虛擬函數實現覆蓋
virtual void Corruption(size_t bytes, const Status& status) = 0; //size 是碰撞大小字節
};
Reader(unique_ptr<SequentialFileReader>&& file, Reporter* reporter,
bool checksum, uint64_t initial_offset);
// "*file" must remain live while this Reader is in use.
//新建一個Reader類,會從 *file指針 返回log 記錄
// "*reporter" must remain live while this Reader is in use.
//Reader會從第一個record開始,物理位置爲initial_offset開始
~Reader();
bool ReadRecord(Slice* record, std::string* scratch,
bool report_eof_inconsistency = false);
//下一條記錄會寫到*record指針中,如果成功返回true,如果到輸入尾部返回false
//使用*scratch作爲臨時存儲
uint64_t LastRecordOffset(); //返回last record的物理偏移量
bool IsEOF() { //判斷reader是否到eof
return eof_;
}
void UnmarkEOF(); //當我們知道有更多的data被寫到文件中,我們可以用這個函數來強制reader重新搜索文件
SequentialFileReader* file() { return file_.get(); }
private:
const unique_ptr<SequentialFileReader> file_;
Reporter* const reporter_;
bool const checksum_;
char* const backing_store_;
Slice buffer_;
bool eof_;
bool read_error_;
size_t eof_offset_;
uint64_t last_record_offset_; //ReadRecord 返回最後記錄的偏移
uint64_t end_of_buffer_offset_; //buffer結束 的第一個位置偏移
uint64_t const initial_offset_; //最開始查詢record的記錄偏移
enum { //使用以下特殊值來記錄擴展類型
kEof = kMaxRecordType + 1, //擴展類型,KMaxRecordType=4
// Currently there are three situations in which this happens:
// * The record has an invalid CRC (ReadPhysicalRecord reports a drop)
// * The record is a 0-length record (No drop is reported)
// * The record is below constructor's initial_offset (No drop is reported)
kBadRecord = kMaxRecordType + 2
};
bool SkipToInitialBlock(); //跳到record起始位置,成功返回true
unsigned int ReadPhysicalRecord(Slice* result,
bool report_eof_inconsistency = false); //返回type或者前面的special values
// Reprts dropped bytes to the reporter.
// buffer_ must be updated to remove the dropped bytes prior to invocation.
void ReportCorruption(size_t bytes, const char* reason);
void ReportDrop(size_t bytes, const Status& reason);
// No copying allowed
Reader(const Reader&);
void operator=(const Reader&);
};
} // namespace log
Write類
namespace rocksdb {
class WritableFileWriter;
using std::unique_ptr;
namespace log {
/**write是一個通用的日誌流文件,提供一個只能追加寫的方式,寫數據的細節由WriteableFile 子類完成
*
*file分解成變長大小records,record格式如下:
* +-----+-------------+--+----+----------+------+-- ... ----+
* File | r0 | r1 | P | r2 | r3 | r4 | |
* +-----+-------------+--+----+----------+------+-- ... ----+
* <--- kBlockSize ------>|<-- kBlockSize ------>|
* rn = variable size records
* P = Padding
*
data 寫在kBlcokSize塊裏,下條record不符合剩餘空間,將會被 \0填充
* Record format:
*
* +---------+-----------+-----------+--- ... ---+
* |CRC (4B) | Size (2B) | Type (1B) | Payload |
* +---------+-----------+-----------+--- ... ---+
*
* CRC = 32位校驗值
* Size = payload data的長度
* Type = Type of record
* (kZeroType, kFullType, kFirstType, kLastType, kMiddleType )
* The type is used to group a bunch of records together to represent
* blocks that are larger than kBlockSize
* Payload = Byte stream as long as specified by the payload size
*/
class Writer {
public:
//創建一個writer類,在*dest後添加數據,*dest初始必須爲空,*dest必須live當Writer類使用時
explicit Writer(unique_ptr<WritableFileWriter>&& dest);
~Writer();
Status AddRecord(const Slice& slice);
WritableFileWriter* file() { return dest_.get(); }
const WritableFileWriter* file() const { return dest_.get(); }
private:
unique_ptr<WritableFileWriter> dest_;
int block_offset_; // 當前block內的偏移
uint32_t type_crc_[kMaxRecordType + 1]; //對於所有支持record type 的crc32c的值
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
Writer(const Writer&); //不允許cooy
void operator=(const Writer&);
};
} // namespace log
} // namespace rocksdb
logBuffer類
class Logger;
class LogBuffer { //logBuffer類是用來緩存log項,並且最後會被flush
public:
LogBuffer(const InfoLogLevel log_level, Logger* info_log);
//log_level:log的層次
//info_log:logger 類 寫log地方
void AddLogToBuffer(size_t max_log_size, const char* format, va_list ap);
//在buffer緩衝區添加一個log項,默認最大log_size
size_t IsEmpty() const { return logs_.empty(); }
void FlushBufferToLog(); //buffer緩存寫入到 info 日誌中,並且清理buffer緩存
private:
struct BufferedLog {
struct timeval now_tv; //log時間戳
char message[1]; // log開始信息
};
const InfoLogLevel log_level_;
Logger* info_log_;
Arena arena_;
autovector<BufferedLog*> logs_;
};
extern void LogToBuffer(LogBuffer* log_buffer, size_t max_log_size,
const char* format, ...);
//調用AddLogToBuffer(),將log緩存到buffer區中,延遲添加到info log中,想要一些日誌存在互斥
extern void LogToBuffer(LogBuffer* log_buffer, const char* format, ...);
//上一個函數的重載,默認的最大日誌大小
} // namespace rocksdb