提示:
由於工作原因文章還沒有弄完,請跳過,弄完後,會刪除此提示
基礎流程:
BackgroundWriterMain
BgBuffferSync->
SyncOneBuffer->
FlushBuffer()->
smgrwrite()
邏輯部分
首先代碼定義一些基礎類型和變量,例如:信號的處理,bgwrite的內存上下文,錯誤處理機制等。
然後,進入後臺寫進程,後臺寫進程是一個死循環進程,啓動後,如果沒有一些特殊情況(如:master死亡,關機等信號)就會一直運行。
此函數主要做一些簡單的工作,如,監聽信號(重新加載、關機等信號)確定是否退出或者重載。
調用落盤函數接口(BgBufferSync),將落盤動作返回的統計動作發送給stat統計表。
做一些簡單的判斷判斷是否要進入一個長時間的睡眠(如果超過WL_TIMEOUT,並且連續兩次沒有落盤動作,就會進入一個長時間的睡眠)。
如果睡眠時間到,或者睡眠期間有內存的分配操作,就會醒來。然後再次循環。
代碼部分
void
BackgroundWriterMain(void)
{
sigjmp_buf local_sigjmp_buf;
//解析:定義錯誤恢復變量,保存錯誤後信號
MemoryContext bgwriter_context;
//解析:定義內存上下文
bool prev_hibernate;
//解析:定義預先睡眠,後面用來確定是否進入一個深度睡眠
WritebackContext wb_context;
//解析:定義後臺寫進程上下文
//內存上下文解析:
//1.pg是基於進程的,每個進程擁有一塊內存,分配內存上下文爲了方便內存的處理。
//2.方便錯誤處理,自己進行內存的管理,方便錯誤發生時進行恢復操作。
pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */
pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */
pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, bgwriter_sigusr1_handler);
pqsignal(SIGUSR2, SIG_IGN);
/*
* Reset some signals that are accepted by postmaster but not here
*/
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
/* We allow SIGQUIT (quickdie) at all times */
sigdelset(&BlockSig, SIGQUIT);
//上面是對一些信號的註冊
CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Writer");
//創建一個資源所有者來與我們的資源保持聯繫
/*
* We just started, assume there has been either a shutdown or end-of-recovery 快照.
*/
last_snapshot_ts = GetCurrentTimestamp();
bgwriter_context = AllocSetContextCreate(TopMemoryContext, "Background Writer",
ALLOCSET_DEFAULT_MINSIZE,ALLOCSET_DEFAULT_INITSIZE,ALLOCSET_DEFAULT_MAXSIZE);
//創建一個內存上下文,內存上下文的作用見:內存上下文篇
MemoryContextSwitchTo(bgwriter_context);
//切換內存上下文
WritebackContextInit(&wb_context, &bgwriter_flush_after);
//下面是做錯誤恢復和檢查用的,後期做整理
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/*
* These operations are really just a minimal subset of AbortTransaction().
* We don't have very many resources to worry about in bgwriter, but we do have LWLocks, buffers, and temp files.
*/
LWLockReleaseAll();
AbortBufferIO();
UnlockBuffers();
/*釋放 buffer pins : */
ResourceOwnerRelease(CurrentResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
/* we needn't bother with the other ResourceOwnerRelease phases */
AtEOXact_Buffers(false);
AtEOXact_SMgr();
AtEOXact_Files();
AtEOXact_HashTables(false);
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
*/
MemoryContextSwitchTo(bgwriter_context);
FlushErrorState();
/* Flush any leaked data in the top-level context */
MemoryContextResetAndDeleteChildren(bgwriter_context);
/* re-initilialize to avoid repeated errors causing problems */
WritebackContextInit(&wb_context, &bgwriter_flush_after);
/* Now we can allow interrupts again */
RESUME_INTERRUPTS();
/*
* Sleep at least 1 second after any error.
* A write error is likely to be repeated, and we don't want to be filling the error logs as fast as we can.
*/
pg_usleep(1000000L);
/*
* Close all open files after any error. This is helpful on Windows,
* where holding deleted files open causes various strange errors.
* It's not clear we need it elsewhere, but shouldn't hurt.
*/
smgrcloseall();
/* Report wait end here, when there is no further possibility of wait */
pgstat_report_wait_end();
}
/* We can now handle ereport(ERROR) */
PG_exception_stack = &local_sigjmp_buf;
/*
* Unblock signals (they were blocked when the postmaster forked us)
*/
PG_SETMASK(&UnBlockSig);
/*
* Reset hibernation state after any error.
*/
prev_hibernate = false;
//開始進入後臺寫進程的流程
for (;;)
{
bool can_hibernate;
//定義bool變量確定是否進入長時間的睡眠,可以認爲是一個開關變量
int rc;
/* Clear any already-pending wakeups */
ResetLatch(MyLatch);
//判斷是否接受到SIGHUP信號,接收到後重新加載配置文件
if (got_SIGHUP)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
}
if (shutdown_requested)
{
/*
* From here on, elog(ERROR) should end with exit(1), not send
* control back to the sigsetjmp block above
*/
ExitOnAnyError = true;
/* Normal exit from the bgwriter is here */
proc_exit(0); /* done */
}
// 重中之重,進行一次髒數據的落盤操作,根據落盤狀況重置開關變量can_hibernate,確定是否需要進入一次長時間睡眠。
can_hibernate = BgBufferSync(&wb_context);
//將統計數據發送到stats collector
pgstat_send_bgwriter();
//判斷是否是checkpoint後的第一次操作
if (FirstCallSinceLastCheckpoint())
{
/*
* After any checkpoint, close all smgr files.
* This is so we won't hang onto smgr
* references to deleted files indefinitely.
*
*/
smgrcloseall();
}
//判斷Log記錄level 並且系統沒有在recovery。
if (XLogStandbyInfoActive() && !RecoveryInProgress())
{
TimestampTz timeout = 0;
TimestampTz now = GetCurrentTimestamp();
timeout =TimestampTzPlusMilliseconds(last_snapshot_ts, LOG_SNAPSHOT_INTERVAL_MS);
/*
* only log if enough time has passed and some xlog record has been inserted.
* 只有xlog已經落盤足夠時間
*/
if (now >= timeout &&last_snapshot_lsn != GetXLogInsertRecPtr())
{
last_snapshot_lsn = LogStandbySnapshot();
last_snapshot_ts = now;
}
}
//判斷WaitLatch是由於哪種原因造成的返回
rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, BgWriterDelay /* ms */ );
//如果沒有latch 事件,並且BgBufferSync沒有落盤髒數據,函數就會睡眠一個比較長的時間。一旦有內存的分配,它就會被喚醒.
if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate)
{
/* Ask for notification at next buffer allocation */
StrategyNotifyBgWriter(MyProc->pgprocno);
/* Sleep ... */
rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, BgWriterDelay * HIBERNATE_FACTOR);
/* Reset the notification request in case we timed out */
StrategyNotifyBgWriter(-1);
}
//如果是由於master進程死亡造成的返回,進程退出,否則的話
if (rc & WL_POSTMASTER_DEATH)
exit(1);
prev_hibernate = can_hibernate;
}