golang源碼分析-啓動過程概述

golang語言作爲根據CSP模型實現的一種強類型的語言，本文主要就是通過簡單的實例來分析一下golang語言的啓動流程，爲深入瞭解與學習做鋪墊。

golang代碼示例

package main

import "fmt"

func main(){
    fmt.Println("hello,world")
}

編寫完示例代碼之後，進行編譯；

go build test.go

調試程序的方式有多種方式，可以使用gdb或者golang調試推薦使用的Devle工具。本文采用gdb調試方式；

gdb ./test
(gdb) info files
Symbols from "/root/test/test".
Local exec file:
	`/root/test/test', file type elf64-x86-64.
	Entry point: 0x454ae0
	0x0000000000401000 - 0x000000000048cba9 is .text
	0x000000000048d000 - 0x00000000004dc24c is .rodata
	0x00000000004dc420 - 0x00000000004dd084 is .typelink
	0x00000000004dd088 - 0x00000000004dd0d8 is .itablink
	0x00000000004dd0d8 - 0x00000000004dd0d8 is .gosymtab
	0x00000000004dd0e0 - 0x0000000000548426 is .gopclntab
	0x0000000000549000 - 0x0000000000549020 is .go.buildinfo
	0x0000000000549020 - 0x00000000005560f8 is .noptrdata
	0x0000000000556100 - 0x000000000055d0f0 is .data
	0x000000000055d100 - 0x0000000000578950 is .bss
	0x0000000000578960 - 0x000000000057b0b8 is .noptrbss
	0x0000000000400f9c - 0x0000000000401000 is .note.go.buildid
(gdb) b *0x454ae0
Breakpoint 1 at 0x454ae0: file /usr/lib/golang/src/runtime/rt0_linux_amd64.s, line 8.

此時我們查看位於rt0_linux_amd64.s中的的內容查看；

#include "textflag.h"

TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8
	JMP	_rt0_amd64(SB) 					# 跳轉到_rt0_amd64處執行

TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0
	JMP	_rt0_amd64_lib(SB)

此時_rt0_amd64的代碼位於runtime/asm_amd64.s中執行。此時就進入了整個的啓動與初始化過程。

runtime中的啓動與初始化

在位於runtime/asm_amd64.s中；

TEXT _rt0_amd64(SB),NOSPLIT,$-8
	MOVQ	0(SP), DI	// argc
	LEAQ	8(SP), SI	// argv
	JMP	runtime·rt0_go(SB)   // 跳轉到rt0_go處執行

真正的初始化與執行的流程都是包含在了rt0_go的流程中。

rt0_go的執行流程

TEXT runtime·rt0_go(SB),NOSPLIT,$0
	// copy arguments forward on an even stack
	MOVQ	DI, AX		// argc           輸入參數
	MOVQ	SI, BX		// argv        
	SUBQ	$(4*8+7), SP		// 2args 2auto
	ANDQ	$~15, SP
	MOVQ	AX, 16(SP)
	MOVQ	BX, 24(SP)

	// create istack out of the given (operating system) stack.
	// _cgo_init may update stackguard.
	MOVQ	$runtime·g0(SB), DI             // 設置g0信息 並設置棧信息
	LEAQ	(-64*1024+104)(SP), BX
	MOVQ	BX, g_stackguard0(DI)
	MOVQ	BX, g_stackguard1(DI)
	MOVQ	BX, (g_stack+stack_lo)(DI)
	MOVQ	SP, (g_stack+stack_hi)(DI)

	// find out information about the processor we're on
	MOVL	$0, AX
	CPUID
	MOVL	AX, SI
	CMPL	AX, $0
	JE	nocpuinfo

	// Figure out how to serialize RDTSC.
	// On Intel processors LFENCE is enough. AMD requires MFENCE.
	// Don't know about the rest, so let's do MFENCE.  根據平臺不同進行跳轉
	CMPL	BX, $0x756E6547  // "Genu"
	JNE	notintel
	CMPL	DX, $0x49656E69  // "ineI"
	JNE	notintel
	CMPL	CX, $0x6C65746E  // "ntel"
	JNE	notintel
	MOVB	$1, runtime·isIntel(SB)
	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
notintel:

	// Load EAX=1 cpuid flags
	MOVL	$1, AX
	CPUID
	MOVL	AX, runtime·processorVersionInfo(SB)

nocpuinfo:
	// if there is an _cgo_init, call it.
	MOVQ	_cgo_init(SB), AX
	TESTQ	AX, AX
	JZ	needtls
	// g0 already in DI
	MOVQ	DI, CX	// Win64 uses CX for first parameter
	MOVQ	$setg_gcc<>(SB), SI
	CALL	AX

	// update stackguard after _cgo_init
	MOVQ	$runtime·g0(SB), CX
	MOVQ	(g_stack+stack_lo)(CX), AX
	ADDQ	$const__StackGuard, AX
	MOVQ	AX, g_stackguard0(CX)
	MOVQ	AX, g_stackguard1(CX)

#ifndef GOOS_windows
	JMP ok
#endif
needtls:
#ifdef GOOS_plan9
	// skip TLS setup on Plan 9
	JMP ok
#endif
#ifdef GOOS_solaris
	// skip TLS setup on Solaris
	JMP ok
#endif
#ifdef GOOS_darwin
	// skip TLS setup on Darwin
	JMP ok
#endif

	LEAQ	runtime·m0+m_tls(SB), DI
	CALL	runtime·settls(SB)

	// store through it, to make sure it works
	get_tls(BX)
	MOVQ	$0x123, g(BX)
	MOVQ	runtime·m0+m_tls(SB), AX     
	CMPQ	AX, $0x123
	JEQ 2(PC)
	CALL	runtime·abort(SB)
ok:
	// set the per-goroutine and per-mach "registers"
	get_tls(BX)
	LEAQ	runtime·g0(SB), CX      // 設置g0信息
	MOVQ	CX, g(BX)
	LEAQ	runtime·m0(SB), AX      // 設置m0信息

	// save m->g0 = g0
	MOVQ	CX, m_g0(AX)
	// save m0 to g0->m
	MOVQ	AX, g_m(CX)

	CLD				// convention is D is always left cleared
	CALL	runtime·check(SB)                // 進行檢查

	MOVL	16(SP), AX		// copy argc      拷貝標準輸入數據
	MOVL	AX, 0(SP)
	MOVQ	24(SP), AX		// copy argv   
	MOVQ	AX, 8(SP)
	CALL	runtime·args(SB)               	// 初始化傳入數據
	CALL	runtime·osinit(SB)              // 初始化核數和頁大小
	CALL	runtime·schedinit(SB)           // 初始化調度器並初始化運行環境

	// create a new goroutine to start program
	MOVQ	$runtime·mainPC(SB), AX		// entry    設置執行入口
	PUSHQ	AX
	PUSHQ	$0			// arg size
	CALL	runtime·newproc(SB)           // 創建協程並綁定運行
	POPQ	AX
	POPQ	AX

	// start this M
	CALL	runtime·mstart(SB)        		// 開始運行

	CALL	runtime·abort(SB)	// mstart should never return
	RET

	// Prevent dead-code elimination of debugCallV1, which is
	// intended to be called by debuggers.
	MOVQ	$runtime·debugCallV1(SB), AX
	RET

DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)     	// 設置mainPC爲runtime.main的地址
GLOBL	runtime·mainPC(SB),RODATA,$8

此時通過該流程可以看出主要的流程首先設置g0的相關環境，接着就初始化輸入參數(args)、初始化運行核數與頁大小(osinit)接着再初始化運行環境(schedinit)，然後調用main函數進行綁定最後調用mstart方法開始執行。

schedinit調度相關初始化

func schedinit() {
	// raceinit must be the first call to race detector.
	// In particular, it must be done before mallocinit below calls racemapshadow.
	_g_ := getg()                   // 獲取g實例
	if raceenabled {
		_g_.racectx, raceprocctx0 = raceinit()
	}

	sched.maxmcount = 10000        // 設置系統線程M的最大數量

	tracebackinit()                // 初始化計數器等內容
	moduledataverify()
	stackinit()                    // 棧相關初始化
	mallocinit()                   // 內存相關初始化
	mcommoninit(_g_.m)             // 初始化當前的m 即m0的初始化
	cpuinit()       // must run before alginit
	alginit()       // maps must not be used before this call
	modulesinit()   // provides activeModules
	typelinksinit() // uses maps, activeModules
	itabsinit()     // uses activeModules

	msigsave(_g_.m)
	initSigmask = _g_.m.sigmask

	goargs()          // 獲取命令行參數
	goenvs()					// 獲取環境變量
	parsedebugvars()   	
	gcinit()    			// 內存回收Gc的初始化

	sched.lastpoll = uint64(nanotime())
	procs := ncpu   		// 運行p的個數檢查
	if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
		procs = n         // 如果設置了最大p個數，檢查p個數合法後就設置爲該值
	}
	if procresize(procs) != nil {      // 初始化對應procs個數的p
		throw("unknown runnable goroutine during bootstrap")
	}

	// For cgocheck > 1, we turn on the write barrier at all times
	// and check all pointer writes. We can't do this until after
	// procresize because the write barrier needs a P.
	if debug.cgocheck > 1 {
		writeBarrier.cgo = true
		writeBarrier.enabled = true
		for _, p := range allp {
			p.wbBuf.reset()
		}
	}

	if buildVersion == "" {
		// Condition should never trigger. This code just serves
		// to ensure runtime·buildVersion is kept in the resulting binary.
		buildVersion = "unknown"
	}
}

該函數主要就是初始化了命令行參數，環境變量，gc和p的初始化過程等操作，都是爲了後續執行做準備。

newproc函數

//go:nosplit
func newproc(siz int32, fn *funcval) {
	argp := add(unsafe.Pointer(&fn), sys.PtrSize)
	gp := getg()                                // 獲取g
	pc := getcallerpc()                         // 獲取當前pc
	systemstack(func() {
		newproc1(fn, (*uint8)(argp), siz, gp, pc)   // 添加到棧中 此時的入口函數就是main函數
	})
}

// Create a new g running fn with narg bytes of arguments starting
// at argp. callerpc is the address of the go statement that created
// this. The new g is put on the queue of g's waiting to run.
func newproc1(fn *funcval, argp *uint8, narg int32, callergp *g, callerpc uintptr) {
	_g_ := getg()        	// 獲取g

	if fn == nil {
		_g_.m.throwing = -1 // do not dump full stacks
		throw("go of nil func value")
	}
	_g_.m.locks++ // disable preemption because it can be holding p in a local var
	siz := narg                           // 設置大小
	siz = (siz + 7) &^ 7

	// We could allocate a larger initial stack if necessary.
	// Not worth it: this is almost always an error.
	// 4*sizeof(uintreg): extra space added below
	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
	if siz >= _StackMin-4*sys.RegSize-sys.RegSize {
		throw("newproc: function arguments too large for new goroutine")
	}

	_p_ := _g_.m.p.ptr()              // 獲取當前的m
	newg := gfget(_p_)                // 生成一個新的g
	if newg == nil {
		newg = malg(_StackMin)
		casgstatus(newg, _Gidle, _Gdead)
		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
	}
	if newg.stack.hi == 0 {
		throw("newproc1: newg missing stack")
	}

	if readgstatus(newg) != _Gdead {
		throw("newproc1: new g is not Gdead")
	}

	totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame    設置棧大小
	totalSize += -totalSize & (sys.SpAlign - 1)                  // align to spAlign
	sp := newg.stack.hi - totalSize      // 設置可用的sp
	spArg := sp
	if usesLR {
		// caller's LR
		*(*uintptr)(unsafe.Pointer(sp)) = 0
		prepGoExitFrame(sp)
		spArg += sys.MinFrameSize
	}
	if narg > 0 {          // 如果輸入參數大於0
		memmove(unsafe.Pointer(spArg), unsafe.Pointer(argp), uintptr(narg))
		// This is a stack-to-stack copy. If write barriers
		// are enabled and the source stack is grey (the
		// destination is always black), then perform a
		// barrier copy. We do this *after* the memmove
		// because the destination stack may have garbage on
		// it.
		if writeBarrier.needed && !_g_.m.curg.gcscandone {
			f := findfunc(fn.fn)                              // 保存輸入參數
			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
			if stkmap.nbit > 0 {
				// We're in the prologue, so it's always stack map index 0.
				bv := stackmapdata(stkmap, 0)
				bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata)
			}
		}
	}

	memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
	newg.sched.sp = sp                             // 設置當前的sp
	newg.stktopsp = sp
	newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function    設置g執行完成後退出的函數地址  指向了goexit
	newg.sched.g = guintptr(unsafe.Pointer(newg))   // 設置當前的g的指針
	gostartcallfn(&newg.sched, fn)                  // 設置當前g的入口函數即該g被調度時執行的入口
	newg.gopc = callerpc
	newg.ancestors = saveAncestors(callergp)
	newg.startpc = fn.fn   													// 保存執行的func地址
	if _g_.m.curg != nil {
		newg.labels = _g_.m.curg.labels
	}
	if isSystemGoroutine(newg, false) {           
		atomic.Xadd(&sched.ngsys, +1)
	}
	newg.gcscanvalid = false                        // 設置該g不被gc收集回收
	casgstatus(newg, _Gdead, _Grunnable)            // 設置當前的g的狀態爲可運行狀態

	if _p_.goidcache == _p_.goidcacheend {
		// Sched.goidgen is the last allocated id,
		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
		// At startup sched.goidgen=0, so main goroutine receives goid=1.
		_p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch)
		_p_.goidcache -= _GoidCacheBatch - 1
		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
	}
	newg.goid = int64(_p_.goidcache) 							// 獲取當前g的id
	_p_.goidcache++
	if raceenabled {
		newg.racectx = racegostart(callerpc)
	}
	if trace.enabled {
		traceGoCreate(newg, newg.startpc)
	}
	runqput(_p_, newg, true)                     // 把當前g加入隊列中並設置下一個就可被喚起運行

	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 && mainStarted { // 將當前g加入到可調度的隊列中去 如果是啓動階段不會調用wakeup  如果是運行中則會在隊列中重新喚起可運行的
		wakep()
	}
	_g_.m.locks--
	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
		_g_.stackguard0 = stackPreempt
	}
}

主要就是新生成一個g來運行，並將該g設置執行函數的入口，棧的初始化並設置g可運行狀態，加入到隊列中可被調用執行，在啓動階段的第一個g傳入的函數其實就是main函數，接着就會調用mstart來調用該新生成的g來執行被包裹的函數main。

mstart函數

//go:nosplit
//go:nowritebarrierrec
func mstart() {
	_g_ := getg()        					// 獲取當前的g

	osStack := _g_.stack.lo == 0
	if osStack {
		// Initialize stack bounds from system stack.
		// Cgo may have left stack size in stack.hi.
		// minit may update the stack bounds.
		size := _g_.stack.hi
		if size == 0 {
			size = 8192 * sys.StackGuardMultiplier
		}
		_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
		_g_.stack.lo = _g_.stack.hi - size + 1024
	}
	// Initialize stack guards so that we can start calling
	// both Go and C functions with stack growth prologues.
	_g_.stackguard0 = _g_.stack.lo + _StackGuard
	_g_.stackguard1 = _g_.stackguard0
	mstart1()    			// 調用mastart1執行

	// Exit this thread.
	if GOOS == "windows" || GOOS == "solaris" || GOOS == "plan9" || GOOS == "darwin" || GOOS == "aix" {
		// Window, Solaris, Darwin, AIX and Plan 9 always system-allocate
		// the stack, but put it in _g_.stack before mstart,
		// so the logic above hasn't set osStack yet.
		osStack = true
	}
	mexit(osStack)       // 退出
}

func mstart1() {
	_g_ := getg()  								// 獲取當前的g

	if _g_ != _g_.m.g0 {
		throw("bad runtime·mstart")
	}

	// Record the caller for use as the top of stack in mcall and
	// for terminating the thread.
	// We're never coming back to mstart1 after we call schedule,
	// so other calls can reuse the current frame.
	save(getcallerpc(), getcallersp())
	asminit()
	minit()    	// 初始化信號量

	// Install signal handlers; after minit so that minit can
	// prepare the thread to be able to handle the signals.
	if _g_.m == &m0 {
		mstartm0()
	}

	if fn := _g_.m.mstartfn; fn != nil {
		fn()
	}

	if _g_.m != &m0 {
		acquirep(_g_.m.nextp.ptr())
		_g_.m.nextp = 0
	}
	schedule()   		// 調度可執行的g 本文先不討論該函數的流程
}

mstart函數主要就是開始調度可以運行的g來執行，在啓動階段可執行的g就是被包裹的main函數，此時繼續瞭解main函數

main函數

func main() {
	g := getg()

	// Racectx of m0->g0 is used only as the parent of the main goroutine.
	// It must not be used for anything else.
	g.m.g0.racectx = 0

	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
	// Using decimal instead of binary GB and MB because
	// they look nicer in the stack overflow failure message.   設置棧的大小
	if sys.PtrSize == 8 {
		maxstacksize = 1000000000     
	} else {
		maxstacksize = 250000000
	}

	// Allow newproc to start new Ms.
	mainStarted = true               						// 設置標誌位可以允許其他newporc開始生成新的m

	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
		systemstack(func() {    									// 開啓一個後臺協程來執行垃圾回收等操作
			newm(sysmon, nil)
		})
	}

	// Lock the main goroutine onto this, the main OS thread,
	// during initialization. Most programs won't care, but a few
	// do require certain calls to be made by the main thread.
	// Those can arrange for main.main to run in the main thread
	// by calling runtime.LockOSThread during initialization
	// to preserve the lock.
	lockOSThread()

	if g.m != &m0 { 												// 檢查是否是m0協程執行
		throw("runtime.main not on m0")
	}

	runtime_init() // must be before defer     各個包的init函數執行，即init的加載
	if nanotime() == 0 {
		throw("nanotime returning zero")
	}

	// Defer unlock so that runtime.Goexit during init does the unlock too.
	needUnlock := true
	defer func() {
		if needUnlock {
			unlockOSThread()
		}
	}()

	// Record when the world started.
	runtimeInitTime = nanotime()      // 記錄當前執行時間

	gcenable()     										// 開啓垃圾回收

	main_init_done = make(chan bool)
	if iscgo {
		if _cgo_thread_start == nil {
			throw("_cgo_thread_start missing")
		}
		if GOOS != "windows" {
			if _cgo_setenv == nil {
				throw("_cgo_setenv missing")
			}
			if _cgo_unsetenv == nil {
				throw("_cgo_unsetenv missing")
			}
		}
		if _cgo_notify_runtime_init_done == nil {
			throw("_cgo_notify_runtime_init_done missing")
		}
		// Start the template thread in case we enter Go from
		// a C-created thread and need to create a new thread.
		startTemplateThread()
		cgocall(_cgo_notify_runtime_init_done, nil)
	}

	fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
	fn()          		// 執行main的init函數
	close(main_init_done)

	needUnlock = false
	unlockOSThread()

	if isarchive || islibrary {
		// A program compiled with -buildmode=c-archive or c-shared
		// has a main, but it is not executed.
		return
	}
	fn = main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
	fn()    						// 執行程序定義的main入口函數
	if raceenabled {
		racefini()
	}

	// Make racy client program work: if panicking on
	// another goroutine at the same time as main returns,
	// let the other goroutine finish printing the panic trace.
	// Once it does, it will exit. See issues 3934 and 20018.
	if atomic.Load(&runningPanicDefers) != 0 {
		// Running deferred functions should not take long.
		for c := 0; c < 1000; c++ {
			if atomic.Load(&runningPanicDefers) == 0 {
				break
			}
			Gosched()
		}
	}
	if atomic.Load(&panicking) != 0 {       	// 如果當前還有正在執行的狀態則調用gopark重新調度讓其他協程執行
		gopark(nil, nil, waitReasonPanicWait, traceEvGoStop, 1)
	}

	exit(0)
	for {
		var x *int32
		*x = 0
	}
}

main函數主要就是最後對應於go程序中的main函數執行，在執行的過程中首先會先執行其他包中的init函數的執行，然後再執行main函數中的init函數，最後執行main函數，至此啓動過程中的基本執行流程就完成。

總結

本文主要就是簡單查看了一下go程序的啓動過程，go中涉及到部分彙編知識，在彙編代碼中一步步查找到runtime中的相關的go的源碼的實現，本文也參考了大量網上已有的內容，大家有興趣課自行查看。由於本人才疏學淺，如有錯誤請批評指正。

golang源碼分析-啓動過程概述

golang源碼分析-啓動過程概述

golang代碼示例

runtime中的啓動與初始化

rt0_go的執行流程

schedinit調度相關初始化

newproc函數

mstart函數

main函數

總結

使用neovim打造go ide(支持代碼跳轉, 代碼補全, 實時語法檢查)

挑戰程序設計競賽 2.3章習題 poj 3046 Ant Counting

Shell/Python中的用戶名獲取

Redis的rdb格式學習

遍歷百萬級Redis的鍵值的大結局

租約-代碼實踐

golang源碼分析：調度器chan調度

兩階段提交實際項目V1

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結