golang源碼分析:編譯過程詞法解析的流程

golang編譯

由於golang作爲靜態語言,當使用go build時就會生成對應的編譯完成之後的文件,那這個編譯過程大致會做什麼事情呢,在golang中的編譯大致有哪些流程。

golang示例代碼
package main

import "fmt"

func main(){
	a := [10]int{2,1}
	fmt.Println(a)
}

在終端中,通過命令編譯一下該語言;

wuzideMacBook-Pro:gofilebeats wuzi$ go build -n main_t.go

#
# command-line-arguments
#

mkdir -p $WORK/b001/
cat >$WORK/b001/importcfg << 'EOF' # internal
# import config
packagefile fmt=/usr/local/go/pkg/darwin_amd64/fmt.a
packagefile runtime=/usr/local/go/pkg/darwin_amd64/runtime.a
EOF
cd /Users/wuzi/goproject/gofilebeats
/usr/local/go/pkg/tool/darwin_amd64/compile -o $WORK/b001/_pkg_.a -trimpath $WORK/b001 -p main -complete -buildid oIIjfgnlQc12b1a7cedK/oIIjfgnlQc12b1a7cedK -goversion go1.12.5 -D _/Users/wuzi/goproject/gofilebeats -importcfg $WORK/b001/importcfg -pack -c=4 ./main_t.go
/usr/local/go/pkg/tool/darwin_amd64/buildid -w $WORK/b001/_pkg_.a # internal
cat >$WORK/b001/importcfg.link << 'EOF' # internal
packagefile command-line-arguments=$WORK/b001/_pkg_.a
packagefile fmt=/usr/local/go/pkg/darwin_amd64/fmt.a
packagefile runtime=/usr/local/go/pkg/darwin_amd64/runtime.a
packagefile errors=/usr/local/go/pkg/darwin_amd64/errors.a
packagefile internal/fmtsort=/usr/local/go/pkg/darwin_amd64/internal/fmtsort.a
packagefile io=/usr/local/go/pkg/darwin_amd64/io.a
packagefile math=/usr/local/go/pkg/darwin_amd64/math.a
packagefile os=/usr/local/go/pkg/darwin_amd64/os.a
packagefile reflect=/usr/local/go/pkg/darwin_amd64/reflect.a
packagefile strconv=/usr/local/go/pkg/darwin_amd64/strconv.a
packagefile sync=/usr/local/go/pkg/darwin_amd64/sync.a
packagefile unicode/utf8=/usr/local/go/pkg/darwin_amd64/unicode/utf8.a
packagefile internal/bytealg=/usr/local/go/pkg/darwin_amd64/internal/bytealg.a
packagefile internal/cpu=/usr/local/go/pkg/darwin_amd64/internal/cpu.a
packagefile runtime/internal/atomic=/usr/local/go/pkg/darwin_amd64/runtime/internal/atomic.a
packagefile runtime/internal/math=/usr/local/go/pkg/darwin_amd64/runtime/internal/math.a
packagefile runtime/internal/sys=/usr/local/go/pkg/darwin_amd64/runtime/internal/sys.a
packagefile sort=/usr/local/go/pkg/darwin_amd64/sort.a
packagefile sync/atomic=/usr/local/go/pkg/darwin_amd64/sync/atomic.a
packagefile math/bits=/usr/local/go/pkg/darwin_amd64/math/bits.a
packagefile internal/poll=/usr/local/go/pkg/darwin_amd64/internal/poll.a
packagefile internal/syscall/unix=/usr/local/go/pkg/darwin_amd64/internal/syscall/unix.a
packagefile internal/testlog=/usr/local/go/pkg/darwin_amd64/internal/testlog.a
packagefile syscall=/usr/local/go/pkg/darwin_amd64/syscall.a
packagefile time=/usr/local/go/pkg/darwin_amd64/time.a
packagefile unicode=/usr/local/go/pkg/darwin_amd64/unicode.a
packagefile internal/race=/usr/local/go/pkg/darwin_amd64/internal/race.a
EOF
mkdir -p $WORK/b001/exe/
cd .
/usr/local/go/pkg/tool/darwin_amd64/link -o $WORK/b001/exe/a.out -importcfg $WORK/b001/importcfg.link -buildmode=exe -buildid=tzzv5K5t4eVH1SJYuR6c/oIIjfgnlQc12b1a7cedK/oIIjfgnlQc12b1a7cedK/tzzv5K5t4eVH1SJYuR6c -extld=clang $WORK/b001/_pkg_.a
/usr/local/go/pkg/tool/darwin_amd64/buildid -w $WORK/b001/exe/a.out # internal
mv $WORK/b001/exe/a.out main_t

通過命令行可知,主要通過go編譯完成的終端工具先使用compile來編譯,接着buildid,接着link然後再buildid從而完成golang語言的編譯過程。通用的靜態語言的編譯過程一般分爲如下幾個步驟;

詞法語法分析
語義分析
中間代碼
代碼優化

在golang的整個編譯過程中,首先就是詞法語法分析、類型檢查,AST生成,生成SSA與機器代碼生成的這幾個步驟。本文會簡單分析一下這些流程。

編譯的啓動過程

build的啓動過程

在終端中執行go build時,執行的入口位於src/cmd/go中的main文件開始;

	base.Go.Commands = []*base.Command{
		bug.CmdBug,
		work.CmdBuild,
		clean.CmdClean,
		doc.CmdDoc,
		envcmd.CmdEnv,
		fix.CmdFix,
		fmtcmd.CmdFmt,
		generate.CmdGenerate,
		get.CmdGet,
		work.CmdInstall,
		list.CmdList,
		modcmd.CmdMod,
		run.CmdRun,
		test.CmdTest,
		tool.CmdTool,
		version.CmdVersion,
		vet.CmdVet,

		help.HelpBuildmode,
		help.HelpC,
		help.HelpCache,
		help.HelpEnvironment,
		help.HelpFileType,
		modload.HelpGoMod,
		help.HelpGopath,
		get.HelpGopathGet,
		modfetch.HelpGoproxy,
		help.HelpImportPath,
		modload.HelpModules,
		modget.HelpModuleGet,
		help.HelpPackages,
		test.HelpTestflag,
		test.HelpTestfunc,
	}

func main() {
	_ = go11tag
	...

BigCmdLoop:
	for bigCmd := base.Go; ; {
		for _, cmd := range bigCmd.Commands {  // 遍歷所有的Commands
			if cmd.Name() != args[0] { 					// 對比輸入的名稱是否跟cmd配置的名稱相同 如果相同則找到該命令否則則繼續查找
				continue
			}
			if len(cmd.Commands) > 0 { 					// 獲取cmd 的輸入參數長度
				bigCmd = cmd
				args = args[1:] 									// 如果獲取的輸入長度信息爲空則打印幫助信息
				if len(args) == 0 {
					help.PrintUsage(os.Stderr, bigCmd)
					base.SetExitStatus(2)
					base.Exit()
				}
				if args[0] == "help" {
					// Accept 'go mod help' and 'go mod help foo' for 'go help mod' and 'go help mod foo'.
					help.Help(os.Stdout, append(strings.Split(cfg.CmdName, " "), args[1:]...))  // 如果是需要幫助信息則打印幫助信息
					return
				}
				cfg.CmdName += " " + args[0]
				continue BigCmdLoop
			}
			if !cmd.Runnable() {
				continue
			}
			cmd.Flag.Usage = func() { cmd.Usage() }  	// 獲取該命令的幫助信息
			if cmd.CustomFlags {
				args = args[1:]
			} else {
				base.SetFromGOFLAGS(cmd.Flag)
				cmd.Flag.Parse(args[1:])
				args = cmd.Flag.Args()
			}
			cmd.Run(cmd, args) 											// 運行該命令
			base.Exit()
			return
		}
		...
	}
}

在初始化的過程中的時候其實已經註冊了所有的命令,此時build對應的就是work.CmdBuild的該命令,此時調用的命令如下;

func init() {
	// break init cycle
	CmdBuild.Run = runBuild 				// 執行運行編譯命令
	CmdInstall.Run = runInstall

	CmdBuild.Flag.BoolVar(&cfg.BuildI, "i", false, "")
	CmdBuild.Flag.StringVar(&cfg.BuildO, "o", "", "output file")

	CmdInstall.Flag.BoolVar(&cfg.BuildI, "i", false, "")

	AddBuildFlags(CmdBuild)
	AddBuildFlags(CmdInstall)
}

此時主要運行的流程就是runBuild函數,

func runBuild(cmd *base.Command, args []string) {
	...
	a := &Action{Mode: "go build"}
	for _, p := range pkgs {
		a.Deps = append(a.Deps, b.AutoAction(ModeBuild, depMode, p))  // 輸入的pkg包 查找該pkg
	}
	if cfg.BuildBuildmode == "shared" {
		a = b.buildmodeShared(ModeBuild, depMode, args, pkgs, a)
	}
	b.Do(a)
}

由於golang的編譯過程,通過一系列的配置參數來進行串聯的,此時a.Deps就是添加了一個AutoAction的action,此時就進行編譯;

// do runs the action graph rooted at root.
func (b *Builder) Do(root *Action) {
	...

	b.readySema = make(chan bool, len(all))

	// Initialize per-action execution state.
	for _, a := range all {
		for _, a1 := range a.Deps {
			a1.triggers = append(a1.triggers, a)  // 遍歷每個底層的Deps
		}
		a.pending = len(a.Deps)
		if a.pending == 0 {
			b.ready.push(a)
			b.readySema <- true
		}
	}

	// Handle runs a single action and takes care of triggering
	// any actions that are runnable as a result.
	handle := func(a *Action) {
		var err error

		if a.Func != nil && (!a.Failed || a.IgnoreFail) {
			if err == nil {
				err = a.Func(b, a)     // 執行回調的函數
			}
		}
		...
	}

	...
	for i := 0; i < par; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for {
				select {
				case _, ok := <-b.readySema:
					if !ok {
						return
					}
					// Receiving a value from b.readySema entitles
					// us to take from the ready queue.
					b.exec.Lock()
					a := b.ready.pop()
					b.exec.Unlock()
					handle(a)     // 調用action
				case <-base.Interrupted:
					base.SetExitStatus(1)
					return
				}
			}
		}()
	}

	...
}

由於在AutoAction中定義的Action,

// AutoAction returns the "right" action for go build or go install of p.
func (b *Builder) AutoAction(mode, depMode BuildMode, p *load.Package) *Action {
	if p.Name == "main" {
		return b.LinkAction(mode, depMode, p) 		// 是否是main 如果是main則直接調用LinkAction
	}
	return b.CompileAction(mode, depMode, p)   // 否則就直接編譯
}

繼續查看LinkAction的執行流程;

func (b *Builder) LinkAction(mode, depMode BuildMode, p *load.Package) *Action {
	// Construct link action.
	a := b.cacheAction("link", p, func() *Action {
		a := &Action{
			Mode:    "link",
			Package: p,
		}

		a1 := b.CompileAction(ModeBuild, depMode, p)   	// 調用ComplieAction
		a.Func = (*Builder).link                        // 設置a的下一個Func就是link的過程
		a.Deps = []*Action{a1}
		a.Objdir = a1.Objdir

		// An executable file. (This is the name of a temporary file.)
		// Because we run the temporary file in 'go run' and 'go test',
		// the name will show up in ps listings. If the caller has specified
		// a name, use that instead of a.out. The binary is generated
		// in an otherwise empty subdirectory named exe to avoid
		// naming conflicts. The only possible conflict is if we were
		// to create a top-level package named exe.
		name := "a.out"
		if p.Internal.ExeName != "" {
			name = p.Internal.ExeName
		} else if (cfg.Goos == "darwin" || cfg.Goos == "windows") && cfg.BuildBuildmode == "c-shared" && p.Target != "" {
			// On OS X, the linker output name gets recorded in the
			// shared library's LC_ID_DYLIB load command.
			// The code invoking the linker knows to pass only the final
			// path element. Arrange that the path element matches what
			// we'll install it as; otherwise the library is only loadable as "a.out".
			// On Windows, DLL file name is recorded in PE file
			// export section, so do like on OS X.
			_, name = filepath.Split(p.Target)
		}
		a.Target = a.Objdir + filepath.Join("exe", name) + cfg.ExeSuffix
		a.built = a.Target
		b.addTransitiveLinkDeps(a, a1, "")

		// Sequence the build of the main package (a1) strictly after the build
		// of all other dependencies that go into the link. It is likely to be after
		// them anyway, but just make sure. This is required by the build ID-based
		// shortcut in (*Builder).useCache(a1), which will call b.linkActionID(a).
		// In order for that linkActionID call to compute the right action ID, all the
		// dependencies of a (except a1) must have completed building and have
		// recorded their build IDs.
		a1.Deps = append(a1.Deps, &Action{Mode: "nop", Deps: a.Deps[1:]})
		return a
	})

	if mode == ModeInstall || mode == ModeBuggyInstall {
		a = b.installAction(a, mode)
	}

	return a
}

繼續查看CompileAction

func (b *Builder) CompileAction(mode, depMode BuildMode, p *load.Package) *Action {
	if mode != ModeBuild && (p.Internal.Local || p.Module != nil) && p.Target == "" {
		// Imported via local path or using modules. No permanent target.
		mode = ModeBuild
	}
	if mode != ModeBuild && p.Name == "main" {
		// We never install the .a file for a main package.
		mode = ModeBuild
	}

	// Construct package build action.
	a := b.cacheAction("build", p, func() *Action {
		a := &Action{
			Mode:    "build",
			Package: p,
			Func:    (*Builder).build,   // 設置執行的Func爲build
			Objdir:  b.NewObjdir(),
		}
		...

		return a
	})

	...

	return a
}

繼續查看build的執行流程;

// build is the action for building a single package.
// Note that any new influence on this logic must be reported in b.buildActionID above as well.
func (b *Builder) build(a *Action) (err error) {
	...

	// Compile Go.
	objpkg := objdir + "_pkg_.a"
	ofile, out, err := BuildToolchain.gc(b, a, objpkg, icfg.Bytes(), symabis, len(sfiles) > 0, gofiles)
	if len(out) > 0 {
		output := b.processOutput(out)
		if p.Module != nil && !allowedVersion(p.Module.GoVersion) {
			output += "note: module requires Go " + p.Module.GoVersion + "\n"
		}
		b.showOutput(a, a.Package.Dir, a.Package.Desc(), output)
		if err != nil {
			return errPrintedOutput
		}
	}


}

在省略了大量細節處理之後,看見了編譯的過程其實調用了BuildToolchain.gc的函數,該函數就是在初始化時定義的gcToolchain,

// The Go toolchain.

type gcToolchain struct{}

func (gcToolchain) compiler() string {
	return base.Tool("compile")
}

func (gcToolchain) linker() string {
	return base.Tool("link")
}

func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg []byte, symabis string, asmhdr bool, gofiles []string) (ofile string, output []byte, err error) {
	p := a.Package
	objdir := a.Objdir
	if archive != "" {
		ofile = archive
	} else {
		out := "_go_.o"
		ofile = objdir + out
	}

	pkgpath := p.ImportPath
	if cfg.BuildBuildmode == "plugin" {
		pkgpath = pluginPath(a)
	} else if p.Name == "main" && !p.Internal.ForceLibrary {
		pkgpath = "main"
	}
	gcargs := []string{"-p", pkgpath}
	if p.Module != nil && p.Module.GoVersion != "" && allowedVersion(p.Module.GoVersion) {
		gcargs = append(gcargs, "-lang=go"+p.Module.GoVersion)
	}
	if p.Standard {
		gcargs = append(gcargs, "-std")
	}
	compilingRuntime := p.Standard && (p.ImportPath == "runtime" || strings.HasPrefix(p.ImportPath, "runtime/internal"))
	// The runtime package imports a couple of general internal packages.
	if p.Standard && (p.ImportPath == "internal/cpu" || p.ImportPath == "internal/bytealg") {
		compilingRuntime = true
	}
	if compilingRuntime {
		// runtime compiles with a special gc flag to check for
		// memory allocations that are invalid in the runtime package,
		// and to implement some special compiler pragmas.
		gcargs = append(gcargs, "-+")
	}

	// If we're giving the compiler the entire package (no C etc files), tell it that,
	// so that it can give good error messages about forward declarations.
	// Exceptions: a few standard packages have forward declarations for
	// pieces supplied behind-the-scenes by package runtime.
	extFiles := len(p.CgoFiles) + len(p.CFiles) + len(p.CXXFiles) + len(p.MFiles) + len(p.FFiles) + len(p.SFiles) + len(p.SysoFiles) + len(p.SwigFiles) + len(p.SwigCXXFiles)
	if p.Standard {
		switch p.ImportPath {
		case "bytes", "internal/poll", "net", "os", "runtime/pprof", "runtime/trace", "sync", "syscall", "time":
			extFiles++
		}
	}
	if extFiles == 0 {
		gcargs = append(gcargs, "-complete")
	}
	if cfg.BuildContext.InstallSuffix != "" {
		gcargs = append(gcargs, "-installsuffix", cfg.BuildContext.InstallSuffix)
	}
	if a.buildID != "" {
		gcargs = append(gcargs, "-buildid", a.buildID)
	}
	platform := cfg.Goos + "/" + cfg.Goarch
	if p.Internal.OmitDebug || platform == "nacl/amd64p32" || cfg.Goos == "plan9" || cfg.Goarch == "wasm" {
		gcargs = append(gcargs, "-dwarf=false")
	}
	if strings.HasPrefix(runtimeVersion, "go1") && !strings.Contains(os.Args[0], "go_bootstrap") {
		gcargs = append(gcargs, "-goversion", runtimeVersion)
	}
	if symabis != "" {
		gcargs = append(gcargs, "-symabis", symabis)
	}

	gcflags := str.StringList(forcedGcflags, p.Internal.Gcflags)
	if compilingRuntime {
		// Remove -N, if present.
		// It is not possible to build the runtime with no optimizations,
		// because the compiler cannot eliminate enough write barriers.
		for i := 0; i < len(gcflags); i++ {
			if gcflags[i] == "-N" {
				copy(gcflags[i:], gcflags[i+1:])
				gcflags = gcflags[:len(gcflags)-1]
				i--
			}
		}
	}

	args := []interface{}{cfg.BuildToolexec, base.Tool("compile"), "-o", ofile, "-trimpath", trimDir(a.Objdir), gcflags, gcargs, "-D", p.Internal.LocalPrefix}   // 處理參數
	if importcfg != nil {
		if err := b.writeFile(objdir+"importcfg", importcfg); err != nil {
			return "", nil, err
		}
		args = append(args, "-importcfg", objdir+"importcfg")
	}
	if ofile == archive {
		args = append(args, "-pack")
	}
	if asmhdr {
		args = append(args, "-asmhdr", objdir+"go_asm.h")
	}

	// Add -c=N to use concurrent backend compilation, if possible.
	if c := gcBackendConcurrency(gcflags); c > 1 {
		args = append(args, fmt.Sprintf("-c=%d", c))
	}

	for _, f := range gofiles {
		args = append(args, mkAbs(p.Dir, f))
	}

	output, err = b.runOut(p.Dir, nil, args...)   // 調用complier命令行來執行
	return ofile, output, err
}

繼續查看b.runOut的函數執行;

func (b *Builder) runOut(dir string, env []string, cmdargs ...interface{}) ([]byte, error) {
	...

	var buf bytes.Buffer
	cmd := exec.Command(cmdline[0], cmdline[1:]...)   	// 調用命令行命令來處理
	cmd.Stdout = &buf
	cmd.Stderr = &buf
	...
	return buf.Bytes(), err
}

此事編譯的就是compile命令行,接着就分析一下compile的流程

compile編譯的主要流程

此事編譯的入口函數位於src/cmd/compile的main.go的入口函數中,

func main() {
	// disable timestamps for reproducible output
	log.SetFlags(0)
	log.SetPrefix("compile: ")

	archInit, ok := archInits[objabi.GOARCH]
	if !ok {
		fmt.Fprintf(os.Stderr, "compile: unknown architecture %q\n", objabi.GOARCH)
		os.Exit(2)
	}

	gc.Main(archInit)   // 根據不同的架構來編譯
	gc.Exit(0)
}

此時Main就是整個編譯過程的執行流程,包括詞法語義分析,中間代碼生成與優化等功能。

// Main parses flags and Go source files specified in the command-line
// arguments, type-checks the parsed Go package, compiles functions to machine
// code, and finally writes the compiled package definition to disk.
func Main(archInit func(*Arch)) {
	...

	timings.Start("fe", "parse")
	lines := parseFiles(flag.Args())
	timings.Stop()
	timings.AddEvent(int64(lines), "lines")

	finishUniverse()

	typecheckok = true

	...
}

省略了一些參數配置初始化,與內建模塊的初始化導入過程。接着就是調用parseFiles來解析文件,

func parseFiles(filenames []string) uint {
	var noders []*noder
	// Limit the number of simultaneously open files.
	sem := make(chan struct{}, runtime.GOMAXPROCS(0)+10)   // 通過chan來併發執行分析

	for _, filename := range filenames {
		p := &noder{
			basemap: make(map[*syntax.PosBase]*src.PosBase),
			err:     make(chan syntax.Error),
		}
		noders = append(noders, p) 													// 添加到noders中

		go func(filename string) {
			sem <- struct{}{}
			defer func() { <-sem }()
			defer close(p.err)
			base := syntax.NewFileBase(filename) 							// 生成一個解析語法

			f, err := os.Open(filename)
			if err != nil {
				p.error(syntax.Error{Pos: syntax.MakePos(base, 0, 0), Msg: err.Error()})
				return
			}
			defer f.Close()

			p.file, _ = syntax.Parse(base, f, p.error, p.pragma, syntax.CheckBranches) // errors are tracked via p.error 						解析文件
		}(filename)
	}

	var lines uint
	for _, p := range noders {
		for e := range p.err {
			p.yyerrorpos(e.Pos, "%s", e.Msg)
		}

		p.node()
		lines += p.file.Lines
		p.file = nil // release memory

		if nsyntaxerrors != 0 {
			errorexit()
		}
		// Always run testdclstack here, even when debug_dclstack is not set, as a sanity measure.
		testdclstack()
	}

	localpkg.Height = myheight

	return lines 			// 返回整個行數
}

解析的核心的邏輯就是通過syntax.Parse來驅動的;

func Parse(base *PosBase, src io.Reader, errh ErrorHandler, pragh PragmaHandler, mode Mode) (_ *File, first error) {
	defer func() {
		if p := recover(); p != nil {
			if err, ok := p.(Error); ok {
				first = err
				return
			}
			panic(p)
		}
	}()

	var p parser
	p.init(base, src, errh, pragh, mode)   // 初始化
	p.next() 															// 獲取解析寫一個字符
	return p.fileOrNil(), p.first 			  // 返回開始執行的文件
}

此時首先運行了next;

func (s *scanner) next() {
   nlsemi := s.nlsemi
   s.nlsemi = false

redo:
   // skip white space
   c := s.getr() 					// 填過空格 獲取一個非空字符
   for c == ' ' || c == '\t' || c == '\n' && !nlsemi || c == '\r' {
      c = s.getr()   				   // 如果是換行或者\t 或者爲空則繼續獲取
   }

   // token start
   s.line, s.col = s.source.line0, s.source.col0

   if isLetter(c) || c >= utf8.RuneSelf && s.isIdentRune(c, true) {
     s.ident()  			// 檢查是否是除了 [] {} () 等符號的, 如果是字符則調用ident()方法,
      return
   }

   switch c {
   case -1:
      if nlsemi {
         s.lit = "EOF"
         s.tok = _Semi
         break
      }
      s.tok = _EOF

   case '\n':
      s.lit = "newline"
      s.tok = _Semi

   case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
      s.number(c)

   case '"':
      s.stdString()

   case '`':
      s.rawString()

   case '\'':
      s.rune()

   case '(':
      s.tok = _Lparen

   case '[':
      s.tok = _Lbrack

   case '{':
      s.tok = _Lbrace

   case ',':
      s.tok = _Comma

   case ';':
      s.lit = "semicolon"
      s.tok = _Semi

   case ')':
      s.nlsemi = true
      s.tok = _Rparen

   case ']':
      s.nlsemi = true
      s.tok = _Rbrack

   case '}':
      s.nlsemi = true
      s.tok = _Rbrace

	...
}

ident方法就是或者除了標點或者符號等其他輸入數據;

func (s *scanner) ident() {
	s.startLit()

	// accelerate common case (7bit ASCII)
	c := s.getr() 										// 獲取字符串
	for isLetter(c) || isDigit(c) {
		c = s.getr() 										// 如果是字符串則選好獲取
	}

	// general case
	if c >= utf8.RuneSelf {
		for s.isIdentRune(c, false) {
			c = s.getr()
		}
	}
	s.ungetr()

	lit := s.stopLit() 							

	// possibly a keyword
	if len(lit) >= 2 {
		if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) {
			s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)  // 判斷是否是關鍵字  如果是關鍵則則tok設置爲關鍵字的標誌
			s.tok = tok
			return
		}
	}

	s.nlsemi = true 									// 如果不是字符串則保存該數據並標識該數據未_Name
	s.lit = string(lit)
	s.tok = _Name
}

開始啓動分析整個文件的位置就是fileOrNil()函數來驅動整個文件的分析;

func (p *parser) fileOrNil() *File {
	if trace {
		defer p.trace("file")()
	}

	f := new(File)
	f.pos = p.pos() 					// 獲取當前的位置

	// PackageClause
	if !p.got(_Package) { 					// 如果第一個獲取的不是_Package關鍵字則報錯
		p.syntaxError("package statement must be first")
		return nil
	}
	f.PkgName = p.name() 					// 通過next來獲取下一個pkg的名稱
	p.want(_Semi) 								// 希望以; 結尾

	// don't bother continuing if package clause has errors
	if p.first != nil { 				// 如果爲空則返回nil
		return nil
	}

	// { ImportDecl ";" }
	for p.got(_Import) { 				// 獲取頭部文件的_Import關鍵字
		f.DeclList = p.appendGroup(f.DeclList, p.importDecl)   // 導入關鍵字 然後添加到DeclList列表中
		p.want(_Semi)
	}

	// { TopLevelDecl ";" }
	for p.tok != _EOF { 					// 進入for循環 進行所有的解析
		switch p.tok {
		case _Const: 								// 判斷是否是常量關鍵字
			p.next()
			f.DeclList = p.appendGroup(f.DeclList, p.constDecl)  // 如果是常量則添加到列表中

		case _Type:
			p.next()
			f.DeclList = p.appendGroup(f.DeclList, p.typeDecl)   // 如果是type關鍵字則添加到列表中

		case _Var:
			p.next()
			f.DeclList = p.appendGroup(f.DeclList, p.varDecl)    // 如果是變量關鍵字 則添加

		case _Func:
			p.next()
			if d := p.funcDeclOrNil(); d != nil { 							// 如果是func關鍵字 則繼續調用funcDeclOrNil來檢查func是否正常,如果正確則添加到DeclList中
				f.DeclList = append(f.DeclList, d)
			}

		default:
			if p.tok == _Lbrace && len(f.DeclList) > 0 && isEmptyFuncDecl(f.DeclList[len(f.DeclList)-1]) {
				// opening { of function declaration on next line
				p.syntaxError("unexpected semicolon or newline before {")
			} else {
				p.syntaxError("non-declaration statement outside function body")
			}
			p.advance(_Const, _Type, _Var, _Func)   					// 如果都沒有匹配到則 選擇匹配常量 類型 變量 func等關鍵字
			continue
		}

		// Reset p.pragma BEFORE advancing to the next token (consuming ';')
		// since comments before may set pragmas for the next function decl.
		p.pragma = 0

		if p.tok != _EOF && !p.got(_Semi) {
			p.syntaxError("after top level declaration")
			p.advance(_Const, _Type, _Var, _Func)
		}
	}
	// p.tok == _EOF

	f.Lines = p.source.line

	return f
}

此時經過這些分析之後,就將分析成當個var,type , func 等的分詞的內容。然後再parseFiles解析完成之後,就會調用node的p.node()方法,將分析得到的內容放置到xtop中,

func (p *noder) node() {
	types.Block = 1
	imported_unsafe = false

	p.setlineno(p.file.PkgName)
	mkpackage(p.file.PkgName.Value)

	xtop = append(xtop, p.decls(p.file.DeclList)...)   // 將收集到的 DeclList都添加到xtop中,讓主流程

	for _, n := range p.linknames {
		if imported_unsafe {
			lookup(n.local).Linkname = n.remote
		} else {
			p.yyerrorpos(n.pos, "//go:linkname only allowed in Go files that import \"unsafe\"")
		}
	}

	// The linker expects an ABI0 wrapper for all cgo-exported
	// functions.
	for _, prag := range p.pragcgobuf {
		switch prag[0] {
		case "cgo_export_static", "cgo_export_dynamic":
			if symabiRefs == nil {
				symabiRefs = make(map[string]obj.ABI)
			}
			symabiRefs[prag[1]] = obj.ABI0
		}
	}

	pragcgobuf = append(pragcgobuf, p.pragcgobuf...)
	lineno = src.NoXPos
	clearImports()
}

這樣主要的詞法語義分析大致就完成。有關xtop的處理流程,後續有機會再繼續瞭解學習。

總結

golang的編譯過程相對比較複雜 ,本文只是大致的梳理了一下golang詞法解析的過程,詞法解析的過程就是通過一個大的for循環來依次識別是否是關鍵字,並通過不同的關鍵字來獲取不同的數據的並分析,詞法分析的過程相對而言還是比較容易理解,但是其他的內容還是很複雜有好多有關係統層面的知識需要加強補充。由於本人才疏學淺,如有錯誤請批評指正。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章