linux在被bootloader加載到內存後, cpu最初執行的內核代碼是arch/x86/boot/header.S彙編文件中的_start例程,設置好頭部header,其中包括大量的bootloader參數。接着是其中的start_of_setup例程,這個例程在做了一些準備工作後會通過call main跳轉到arch/x86/boot/main.c:main()函數處執行,這就是衆所周知的x86下的main函數,它們都工作在實模式下。在這個main函數中我們可以第一次看到與內存管理相關的代碼,這段代碼調用detect_memory()函數檢測系統物理內存。如下:
- void main(void)
- {
- /* First, copy the boot header into the "zeropage" */
- copy_boot_params(); /* 把頭部各參數複製到boot_params變量中 */
- /* End of heap check */
- init_heap();
- /* Make sure we have all the proper CPU support */
- if (validate_cpu()) {
- puts("Unable to boot - please use a kernel appropriate "
- "for your CPU.\n");
- die();
- }
- /* Tell the BIOS what CPU mode we intend to run in. */
- set_bios_mode();
- /* Detect memory layout */
- detect_memory(); /* 內存探測函數 */
- /* Set keyboard repeat rate (why?) */
- keyboard_set_repeat();
- /* Query MCA information */
- query_mca();
- /* Query Intel SpeedStep (IST) information */
- query_ist();
- /* Query APM information */
- #if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
- query_apm_bios();
- #endif
- /* Query EDD information */
- #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
- query_edd();
- #endif
- /* Set the video mode */
- set_video();
- /* Parse command line for 'quiet' and pass it to decompressor. */
- if (cmdline_find_option_bool("quiet"))
- boot_params.hdr.loadflags |= QUIET_FLAG;
- /* Do the last things and invoke protected mode */
- go_to_protected_mode();
- }
- int detect_memory(void)
- {
- int err = -1;
- if (detect_memory_e820() > 0)
- err = 0;
- if (!detect_memory_e801())
- err = 0;
- if (!detect_memory_88())
- err = 0;
- return err;
- }
- #define SMAP 0x534d4150 /* ASCII "SMAP" */
- static int detect_memory_e820(void)
- {
- int count = 0; /* 用於記錄已檢測到的物理內存數目 */
- struct biosregs ireg, oreg;
- struct e820entry *desc = boot_params.e820_map;
- static struct e820entry buf; /* static so it is zeroed */
- initregs(&ireg); /* 初始化ireg中的相關寄存器 */
- ireg.ax = 0xe820;
- ireg.cx = sizeof buf; /* e820entry數據結構大小 */
- ireg.edx = SMAP; /* 標識 */
- ireg.di = (size_t)&buf; /* int15返回值的存放處 */
- /*
- * Note: at least one BIOS is known which assumes that the
- * buffer pointed to by one e820 call is the same one as
- * the previous call, and only changes modified fields. Therefore,
- * we use a temporary buffer and copy the results entry by entry.
- *
- * This routine deliberately does not try to account for
- * ACPI 3+ extended attributes. This is because there are
- * BIOSes in the field which report zero for the valid bit for
- * all ranges, and we don't currently make any use of the
- * other attribute bits. Revisit this if we see the extended
- * attribute bits deployed in a meaningful way in the future.
- */
- do {
- /* 在執行這條內聯彙編語句時輸入的參數有:
- eax寄存器=0xe820
- dx寄存器=’SMAP’
- edi寄存器=desc
- ebx寄存器=next
- ecx寄存器=size
- 返回給c語言代碼的參數有:
- id=eax寄存器
- rr=edx寄存器
- ext=ebx寄存器
- size=ecx寄存器
- desc指向的內存地址在執行0x15中斷調用時被設置
- */
- intcall(0x15, &ireg, &oreg);
- ireg.ebx = oreg.ebx; /* 選擇下一個 */
- /* BIOSes which terminate the chain with CF = 1 as opposed
- to %ebx = 0 don't always report the SMAP signature on
- the final, failing, probe. */
- if (oreg.eflags & X86_EFLAGS_CF)
- break;
- /* Some BIOSes stop returning SMAP in the middle of
- the search loop. We don't know exactly how the BIOS
- screwed up the map at that point, we might have a
- partial map, the full map, or complete garbage, so
- just return failure. */
- if (oreg.eax != SMAP) {
- count = 0;
- break;
- }
- *desc++ = buf; /* 將buf賦值給desc */
- count++; /* 探測數加一 */
- } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
- /* 將內存塊數保持到變量中 */
- return boot_params.e820_entries = count;
- }
這裏存放中斷返回值的e820entry結構,以及表示內存圖的e820map結構均位於arch/x86/include/asm/e820.h中,如下:
- struct e820entry {
- __u64 addr; /* 內存段的開始 */
- __u64 size; /* 內存段的大小 */
- __u32 type; /* 內存段的類型 */
- } __attribute__((packed));
- struct e820map {
- __u32 nr_map;
- struct e820entry map[E820_X_MAX];
- };
對於32位的系統,通過調用鏈arch/x86/boot/main.c:main()--->arch/x86/boot/pm.c:go_to_protected_mode()--->arch/x86/boot/pmjump.S:protected_mode_jump()--->arch/i386/boot/compressed/head_32.S:startup_32()--->arch/x86/kernel/head_32.S:startup_32()--->arch/x86/kernel/head32.c:i386_start_kernel()--->init/main.c:start_kernel(),到達衆所周知的Linux內核啓動函數start_kernel(),這裏會調用setup_arch()完成與體系結構相關的一系列初始化工作,其中就包括各種內存的初始化工作,如內存圖的建立、管理區的初始化等等。對x86體系結構,setup_arch()函數在arch/x86/kernel/setup.c中,如下:
- void __init setup_arch(char **cmdline_p)
- {
- /* ...... */
- x86_init.oem.arch_setup();
- setup_memory_map(); /* 建立內存圖 */
- parse_setup_data();
- /* update the e820_saved too */
- e820_reserve_setup_data();
- /* ...... */
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- max_pfn = e820_end_of_ram_pfn(); /* 找出最大可用內存頁面幀號 */
- /* preallocate 4k for mptable mpc */
- early_reserve_e820_mpc_new();
- /* update e820 for memory not covered by WB MTRRs */
- mtrr_bp_init();
- if (mtrr_trim_uncached_memory(max_pfn))
- max_pfn = e820_end_of_ram_pfn();
- #ifdef CONFIG_X86_32
- /* max_low_pfn在這裏更新 */
- find_low_pfn_range(); /* 找出低端內存的最大頁幀號 */
- #else
- num_physpages = max_pfn;
- /* ...... */
- /* max_pfn_mapped在這更新 */
- /* 初始化內存映射機制 */
- max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
- max_pfn_mapped = max_low_pfn_mapped;
- #ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
- /* can we preseve max_low_pfn ?*/
- max_low_pfn = max_pfn;
- }
- #endif
- /* ...... */
- initmem_init(0, max_pfn); /* 啓動內存分配器 */
- /* ...... */
- x86_init.paging.pagetable_setup_start(swapper_pg_dir);
- paging_init(); /* 建立完整的頁表 */
- x86_init.paging.pagetable_setup_done(swapper_pg_dir);
- /* ...... */
- }
(1)建立內存圖:setup_memory_map();
(2)調用e820_end_of_ram_pfn()找出最大可用頁幀號max_pfn,調用find_low_pfn_range()找出低端內存區的最大可用頁幀號max_low_pfn。
(2)初始化內存映射機制:init_memory_mapping();
(3)初始化內存分配器:initmem_init();
(4)建立完整的頁表:paging_init()。
2、建立內存圖
內存探測完之後,就要建立描述各內存塊情況的全局內存圖結構了。函數爲setup_arch()--->arch/x86/kernel/e820.c:setup_memory_map(),如下:
- void __init setup_memory_map(void)
- {
- char *who;
- /* 調用x86體系下的memory_setup函數 */
- who = x86_init.resources.memory_setup();
- /* 保存到e820_saved中 */
- memcpy(&e820_saved, &e820, sizeof(struct e820map));
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- /* 打印輸出 */
- e820_print_map(who);
- }
- char *__init default_machine_specific_memory_setup(void)
- {
- char *who = "BIOS-e820";
- u32 new_nr;
- /*
- * 複製BIOS提供的e820內存圖,否則僞造一個內存圖:一塊爲0-640k,接着的
- * 下一塊爲1mb到appropriate_mem_k的大小
- */
- new_nr = boot_params.e820_entries;
- /* 將重疊的去除 */
- sanitize_e820_map(boot_params.e820_map,
- ARRAY_SIZE(boot_params.e820_map),
- &new_nr);
- /* 去掉重疊的部分後得到的內存塊個數 */
- boot_params.e820_entries = new_nr;
- /* 將其複製到全局變量e820中,小於0時,爲出錯處理 */
- if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
- < 0) {
- u64 mem_size;
- /* compare results from other methods and take the greater */
- if (boot_params.alt_mem_k
- < boot_params.screen_info.ext_mem_k) {
- mem_size = boot_params.screen_info.ext_mem_k;
- who = "BIOS-88";
- } else {
- mem_size = boot_params.alt_mem_k;
- who = "BIOS-e801";
- }
- e820.nr_map = 0;
- e820_add_region(0, LOWMEMSIZE(), E820_RAM);
- e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
- }
- /* In case someone cares... */
- return who;
- }
- /*
- * 複製BIOS e820內存圖到一個安全的地方。如果我們在裏面,則要進行重疊檢查
- * 如果我們用的是現代系統,則設置代碼將給我們提供一個可以使用的內存圖,以便
- * 用它來建立內存。如果不是現代系統,則將僞造一個內存圖
- */
- static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
- {
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
- return __append_e820_map(biosmap, nr_map);
- }
- static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
- {
- while (nr_map) { /* 循環nr_map次調用,添加內存塊到e820 */
- u64 start = biosmap->addr;
- u64 size = biosmap->size;
- u64 end = start + size;
- u32 type = biosmap->type;
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
- /* 添加函數 */
- e820_add_region(start, size, type);
- biosmap++;
- nr_map--;
- }
- return 0;
- }
- void __init e820_add_region(u64 start, u64 size, int type)
- {
- __e820_add_region(&e820, start, size, type);
- }
- /*
- * 添加一個內存塊到內存e820內存圖中
- */
- static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
- int type)
- {
- int x = e820x->nr_map;
- if (x >= ARRAY_SIZE(e820x->map)) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
- e820x->map[x].addr = start;
- e820x->map[x].size = size;
- e820x->map[x].type = type;
- e820x->nr_map++;
- }