Kernel啓動流程源碼解析 5 start_kernel 下


一 start_kernel

start_kernel函數是kernel啓動過程執行的第一個c語言函數,其通過調用一系列初始化函數完成的內核的初始化工作,下篇分析local_irq_enable之後的代碼。

1.0 start_kernel

定義在init/main.c中

asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];

    /*
     * Need to run as early as possible, to initialize the
     * lockdep hash:
     */
    lockdep_init(); // 初始化內核死鎖檢測機制的哈希表
    smp_setup_processor_id(); // 返回cpu號,單核cpu返回0
    debug_objects_early_init(); // 對調試對象進行早期的初始化

    cgroup_init_early(); // 對Control Groups進行早期的初始化

    local_irq_disable(); // 關閉當前cpu的中斷
    early_boot_irqs_disabled = true;

/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
    boot_cpu_init(); // 設置當前cpu位激活狀態
    page_address_init();  // 初始化高端內存的,arm沒有用到
    pr_notice("%s", linux_banner);
    setup_arch(&command_line); // 內核架構相關初始化函數
    /*
     * Set up the the initial canary ASAP:
     */
    boot_init_stack_canary(); // 初始化棧canary值,canary值用於防止棧溢出攻擊的堆棧的保護字
    mm_init_owner(&init_mm, &init_task); // mm.owner = &init_task
    mm_init_cpumask(&init_mm);
    setup_command_line(command_line); // 對cmdline進行備份
    setup_nr_cpu_ids(); // nr_cpu_ids
    setup_per_cpu_areas(); // 每個cpu的per-cpu變量副本分配空間
    smp_prepare_boot_cpu();    /* arch-specific boot-cpu hooks */

    build_all_zonelists(NULL, NULL); // 建立系統內存頁區(zone)鏈表
    page_alloc_init(); // 內存頁初始化

    pr_notice("Kernel command line: %s\n", boot_command_line);
    parse_early_param(); // 解析需要'早期'處理的啓動參數用?setup_arch已經調用了一次
    parse_args("Booting kernel", static_command_line, __start___param,
           __stop___param - __start___param,
           -1, -1, &unknown_bootoption); // 解析cmdline中的啓動參數

    jump_label_init(); // 處理靜態定義在跳轉標號

    /*
     * These use large bootmem allocations and must precede
     * kmem_cache_init()
     */
    setup_log_buf(0); // 使用memblock_alloc分配一個啓動時log緩衝區
    pidhash_init(); // 初始化pid散列表
    vfs_caches_init_early(); // 初始化dentry和inode的hashtable
    sort_main_extable(); // 對內核異常向量表進行排序
    trap_init(); // 對內核陷阱異常進行初始化,arm沒有用到
    mm_init(); // 初始化內核內存分配器,過度到夥伴系統,啓動slab機制,初始化非連續內存區

    /*
     * Set up the scheduler prior starting any interrupts (such as the
     * timer interrupt). Full topology setup happens at smp_init()
     * time - but meanwhile we still have a functioning scheduler.
     */
    sched_init(); // 初始化進程調度器
    /*
     * Disable preemption - early bootup scheduling is extremely
     * fragile until we cpu_idle() for the first time.
     */
    preempt_disable(); // 進制內核搶佔
    if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
        local_irq_disable(); // 關閉本地中斷
    idr_init_cache(); // 創建idr(整數id管理機制)高速緩存
    perf_event_init(); // 初始化性能診斷工具
    rcu_init(); // 初始化rcu機制(讀-寫-拷貝)
    tick_nohz_init(); // 初始化動態時鐘框架
    radix_tree_init(); // 初始化內核基數樹
    /* init some links before init_ISA_irqs() */
    early_irq_init(); // arm64沒有用到
    init_IRQ(); // 初始化中斷
    tick_init(); // 初始化時鐘滴答控制器
    init_timers(); // 初始化內核定時器
    hrtimers_init(); // 初始化高精度時鐘
    softirq_init(); // 初始化軟中斷
    timekeeping_init();  // 初始化了大量的時鐘相關全局變量
    time_init(); // 時鐘初始化
    profile_init(); //  對內核的一個性能測試工具profile進行初始化
    call_function_init(); // smp下跨cpu的函數傳遞初始化
    WARN(!irqs_disabled(), "Interrupts were enabled early\n");
    early_boot_irqs_disabled = false;
    local_irq_enable(); // 使能當前cpu中斷
// -----------------------------------------------------------
    kmem_cache_init_late(); // 初始化slab分配器的緩存機制

    /*
     * HACK ALERT! This is early. We're enabling the console before
     * we've done PCI setups etc, and console_init() must be aware of
     * this. But we do want output early, in case something goes wrong.
     */
    console_init(); // 初始化控制檯
    if (panic_later)
        panic(panic_later, panic_param);

    lockdep_info(); // 打印鎖的依賴信息

    /*
     * Need to run this when irqs are enabled, because it wants
     * to self-test [hard/soft]-irqs on/off lock inversion bugs
     * too:
     */
    locking_selftest(); // 死鎖檢測

#ifdef CONFIG_BLK_DEV_INITRD // 檢查initrd的位置是否符合要求
    if (initrd_start && !initrd_below_start_ok &&
        page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
        pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
            page_to_pfn(virt_to_page((void *)initrd_start)),
            min_low_pfn);
        initrd_start = 0;
    }
#endif
    page_cgroup_init(); // 爲page_cgroup相關結構分配存儲空間
    debug_objects_mem_init(); // 創建debug_obj的slab高速緩存
    kmemleak_init(); // 初始化內存泄漏檢測機制
    setup_per_cpu_pageset(); // 設置並初始化每個cpu的頁組
    numa_policy_init(); // 初始化NUMA的內存訪問策略
    if (late_time_init) // arm64爲空
        late_time_init();
    sched_clock_init(); // 初始化調度器時鐘
    calibrate_delay(); // 延時校準
    pidmap_init(); // 初始化進程pid位圖
    anon_vma_init(); // 創建anon_vma的slab緩存
#ifdef CONFIG_X86
    if (efi_enabled(EFI_RUNTIME_SERVICES))
        efi_enter_virtual_mode();
#endif
    thread_info_cache_init(); // 創建進程thread_info的slab高速緩存
    cred_init(); // 創建任務信用系統的slab高速緩存
    fork_init(totalram_pages); // 初始化進程創建機制
    proc_caches_init(); // 創建進程所需的各結構體slab高速緩存
    buffer_init(); // 爲buffer_head結構體創建slab高速緩存
    key_init(); // 初始化內核密鑰管理系統
    security_init(); // 初始化內核安全框架
    dbg_late_init(); // 初始化內核調試模塊kdb
    vfs_caches_init(totalram_pages); // 初始化虛擬文件系統
    signals_init(); // 創建信號隊列slab高速緩存
    /* rootfs populating might need page-writeback */
    page_writeback_init(); // 初始化頁回寫機制
#ifdef CONFIG_PROC_FS
    proc_root_init(); // 初始化proc文件系統
#endif
    cgroup_init(); // control group正式初始化
    cpuset_init(); // 初始化cpuset
    taskstats_init_early(); // 任務狀態早期初始化函數,創建高速緩存並初始化互斥機制
    delayacct_init(); // 初始化任務延時機制

    check_bugs(); // arm64爲空

    acpi_early_init(); /* before LAPIC and SMP init */ // 初始化acpi電源管理
    sfi_init_late(); // simple fireware interface
    if (efi_enabled(EFI_RUNTIME_SERVICES)) { // arm暫時沒有用到
        efi_late_init();
        efi_free_boot_services();
    }

    ftrace_init(); // 初始化ftrace

    /* Do the rest non-__init'ed, we're now alive */
    rest_init(); // 後續初始化,單獨分析
}

1.1 kmem_cache_init_late

定義在mm/slab.c中
void __init kmem_cache_init_late(void)
{
    struct kmem_cache *cachep;

    slab_state = UP;

    /* 6) resize the head arrays to their final sizes */
    mutex_lock(&slab_mutex);
    list_for_each_entry(cachep, &slab_caches, list)
        if (enable_cpucache(cachep, GFP_NOWAIT))
            BUG();
    mutex_unlock(&slab_mutex);

    /* Annotate slab for lockdep -- annotate the malloc caches */
    init_lock_keys();

    /* Done! */
    slab_state = FULL;

    /*
     * Register a cpu startup notifier callback that initializes
     * cpu_cache_get for all new cpus
     */
    register_cpu_notifier(&cpucache_notifier);

#ifdef CONFIG_NUMA
    /*
     * Register a memory hotplug callback that initializes and frees
     * node.
     */
    hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif

    /*
     * The reap timers are started later, with a module init call: That part
     * of the kernel is not yet operational.
     */
}


1.2 console_init

定義在drivers/tty/tty_io.c中
void __init console_init(void)
{
    initcall_t *call;

    /* Setup the default TTY line discipline. */
    tty_ldisc_begin();

    /*
     * set up the console device so that later boot sequences can
     * inform about problems etc..
     */
    call = __con_initcall_start;
    while (call < __con_initcall_end) {
        (*call)();
        call++;
    }
}

1.3 lockdep_info

定義在kernel/lockdep.c中
void __init lockdep_info(void)
{
    printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");

    printk("... MAX_LOCKDEP_SUBCLASSES:  %lu\n", MAX_LOCKDEP_SUBCLASSES);
    printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH);
    printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS);
    printk("... CLASSHASH_SIZE:          %lu\n", CLASSHASH_SIZE);
    printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES);
    printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS);
    printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE);

    printk(" memory used by lock dependency info: %lu kB\n",
        (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
        sizeof(struct list_head) * CLASSHASH_SIZE +
        sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
        sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
        sizeof(struct list_head) * CHAINHASH_SIZE
#ifdef CONFIG_PROVE_LOCKING
        + sizeof(struct circular_queue)
#endif
        ) / 1024
        );

    printk(" per task-struct memory footprint: %lu bytes\n",
        sizeof(struct held_lock) * MAX_LOCK_DEPTH);

#ifdef CONFIG_DEBUG_LOCKDEP
    if (lockdep_init_error) {
        printk("WARNING: lockdep init error! lock-%s was acquired"
            "before lockdep_init\n", lock_init_error);
        printk("Call stack leading to lockdep invocation was:\n");
        print_stack_trace(&lockdep_init_trace, 0);
    }
#endif
}

1.4 locking_selftest

定義在lib/locking-selftest.c中
void locking_selftest(void)
{
    /*
     * Got a locking failure before the selftest ran?
     */
    if (!debug_locks) {
        printk("----------------------------------\n");
        printk("| Locking API testsuite disabled |\n");
        printk("----------------------------------\n");
        return;
    }

    /*
     * Run the testsuite:
     */
    printk("------------------------\n");
    printk("| Locking API testsuite:\n");
    printk("----------------------------------------------------------------------------\n");
    printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |\n");
    printk("  --------------------------------------------------------------------------\n");

    init_shared_classes();
    debug_locks_silent = !debug_locks_verbose;

    DO_TESTCASE_6R("A-A deadlock", AA);
    DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
    DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
    DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
    DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
    DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
    DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
    DO_TESTCASE_6("double unlock", double_unlock);
    DO_TESTCASE_6("initialize held", init_held);
    DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);

    printk("  --------------------------------------------------------------------------\n");
    print_testname("recursive read-lock");
    printk("             |");
    dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    print_testname("recursive read-lock #2");
    printk("             |");
    dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    print_testname("mixed read-write-lock");
    printk("             |");
    dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    print_testname("mixed write-read-lock");
    printk("             |");
    dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    printk("  --------------------------------------------------------------------------\n");

    /*
     * irq-context testcases:
     */
    DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
    DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
    DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
    DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
    DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
    DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);

    DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
//    DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);

    if (unexpected_testcase_failures) {
        printk("-----------------------------------------------------------------\n");
        debug_locks = 0;
        printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
            unexpected_testcase_failures, testcase_total);
        printk("-----------------------------------------------------------------\n");
    } else if (expected_testcase_failures && testcase_successes) {
        printk("--------------------------------------------------------\n");
        printk("%3d out of %3d testcases failed, as expected. |\n",
            expected_testcase_failures, testcase_total);
        printk("----------------------------------------------------\n");
        debug_locks = 1;
    } else if (expected_testcase_failures && !testcase_successes) {
        printk("--------------------------------------------------------\n");
        printk("All %3d testcases failed, as expected. |\n",
            expected_testcase_failures);
        printk("----------------------------------------\n");
        debug_locks = 1;
    } else {
        printk("-------------------------------------------------------\n");
        printk("Good, all %3d testcases passed! |\n",
            testcase_successes);
        printk("---------------------------------\n");
        debug_locks = 1;
    }
    debug_locks_silent = 0;
}


1.5 page_cgroup_init

定義在mm/page_cgroup.c中
void __init page_cgroup_init(void)
{
    unsigned long pfn;
    int nid;

    if (mem_cgroup_disabled())
        return;

    for_each_node_state(nid, N_MEMORY) {
        unsigned long start_pfn, end_pfn;

        start_pfn = node_start_pfn(nid);
        end_pfn = node_end_pfn(nid);
        /*
         * start_pfn and end_pfn may not be aligned to SECTION and the
         * page->flags of out of node pages are not initialized.  So we
         * scan [start_pfn, the biggest section's pfn < end_pfn) here.
         */
        for (pfn = start_pfn;
             pfn < end_pfn;
                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {

            if (!pfn_valid(pfn))
                continue;
            /*
             * Nodes's pfns can be overlapping.
             * We know some arch can have a nodes layout such as
             * -------------pfn-------------->
             * N0 | N1 | N2 | N0 | N1 | N2|....
             */
            if (pfn_to_nid(pfn) != nid)
                continue;
            if (init_section_page_cgroup(pfn, nid))
                goto oom;
        }
    }
    hotplug_memory_notifier(page_cgroup_callback, 0);
    printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
    printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
             "don't want memory cgroups\n");
    return;
oom:
    printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
    panic("Out of memory");
}

1.6 debug_objects_mem_init

定義在lib/debugobjects.c中
void __init debug_objects_mem_init(void)
{
    if (!debug_objects_enabled)
        return;

    obj_cache = kmem_cache_create("debug_objects_cache",
                      sizeof (struct debug_obj), 0,
                      SLAB_DEBUG_OBJECTS, NULL);

    if (!obj_cache || debug_objects_replace_static_objects()) {
        debug_objects_enabled = 0;
        if (obj_cache)
            kmem_cache_destroy(obj_cache);
        printk(KERN_WARNING "ODEBUG: out of memory.\n");
    } else
        debug_objects_selftest();
}

1.7 kmemleak_init

定義在mm/kmemleak.c中
void __init kmemleak_init(void)
{
    int i;
    unsigned long flags;

#ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
    if (!kmemleak_skip_disable) {
        atomic_set(&kmemleak_early_log, 0);
        kmemleak_disable();
        return;
    }
#endif

    jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
    jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);

    object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
    scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);

    if (crt_early_log >= ARRAY_SIZE(early_log))
        pr_warning("Early log buffer exceeded (%d), please increase "
               "DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n", crt_early_log);

    /* the kernel is still in UP mode, so disabling the IRQs is enough */
    local_irq_save(flags);
    atomic_set(&kmemleak_early_log, 0);
    if (atomic_read(&kmemleak_error)) {
        local_irq_restore(flags);
        return;
    } else
        atomic_set(&kmemleak_enabled, 1);
    local_irq_restore(flags);

    /*
     * This is the point where tracking allocations is safe. Automatic
     * scanning is started during the late initcall. Add the early logged
     * callbacks to the kmemleak infrastructure.
     */
    for (i = 0; i < crt_early_log; i++) {
        struct early_log *log = &early_log[i];

        switch (log->op_type) {
        case KMEMLEAK_ALLOC:
            early_alloc(log);
            break;
        case KMEMLEAK_ALLOC_PERCPU:
            early_alloc_percpu(log);
            break;
        case KMEMLEAK_FREE:
            kmemleak_free(log->ptr);
            break;
        case KMEMLEAK_FREE_PART:
            kmemleak_free_part(log->ptr, log->size);
            break;
        case KMEMLEAK_FREE_PERCPU:
            kmemleak_free_percpu(log->ptr);
            break;
        case KMEMLEAK_NOT_LEAK:
            kmemleak_not_leak(log->ptr);
            break;
        case KMEMLEAK_IGNORE:
            kmemleak_ignore(log->ptr);
            break;
        case KMEMLEAK_SCAN_AREA:
            kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
            break;
        case KMEMLEAK_NO_SCAN:
            kmemleak_no_scan(log->ptr);
            break;
        default:
            kmemleak_warn("Unknown early log operation: %d\n",
                      log->op_type);
        }

        if (atomic_read(&kmemleak_warning)) {
            print_log_trace(log);
            atomic_set(&kmemleak_warning, 0);
        }
    }
}

1.8 setup_per_cpu_pageset

定義在mm/page_alloc.c中
void __init setup_per_cpu_pageset(void)
{
    struct zone *zone;

    for_each_populated_zone(zone)
        setup_zone_pageset(zone);
}

1.9 numa_policy_init

定義在mm/mempolicy.c中
void __init numa_policy_init(void)
{
    nodemask_t interleave_nodes;
    unsigned long largest = 0;
    int nid, prefer = 0;

    policy_cache = kmem_cache_create("numa_policy",
                     sizeof(struct mempolicy),
                     0, SLAB_PANIC, NULL);

    sn_cache = kmem_cache_create("shared_policy_node",
                     sizeof(struct sp_node),
                     0, SLAB_PANIC, NULL);

    for_each_node(nid) {
        preferred_node_policy[nid] = (struct mempolicy) {
            .refcnt = ATOMIC_INIT(1),
            .mode = MPOL_PREFERRED,
            .flags = MPOL_F_MOF | MPOL_F_MORON,
            .v = { .preferred_node = nid, },
        };
    }

    /*
     * Set interleaving policy for system init. Interleaving is only
     * enabled across suitably sized nodes (default is >= 16MB), or
     * fall back to the largest node if they're all smaller.
     */
    nodes_clear(interleave_nodes);
    for_each_node_state(nid, N_MEMORY) {
        unsigned long total_pages = node_present_pages(nid);

        /* Preserve the largest node */
        if (largest < total_pages) {
            largest = total_pages;
            prefer = nid;
        }

        /* Interleave this node? */
        if ((total_pages << PAGE_SHIFT) >= (16 << 20))
            node_set(nid, interleave_nodes);
    }

    /* All too small, use the largest */
    if (unlikely(nodes_empty(interleave_nodes)))
        node_set(prefer, interleave_nodes);

    if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
        printk("numa_policy_init: interleaving failed\n");

    check_numabalancing_enable();
}

1.10 sched_clock_init

定義在kernel/sched/clock.c中
void sched_clock_init(void)
{
    u64 ktime_now = ktime_to_ns(ktime_get());
    int cpu;

    for_each_possible_cpu(cpu) {
        struct sched_clock_data *scd = cpu_sdc(cpu);

        scd->tick_raw = 0;
        scd->tick_gtod = ktime_now;
        scd->clock = ktime_now;
    }

    sched_clock_running = 1;
}

1.11 calibrate_delay

定義在init/calibrate.c中
void __cpuinit calibrate_delay(void)
{
    unsigned long lpj;
    static bool printed;
    int this_cpu = smp_processor_id();

    if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
        lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "already calibrated this CPU");
    } else if (preset_lpj) {
        lpj = preset_lpj;
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "preset value.. ");
    } else if ((!printed) && lpj_fine) {
        lpj = lpj_fine;
        pr_info("Calibrating delay loop (skipped), "
            "value calculated using timer frequency.. ");
    } else if ((lpj = calibrate_delay_is_known())) {
        ;
    } else if ((lpj = calibrate_delay_direct()) != 0) {
        if (!printed)
            pr_info("Calibrating delay using timer "
                "specific routine.. ");
    } else {
        if (!printed)
            pr_info("Calibrating delay loop... ");
        lpj = calibrate_delay_converge();
    }
    per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
    if (!printed)
        pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
            lpj/(500000/HZ),
            (lpj/(5000/HZ)) % 100, lpj);

    loops_per_jiffy = lpj;
    printed = true;
}


1.12 pidmap_init

定義在kernel/pid.c中
void __init pidmap_init(void)
{
    /* Veryify no one has done anything silly */
    BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);

    /* bump default and minimum pid_max based on number of cpus */
    pid_max = min(pid_max_max, max_t(int, pid_max,
                PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
    pid_max_min = max_t(int, pid_max_min,
                PIDS_PER_CPU_MIN * num_possible_cpus());
    pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);

    init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
    /* Reserve PID 0. We never call free_pidmap(0) */
    set_bit(0, init_pid_ns.pidmap[0].page);
    atomic_dec(&init_pid_ns.pidmap[0].nr_free);
    init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;

    init_pid_ns.pid_cachep = KMEM_CACHE(pid,
            SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}

1.13 anon_vma_init

定義在mm/rmap.c中
void __init anon_vma_init(void)
{
    anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
            0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
    anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
}

1.14 thread_info_cache_init

定義在kernel/fork.c中
void thread_info_cache_init(void)
{
    thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
                          THREAD_SIZE, 0, NULL);
    BUG_ON(thread_info_cache == NULL);
}

1.15 cred_init

定義在kernel/cred.c中
void __init cred_init(void)
{
    /* allocate a slab in which we can store credentials */
    cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
                     0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
}

1.16 fork_init

定義在kernel/fork.c中
void __init fork_init(unsigned long mempages)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN  L1_CACHE_BYTES
#endif
    /* create a slab on which task_structs can be allocated */
    task_struct_cachep =
        kmem_cache_create("task_struct", sizeof(struct task_struct),
            ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif

    /* do the arch specific task caches init */
    arch_task_cache_init();

    /*
     * The default maximum number of threads is set to a safe
     * value: the thread structures can take up at most half
     * of memory.
     */
    max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);

    /*
     * we need to allow at least 20 threads to boot a system
     */
    if (max_threads < 20)
        max_threads = 20;

    init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
    init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
    init_task.signal->rlim[RLIMIT_SIGPENDING] =
        init_task.signal->rlim[RLIMIT_NPROC];
}

1.17 proc_caches_init

定義在kernel/fork.c中
void __init proc_caches_init(void)
{
    sighand_cachep = kmem_cache_create("sighand_cache",
            sizeof(struct sighand_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
            SLAB_NOTRACK, sighand_ctor);
    signal_cachep = kmem_cache_create("signal_cache",
            sizeof(struct signal_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    files_cachep = kmem_cache_create("files_cache",
            sizeof(struct files_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    fs_cachep = kmem_cache_create("fs_cache",
            sizeof(struct fs_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    /*
     * FIXME! The "sizeof(struct mm_struct)" currently includes the
     * whole struct cpumask for the OFFSTACK case. We could change
     * this to *only* allocate as much of it as required by the
     * maximum number of CPU's we can ever have.  The cpumask_allocation
     * is at the end of the structure, exactly for that reason.
     */
    mm_cachep = kmem_cache_create("mm_struct",
            sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
    mmap_init();
    nsproxy_cache_init();
}

1.18 buffer_init

定義在fs/buffer.c中
void __init buffer_init(void)
{
    unsigned long nrpages;

    bh_cachep = kmem_cache_create("buffer_head",
            sizeof(struct buffer_head), 0,
                (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
                SLAB_MEM_SPREAD),
                NULL);

    /*
     * Limit the bh occupancy to 10% of ZONE_NORMAL
     */
    nrpages = (nr_free_buffer_pages() * 10) / 100;
    max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
    hotcpu_notifier(buffer_cpu_notify, 0);
}

1.19 key_init

定義在security/keys/key.c中
void __init key_init(void)
{
    /* allocate a slab in which we can store keys */
    key_jar = kmem_cache_create("key_jar", sizeof(struct key),
            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

    /* add the special key types */
    list_add_tail(&key_type_keyring.link, &key_types_list);
    list_add_tail(&key_type_dead.link, &key_types_list);
    list_add_tail(&key_type_user.link, &key_types_list);
    list_add_tail(&key_type_logon.link, &key_types_list);

    /* record the root user tracking */
    rb_link_node(&root_key_user.node,
             NULL,
             &key_user_tree.rb_node);

    rb_insert_color(&root_key_user.node,
            &key_user_tree);
}

1.20 security_init

定義在security/security.c中
int __init security_init(void)
{
    printk(KERN_INFO "Security Framework initialized\n");

    security_fixup_ops(&default_security_ops);
    security_ops = &default_security_ops;
    do_security_initcalls();

    return 0;
}

1.21 dbg_late_init

定義在kernel/debug/debug_core.c中
void __init dbg_late_init(void)
{
    dbg_is_early = false;
    if (kgdb_io_module_registered)
        kgdb_arch_late();
    kdb_init(KDB_INIT_FULL);
}


1.22 vfs_caches_init

定義在fs/dcache.c中
void __init vfs_caches_init(unsigned long mempages)
{
    unsigned long reserve;

    /* Base hash sizes on available memory, with a reserve equal to
           150% of current kernel size */

    reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
    mempages -= reserve;

    names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

    dcache_init();
    inode_init();
    files_init(mempages);
    mnt_init();
    bdev_cache_init();
    chrdev_init();
}

1.23 page_writeback_init

定義在mm/page-writeback.c中
void __init page_writeback_init(void)
{
    writeback_set_ratelimit();
    register_cpu_notifier(&ratelimit_nb);

    fprop_global_init(&writeout_completions);
}

1.24 proc_root_init

定義在fs/proc/root.c中
void __init proc_root_init(void)
{
    int err;

    proc_init_inodecache();
    err = register_filesystem(&proc_fs_type);
    if (err)
        return;

    proc_self_init();
    proc_symlink("mounts", NULL, "self/mounts");

    proc_net_init();

#ifdef CONFIG_SYSVIPC
    proc_mkdir("sysvipc", NULL);
#endif
    proc_mkdir("fs", NULL);
    proc_mkdir("driver", NULL);
    proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
    /* just give it a mountpoint */
    proc_mkdir("openprom", NULL);
#endif
    proc_tty_init();
#ifdef CONFIG_PROC_DEVICETREE
    proc_device_tree_init();
#endif
    proc_mkdir("bus", NULL);
    proc_sys_init();
}

1.25 cgroup_init

定義在kernel/cgroup.c中
int __init cgroup_init(void)
{
    int err;
    int i;
    unsigned long key;

    err = bdi_init(&cgroup_backing_dev_info);
    if (err)
        return err;

    for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
        struct cgroup_subsys *ss = subsys[i];

        /* at bootup time, we don't worry about modular subsystems */
        if (!ss || ss->module)
            continue;
        if (!ss->early_init)
            cgroup_init_subsys(ss);
        if (ss->use_id)
            cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
    }

    /* Add init_css_set to the hash table */
    key = css_set_hash(init_css_set.subsys);
    hash_add(css_set_table, &init_css_set.hlist, key);
    BUG_ON(!init_root_id(&rootnode));

    cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
    if (!cgroup_kobj) {
        err = -ENOMEM;
        goto out;
    }

    err = register_filesystem(&cgroup_fs_type);
    if (err < 0) {
        kobject_put(cgroup_kobj);
        goto out;
    }

    proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);

out:
    if (err)
        bdi_destroy(&cgroup_backing_dev_info);

    return err;
}


1.26 cpuset_init

定義在kernel/cpuset.c中
int __init cpuset_init(void)
{
    int err = 0;

    if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
        BUG();

    cpumask_setall(top_cpuset.cpus_allowed);
    nodes_setall(top_cpuset.mems_allowed);

    fmeter_init(&top_cpuset.fmeter);
    set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
    top_cpuset.relax_domain_level = -1;

    err = register_filesystem(&cpuset_fs_type);
    if (err < 0)
        return err;

    if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
        BUG();

    number_of_cpusets = 1;
    return 0;
}

1.27 taskstats_init_early

定義在kernel/taskstats.c中
void __init taskstats_init_early(void)
{
    unsigned int i;

    taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
    for_each_possible_cpu(i) {
        INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
        init_rwsem(&(per_cpu(listener_array, i).sem));
    }
}

1.28 acpi_early_init

定義在drivers/acpi/bus.c中
void __init acpi_early_init(void)
{
    acpi_status status = AE_OK;

    if (acpi_disabled)
        return;

    printk(KERN_INFO PREFIX "Core revision %08x\n", ACPI_CA_VERSION);

    /* enable workarounds, unless strict ACPI spec. compliance */
    if (!acpi_strict)
        acpi_gbl_enable_interpreter_slack = TRUE;

    acpi_gbl_permanent_mmap = 1;

    /*
     * If the machine falls into the DMI check table,
     * DSDT will be copied to memory
     */
    dmi_check_system(dsdt_dmi_table);

    status = acpi_reallocate_root_table();
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX
               "Unable to reallocate ACPI tables\n");
        goto error0;
    }

    status = acpi_initialize_subsystem();
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX
               "Unable to initialize the ACPI Interpreter\n");
        goto error0;
    }

    status = acpi_load_tables();
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX
               "Unable to load the System Description Tables\n");
        goto error0;
    }

#ifdef CONFIG_X86
    if (!acpi_ioapic) {
        /* compatible (0) means level (3) */
        if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
            acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
            acpi_sci_flags |= ACPI_MADT_TRIGGER_LEVEL;
        }
        /* Set PIC-mode SCI trigger type */
        acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
                     (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
    } else {
        /*
         * now that acpi_gbl_FADT is initialized,
         * update it with result from INT_SRC_OVR parsing
         */
        acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi;
    }
#endif

    status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE);
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX "Unable to enable ACPI\n");
        goto error0;
    }

    /*
     * If the system is using ACPI then we can be reasonably
     * confident that any regulators are managed by the firmware
     * so tell the regulator core it has everything it needs to
     * know.
     */
    regulator_has_full_constraints();

    return;

      error0:
    disable_acpi();
    return;
}

1.29 sfi_init_late

定義在drivers/sfi/sfi_core.c中
void __init sfi_init_late(void)
{
    int length;

    if (sfi_disabled)
        return;

    length = syst_va->header.len;
    sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple));

    /* Use ioremap now after it is ready */
    sfi_use_ioremap = 1;
    syst_va = sfi_map_memory(syst_pa, length);

    sfi_acpi_init();
}

1.30 ftrace_init

定義在kernel/trace/ftrace.c中
void __init ftrace_init(void)
{
    unsigned long count, addr, flags;
    int ret;

    /* Keep the ftrace pointer to the stub */
    addr = (unsigned long)ftrace_stub;

    local_irq_save(flags);
    ftrace_dyn_arch_init(&addr);
    local_irq_restore(flags);

    /* ftrace_dyn_arch_init places the return code in addr */
    if (addr)
        goto failed;

    count = __stop_mcount_loc - __start_mcount_loc;

    ret = ftrace_dyn_table_alloc(count);
    if (ret)
        goto failed;

    last_ftrace_enabled = ftrace_enabled = 1;

    ret = ftrace_process_locs(NULL,
                  __start_mcount_loc,
                  __stop_mcount_loc);

    ret = register_module_notifier(&ftrace_module_enter_nb);
    if (ret)
        pr_warning("Failed to register trace ftrace module enter notifier\n");

    ret = register_module_notifier(&ftrace_module_exit_nb);
    if (ret)
        pr_warning("Failed to register trace ftrace module exit notifier\n");

    set_ftrace_early_filters();

    return;
 failed:
    ftrace_disabled = 1;
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章