一 rest_init
1.0 rest_init
定義在init/main.c中
static noinline void __init_refok rest_init(void)
{
int pid;
const struct sched_param param = { .sched_priority = 1 };
rcu_scheduler_starting(); // 使能rcu
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); // 創建kernel_init內核線程,即init,1號進程,但是在kthreadd後運行
numa_default_policy(); // 設定NUMA系統的默認內存訪問策略
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); // 創建kthreadd內核線程,2號進程,用於管理和調度其它內核線程。// kthread_create創建的內核線程
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); // 獲取kthreadd的進程描述符
rcu_read_unlock();
sched_setscheduler_nocheck(kthreadd_task, SCHED_FIFO, ¶m);
complete(&kthreadd_done); // 通知kernel_init進程kthreadd進程已創建完成
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current); // 設置當前進程(0號進程)爲idle進程類
schedule_preempt_disabled(); // 主動調用進程調度,並禁止內核搶佔
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE); // 0號進程完成kernel初始化的工作,進入idle循環,化身idle進程
}
1.1 rcu_scheduler_starting
定義在kernel/rcutree.c中
void rcu_scheduler_starting(void)
{
WARN_ON(num_online_cpus() != 1); // 確保當前只啓動了一個cpu核
WARN_ON(nr_context_switches() > 0); // 確保之前沒有進行進程上下文切換
rcu_scheduler_active = 1; // 使能rcu機制
}
1.2 kernel_thread
定義在kernel/fork.c
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL);
}
1.3 kthreadd
定義在init/main.c中
int kthreadd(void *unused)
{
struct task_struct *tsk = current;
/* Setup a clean context for our children to inherit. */
set_task_comm(tsk, "kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&kthread_create_list))
schedule();
__set_current_state(TASK_RUNNING);
spin_lock(&kthread_create_lock);
while (!list_empty(&kthread_create_list)) {
struct kthread_create_info *create;
create = list_entry(kthread_create_list.next,
struct kthread_create_info, list);
list_del_init(&create->list);
spin_unlock(&kthread_create_lock);
create_kthread(create);
spin_lock(&kthread_create_lock);
}
spin_unlock(&kthread_create_lock);
}
return 0;
}
1.4 rcu_read_lock & rcu_read_unlock
定義在include/linux/rcupdate.h中
static inline void rcu_read_lock(void)
{
__rcu_read_lock();
__acquire(RCU);
rcu_lock_acquire(&rcu_lock_map);
rcu_lockdep_assert(!rcu_is_cpu_idle(),
"rcu_read_lock() used illegally while idle");
}
static inline void rcu_read_unlock(void)
{
rcu_lockdep_assert(!rcu_is_cpu_idle(),
"rcu_read_unlock() used illegally while idle");
rcu_lock_release(&rcu_lock_map);
__release(RCU);
__rcu_read_unlock();
}
1.5 find_task_by_pid_ns
定義在kernel/pid.c中
struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
{
rcu_lockdep_assert(rcu_read_lock_held(),
"find_task_by_pid_ns() needs rcu_read_lock()"
" protection");
return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}
struct pid_namespace init_pid_ns = {
.kref = {
.refcount = ATOMIC_INIT(2),
},
.pidmap = {
[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
},
.last_pid = 0,
.level = 0,
.child_reaper = &init_task,
.user_ns = &init_user_ns,
.proc_inum = PROC_PID_INIT_INO,
};
1.6 sched_setscheduler_nocheck
定義在kernel/sched/core.c中
int sched_setscheduler_nocheck(struct task_struct *p, int policy,
const struct sched_param *param)
{
return __sched_setscheduler(p, policy, param, false);
}
1.7 kthreadd_done
定義在init/main.c中
static __initdata DECLARE_COMPLETION(kthreadd_done);
#define DECLARE_COMPLETION(work) \
struct completion work = COMPLETION_INITIALIZER(work)
1.8 init_idle_bootup_task
定義在kernel/sched/core.c中
void __cpuinit init_idle_bootup_task(struct task_struct *idle)
{
idle->sched_class = &idle_sched_class;
}
#define get_current() (current_thread_info()->task)
#define current get_current()
1.9 schedule_preempt_disabled
void __sched schedule_preempt_disabled(void)
{
sched_preempt_enable_no_resched(); // 內核搶佔計數preempt_count減1,但不立即搶佔式調度
schedule(); // 並主動請求調度,讓出cpu,1號進程kernel_init將會運行
preempt_disable(); // 禁止搶佔
}
#define sched_preempt_enable_no_resched() \
do { \
barrier(); \
dec_preempt_count(); \
} while (0)
#define preempt_disable() \
do { \
inc_preempt_count(); \
barrier(); \
} while (0)
1.10 cpu_startup_entry
定義在cpu_startup_entry中
void cpu_startup_entry(enum cpuhp_state state)
{
/*
* This #ifdef needs to die, but it's too late in the cycle to
* make this generic (arm and sh have never invoked the canary
* init for the non boot cpus!). Will be fixed in 3.11
*/
#ifdef CONFIG_X86
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. The boot CPU already has it initialized but no harm
* in doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
#endif
__current_set_polling();
arch_cpu_idle_prepare();
cpu_idle_loop(); // 0號進程進入idle循環
}
二 kernel_init
2.0 kernel_init
定義在init/main.c中
static int __ref kernel_init(void *unused)
{
kernel_init_freeable(); // 重要,下面有詳細說明
/* need to finish all async __init code before freeing the memory */
async_synchronize_full(); // 等待所有異步調用執行完成
free_initmem(); // 釋放所有init.* 段中的內存
mark_rodata_ro(); // arm64爲空
system_state = SYSTEM_RUNNING; // 設置系統狀態爲運行狀態
numa_default_policy();
flush_delayed_fput(); // 同步所有延時fput
if (ramdisk_execute_command) {
if (!run_init_process(ramdisk_execute_command)) // do_execve(“/init”) // 運行init程序,從一個內核進程變成用戶進程
return 0;
pr_err("Failed to execute %s\n", ramdisk_execute_command);
}
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
if (execute_command) {
if (!run_init_process(execute_command))
return 0;
pr_err("Failed to execute %s. Attempting defaults...\n",
execute_command);
}
if (!run_init_process("/sbin/init") ||
!run_init_process("/etc/init") ||
!run_init_process("/bin/init") ||
!run_init_process("/bin/sh"))
return 0;
panic("No init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
2.1 kernel_init_freeable
定義在init/main.c中
static noinline void __init kernel_init_freeable(void)
{
/*
* Wait until kthreadd is all set-up.
*/
wait_for_completion(&kthreadd_done); // 等待kthreadd_done完成量,其實是在等待kthreadd進程創建完成
/* Now the scheduler is fully set up and can do blocking allocations */
gfp_allowed_mask = __GFP_BITS_MASK; //
/*
* init can allocate pages on any node
*/
set_mems_allowed(node_states[N_MEMORY]); // 設置init進程可以分配的物理頁面
/*
* init can run on any cpu.
*/
set_cpus_allowed_ptr(current, cpu_all_mask); // 通過設置cpu_bit_mask, 使init進程可以在任意cpu上運行
cad_pid = task_pid(current); //
cad:ctrl-alt-del 設置init進程來處理ctrl-alt-del信號
smp_prepare_cpus(setup_max_cpus); // 對全部可用cpu核調用cpu_prepare函數,並將其設爲present狀態
do_pre_smp_initcalls(); // 調用level小於0的initcall函數
lockup_detector_init(); // 使能watchdog
smp_init(); // 啓動cpu0外的其他cpu核
sched_init_smp(); // 進程調度域初始化
do_basic_setup(); // 重要,下面有詳細說明
/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) // 打開/dev/console,文件號0,作爲init進程標準輸入
pr_err("Warning: unable to open an initial console.\n");
(void) sys_dup(0); // 標準輸入
(void) sys_dup(0); // 標準輸出
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
if (!ramdisk_execute_command)
ramdisk_execute_command = "/init”; // init程序
if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace();
}
/*
* Ok, we have completed the initial bootup, and
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
/* rootfs is available now, try loading default modules */
load_default_modules(); // 加載IO調度的電梯算法
}
static void __init do_basic_setup(void)
{
cpuset_init_smp(); // 初始化內核control group的cpuset子系統
usermodehelper_init(); // 創建khelper單線程工作隊列,用於協助新建和運行用戶空間程序
shmem_init(); // 初始化共享內存
driver_init(); // 初始化設備驅動
init_irq_proc(); // 創建/proc/irq目錄, 並初始化系統中所有中斷對應的子目錄
do_ctors(); // 執行內核的構造函數
usermodehelper_enable(); // 使能usermodehelper
do_initcalls();
// 調用level 0到level 7的initcall函數,依次的level名稱是"early", "core", "postcore", "arch", "subsys", "fs", "device", “late”,需要注意的kernel在這塊的命名有些問題,early_initcall對應的level小於0,pure_initcall對應level纔是0
random_int_secret_init(); 初始化隨機數生成池
}
void __init driver_init(void)
{
/* These are the core pieces */
devtmpfs_init(); // 註冊devtmpfs文件系統,啓動kdevtmpfs進程
devices_init(); // 初始化驅動模型中的部分子系統,kset:devices
和 kobject:dev、 dev/block、 dev/char
buses_init(); // 初始化驅動模型中的bus子系統,kset:bus、devices/system
classes_init(); // 初始化驅動模型中的class子系統,kset:class
firmware_init(); // 初始化驅動模型中的firmware子系統 ,kobject:firmware
hypervisor_init(); // 初始化驅動模型中的hypervisor子系統,kobject:hypervisor
/* These are also core pieces, but must come after the
* core core pieces.
*/
platform_bus_init(); // 初始化驅動模型中的bus/platform子系統
cpu_dev_init(); // 初始化驅動模型中的devices/system/cpu子系統
memory_dev_init(); // 當前爲空函數
container_dev_init(); // 初始化驅動模型中的devices/system/container子系統
}
2.2 free_initmem
定義在arch/arm64/mm/init.c中
void free_initmem(void)
{
poison_init_mem(__init_begin, __init_end - __init_begin);
free_initmem_default(0);
}
2.3 run_init_process
定義在init/main.c中
static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
return do_execve(init_filename,
(const char __user *const __user *)argv_init,
(const char __user *const __user *)envp_init);
}
static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };