eas k5.4 (五):v4.20 - Runtime scheduler domain flag detection

AOSP Kernel4.14已經有這個feature,kernel4.20參考AOSP kernel也支持這個feature:

e1799a8 sched/topology, arch/arm: Rebuild sched_domain hierarchy when CPU capacity changes
3ba09df sched/topology, arch/arm64: Rebuild the sched_domain hierarchy when the CPU capacity changes
bb1fbdd sched/topology, drivers/base/arch_topology: Rebuild the sched_domain hierarchy when capacities change
05484e0 sched/topology: Add SD_ASYM_CPUCAPACITY flag detection
 

  • 數據結構

  • sched_domain的建立

  1. 函數調用關係

    
    |rebuild_sched_domains()  /*kernel/cgroup/cpuset.c*/                    (1)
    |update_cpumasks_hier()
    |update_relax_domain_level()
    |update_flag()
    |update_prstate() /*(kernel5.4)*/
      \-|rebuild_sched_domains_locked(void)
        | 
        |\-|partition_sched_domains_locked() /*kernel4.14/kernel/sched/topology.c*/
        |    \-|build_sched_domains()
        |
         \-|generate_sched_domains(&doms, &attr) /*kernel5.4/kernel/cgroup/cpuset.c*/
           |partition_and_rebuild_sched_domains() 
           |
            \-|partition_sched_domains_locked() /*kernel5.4/kernel/sched/topology.c*/
              | \-|build_sched_domains()
              |
              |rebuild_root_domains()
    
    |cpuhp_hp_states[CPUHP_AP_ACTIVE] /*kernel/cpu.c*/                        (2)
    | 
     \-|sched_cpu_activate()  /*kernel/sched/core.c*/
       |sched_cpu_deactivate()
         \-|cpuset_cpu_active(1, NULL, NULL)
           |cpuset_cpu_inactive(1, NULL, NULL)
           | 
            \-|partition_sched_domains() /*kernel/sched/topology.c*/
              |
               \partition_sched_domains_locked
                 \-|build_sched_domains()
    
    |__init sched_init_smp(void) /*kernel/sched/core.c*/                        (3)
    |
     \-|sched_init_domains(cpu_active_mask) /*kernel/sched/topology.c*/
        \-|build_sched_domains()

    (3) 系統初始化時爲所有 cpu_active_mask cpu(排除housekeeping HK_FLAG_DOMAIN)建立一個sched domain. 

  2. 關鍵函數

    build_sched_domains()
        |__visit_domain_allocation_hell()
        |    |__sdt_alloc()
        |        |alloc_rootdomain()
        |            |init_rootdomain()
        |                 |init_irq_work()
        |                     |init_dl_bw()
        |                     |cpudl_init()  
        |                     |cpupri_init()  
        |                     |init_max_cpu_capacity()
        |
        |build_sched_domain()
        |    |sd_init()  
        |
        |build_sched_groups()
        |    |get_group()
        |
        |init_sched_groups_capacity()
    
    

     

1427  * Topology list, bottom-up.
1428  */
1429 static struct sched_domain_topology_level default_topology[] = {
1430 #ifdef CONFIG_SCHED_SMT  
1431     { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
1432 #endif
1433 #ifdef CONFIG_SCHED_MC
1434     { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
1435 #endif
1436     { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1437     { NULL, },
1438 };

1978 static int
1979 build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
1980 {
1981     enum s_alloc alloc_state = sa_none;
1982     struct sched_domain *sd;
1983     struct s_data d;
1984     int i, ret = -ENOMEM;
1985     struct sched_domain_topology_level *tl_asym;
1986     bool has_asym = false;
1990 
1991     alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
1994 
1995     tl_asym = asym_cpu_capacity_level(cpu_map);        /*commit:05484e09*/    (1)
1996 
1997     /* Set up domains for CPUs specified by the cpu_map: */
1998     for_each_cpu(i, cpu_map) {            /*loop每個cpu*/        (2)
1999         struct sched_domain_topology_level *tl;
2000 
2001         sd = NULL;
2002         for_each_sd_topology(tl) {        /*loop每個topology level*/        (3)
2003             int dflags = 0;
2004 
2005             if (tl == tl_asym) {
2006                 dflags |= SD_ASYM_CPUCAPACITY;    /*設置異構topology level sched domain的flag*/    (4)
2007                 has_asym = true;
2008             }
2009 
2010             if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
2011                 goto error;
2012 
/*初始化cpu i所在的tl(mc or die)的sched domain*/        (5)
2013             sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
2014 
2015             if (tl == sched_domain_topology)
/*初始化s_data->sd,爲每個cpu的最低level的sched domain*/        (6)
2016                 *per_cpu_ptr(d.sd, i) = sd;
2017             if (tl->flags & SDTL_OVERLAP)
2018                 sd->flags |= SD_OVERLAP;
2019             if (cpumask_equal(cpu_map, sched_domain_span(sd)))
2020                 break;
2021         }
2022     }
2023 
2024     /* Build the groups for the domains */        (7)
2025     for_each_cpu(i, cpu_map) {
2026         for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
2027             sd->span_weight = cpumask_weight(sched_domain_span(sd));
2028             if (sd->flags & SD_OVERLAP) {
2029                 if (build_overlap_sched_groups(sd, i))
2030                     goto error;
2031             } else {
2032                 if (build_sched_groups(sd, i))
2033                     goto error;
2034             }
2035         }
2036     }
2037 
2038     /* Calculate CPU capacity for physical packages and nodes */        (8)
2039     for (i = nr_cpumask_bits-1; i >= 0; i--) {
2040         if (!cpumask_test_cpu(i, cpu_map))
2041             continue;
2042 
2043         for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
2044             claim_allocations(i, sd);
2045             init_sched_groups_capacity(i, sd);
2046         }
2047     }
2048 
2049     /* Attach the domains */        (9)
2050     rcu_read_lock();
2051     for_each_cpu(i, cpu_map) {
2052         sd = *per_cpu_ptr(d.sd, i);
2053         cpu_attach_domain(sd, d.rd, i);
2054     }
2055     rcu_read_unlock();
2056 
2057     if (has_asym)
2058         static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
2059 
2060     ret = 0;
2061 error:
2062     __free_domain_allocs(&d, alloc_state, cpu_map);
2063 
2064     return ret;
2065 }

 

(1) loop cpu異構的domain level,在不支持smt的四大四小架構中,只有MC domain和DIE domain,此時返回DIE domain
(2)(3) loop cpu0->MC, cpu0->DIE, cpu1->MC, cpu1->DIE.....
(4) DIE domain包含cluster0和cluster1兩種架構cpu,SD_ASYM_CPUCAPACITY被置位;MC domain 都是同構cpu,SD_ASYM_CPUCAPACITY不置位。
(5) build sched domain關鍵函數
(6) 初始化s_data->sd爲cpu0~cpu7的MC domain
(7) loop cpu0~7的 s_data->sd(都爲MC domain)及其parent(DIE domain),具體是:cpu0->MC,cpu0->DIE,cpu1->MC,cpu1->DIE......分別對loop到的domain build sched group

1113  * build_sched_groups will build a circular linked list of the groups
1114  * covered by the given span, will set each group's ->cpumask correctly,
1115  * and will initialize their ->sgc.
1116  *
1117  * Assumes the sched_domain tree is fully constructed
1118  */

/*指定cpu i和sched domain,建立sched group。
cpu0->MC,cpu0->DIE,cpu1->MC,cpu1->DIE......*/

1119 static int
1120 build_sched_groups(struct sched_domain *sd, int cpu)
1121 { 
1122     struct sched_group *first = NULL, *last = NULL;
1123     struct sd_data *sdd = sd->private;
1124     const struct cpumask *span = sched_domain_span(sd); 
1125     struct cpumask *covered;
1126     int i;
1127   
1128     lockdep_assert_held(&sched_domains_mutex);
1129     covered = sched_domains_tmpmask;
1130   
1131     cpumask_clear(covered);
1132
   
/*從i從cpu開始,在span mask範圍內loop*/

1133     for_each_cpu_wrap(i, span, cpu) {        
1134         struct sched_group *sg;        
1135   
1136         if (cpumask_test_cpu(i, covered)) 
1137             continue;    
1138   
1139         sg = get_group(i, sdd);        
1140   
1141         cpumask_or(covered, covered, sched_group_span(sg));
1142   
1143         if (!first)      
1144             first = sg;  
1145         if (last)
1146             last->next = sg;
1147         last = sg;
1148     }
1149     last->next = first;
1150     sd->groups = first;  
1151   
1152     return 0;
1153 } 

1075 static struct sched_group *get_group(int cpu, struct sd_data *sdd)
1076 {
1077     struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1078     struct sched_domain *child = sd->child;
1079     struct sched_group *sg;
1080     bool already_visited;
1081 
1082     if (child)        

/*如果這個domain(DIE domain)包含child,取此cpu DIE domain對應的child domain的cpu mask
的第一個cpu number;所以cpu0~3對應的cpu=0,cpu4~7對應的cpu=4*/

1083         cpu = cpumask_first(sched_domain_span(child));
1084 
1085     sg = *per_cpu_ptr(sdd->sg, cpu);

/*建立sd_data中sg和sgc的關係。如果是DIE topology level,只包含兩個group,分別對應
cpu0(cpu0~3)和cpu4(cpu4~7);如果是MC topology level,包含8個,每個cpu MC domain對應
一個group*/

1086     sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
1087 
1088     /* Increase refcounts for claim_allocations: */
1089     already_visited = atomic_inc_return(&sg->ref) > 1;
1090     /* sgc visits should follow a similar trend as sg */
1091     WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));
1092 
1093     /* If we have already visited that group, it's already initialized. */
1094     if (already_visited)
1095         return sg;
1096 

/*如果是包含child的DIE domain,初始化sg->cpumask=(child)sd->span(0x0F,0xF0), 
初始化sg->sgc->cpumask=sg->cpumask*/

1097     if (child) {
1098         cpumask_copy(sched_group_span(sg), sched_domain_span(child));
1099         cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
1100     } else {  /*如果是MC domain,分別添加當前cpu到sg->cpumask和sg->sgc->cpumask*/
1101         cpumask_set_cpu(cpu, sched_group_span(sg));
1102         cpumask_set_cpu(cpu, group_balance_mask(sg));
1103     }
1104

/*根據組權重初始化sgc capacicy,MC level的每個group只包含一個cpu,權重=1;
DIE level的兩個group分別包含cpu0~3和cpu4~7,組權重=4*/ 

1105     sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
1106     sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
1107     sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
1108 
1109     return sg;
1110 }

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章