Skip to content
Snippets Groups Projects
Commit 70a232a5 authored by Hui Tang's avatar Hui Tang Committed by Yongqiang Liu
Browse files

sched: Adjust wakeup cpu range according CPU util dynamicly

hulk inclusion
category: feature
bugzilla: 187173, https://gitee.com/openeuler/kernel/issues/I5G4IH


CVE: NA

--------------------------------

Compare taskgroup 'util_avg' in perferred cpu with capacity preferred cpu,
dynamicly adjust cpu range for task wakeup process.

Signed-off-by: default avatarHui Tang <tanghui20@huawei.com>
Reviewed-by: default avatarChen Hui <judy.chenhui@huawei.com>
Reviewed-by: default avatarZhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: default avatarYongqiang Liu <liuyongqiang13@huawei.com>
parent 243865da
No related branches found
No related tags found
No related merge requests found
......@@ -1251,13 +1251,15 @@ struct task_struct {
#if !defined(__GENKSYMS__)
#if defined(CONFIG_QOS_SCHED_DYNAMIC_AFFINITY)
cpumask_t *prefer_cpus;
const cpumask_t *select_cpus;
#else
KABI_RESERVE(6)
KABI_RESERVE(7)
#endif
#else
KABI_RESERVE(6)
#endif
KABI_RESERVE(7)
#endif
KABI_RESERVE(8)
/* CPU-specific state of this task: */
......
......@@ -32,6 +32,10 @@ extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
extern int sysctl_sched_util_low_pct;
#endif
enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,
SCHED_TUNABLESCALING_LOG,
......
......@@ -1775,6 +1775,9 @@ static void task_numa_compare(struct task_numa_env *env,
* can be used from IRQ context.
*/
local_irq_disable();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
env->p->select_cpus = &env->p->cpus_allowed;
#endif
env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
env->dst_cpu);
local_irq_enable();
......@@ -5955,8 +5958,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
int i;
/* Skip over this group if it has no CPUs allowed */
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!cpumask_intersects(sched_group_span(group),
p->select_cpus))
#else
if (!cpumask_intersects(sched_group_span(group),
&p->cpus_allowed))
#endif
continue;
local_group = cpumask_test_cpu(this_cpu,
......@@ -6088,7 +6096,11 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
return cpumask_first(sched_group_span(group));
/* Traverse only the allowed CPUs */
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
for_each_cpu_and(i, sched_group_span(group), p->select_cpus) {
#else
for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
#endif
if (sched_idle_cpu(i))
return i;
......@@ -6131,7 +6143,11 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
{
int new_cpu = cpu;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!cpumask_intersects(sched_domain_span(sd), p->select_cpus))
#else
if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
#endif
return prev_cpu;
/*
......@@ -6248,7 +6264,11 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
if (!test_idle_cores(target, false))
return -1;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_and(cpus, sched_domain_span(sd), p->select_cpus);
#else
cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
#endif
for_each_cpu_wrap(core, cpus, target) {
bool idle = true;
......@@ -6282,8 +6302,13 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
return -1;
for_each_cpu(cpu, cpu_smt_mask(target)) {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!cpumask_test_cpu(cpu, p->select_cpus) ||
!cpumask_test_cpu(cpu, sched_domain_span(sd)))
#else
if (!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
!cpumask_test_cpu(cpu, sched_domain_span(sd)))
#endif
continue;
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
return cpu;
......@@ -6344,7 +6369,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
time = local_clock();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_and(cpus, sched_domain_span(sd), p->select_cpus);
#else
cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
#endif
for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
......@@ -6383,7 +6412,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
struct sched_domain *sd;
int i, recent_used_cpu;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
cpumask_test_cpu(target, p->select_cpus)) {
#else
if (available_idle_cpu(target) || sched_idle_cpu(target)) {
#endif
SET_STAT(found_idle_cpu_easy);
return target;
}
......@@ -6391,8 +6425,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (prev != target && cpus_share_cache(prev, target) &&
cpumask_test_cpu(prev, p->select_cpus) &&
(available_idle_cpu(prev) || sched_idle_cpu(prev))) {
#else
if (prev != target && cpus_share_cache(prev, target) &&
(available_idle_cpu(prev) || sched_idle_cpu(prev))) {
#endif
SET_STAT(found_idle_cpu_easy);
return prev;
}
......@@ -6403,7 +6443,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
cpumask_test_cpu(p->recent_used_cpu, p->select_cpus)) {
#else
cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
#endif
/*
* Replace recent_used_cpu with prev as it is a potential
* candidate for the next wake:
......@@ -6605,7 +6649,85 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
sync_entity_load_avg(&p->se);
return min_cap * 1024 < task_util(p) * capacity_margin;
}
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
/*
* Low utilization threshold for CPU
*
* (default: 85%), units: percentage of CPU utilization)
*/
int sysctl_sched_util_low_pct = 85;
static inline bool prefer_cpus_valid(struct task_struct *p)
{
return p->prefer_cpus &&
!cpumask_empty(p->prefer_cpus) &&
!cpumask_equal(p->prefer_cpus, &p->cpus_allowed) &&
cpumask_subset(p->prefer_cpus, &p->cpus_allowed);
}
/*
* set_task_select_cpus: select the cpu range for task
* @p: the task whose available cpu range will to set
* @idlest_cpu: the cpu which is the idlest in prefer cpus
*
* If sum of 'util_avg' among 'preferred_cpus' lower than the percentage
* 'sysctl_sched_util_low_pct' of 'preferred_cpus' capacity, select
* 'preferred_cpus' range for task, otherwise select 'preferred_cpus' for task.
*
* The available cpu range set to p->select_cpus. Idlest cpu in preferred cpus
* set to @idlest_cpu, which is set to wakeup cpu when fast path wakeup cpu
* without p->select_cpus.
*/
static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu,
int sd_flag)
{
unsigned long util_avg_sum = 0;
unsigned long tg_capacity = 0;
long min_util = INT_MIN;
struct task_group *tg;
long spare;
int cpu;
p->select_cpus = &p->cpus_allowed;
if (!prefer_cpus_valid(p))
return;
rcu_read_lock();
tg = task_group(p);
for_each_cpu(cpu, p->prefer_cpus) {
if (unlikely(!tg->se[cpu]))
continue;
if (idlest_cpu && available_idle_cpu(cpu)) {
*idlest_cpu = cpu;
} else if (idlest_cpu) {
spare = (long)(capacity_of(cpu) - tg->se[cpu]->avg.util_avg);
if (spare > min_util) {
min_util = spare;
*idlest_cpu = cpu;
}
}
if (available_idle_cpu(cpu)) {
rcu_read_unlock();
p->select_cpus = p->prefer_cpus;
return;
}
util_avg_sum += tg->se[cpu]->avg.util_avg;
tg_capacity += capacity_of(cpu);
}
rcu_read_unlock();
if (tg_capacity > cpumask_weight(p->prefer_cpus) &&
util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) {
p->select_cpus = p->prefer_cpus;
}
}
#endif
/*
* select_task_rq_fair: Select target runqueue for the waking task in domains
......@@ -6628,13 +6750,24 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
int new_cpu = prev_cpu;
int want_affine = 0;
int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
int idlest_cpu = 0;
#endif
time = schedstat_start_time();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
set_task_select_cpus(p, &idlest_cpu, sd_flag);
#endif
if (sd_flag & SD_BALANCE_WAKE) {
record_wakee(p);
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
&& cpumask_test_cpu(cpu, p->select_cpus);
#else
&& cpumask_test_cpu(cpu, &p->cpus_allowed);
#endif
}
rcu_read_lock();
......@@ -6648,7 +6781,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
*/
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
new_cpu = cpu;
if (cpu != prev_cpu &&
cpumask_test_cpu(prev_cpu, p->select_cpus))
#else
if (cpu != prev_cpu)
#endif
new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
sd = NULL; /* Prefer wake_affine over balance flags */
......@@ -6673,6 +6812,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
current->recent_used_cpu = cpu;
}
rcu_read_unlock();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
if (!cpumask_test_cpu(new_cpu, p->select_cpus))
new_cpu = idlest_cpu;
#endif
schedstat_end_time(cpu_rq(cpu), time);
return new_cpu;
......
......@@ -1325,6 +1325,17 @@ static struct ctl_table kern_table[] = {
.extra1 = &one_hundred,
.extra2 = &one_thousand,
},
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
{
.procname = "sched_util_low_pct",
.data = &sysctl_sched_util_low_pct,
.maxlen = sizeof(sysctl_sched_util_low_pct),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one_hundred,
},
#endif
{ }
};
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment