From 709d65618750e239e76c7ceac081673f36b265d0 Mon Sep 17 00:00:00 2001
From: Zhang Qiao <zhangqiao22@huawei.com>
Date: Fri, 30 Apr 2021 18:43:28 +0800
Subject: [PATCH] sched: Introduce qos scheduler for co-location

hulk inclusion
category: feature
bugzilla: 51828
CVE: NA

--------------------------------

We introduce the idea of qos level to scheduler, which now is
supported with different scheduler policies. The qos scheduler
will change the policy of correlative tasks when the qos level
of a task group is modified with cpu.qos_level cpu cgroup file.
In this way we are able to satisfy different needs of tasks in
different qos levels.

Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Reviewed-by: Hui Chen <clare.chenhui@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 init/Kconfig         |  5 +++
 kernel/sched/core.c  | 95 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  4 ++
 3 files changed, 104 insertions(+)

diff --git a/init/Kconfig b/init/Kconfig
index 1386cf410c6a..e57c7d70328b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -776,6 +776,11 @@ menuconfig CGROUP_SCHED
 	  tasks.
 
 if CGROUP_SCHED
+config QOS_SCHED
+    bool "Qos task scheduling"
+    depends on CGROUP_SCHED
+    default n
+
 config FAIR_GROUP_SCHED
 	bool "Group scheduling for SCHED_OTHER"
 	depends on CGROUP_SCHED
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 41fee321ef83..8c1af86a3b21 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6329,6 +6329,15 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
 /* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
+#ifdef CONFIG_QOS_SCHED
+static int alloc_qos_sched_group(struct task_group *tg, struct task_group *parent)
+{
+	tg->qos_level = parent->qos_level;
+
+	return 1;
+}
+#endif
+
 static void sched_free_group(struct task_group *tg)
 {
 	free_fair_sched_group(tg);
@@ -6349,6 +6358,11 @@ struct task_group *sched_create_group(struct task_group *parent)
 	if (!alloc_fair_sched_group(tg, parent))
 		goto err;
 
+#ifdef CONFIG_QOS_SCHED
+	if (!alloc_qos_sched_group(tg, parent))
+		goto err;
+#endif
+
 	if (!alloc_rt_sched_group(tg, parent))
 		goto err;
 
@@ -6417,6 +6431,30 @@ static void sched_change_group(struct task_struct *tsk, int type)
 	tg = autogroup_task_group(tsk, tg);
 	tsk->sched_task_group = tg;
 
+#ifdef CONFIG_QOS_SCHED
+	/*
+	 * No need to re-setcheduler when a task is exiting or the task
+	 * is in an autogroup.
+	 */
+	if (!rt_task(tsk)
+	    && !(tsk->flags & PF_EXITING)
+	    && !task_group_is_autogroup(tg)) {
+		struct rq *rq = task_rq(tsk);
+		struct sched_attr attr = {
+			.sched_priority = 0,
+		};
+
+		if (tg->qos_level == -1) {
+			attr.sched_policy = SCHED_IDLE;
+		} else {
+			attr.sched_policy = SCHED_NORMAL;
+		}
+		attr.sched_nice = PRIO_TO_NICE(tsk->static_prio);
+
+		__setscheduler(rq, tsk, &attr, 0);
+	}
+#endif
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->task_change_group)
 		tsk->sched_class->task_change_group(tsk, type);
@@ -6877,6 +6915,56 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
+#ifdef CONFIG_QOS_SCHED
+static int cpu_qos_write(struct cgroup_subsys_state *css,
+			struct cftype *cftype, s64 qos_level)
+{
+	struct css_task_iter it;
+	struct task_struct *tsk;
+	struct task_group *tg;
+	struct sched_param param;
+	int pid, policy;
+	tg = css_tg(css);
+
+	if (!tg->se[0])
+		return -EINVAL;
+
+	if (qos_level != -1 && qos_level != 0)
+		return -EINVAL;
+
+	if (tg->qos_level == qos_level)
+		goto done;
+
+	if (qos_level == -1) {
+		policy = SCHED_IDLE;
+	} else {
+		policy = SCHED_NORMAL;
+	}
+
+	tg->qos_level = qos_level;
+
+	param.sched_priority = 0;
+	css_task_iter_start(css, 0, &it);
+	while ((tsk = css_task_iter_next(&it))) {
+		pid = task_tgid_vnr(tsk);
+
+		if (pid > 0 && !rt_task(tsk)) {
+			sched_setscheduler(tsk, policy, &param);
+		}
+	}
+	css_task_iter_end(&it);
+
+done:
+	return 0;
+}
+
+static s64 cpu_qos_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+        struct task_group *tg = css_tg(css);
+        return tg->qos_level;
+}
+#endif /* CONFIG_QOS_SCHED */
+
 static struct cftype cpu_legacy_files[] = {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	{
@@ -6912,6 +7000,13 @@ static struct cftype cpu_legacy_files[] = {
 		.read_u64 = cpu_rt_period_read_uint,
 		.write_u64 = cpu_rt_period_write_uint,
 	},
+#endif
+#ifdef CONFIG_QOS_SCHED
+	{
+		.name = "qos_level",
+		.read_s64 = cpu_qos_read,
+		.write_s64 = cpu_qos_write,
+	},
 #endif
 	{ }	/* Terminate */
 };
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ca2fd2d6171f..502af7c7373e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -400,7 +400,11 @@ struct task_group {
 
 	struct cfs_bandwidth	cfs_bandwidth;
 
+#if defined(CONFIG_QOS_SCHED) && !defined(__GENKSYMS__)
+	long qos_level;
+#else
 	KABI_RESERVE(1)
+#endif
 	KABI_RESERVE(2)
 };
 
-- 
GitLab