diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index c8b52b3ec86543bc452d85447e6f9544703d90d4..1c2c099e393b0f2509e847e6aa7a66862b97be5f 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -33,7 +33,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_post_fork(struct task_struct *p);
+extern void sched_cgroup_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 void __noreturn do_task_dead(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index 88463fd56930820139346d994a32d865439b313a..231b01eba6e1ea9e3e2237ec6169bde123e0e774 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2063,6 +2063,17 @@ static __latent_entropy struct task_struct *copy_process(
 	if (retval)
 		goto bad_fork_free_futex_mutex;
 
+	/*
+	 * Now that the cgroups are pinned, re-clone the parent cgroup and put
+	 * the new task on the correct runqueue. All this *before* the task
+	 * becomes visible.
+	 *
+	 * This isn't part of ->can_fork() because while the re-cloning is
+	 * cgroup specific, it unconditionally needs to place the task on a
+	 * runqueue.
+	 */
+	sched_cgroup_fork(p);
+
 	/*
 	 * From this point on we must avoid any synchronous user-space
 	 * communication until we take the tasklist-lock. In particular, we do
@@ -2171,7 +2182,6 @@ static __latent_entropy struct task_struct *copy_process(
 
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	sched_post_fork(p);
 	cgroup_threadgroup_change_end(current);
 	perf_event_fork(p);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 496ce71f93a7a48bbc3f6a5fd12ed944a7704bb8..b091537102590fa56c7f57a29a525bea9d992948 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2357,8 +2357,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	return 0;
 }
 
-void sched_post_fork(struct task_struct *p)
+void sched_cgroup_fork(struct task_struct *p)
 {
+
 	unsigned long flags;
 
 	/*
@@ -2369,6 +2370,9 @@ void sched_post_fork(struct task_struct *p)
 	 * Silence PROVE_RCU.
 	 */
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
+#ifdef CONFIG_CGROUP_SCHED
+	p->sched_task_group = task_group(current);
+ #endif
 	rseq_migrate(p);
 	/*
 	 * We're setting the CPU for the first time, we don't migrate,