Skip to content
Snippets Groups Projects
Commit 828cad8e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes in this (fairly busy) cycle were:

   - There was a class of scheduler bugs related to forgetting to update
     the rq-clock timestamp which can cause weird and hard to debug
     problems, so there's a new debug facility for this: which uncovered
     a whole lot of bugs which convinced us that we want to keep the
     debug facility.

     (Peter Zijlstra, Matt Fleming)

   - Various cputime related updates: eliminate cputime and use u64
     nanoseconds directly, simplify and improve the arch interfaces,
     implement delayed accounting more widely, etc. - (Frederic
     Weisbecker)

   - Move code around for better structure plus cleanups (Ingo Molnar)

   - Move IO schedule accounting deeper into the scheduler plus related
     changes to improve the situation (Tejun Heo)

   - ... plus a round of sched/rt and sched/deadline fixes, plus other
     fixes, updats and cleanups"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (85 commits)
  sched/core: Remove unlikely() annotation from sched_move_task()
  sched/autogroup: Rename auto_group.[ch] to autogroup.[ch]
  sched/topology: Split out scheduler topology code from core.c into topology.c
  sched/core: Remove unnecessary #include headers
  sched/rq_clock: Consolidate the ordering of the rq_clock methods
  delayacct: Include <uapi/linux/taskstats.h>
  sched/core: Clean up comments
  sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds
  sched/clock: Add dummy clear_sched_clock_stable() stub function
  sched/cputime: Remove generic asm headers
  sched/cputime: Remove unused nsec_to_cputime()
  s390, sched/cputime: Remove unused cputime definitions
  powerpc, sched/cputime: Remove unused cputime definitions
  s390, sched/cputime: Make arch_cpu_idle_time() to return nsecs
  ia64, sched/cputime: Remove unused cputime definitions
  ia64: Convert vtime to use nsec units directly
  ia64, sched/cputime: Move the nsecs based cputime headers to the last arch using it
  sched/cputime: Remove jiffies based cputime
  sched/cputime, vtime: Return nsecs instead of cputime_t to account
  sched/cputime: Complete nsec conversion of tick based accounting
  ...
parents 60c906ba bb3bac2c
No related branches found
No related tags found
No related merge requests found
Showing
with 77 additions and 46 deletions
...@@ -408,6 +408,11 @@ CONTENTS ...@@ -408,6 +408,11 @@ CONTENTS
* the new scheduling related syscalls that manipulate it, i.e., * the new scheduling related syscalls that manipulate it, i.e.,
sched_setattr() and sched_getattr() are implemented. sched_setattr() and sched_getattr() are implemented.
For debugging purposes, the leftover runtime and absolute deadline of a
SCHED_DEADLINE task can be retrieved through /proc/<pid>/sched (entries
dl.runtime and dl.deadline, both values in ns). A programmatic way to
retrieve these values from production code is under discussion.
4.3 Default behavior 4.3 Default behavior
--------------------- ---------------------
...@@ -476,6 +481,7 @@ CONTENTS ...@@ -476,6 +481,7 @@ CONTENTS
Still missing: Still missing:
- programmatic way to retrieve current runtime and absolute deadline
- refinements to deadline inheritance, especially regarding the possibility - refinements to deadline inheritance, especially regarding the possibility
of retaining bandwidth isolation among non-interacting tasks. This is of retaining bandwidth isolation among non-interacting tasks. This is
being studied from both theoretical and practical points of view, and being studied from both theoretical and practical points of view, and
......
...@@ -158,11 +158,11 @@ as its prone to starvation without deadline scheduling. ...@@ -158,11 +158,11 @@ as its prone to starvation without deadline scheduling.
Consider two sibling groups A and B; both have 50% bandwidth, but A's Consider two sibling groups A and B; both have 50% bandwidth, but A's
period is twice the length of B's. period is twice the length of B's.
* group A: period=100000us, runtime=10000us * group A: period=100000us, runtime=50000us
- this runs for 0.01s once every 0.1s - this runs for 0.05s once every 0.1s
* group B: period= 50000us, runtime=10000us * group B: period= 50000us, runtime=25000us
- this runs for 0.01s twice every 0.1s (or once every 0.05 sec). - this runs for 0.025s twice every 0.1s (or once every 0.05 sec).
This means that currently a while (1) loop in A will run for the full period of This means that currently a while (1) loop in A will run for the full period of
B and can starve B's tasks (assuming they are of lower priority) for a whole B and can starve B's tasks (assuming they are of lower priority) for a whole
......
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h generic-y += exec.h
generic-y += export.h generic-y += export.h
generic-y += irq_work.h generic-y += irq_work.h
......
...@@ -1145,7 +1145,7 @@ struct rusage32 { ...@@ -1145,7 +1145,7 @@ struct rusage32 {
SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
{ {
struct rusage32 r; struct rusage32 r;
cputime_t utime, stime; u64 utime, stime;
unsigned long utime_jiffies, stime_jiffies; unsigned long utime_jiffies, stime_jiffies;
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
...@@ -1155,16 +1155,16 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) ...@@ -1155,16 +1155,16 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
switch (who) { switch (who) {
case RUSAGE_SELF: case RUSAGE_SELF:
task_cputime(current, &utime, &stime); task_cputime(current, &utime, &stime);
utime_jiffies = cputime_to_jiffies(utime); utime_jiffies = nsecs_to_jiffies(utime);
stime_jiffies = cputime_to_jiffies(stime); stime_jiffies = nsecs_to_jiffies(stime);
jiffies_to_timeval32(utime_jiffies, &r.ru_utime); jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
jiffies_to_timeval32(stime_jiffies, &r.ru_stime); jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
r.ru_minflt = current->min_flt; r.ru_minflt = current->min_flt;
r.ru_majflt = current->maj_flt; r.ru_majflt = current->maj_flt;
break; break;
case RUSAGE_CHILDREN: case RUSAGE_CHILDREN:
utime_jiffies = cputime_to_jiffies(current->signal->cutime); utime_jiffies = nsecs_to_jiffies(current->signal->cutime);
stime_jiffies = cputime_to_jiffies(current->signal->cstime); stime_jiffies = nsecs_to_jiffies(current->signal->cstime);
jiffies_to_timeval32(utime_jiffies, &r.ru_utime); jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
jiffies_to_timeval32(stime_jiffies, &r.ru_stime); jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
r.ru_minflt = current->signal->cmin_flt; r.ru_minflt = current->signal->cmin_flt;
......
...@@ -2,7 +2,6 @@ generic-y += auxvec.h ...@@ -2,7 +2,6 @@ generic-y += auxvec.h
generic-y += bitsperlong.h generic-y += bitsperlong.h
generic-y += bugs.h generic-y += bugs.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += device.h generic-y += device.h
generic-y += div64.h generic-y += div64.h
generic-y += emergency-restart.h generic-y += emergency-restart.h
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
generic-y += bitsperlong.h generic-y += bitsperlong.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h generic-y += current.h
generic-y += early_ioremap.h generic-y += early_ioremap.h
generic-y += emergency-restart.h generic-y += emergency-restart.h
......
generic-y += bugs.h generic-y += bugs.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += delay.h generic-y += delay.h
generic-y += div64.h generic-y += div64.h
generic-y += dma.h generic-y += dma.h
......
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += delay.h generic-y += delay.h
generic-y += device.h generic-y += device.h
generic-y += div64.h generic-y += div64.h
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
generic-y += auxvec.h generic-y += auxvec.h
generic-y += bitsperlong.h generic-y += bitsperlong.h
generic-y += bugs.h generic-y += bugs.h
generic-y += cputime.h
generic-y += current.h generic-y += current.h
generic-y += device.h generic-y += device.h
generic-y += div64.h generic-y += div64.h
......
...@@ -5,7 +5,6 @@ generic-y += barrier.h ...@@ -5,7 +5,6 @@ generic-y += barrier.h
generic-y += bitsperlong.h generic-y += bitsperlong.h
generic-y += bugs.h generic-y += bugs.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h generic-y += current.h
generic-y += device.h generic-y += device.h
generic-y += div64.h generic-y += div64.h
......
...@@ -4,7 +4,6 @@ generic-y += barrier.h ...@@ -4,7 +4,6 @@ generic-y += barrier.h
generic-y += bitsperlong.h generic-y += bitsperlong.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cmpxchg.h generic-y += cmpxchg.h
generic-y += cputime.h
generic-y += device.h generic-y += device.h
generic-y += div64.h generic-y += div64.h
generic-y += errno.h generic-y += errno.h
......
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h generic-y += exec.h
generic-y += irq_work.h generic-y += irq_work.h
generic-y += mcs_spinlock.h generic-y += mcs_spinlock.h
......
...@@ -5,7 +5,6 @@ generic-y += bugs.h ...@@ -5,7 +5,6 @@ generic-y += bugs.h
generic-y += cacheflush.h generic-y += cacheflush.h
generic-y += checksum.h generic-y += checksum.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h generic-y += current.h
generic-y += delay.h generic-y += delay.h
generic-y += device.h generic-y += device.h
......
...@@ -6,7 +6,6 @@ generic-y += barrier.h ...@@ -6,7 +6,6 @@ generic-y += barrier.h
generic-y += bug.h generic-y += bug.h
generic-y += bugs.h generic-y += bugs.h
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h generic-y += current.h
generic-y += device.h generic-y += device.h
generic-y += div64.h generic-y += div64.h
......
...@@ -18,11 +18,7 @@ ...@@ -18,11 +18,7 @@
#ifndef __IA64_CPUTIME_H #ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H #define __IA64_CPUTIME_H
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
# include <asm-generic/cputime.h>
#else
# include <asm/processor.h>
# include <asm-generic/cputime_nsecs.h>
extern void arch_vtime_task_switch(struct task_struct *tsk); extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
......
...@@ -27,6 +27,12 @@ struct thread_info { ...@@ -27,6 +27,12 @@ struct thread_info {
mm_segment_t addr_limit; /* user-level address space limit */ mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
__u64 utime;
__u64 stime;
__u64 gtime;
__u64 hardirq_time;
__u64 softirq_time;
__u64 idle_time;
__u64 ac_stamp; __u64 ac_stamp;
__u64 ac_leave; __u64 ac_leave;
__u64 ac_stime; __u64 ac_stime;
......
...@@ -1031,7 +1031,7 @@ GLOBAL_ENTRY(ia64_native_sched_clock) ...@@ -1031,7 +1031,7 @@ GLOBAL_ENTRY(ia64_native_sched_clock)
END(ia64_native_sched_clock) END(ia64_native_sched_clock)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
GLOBAL_ENTRY(cycle_to_cputime) GLOBAL_ENTRY(cycle_to_nsec)
alloc r16=ar.pfs,1,0,0,0 alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
;; ;;
...@@ -1047,7 +1047,7 @@ GLOBAL_ENTRY(cycle_to_cputime) ...@@ -1047,7 +1047,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
;; ;;
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp br.ret.sptk.many rp
END(cycle_to_cputime) END(cycle_to_nsec)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_IA64_BRL_EMU #ifdef CONFIG_IA64_BRL_EMU
......
...@@ -619,6 +619,8 @@ setup_arch (char **cmdline_p) ...@@ -619,6 +619,8 @@ setup_arch (char **cmdline_p)
check_sal_cache_flush(); check_sal_cache_flush();
#endif #endif
paging_init(); paging_init();
clear_sched_clock_stable();
} }
/* /*
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/timex.h> #include <linux/timex.h>
#include <linux/timekeeper_internal.h> #include <linux/timekeeper_internal.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/cputime.h>
#include <asm/machvec.h> #include <asm/machvec.h>
#include <asm/delay.h> #include <asm/delay.h>
...@@ -59,18 +60,43 @@ static struct clocksource *itc_clocksource; ...@@ -59,18 +60,43 @@ static struct clocksource *itc_clocksource;
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
extern cputime_t cycle_to_cputime(u64 cyc); extern u64 cycle_to_nsec(u64 cyc);
void vtime_account_user(struct task_struct *tsk) void vtime_flush(struct task_struct *tsk)
{ {
cputime_t delta_utime;
struct thread_info *ti = task_thread_info(tsk); struct thread_info *ti = task_thread_info(tsk);
u64 delta;
if (ti->ac_utime) { if (ti->utime)
delta_utime = cycle_to_cputime(ti->ac_utime); account_user_time(tsk, cycle_to_nsec(ti->utime));
account_user_time(tsk, delta_utime);
ti->ac_utime = 0; if (ti->gtime)
account_guest_time(tsk, cycle_to_nsec(ti->gtime));
if (ti->idle_time)
account_idle_time(cycle_to_nsec(ti->idle_time));
if (ti->stime) {
delta = cycle_to_nsec(ti->stime);
account_system_index_time(tsk, delta, CPUTIME_SYSTEM);
} }
if (ti->hardirq_time) {
delta = cycle_to_nsec(ti->hardirq_time);
account_system_index_time(tsk, delta, CPUTIME_IRQ);
}
if (ti->softirq_time) {
delta = cycle_to_nsec(ti->softirq_time));
account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
}
ti->utime = 0;
ti->gtime = 0;
ti->idle_time = 0;
ti->stime = 0;
ti->hardirq_time = 0;
ti->softirq_time = 0;
} }
/* /*
...@@ -83,7 +109,7 @@ void arch_vtime_task_switch(struct task_struct *prev) ...@@ -83,7 +109,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
struct thread_info *pi = task_thread_info(prev); struct thread_info *pi = task_thread_info(prev);
struct thread_info *ni = task_thread_info(current); struct thread_info *ni = task_thread_info(current);
pi->ac_stamp = ni->ac_stamp; ni->ac_stamp = pi->ac_stamp;
ni->ac_stime = ni->ac_utime = 0; ni->ac_stime = ni->ac_utime = 0;
} }
...@@ -91,18 +117,15 @@ void arch_vtime_task_switch(struct task_struct *prev) ...@@ -91,18 +117,15 @@ void arch_vtime_task_switch(struct task_struct *prev)
* Account time for a transition between system, hard irq or soft irq state. * Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled. * Note that this function is called with interrupts enabled.
*/ */
static cputime_t vtime_delta(struct task_struct *tsk) static __u64 vtime_delta(struct task_struct *tsk)
{ {
struct thread_info *ti = task_thread_info(tsk); struct thread_info *ti = task_thread_info(tsk);
cputime_t delta_stime; __u64 now, delta_stime;
__u64 now;
WARN_ON_ONCE(!irqs_disabled()); WARN_ON_ONCE(!irqs_disabled());
now = ia64_get_itc(); now = ia64_get_itc();
delta_stime = now - ti->ac_stamp;
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
ti->ac_stime = 0;
ti->ac_stamp = now; ti->ac_stamp = now;
return delta_stime; return delta_stime;
...@@ -110,15 +133,25 @@ static cputime_t vtime_delta(struct task_struct *tsk) ...@@ -110,15 +133,25 @@ static cputime_t vtime_delta(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk) void vtime_account_system(struct task_struct *tsk)
{ {
cputime_t delta = vtime_delta(tsk); struct thread_info *ti = task_thread_info(tsk);
__u64 stime = vtime_delta(tsk);
account_system_time(tsk, 0, delta);
if ((tsk->flags & PF_VCPU) && !irq_count())
ti->gtime += stime;
else if (hardirq_count())
ti->hardirq_time += stime;
else if (in_serving_softirq())
ti->softirq_time += stime;
else
ti->stime += stime;
} }
EXPORT_SYMBOL_GPL(vtime_account_system); EXPORT_SYMBOL_GPL(vtime_account_system);
void vtime_account_idle(struct task_struct *tsk) void vtime_account_idle(struct task_struct *tsk)
{ {
account_idle_time(vtime_delta(tsk)); struct thread_info *ti = task_thread_info(tsk);
ti->idle_time += vtime_delta(tsk);
} }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
......
generic-y += clkdev.h generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h generic-y += exec.h
generic-y += irq_work.h generic-y += irq_work.h
generic-y += kvm_para.h generic-y += kvm_para.h
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment