rpms/kernel/F-9 linux-2.6-sched-clock-fix-nohz-interaction.patch, NONE, 1.1 linux-2.6-sched-fix-process-time-monotonicity.patch, NONE, 1.1 kernel.spec, 1.763, 1.764

Chuck Ebbert cebbert at fedoraproject.org
Sun Sep 14 01:29:53 UTC 2008


Author: cebbert

Update of /cvs/pkgs/rpms/kernel/F-9
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv6200

Modified Files:
	kernel.spec 
Added Files:
	linux-2.6-sched-clock-fix-nohz-interaction.patch 
	linux-2.6-sched-fix-process-time-monotonicity.patch 
Log Message:
Fix problems with scheduler clock going backwards (#453257)

linux-2.6-sched-clock-fix-nohz-interaction.patch:

--- NEW FILE linux-2.6-sched-clock-fix-nohz-interaction.patch ---
From: Peter Zijlstra <a.p.zijlstra at chello.nl>
Date: Mon, 1 Sep 2008 14:44:23 +0000 (+0200)
Subject: sched_clock: fix NOHZ interaction
X-Git-Tag: v2.6.27-rc6~15^2~2
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=56c7426b3951e4f35a71d695f1c982989399d6fd

sched_clock: fix NOHZ interaction

If HLT stops the TSC, we'll fail to account idle time, thereby inflating the
actual process times. Fix this by re-calibrating the clock against GTOD when
leaving nohz mode.

Signed-off-by: Peter Zijlstra <a.p.zijlstra at chello.nl>
Tested-by: Avi Kivity <avi at qumranet.com>
Signed-off-by: Ingo Molnar <mingo at elte.hu>
---

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7a46bde..a87b046 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -162,6 +162,8 @@ void tick_nohz_stop_idle(int cpu)
 		ts->idle_lastupdate = now;
 		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
 		ts->idle_active = 0;
+
+		sched_clock_idle_wakeup_event(0);
 	}
 }
 
@@ -177,6 +179,7 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
 	}
 	ts->idle_entrytime = now;
 	ts->idle_active = 1;
+	sched_clock_idle_sleep_event();
 	return now;
 }
 

linux-2.6-sched-fix-process-time-monotonicity.patch:

--- NEW FILE linux-2.6-sched-fix-process-time-monotonicity.patch ---
From: Balbir Singh <balbir at linux.vnet.ibm.com>
Date: Fri, 5 Sep 2008 16:12:23 +0000 (+0200)
Subject: sched: fix process time monotonicity
X-Git-Tag: v2.6.27-rc6~15^2~1
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=49048622eae698e5c4ae61f7e71200f265ccc529

sched: fix process time monotonicity

Spencer reported a problem where utime and stime were going negative despite
the fixes in commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. The suspected
reason for the problem is that signal_struct maintains it's own utime and
stime (of exited tasks), these are not updated using the new task_utime()
routine, hence sig->utime can go backwards and cause the same problem
to occur (sig->utime, adds tsk->utime and not task_utime()). This patch
fixes the problem

TODO: using max(task->prev_utime, derived utime) works for now, but a more
generic solution is to implement cputime_max() and use the cputime_gt()
function for comparison.

Reported-by: spencer at bluehost.com
Signed-off-by: Balbir Singh <balbir at linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra at chello.nl>
Signed-off-by: Ingo Molnar <mingo at elte.hu>
---

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0d6eb33..71c9be5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 	return 0;
 }
 
-/*
- * Use precise platform statistics if available:
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-static cputime_t task_utime(struct task_struct *p)
-{
-	return p->utime;
-}
-
-static cputime_t task_stime(struct task_struct *p)
-{
-	return p->stime;
-}
-#else
-static cputime_t task_utime(struct task_struct *p)
-{
-	clock_t utime = cputime_to_clock_t(p->utime),
-		total = utime + cputime_to_clock_t(p->stime);
-	u64 temp;
-
-	/*
-	 * Use CFS's precise accounting:
-	 */
-	temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
-
-	if (total) {
-		temp *= utime;
-		do_div(temp, total);
-	}
-	utime = (clock_t)temp;
-
-	p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
-	return p->prev_utime;
-}
-
-static cputime_t task_stime(struct task_struct *p)
-{
-	clock_t stime;
-
-	/*
-	 * Use CFS's precise accounting. (we subtract utime from
-	 * the total, to make sure the total observed by userspace
-	 * grows monotonically - apps rely on that):
-	 */
-	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
-			cputime_to_clock_t(task_utime(p));
-
-	if (stime >= 0)
-		p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
-
-	return p->prev_stime;
-}
-#endif
-
-static cputime_t task_gtime(struct task_struct *p)
-{
-	return p->gtime;
-}
-
 static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task, int whole)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cfb0d87..3d9120c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1475,6 +1475,10 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
+extern cputime_t task_utime(struct task_struct *p);
+extern cputime_t task_stime(struct task_struct *p);
+extern cputime_t task_gtime(struct task_struct *p);
+
 /*
  * Per process flags
  */
diff --git a/kernel/exit.c b/kernel/exit.c
index 25ed2ad..1639564 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, tsk->utime);
-		sig->stime = cputime_add(sig->stime, tsk->stime);
-		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
+		sig->utime = cputime_add(sig->utime, task_utime(tsk));
+		sig->stime = cputime_add(sig->stime, task_stime(tsk));
+		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/sched.c b/kernel/sched.c
index 9a1ddb8..1a5f73c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4179,6 +4179,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 }
 
 /*
+ * Use precise platform statistics if available:
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+cputime_t task_utime(struct task_struct *p)
+{
+	return p->utime;
+}
+
+cputime_t task_stime(struct task_struct *p)
+{
+	return p->stime;
+}
+#else
+cputime_t task_utime(struct task_struct *p)
+{
+	clock_t utime = cputime_to_clock_t(p->utime),
+		total = utime + cputime_to_clock_t(p->stime);
+	u64 temp;
+
+	/*
+	 * Use CFS's precise accounting:
+	 */
+	temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+
+	if (total) {
+		temp *= utime;
+		do_div(temp, total);
+	}
+	utime = (clock_t)temp;
+
+	p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
+	return p->prev_utime;
+}
+
+cputime_t task_stime(struct task_struct *p)
+{
+	clock_t stime;
+
+	/*
+	 * Use CFS's precise accounting. (we subtract utime from
+	 * the total, to make sure the total observed by userspace
+	 * grows monotonically - apps rely on that):
+	 */
+	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+			cputime_to_clock_t(task_utime(p));
+
+	if (stime >= 0)
+		p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+
+	return p->prev_stime;
+}
+#endif
+
+inline cputime_t task_gtime(struct task_struct *p)
+{
+	return p->gtime;
+}
+
+/*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
  *


Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-9/kernel.spec,v
retrieving revision 1.763
retrieving revision 1.764
diff -u -r1.763 -r1.764
--- kernel.spec	14 Sep 2008 01:01:23 -0000	1.763
+++ kernel.spec	14 Sep 2008 01:29:23 -0000	1.764
@@ -576,7 +576,10 @@
 Patch22: linux-2.6-utrace.patch
 
 Patch41: linux-2.6-sysrq-c.patch
-Patch42: linux-2.6-x86-tune-generic.patch
+Patch42: linux-2.6-sched-clock-fix-nohz-interaction.patch
+Patch43: linux-2.6-sched-fix-process-time-monotonicity.patch
+
+Patch70: linux-2.6-x86-tune-generic.patch
 Patch75: linux-2.6-x86-debug-boot.patch
 Patch87: linux-2.6-x86-apic-dump-all-regs-v3.patch
 Patch88: linux-2.6-x86-64-fix-overlap-of-modules-and-fixmap-areas.patch
@@ -1029,6 +1032,10 @@
 # enable sysrq-c on all kernels, not only kexec
 ApplyPatch linux-2.6-sysrq-c.patch
 
+# fix sched clock monotonicity bugs
+ApplyPatch linux-2.6-sched-clock-fix-nohz-interaction.patch
+ApplyPatch linux-2.6-sched-fix-process-time-monotonicity.patch
+
 # Architecture patches
 # x86(-64)
 # Compile 686 kernels tuned for Pentium4.
@@ -1843,6 +1850,9 @@
 %kernel_variant_files -a /%{image_install_path}/xen*-%{KVERREL}.xen -e /etc/ld.so.conf.d/kernelcap-%{KVERREL}.xen.conf %{with_xen} xen
 
 %changelog
+* Sat Sep 13 2008 Chuck Ebbert <cebbert at redhat.com> 2.6.26.5-37
+- Fix problems with scheduler clock going backwards (#453257)
+
 * Sat Sep 13 2008 Chuck Ebbert <cebbert at redhat.com> 2.6.26.5-36
 - x86: fix memmap=exactmap argument, fixing kdump in some cases (#459103)
 




More information about the fedora-extras-commits mailing list