rpms/kernel/devel kernel-2.6.spec, 1.3279, 1.3280 linux-2.6-debug-nmi-timeout.patch, 1.1, 1.2 linux-2.6-sched-cfs.patch, 1.9, 1.10

Fri Jul 6 21:13:38 UTC 2007

Author: davej

Update of /cvs/pkgs/rpms/kernel/devel
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv21824

Modified Files:
	kernel-2.6.spec linux-2.6-debug-nmi-timeout.patch 
	linux-2.6-sched-cfs.patch 
Log Message:
* Fri Jul 06 2007 Dave Jones <davej at redhat.com>
- Update CFS to v19.



Index: kernel-2.6.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/kernel-2.6.spec,v
retrieving revision 1.3279
retrieving revision 1.3280
diff -u -r1.3279 -r1.3280

--- kernel-2.6.spec	6 Jul 2007 18:58:01 -0000	1.3279
+++ kernel-2.6.spec	6 Jul 2007 21:13:02 -0000	1.3280
@@ -2052,9 +2052,12 @@
 %endif
 
 %changelog
+* Fri Jul 06 2007 Dave Jones <davej at redhat.com>
+- Update CFS to v19.
+
 * Fri Jul  6 2007 Jeremy Katz <katzj at redhat.com>
-- Add minimal patch from markmc for ICH9 support in e1000 while the new 
- driver works itself out upstream
+- Add minimal patch from markmc for ICH9 support in e1000 while the new
+  driver works itself out upstream
 
 * Fri Jul 06 2007 Chuck Ebbert <cebbert at redhat.com>
 - add Intel ICH8M (Santa Rosa) PCI ID to ata_piix driver

linux-2.6-debug-nmi-timeout.patch:

Index: linux-2.6-debug-nmi-timeout.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-debug-nmi-timeout.patch,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- linux-2.6-debug-nmi-timeout.patch	13 Apr 2007 20:40:51 -0000	1.1
+++ linux-2.6-debug-nmi-timeout.patch	6 Jul 2007 21:13:02 -0000	1.2
@@ -20,11 +20,11 @@
  			die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
  				panic_on_timeout);
  	} else {
---- linux-2.6.20.noarch/lib/Kconfig.debug~	2007-04-13 15:53:48.000000000 -0400
-+++ linux-2.6.20.noarch/lib/Kconfig.debug	2007-04-13 15:55:18.000000000 -0400
-@@ -133,6 +133,14 @@ config DETECT_SOFTLOCKUP
- 	   can be detected via the NMI-watchdog, on platforms that
- 	   support it.)
+--- linux-2.6.21.noarch/lib/Kconfig.debug~	2007-07-06 17:05:46.000000000 -0400
++++ linux-2.6.21.noarch/lib/Kconfig.debug	2007-07-06 17:06:07.000000000 -0400
+@@ -126,6 +126,14 @@ config SCHEDSTATS
+ 	  application, you can say N to avoid the very slight overhead
+ 	  this adds.
  
 +config DEBUG_NMI_TIMEOUT
 +	int "Number of seconds before NMI timeout"
@@ -34,6 +34,6 @@
 +	  This value is the number of seconds the NMI watchdog will tick
 +	  before it decides the machine has hung.
 +
- config SCHEDSTATS
- 	bool "Collect scheduler statistics"
+ config TIMER_STATS
+ 	bool "Collect kernel timers statistics"
  	depends on DEBUG_KERNEL && PROC_FS

linux-2.6-sched-cfs.patch:

View full diff with command:
/usr/bin/cvs -f diff  -kk -u -N -r 1.9 -r 1.10 linux-2.6-sched-cfs.patch
Index: linux-2.6-sched-cfs.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-sched-cfs.patch,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- linux-2.6-sched-cfs.patch	29 Jun 2007 07:03:02 -0000	1.9
+++ linux-2.6-sched-cfs.patch	6 Jul 2007 21:13:02 -0000	1.10
@@ -1,40 +1,8 @@
----
- Documentation/kernel-parameters.txt |   43 
- Documentation/sched-design-CFS.txt  |  119 +
- Makefile                            |    2 
- arch/i386/kernel/smpboot.c          |   12 
- arch/i386/kernel/tsc.c              |    9 
- arch/ia64/kernel/setup.c            |    6 
- arch/mips/kernel/smp.c              |   11 
- arch/sparc/kernel/smp.c             |   10 
- arch/sparc64/kernel/smp.c           |   27 
- block/cfq-iosched.c                 |    3 
- fs/proc/array.c                     |   61 
- fs/proc/base.c                      |   64 
- include/asm-generic/bitops/sched.h  |   21 
- include/linux/hardirq.h             |   13 
- include/linux/sched.h               |  159 +-
- include/linux/topology.h            |   14 
- init/main.c                         |    5 
- kernel/delayacct.c                  |   10 
- kernel/exit.c                       |    3 
- kernel/fork.c                       |    5 
- kernel/posix-cpu-timers.c           |   34 
- kernel/sched.c                      | 2843 ++++++++++++------------------------
- kernel/sched_debug.c                |  260 +++
- kernel/sched_fair.c                 |  884 +++++++++++
- kernel/sched_idletask.c             |   68 
- kernel/sched_rt.c                   |  215 ++
- kernel/sched_stats.h                |  235 ++
- kernel/softirq.c                    |    1 
- kernel/sysctl.c                     |   76 
- 29 files changed, 3165 insertions(+), 2048 deletions(-)
-
 Index: linux/Documentation/kernel-parameters.txt
 ===================================================================
 --- linux.orig/Documentation/kernel-parameters.txt
 +++ linux/Documentation/kernel-parameters.txt
-@@ -1019,49 +1019,6 @@ and is between 256 and 4096 characters. 
+@@ -1014,49 +1014,6 @@ and is between 256 and 4096 characters. 
  
  	mga=		[HW,DRM]
  
@@ -440,7 +408,7 @@
  	       	p->tgid, p->pid,
  	       	pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
  		pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
-@@ -312,6 +310,43 @@ int proc_pid_status(struct task_struct *
+@@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *
  	return buffer - orig;
  }
  
@@ -448,19 +416,18 @@
 +{
 +	clock_t utime = cputime_to_clock_t(p->utime),
 +		total = utime + cputime_to_clock_t(p->stime);
++	u64 temp;
 +
 +	/*
-+	 * Use CFS's precise accounting, if available:
++	 * Use CFS's precise accounting:
 +	 */
-+	if (!(sysctl_sched_features & 128)) {
-+		u64 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
++	temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
 +
-+		if (total) {
-+			temp *= utime;
-+			do_div(temp, total);
-+		}
-+		utime = (clock_t)temp;
++	if (total) {
++		temp *= utime;
++		do_div(temp, total);
 +	}
++	utime = (clock_t)temp;
 +
 +	return utime;
 +}
@@ -470,12 +437,11 @@
 +	clock_t stime = cputime_to_clock_t(p->stime);
 +
 +	/*
-+	 * Use CFS's precise accounting, if available (we subtract
-+	 * utime from the total, to make sure the total observed
-+	 * by userspace grows monotonically - apps rely on that):
++	 * Use CFS's precise accounting. (we subtract utime from
++	 * the total, to make sure the total observed by userspace
++	 * grows monotonically - apps rely on that):
 +	 */
-+	if (!(sysctl_sched_features & 128))
-+		stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
++	stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
 +
 +	return stime;
 +}
@@ -484,7 +450,7 @@
  static int do_task_stat(struct task_struct *task, char * buffer, int whole)
  {
  	unsigned long vsize, eip, esp, wchan = ~0UL;
-@@ -326,7 +361,8 @@ static int do_task_stat(struct task_stru
+@@ -326,7 +359,8 @@ static int do_task_stat(struct task_stru
  	unsigned long long start_time;
  	unsigned long cmin_flt = 0, cmaj_flt = 0;
  	unsigned long  min_flt = 0,  maj_flt = 0;
@@ -494,7 +460,7 @@
  	unsigned long rsslim = 0;
  	char tcomm[sizeof(task->comm)];
  	unsigned long flags;
-@@ -344,7 +380,8 @@ static int do_task_stat(struct task_stru
+@@ -344,7 +378,8 @@ static int do_task_stat(struct task_stru
  
  	sigemptyset(&sigign);
  	sigemptyset(&sigcatch);
@@ -504,7 +470,7 @@
  
  	rcu_read_lock();
  	if (lock_task_sighand(task, &flags)) {
-@@ -370,15 +407,15 @@ static int do_task_stat(struct task_stru
+@@ -370,15 +405,15 @@ static int do_task_stat(struct task_stru
  			do {
  				min_flt += t->min_flt;
  				maj_flt += t->maj_flt;
@@ -524,7 +490,7 @@
  		}
  
  		sid = signal_session(sig);
-@@ -394,8 +431,8 @@ static int do_task_stat(struct task_stru
+@@ -394,8 +429,8 @@ static int do_task_stat(struct task_stru
  	if (!whole) {
  		min_flt = task->min_flt;
  		maj_flt = task->maj_flt;
@@ -535,7 +501,7 @@
  	}
  
  	/* scale priority and nice values from timeslices to -20..20 */
-@@ -426,8 +463,8 @@ static int do_task_stat(struct task_stru
+@@ -426,8 +461,8 @@ static int do_task_stat(struct task_stru
  		cmin_flt,
  		maj_flt,
  		cmaj_flt,
@@ -559,10 +525,11 @@
  			task->sched_info.cpu_time,
  			task->sched_info.run_delay,
  			task->sched_info.pcnt);
-@@ -929,6 +929,66 @@ static const struct file_operations proc
+@@ -929,6 +929,69 @@ static const struct file_operations proc
  };
  #endif
  
++#ifdef CONFIG_SCHED_DEBUG
 +/*
 + * Print out various scheduling related per-task fields:
 + */
@@ -623,22 +590,28 @@
 +	.release	= seq_release,
 +};
 +
++#endif
++
  static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
  	struct inode *inode = dentry->d_inode;
-@@ -1963,6 +2023,7 @@ static const struct pid_entry tgid_base_
+@@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_
  	INF("environ",    S_IRUSR, pid_environ),
  	INF("auxv",       S_IRUSR, pid_auxv),
  	INF("status",     S_IRUGO, pid_status),
++#ifdef CONFIG_SCHED_DEBUG
 +	REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
++#endif
  	INF("cmdline",    S_IRUGO, pid_cmdline),
  	INF("stat",       S_IRUGO, tgid_stat),
  	INF("statm",      S_IRUGO, pid_statm),
-@@ -2247,6 +2308,7 @@ static const struct pid_entry tid_base_s
+@@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_s
  	INF("environ",   S_IRUSR, pid_environ),
  	INF("auxv",      S_IRUSR, pid_auxv),
  	INF("status",    S_IRUGO, pid_status),
++#ifdef CONFIG_SCHED_DEBUG
 +	REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
++#endif
  	INF("cmdline",   S_IRUGO, pid_cmdline),
  	INF("stat",      S_IRUGO, tid_stat),
  	INF("statm",     S_IRUGO, pid_statm),
@@ -733,17 +706,34 @@
  struct sched_param {
  	int sched_priority;
  };
-@@ -130,6 +133,9 @@ extern unsigned long nr_active(void);
+@@ -130,6 +133,26 @@ extern unsigned long nr_active(void);
  extern unsigned long nr_iowait(void);
  extern unsigned long weighted_cpuload(const int cpu);
[...3447 lines suppressed...]
 +
 +	/* dequeue is not valid, we print a debug message there: */
@@ -6496,8 +7034,7 @@
 +	.pick_next_task		= pick_next_task_idle,
 +	.put_prev_task		= put_prev_task_idle,
 +
-+	.load_balance_start	= load_balance_start_idle,
-+	/* no .load_balance_next for idle tasks */
++	.load_balance		= load_balance_idle,
 +
 +	.task_tick		= task_tick_idle,
 +	/* no .task_new for idle tasks */
@@ -6506,7 +7043,7 @@
 ===================================================================
 --- /dev/null
 +++ linux/kernel/sched_rt.c
-@@ -0,0 +1,215 @@
+@@ -0,0 +1,255 @@
 +/*
 + * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
 + * policies)
@@ -6521,7 +7058,7 @@
 +	struct task_struct *curr = rq->curr;
 +	u64 delta_exec;
 +
-+	if (!has_rt_policy(curr))
++	if (!task_has_rt_policy(curr))
 +		return;
 +
 +	delta_exec = now - curr->se.exec_start;
@@ -6616,8 +7153,9 @@
 + * achieve that by always pre-iterating before returning
 + * the current task:
 + */
-+static struct task_struct * load_balance_start_rt(struct rq *rq)
++static struct task_struct *load_balance_start_rt(void *arg)
 +{
++	struct rq *rq = arg;
 +	struct prio_array *array = &rq->rt.active;
 +	struct list_head *head, *curr;
 +	struct task_struct *p;
@@ -6641,8 +7179,9 @@
 +	return p;
 +}
 +
-+static struct task_struct * load_balance_next_rt(struct rq *rq)
++static struct task_struct *load_balance_next_rt(void *arg)
 +{
++	struct rq *rq = arg;
 +	struct prio_array *array = &rq->rt.active;
 +	struct list_head *head, *curr;
 +	struct task_struct *p;
@@ -6679,6 +7218,44 @@
 +	return p;
 +}
 +
++static int
++load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
++			unsigned long max_nr_move, unsigned long max_load_move,
++			struct sched_domain *sd, enum cpu_idle_type idle,
++			int *all_pinned, unsigned long *load_moved)
++{
++	int this_best_prio, best_prio, best_prio_seen = 0;
++	int nr_moved;
++	struct rq_iterator rt_rq_iterator;
++
++	best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
++	this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
++
++	/*
++	 * Enable handling of the case where there is more than one task
++	 * with the best priority.   If the current running task is one
++	 * of those with prio==best_prio we know it won't be moved
++	 * and therefore it's safe to override the skip (based on load)
++	 * of any task we find with that prio.
++	 */
++	if (busiest->curr->prio == best_prio)
++		best_prio_seen = 1;
++
++	rt_rq_iterator.start = load_balance_start_rt;
++	rt_rq_iterator.next = load_balance_next_rt;
++	/* pass 'busiest' rq argument into
++	 * load_balance_[start|next]_rt iterators
++	 */
++	rt_rq_iterator.arg = busiest;
++
++	nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
++			max_load_move, sd, idle, all_pinned, load_moved,
++			this_best_prio, best_prio, best_prio_seen,
++			&rt_rq_iterator);
++
++	return nr_moved;
++}
++
 +static void task_tick_rt(struct rq *rq, struct task_struct *p)
 +{
 +	/*
@@ -6688,13 +7265,14 @@
 +	if (p->policy != SCHED_RR)
 +		return;
 +
-+	if (!(--p->time_slice)) {
-+		p->time_slice = static_prio_timeslice(p->static_prio);
-+		set_tsk_need_resched(p);
++	if (--p->time_slice)
++		return;
 +
-+		/* put it at the end of the queue: */
-+		requeue_task_rt(rq, p);
-+	}
++	p->time_slice = static_prio_timeslice(p->static_prio);
++	set_tsk_need_resched(p);
++
++	/* put it at the end of the queue: */
++	requeue_task_rt(rq, p);
 +}
 +
 +/*
@@ -6716,8 +7294,7 @@
 +	.pick_next_task		= pick_next_task_rt,
 +	.put_prev_task		= put_prev_task_rt,
 +
-+	.load_balance_start	= load_balance_start_rt,
-+	.load_balance_next	= load_balance_next_rt,
++	.load_balance		= load_balance_rt,
 +
 +	.task_tick		= task_tick_rt,
 +	.task_new		= task_new_rt,
@@ -7063,3 +7640,71 @@
  		.ctl_name	= KERN_PANIC,
  		.procname	= "panic",
  		.data		= &panic_timeout,
+Index: linux/kernel/time.c
+===================================================================
+--- linux.orig/kernel/time.c
++++ linux/kernel/time.c
+@@ -57,14 +57,17 @@ EXPORT_SYMBOL(sys_tz);
+  */
+ asmlinkage long sys_time(time_t __user * tloc)
+ {
+-	time_t i;
+-	struct timeval tv;
++	/*
++	 * We read xtime.tv_sec atomically - it's updated
++	 * atomically by update_wall_time(), so no need to
++	 * even read-lock the xtime seqlock:
++	 */
++	time_t i = xtime.tv_sec;
+ 
+-	do_gettimeofday(&tv);
+-	i = tv.tv_sec;
++	smp_rmb(); /* sys_time() results are coherent */
+ 
+ 	if (tloc) {
+-		if (put_user(i,tloc))
++		if (put_user(i, tloc))
+ 			i = -EFAULT;
+ 	}
+ 	return i;
+@@ -373,6 +376,20 @@ void do_gettimeofday (struct timeval *tv
+ 
+ 	tv->tv_sec = sec;
+ 	tv->tv_usec = usec;
++
++	/*
++	 * Make sure xtime.tv_sec [returned by sys_time()] always
++	 * follows the gettimeofday() result precisely. This
++	 * condition is extremely unlikely, it can hit at most
++	 * once per second:
++	 */
++	if (unlikely(xtime.tv_sec != tv->tv_sec)) {
++		unsigned long flags;
++
++		write_seqlock_irqsave(&xtime_lock);
++		update_wall_time();
++		write_seqlock_irqrestore(&xtime_lock);
++	}
+ }
+ 
+ EXPORT_SYMBOL(do_gettimeofday);
+Index: linux/lib/Kconfig.debug
+===================================================================
+--- linux.orig/lib/Kconfig.debug
++++ linux/lib/Kconfig.debug
+@@ -105,6 +105,15 @@ config DETECT_SOFTLOCKUP
+ 	   can be detected via the NMI-watchdog, on platforms that
+ 	   support it.)
+ 
++config SCHED_DEBUG
++	bool "Collect scheduler debugging info"
++	depends on DEBUG_KERNEL && PROC_FS
++	default y
++	help
++	  If you say Y here, the /proc/sched_debug file will be provided
++	  that can help debug the scheduler. The runtime overhead of this
++	  option is minimal.
++
+ config SCHEDSTATS
+ 	bool "Collect scheduler statistics"
+ 	depends on DEBUG_KERNEL && PROC_FS