[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

RE: condvar wakeups



On Wed, 14 May 2003, Perez-Gonzalez, Inaky wrote:

> > you are completely right - the counts are messed up.
> 
> Do you want a patch for this or you want to tackle it down yourself?

a patch would be more than welcome. Here are my current futex/nptl
changes, against BK-curr. Could you also add your vcache related futex
fixes?

	Ingo

--- linux/include/linux/futex.h.orig	
+++ linux/include/linux/futex.h	
@@ -5,7 +5,8 @@
 #define FUTEX_WAIT (0)
 #define FUTEX_WAKE (1)
 #define FUTEX_FD (2)
+#define FUTEX_REQUEUE (3)
 
-extern asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, struct timespec __user *utime);
+extern asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, struct timespec __user *utime, u32 __user *uaddr2);
 
 #endif
--- linux/include/linux/wait.h.orig	
+++ linux/include/linux/wait.h	
@@ -111,15 +111,12 @@ extern void FASTCALL(__wake_up_sync(wait
 #define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
 #define wake_up_nr(x, nr)		__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
 #define wake_up_all(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
+#define wake_up_all_sync(x)			__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
 #define wake_up_interruptible(x)	__wake_up((x),TASK_INTERRUPTIBLE, 1)
 #define wake_up_interruptible_nr(x, nr)	__wake_up((x),TASK_INTERRUPTIBLE, nr)
 #define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE, 0)
 #define	wake_up_locked(x)		__wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
-#ifdef CONFIG_SMP
 #define wake_up_interruptible_sync(x)   __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
-#else
-#define wake_up_interruptible_sync(x)   __wake_up((x),TASK_INTERRUPTIBLE, 1)
-#endif
 
 #define __wait_event(wq, condition) 					\
 do {									\
--- linux/include/linux/sched.h.orig	
+++ linux/include/linux/sched.h	
@@ -530,6 +530,7 @@ extern void do_timer(struct pt_regs *);
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+extern int FASTCALL(wake_up_process_kick(struct task_struct * tsk));
 extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
 extern void FASTCALL(sched_exit(task_t * p));
 
--- linux/kernel/futex.c.orig	
+++ linux/kernel/futex.c	
@@ -93,14 +93,6 @@ static inline struct list_head *hash_fut
 							FUTEX_HASHBITS)];
 }
 
-/* Waiter either waiting in FUTEX_WAIT or poll(), or expecting signal */
-static inline void tell_waiter(struct futex_q *q)
-{
-	wake_up_all(&q->waiters);
-	if (q->filp)
-		send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
-}
-
 /*
  * Get kernel address of the user page and pin it.
  *
@@ -181,7 +173,9 @@ static int futex_wake(unsigned long uadd
 		if (this->page == page && this->offset == offset) {
 			list_del_init(i);
 			__detach_vcache(&this->vcache);
-			tell_waiter(this);
+			wake_up_all_sync(&this->waiters);
+			if (this->filp)
+				send_sigio(&this->filp->f_owner, this->fd, POLL_IN);
 			ret++;
 			if (ret >= num)
 				break;
@@ -216,6 +210,59 @@ static void futex_vcache_callback(vcache
 	spin_unlock(&futex_lock);
 }
 
+/*
+ * Requeue all waiters hashed on one physical page to another
+ * physical page.
+ */
+static int futex_requeue(unsigned long uaddr1, int offset1, unsigned long uaddr2, int offset2, int num)
+{
+	struct list_head *i, *next, *head1, *head2;
+	struct page *page1, *page2;
+	int ret = 0;
+
+	lock_futex_mm();
+
+	page1 = __pin_page(uaddr1 - offset1);
+	if (!page1) {
+		unlock_futex_mm();
+		return -EFAULT;
+	}
+	page2 = __pin_page(uaddr2 - offset2);
+	if (!page2) {
+		unlock_futex_mm();
+		return -EFAULT;
+	}
+
+	head1 = hash_futex(page1, offset1);
+	head2 = hash_futex(page2, offset2);
+
+	list_for_each_safe(i, next, head1) {
+		struct futex_q *this = list_entry(i, struct futex_q, list);
+
+		if (this->page == page1 && this->offset == offset1) {
+			list_del_init(i);
+			__detach_vcache(&this->vcache);
+			if (++ret <= num) {
+				wake_up_all_sync(&this->waiters);
+				if (this->filp)
+					send_sigio(&this->filp->f_owner, this->fd, POLL_IN);
+			} else {
+				list_add_tail(i, head2);
+				__attach_vcache(&this->vcache, uaddr2, current->mm, futex_vcache_callback);
+				this->offset = offset2;
+				this->page = page2;
+			}
+		}
+	}
+
+	unlock_futex_mm();
+
+	unpin_page(page1);
+	unpin_page(page2);
+
+	return ret;
+}
+
 static inline void __queue_me(struct futex_q *q, struct page *page,
 				unsigned long uaddr, int offset,
 				int fd, struct file *filp)
@@ -273,14 +320,17 @@ static int futex_wait(unsigned long uadd
 	}
 	__queue_me(&q, page, uaddr, offset, -1, NULL);
 
-	unlock_futex_mm();
-
-	/* Page is pinned, but may no longer be in this address space. */
+	/*
+	 * Page is pinned, but may no longer be in this address space.
+	 * It cannot schedule, so we access it with the spinlock held.
+	 */
 	if (get_user(curval, (int *)uaddr) != 0) {
+		unlock_futex_mm();
 		ret = -EFAULT;
 		goto out;
 	}
 	if (curval != val) {
+		unlock_futex_mm();
 		ret = -EWOULDBLOCK;
 		goto out;
 	}
@@ -293,8 +343,10 @@ static int futex_wait(unsigned long uadd
 	 */
 	add_wait_queue(&q.waiters, &wait);
 	set_current_state(TASK_INTERRUPTIBLE);
-	if (!list_empty(&q.list))
+	if (!list_empty(&q.list)) {
+		unlock_futex_mm();
 		time = schedule_timeout(time);
+	}
 	set_current_state(TASK_RUNNING);
 	/*
 	 * NOTE: we don't remove ourselves from the waitqueue because
@@ -420,9 +472,9 @@ out:
 	return ret;
 }
 
-long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout)
+long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2)
 {
-	unsigned long pos_in_page;
+	unsigned long pos_in_page, pos_in_page2;
 	int ret;
 
 	pos_in_page = uaddr % PAGE_SIZE;
@@ -438,6 +490,14 @@ long do_futex(unsigned long uaddr, int o
 	case FUTEX_WAKE:
 		ret = futex_wake(uaddr, pos_in_page, val);
 		break;
+	case FUTEX_REQUEUE:
+		pos_in_page2 = uaddr2 % PAGE_SIZE;
+
+		/* Must be "naturally" aligned */
+		if (pos_in_page2 % sizeof(u32))
+			return -EINVAL;
+		ret = futex_requeue(uaddr, pos_in_page, uaddr2, pos_in_page2, val);
+		break;
 	case FUTEX_FD:
 		/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
 		ret = futex_fd(uaddr, pos_in_page, val);
@@ -448,7 +508,7 @@ long do_futex(unsigned long uaddr, int o
 	return ret;
 }
 
-asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, struct timespec __user *utime)
+asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, struct timespec __user *utime, u32 __user *uaddr2)
 {
 	struct timespec t;
 	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -458,7 +518,7 @@ asmlinkage long sys_futex(u32 __user *ua
 			return -EFAULT;
 		timeout = timespec_to_jiffies(&t) + 1;
 	}
-	return do_futex((unsigned long)uaddr, op, val, timeout);
+	return do_futex((unsigned long)uaddr, op, val, timeout, (unsigned long)uaddr2);
 }
 
 static struct super_block *
--- linux/kernel/fork.c.orig	
+++ linux/kernel/fork.c	
@@ -457,7 +457,7 @@ void mm_release(struct task_struct *tsk,
 		 * not set up a proper pointer then tough luck.
 		 */
 		put_user(0, tidptr);
-		sys_futex(tidptr, FUTEX_WAKE, 1, NULL);
+		sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL);
 	}
 }
 
--- linux/kernel/compat.c.orig	
+++ linux/kernel/compat.c	
@@ -214,7 +214,7 @@ asmlinkage long compat_sys_sigprocmask(i
 extern long do_futex(unsigned long, int, int, unsigned long);
 
 asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val,
-		struct compat_timespec *utime)
+		struct compat_timespec *utime, u32 *uaddr2)
 {
 	struct timespec t;
 	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -224,7 +224,7 @@ asmlinkage long compat_sys_futex(u32 *ua
 			return -EFAULT;
 		timeout = timespec_to_jiffies(&t) + 1;
 	}
-	return do_futex((unsigned long)uaddr, op, val, timeout);
+	return do_futex((unsigned long)uaddr, op, val, timeout, (unsigned long)uaddr2);
 }
 
 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim);
--- linux/kernel/sched.c.orig	
+++ linux/kernel/sched.c	
@@ -475,6 +475,7 @@ void kick_if_running(task_t * p)
  * @p: the to-be-woken-up thread
  * @state: the mask of task states that can be woken
  * @sync: do a synchronous wakeup?
+ * @kick: kick the CPU if the task is already running?
  *
  * Put it on the run-queue if it's not already there. The "current"
  * thread is always on the run-queue (except when the actual
@@ -484,7 +485,7 @@ void kick_if_running(task_t * p)
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(task_t * p, unsigned int state, int sync)
+static int try_to_wake_up(task_t * p, unsigned int state, int sync, int kick)
 {
 	int success = 0, requeue_waker = 0;
 	unsigned long flags;
@@ -518,7 +519,9 @@ repeat_lock_task:
 					resched_task(rq->curr);
 			}
 			success = 1;
-		}
+		} else
+			if (unlikely(kick) && task_running(rq, p))
+				resched_task(rq->curr);
 		p->state = TASK_RUNNING;
 	}
 	task_rq_unlock(rq, &flags);
@@ -543,12 +546,17 @@ repeat_lock_task:
 
 int wake_up_process(task_t * p)
 {
-	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
+	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
+}
+
+int wake_up_process_kick(task_t * p)
+{
+	return try_to_wake_up(p, TASK_STOPPED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
 }
 
 int wake_up_state(task_t *p, unsigned int state)
 {
-	return try_to_wake_up(p, state, 0);
+	return try_to_wake_up(p, state, 0, 0);
 }
 
 /*
@@ -1389,7 +1397,7 @@ need_resched:
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync)
 {
 	task_t *p = curr->task;
-	return try_to_wake_up(p, mode, sync);
+	return try_to_wake_up(p, mode, sync, 0);
 }
 
 /*
@@ -1440,8 +1448,6 @@ void __wake_up_locked(wait_queue_head_t 
 	__wake_up_common(q, mode, 1, 0);
 }
 
-#ifdef CONFIG_SMP
-
 /**
  * __wake_up - sync- wake up threads blocked on a waitqueue.
  * @q: the waitqueue
@@ -1452,6 +1458,8 @@ void __wake_up_locked(wait_queue_head_t 
  * away soon, so while the target thread will be woken up, it will not
  * be migrated to another CPU - ie. the two threads are 'synchronized'
  * with each other. This can prevent needless bouncing between CPUs.
+ *
+ * On UP it can prevent extra preemption.
  */
 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 {
@@ -1468,8 +1476,6 @@ void __wake_up_sync(wait_queue_head_t *q
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 
-#endif
-
 void complete(struct completion *x)
 {
 	unsigned long flags;
--- linux/kernel/signal.c.orig	
+++ linux/kernel/signal.c	
@@ -521,18 +521,6 @@ inline void signal_wake_up(struct task_s
 	set_tsk_thread_flag(t,TIF_SIGPENDING);
 
 	/*
-	 * If the task is running on a different CPU
-	 * force a reschedule on the other CPU to make
-	 * it notice the new signal quickly.
-	 *
-	 * The code below is a tad loose and might occasionally
-	 * kick the wrong CPU if we catch the process in the
-	 * process of changing - but no harm is done by that
-	 * other than doing an extra (lightweight) IPI interrupt.
-	 */
-	if (t->state == TASK_RUNNING)
-		kick_if_running(t);
-	/*
 	 * If resume is set, we want to wake it up in the TASK_STOPPED case.
 	 * We don't check for TASK_STOPPED because there is a race with it
 	 * executing another processor and just now entering stopped state.
@@ -543,7 +531,7 @@ inline void signal_wake_up(struct task_s
 	if (resume)
 		mask |= TASK_STOPPED;
 	if (t->state & mask) {
-		wake_up_process(t);
+		wake_up_process_kick(t);
 		return;
 	}
 }




[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]