[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

RE: Poor thread performance on Linux vs. Solaris



You could try this patch. It is against linux-2.6.0-test4. :)


boris

--- linux-2.6.0-test4.orig/kernel/futex.c	2003-08-23
07:53:39.000000000 +0800
+++ linux-2.6.0-test4/kernel/futex.c	2003-09-08 11:12:16.000000000
+0800
@@ -57,9 +57,16 @@
 	struct file *filp;
 };
 
+/* 
+ * Split the global futex_lock into every hash list lock.
+ */ 
+struct futex_hash_bucket {
+	struct list_head	chain;
+	spinlock_t		lock;
+};
+
 /* The key for the hash is the address + index + offset within page */
-static struct list_head futex_queues[1<<FUTEX_HASHBITS];
-static spinlock_t futex_lock = SPIN_LOCK_UNLOCKED;
+static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
@@ -74,12 +81,10 @@
 {
 	spin_lock(&current->mm->page_table_lock);
 	spin_lock(&vcache_lock);
-	spin_lock(&futex_lock);
 }
 
 static inline void unlock_futex_mm(void)
 {
-	spin_unlock(&futex_lock);
 	spin_unlock(&vcache_lock);
 	spin_unlock(&current->mm->page_table_lock);
 }
@@ -87,7 +92,7 @@
 /*
  * The physical page is shared, so we can hash on its address:
  */
-static inline struct list_head *hash_futex(struct page *page, int
offset)
+static inline struct futex_hash_bucket *hash_futex(struct page *page,
int offset)
 {
 	return &futex_queues[hash_long((unsigned long)page + offset,
 
FUTEX_HASHBITS)];
@@ -153,6 +158,7 @@
 static inline int futex_wake(unsigned long uaddr, int offset, int num)
 {
 	struct list_head *i, *next, *head;
+	struct futex_hash_bucket *bh;
 	struct page *page;
 	int ret = 0;
 
@@ -164,7 +170,9 @@
 		return -EFAULT;
 	}
 
-	head = hash_futex(page, offset);
+	bh = hash_futex(page, offset);
+	spin_lock(&bh->lock);
+	head = &bh->chain;
 
 	list_for_each_safe(i, next, head) {
 		struct futex_q *this = list_entry(i, struct futex_q,
list);
@@ -180,7 +188,7 @@
 				break;
 		}
 	}
-
+	spin_unlock(&bh->lock);
 	unlock_futex_mm();
 	put_page(page);
 
@@ -196,19 +204,19 @@
 static void futex_vcache_callback(vcache_t *vcache, struct page
*new_page)
 {
 	struct futex_q *q = container_of(vcache, struct futex_q,
vcache);
-	struct list_head *head = hash_futex(new_page, q->offset);
+	struct futex_hash_bucket *head = hash_futex(new_page,
q->offset);
 
-	spin_lock(&futex_lock);
+	spin_lock(&head->lock);
 
 	if (!list_empty(&q->list)) {
 		put_page(q->page);
 		q->page = new_page;
 		__pin_page_atomic(new_page);
 		list_del(&q->list);
-		list_add_tail(&q->list, head);
+		list_add_tail(&q->list, &head->chain);
 	}
 
-	spin_unlock(&futex_lock);
+	spin_unlock(&head->lock);
 }
 
 /*
@@ -219,6 +227,7 @@
 	unsigned long uaddr2, int offset2, int nr_wake, int nr_requeue)
 {
 	struct list_head *i, *next, *head1, *head2;
+	struct futex_hash_bucket *bh1, *bh2;
 	struct page *page1 = NULL, *page2 = NULL;
 	int ret = 0;
 
@@ -231,9 +240,13 @@
 	if (!page2)
 		goto out;
 
-	head1 = hash_futex(page1, offset1);
-	head2 = hash_futex(page2, offset2);
-
+	bh1 = hash_futex(page1, offset1);
+	bh2 = hash_futex(page2, offset2);
+	spin_lock(&bh1->lock);
+	spin_lock(&bh2->lock);
+	head1 = &bh1->chain;
+	head2 = &bh2->chain;
+       
 	list_for_each_safe(i, next, head1) {
 		struct futex_q *this = list_entry(i, struct futex_q,
list);
 
@@ -260,6 +273,8 @@
 	}
 
 out:
+	spin_unlock(&bh2->lock);
+	spin_unlock(&bh1->lock);
 	unlock_futex_mm();
 
 	if (page1)
@@ -274,7 +289,7 @@
 				unsigned long uaddr, int offset,
 				int fd, struct file *filp)
 {
-	struct list_head *head = hash_futex(page, offset);
+	struct list_head *head = &hash_futex(page, offset)->chain;
 
 	q->offset = offset;
 	q->fd = fd;
@@ -293,15 +308,16 @@
 static inline int unqueue_me(struct futex_q *q)
 {
 	int ret = 0;
+	struct futex_hash_bucket *bh = hash_futex(q->page, q->offset);
 
 	spin_lock(&vcache_lock);
-	spin_lock(&futex_lock);
+	spin_lock(&bh->lock);
 	if (!list_empty(&q->list)) {
 		list_del(&q->list);
 		__detach_vcache(&q->vcache);
 		ret = 1;
 	}
-	spin_unlock(&futex_lock);
+	spin_unlock(&bh->lock);
 	spin_unlock(&vcache_lock);
 	return ret;
 }
@@ -315,6 +331,7 @@
 	int ret = 0, curval;
 	struct page *page;
 	struct futex_q q;
+	struct futex_hash_bucket *bh;
 
 	init_waitqueue_head(&q.waiters);
 
@@ -325,6 +342,10 @@
 		unlock_futex_mm();
 		return -EFAULT;
 	}
+	
+	bh = hash_futex(page, offset);
+	spin_lock(&bh->lock);
+
 	__queue_me(&q, page, uaddr, offset, -1, NULL);
 
 	/*
@@ -332,11 +353,13 @@
 	 * It cannot schedule, so we access it with the spinlock held.
 	 */
 	if (get_user(curval, (int *)uaddr) != 0) {
+		spin_unlock(&bh->lock);
 		unlock_futex_mm();
 		ret = -EFAULT;
 		goto out;
 	}
 	if (curval != val) {
+		spin_unlock(&bh->lock);
 		unlock_futex_mm();
 		ret = -EWOULDBLOCK;
 		goto out;
@@ -351,6 +374,7 @@
 	add_wait_queue(&q.waiters, &wait);
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (!list_empty(&q.list)) {
+		spin_unlock(&bh->lock);
 		unlock_futex_mm();
 		time = schedule_timeout(time);
 	}
@@ -389,13 +413,14 @@
 			       struct poll_table_struct *wait)
 {
 	struct futex_q *q = filp->private_data;
+	struct futex_hash_bucket *bh = hash_futex(q->page, q->offset);
 	int ret = 0;
 
 	poll_wait(filp, &q->waiters, wait);
-	spin_lock(&futex_lock);
+	spin_lock(&bh->lock);
 	if (list_empty(&q->list))
 		ret = POLLIN | POLLRDNORM;
-	spin_unlock(&futex_lock);
+	spin_unlock(&bh->lock);
 
 	return ret;
 }
@@ -411,6 +436,7 @@
 {
 	struct page *page = NULL;
 	struct futex_q *q;
+	struct futex_hash_bucket *bh;
 	struct file *filp;
 	int ret;
 
@@ -466,8 +492,12 @@
 	init_waitqueue_head(&q->waiters);
 	filp->private_data = q;
 
-	__queue_me(q, page, uaddr, offset, ret, filp);
+	bh = hash_futex(page, offset);
+	spin_lock(&bh->lock);
 
+	__queue_me(q, page, uaddr, offset, ret, filp);
+	
+	spin_unlock(&bh->lock);
 	unlock_futex_mm();
 
 	/* Now we map fd to filp, so userspace can access it */
@@ -563,8 +593,10 @@
 	register_filesystem(&futex_fs_type);
 	futex_mnt = kern_mount(&futex_fs_type);
 
-	for (i = 0; i < ARRAY_SIZE(futex_queues); i++)
-		INIT_LIST_HEAD(&futex_queues[i]);
+	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
+		futex_queues[i].lock = SPIN_LOCK_UNLOCKED;
+		INIT_LIST_HEAD(&futex_queues[i].chain);
+	}
 	return 0;
 }
 __initcall(init);


> 
> On Thursday 04 September 2003 8:36 pm, Perez-Gonzalez, Inaky wrote:
> 
> > > From: Bill Soudan [mailto:bsoudan brass com]
> > >
> > > 4) try to eliminate the global locks in the kernel/futex.c file.
I'm
> > > afraid it might not be that easy though - the weird freeze make me
> > > wonder if the scheduler may play a role here as well.
> >
> > A coworker of mine played with the notion of removing global lock
> > for the futexes, replacing it with a per-hash-queue lock. He did it
> > on our rtfutex patch, and I don't know how easy would it be for
> > the mainstream futexes to adapt, but it might be worth a try; it
> > will not resolve fully the problem, but it might help a lot.
> 
> Any chance I could get my hands on this code?  From the list archives,
> I
> found this link:
> 
> http://developer.osdl.org/dev/robustmutexes/
> 
> but the rtfutex code I found still has the global lock.  Am I looking
> in
> the wrong place, or is this unreleased code?
> 
> Bill
> 
> 
> STATEMENT OF CONFIDENTIALITY
> 
> The information contained in this electronic message and any
attachments
> to this message are intended for the exclusive use of the addressee(s)
> and may contain confidential or privileged information. If you are not
> the intended recipient, please notify SunGard Trading Systems
> immediately
> at (201) 499-5900 and destroy all copies of this message and any
> attachments.
> 
> 
> --
> Phil-list mailing list
> Phil-list redhat com
> https://www.redhat.com/mailman/listinfo/phil-list

Attachment: futex_q_lock.diff
Description: futex_q_lock.diff


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]