[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[PATCH] Cache aligned locks for PPC64



In powerpc the memory reservations used for atomic updates are usually associate with the (L1) cache line. For all current 64-bit implementations this is 128-bytes. Since pthread_* locks are normally smaller (32-56 bytes) than the cache line we can see false sharing between adjacent locks in static and arrays. In extreme cases (100 threads contending for 10 mutexes) we can measure degradations of 40-70%.

One way to address this to apply the __attribute__ ((__aligned__ (__CACHE_ALIGN_SIZE))) to each lock type in sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h. This does not change the sizeof or array stride, but does force 128-byte alignment in static and within containing structs. This insures that adjacent pthread_locks are in different cache lines.

Powerpc32 is more complicated. Older 32-bit processors have 32-byte cache lines but 32-bit applications will run on 64-bit processors with 128-byte cache line. The compromise is to set __CACHE_ALIGN_SIZE 32-bytes for powerpc32. This correct for 32-bit processors and may reduce some false sharing on 64-bit. The alterative would define __CACHE_ALIGN_SIZE to empty (not attribute) for powerpc32.

2003-05-12 Steven Munroe <sjmunroe us ibm com>

	* sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
	(__CACHE_ALIGN_SIZE): Define.
	(pthread_mutex_t): Add __aligned__ attribute.
	(pthread_cond_t): Add __aligned__ attribute.
	(pthread_rwlock_t): Add __aligned__ attribute.
	(pthread_barrier_t): Add __aligned__ attribute.
diff -urN nptl-0.38/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h libc23/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h
--- nptl-0.38/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h	2003-05-03 00:02:23.000000000 -0500
+++ libc23/nptl/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h	2003-05-12 15:13:39.000000000 -0500
@@ -33,6 +33,7 @@
 # define __SIZEOF_PTHREAD_RWLOCKATTR_T 8
 # define __SIZEOF_PTHREAD_BARRIER_T 32
 # define __SIZEOF_PTHREAD_BARRIERATTR_T 4
+# define __CACHE_ALIGN_SIZE 128
 #else
 # define __SIZEOF_PTHREAD_ATTR_T 36
 # define __SIZEOF_PTHREAD_MUTEX_T 24
@@ -43,6 +44,7 @@
 # define __SIZEOF_PTHREAD_RWLOCKATTR_T 8
 # define __SIZEOF_PTHREAD_BARRIER_T 20
 # define __SIZEOF_PTHREAD_BARRIERATTR_T 4
+# define __CACHE_ALIGN_SIZE 32
 #endif
 
 
@@ -73,7 +75,7 @@
   } __data;
   char __size[__SIZEOF_PTHREAD_MUTEX_T];
   long int __align;
-} pthread_mutex_t;
+} pthread_mutex_t __attribute__((__aligned__(__CACHE_ALIGN_SIZE)));
 
 typedef union
 {
@@ -96,7 +98,7 @@
   } __data;
   char __size[__SIZEOF_PTHREAD_COND_T];
   long long int __align;
-} pthread_cond_t;
+} pthread_cond_t __attribute__((__aligned__(__CACHE_ALIGN_SIZE)));
 
 typedef union
 {
@@ -151,7 +153,7 @@
 # endif
   char __size[__SIZEOF_PTHREAD_RWLOCK_T];
   long int __align;
-} pthread_rwlock_t;
+} pthread_rwlock_t __attribute__((__aligned__(__CACHE_ALIGN_SIZE)));
 
 typedef union
 {
@@ -172,7 +174,7 @@
 {
   char __size[__SIZEOF_PTHREAD_BARRIER_T];
   long int __align;
-} pthread_barrier_t;
+} pthread_barrier_t __attribute__((__aligned__(__CACHE_ALIGN_SIZE)));
 
 typedef union
 {

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]