All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv3] Implement gcc 64bit atomic operations and fix the broken 8/16 bit ones
@ 2014-09-12 16:02 Guy Martin
  2014-09-15 21:17 ` Helge Deller
  0 siblings, 1 reply; 3+ messages in thread
From: Guy Martin @ 2014-09-12 16:02 UTC (permalink / raw)
  To: linux-parisc

[-- Attachment #1: Type: text/plain, Size: 412 bytes --]


Hi,

Here is the latest set of patch for the 8/16/64 bit atomic gcc builtins.

On the kernel side a new LWS is implemented allowing for variable size 
CAS.
On gcc's side, the new LWS is used for 8/16/64 bit atomic builtins, 
keeping the old LWS for 32bit ops.
Since 8 and 16 bit ops were broken, it's best to use the new LWS 
unconditionally for these.

Hopefully, these are the final patches !

Regards,
   Guy

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: gcc-64bit-atomic-cas2_v3.patch --]
[-- Type: text/x-diff; name=gcc-64bit-atomic-cas2_v3.patch, Size: 13218 bytes --]


2014-09-12  Guy Martin  <gmsoft@tuxicoman.be>

	* config/pa/linux-atomic.c: Use new LWS CAS implementation for atomic
	builtins. Implement 64bit atomic builtins.


Index: libgcc/config/pa/linux-atomic.c
===================================================================
--- libgcc/config/pa/linux-atomic.c	(revision 212942)
+++ libgcc/config/pa/linux-atomic.c	(working copy)
@@ -75,6 +75,30 @@
   return lws_errno;
 }
 
+static inline long
+__kernel_cmpxchg2 (void * oldval, void * newval, void *mem, int val_size)
+{
+  register unsigned long lws_mem asm("r26") = (unsigned long) (mem);
+  register long lws_ret   asm("r28");
+  register long lws_errno asm("r21");
+  register unsigned long lws_old asm("r25") = (unsigned long) oldval;
+  register unsigned long lws_new asm("r24") = (unsigned long) newval;
+  register int lws_size asm("r23") = val_size;
+  asm volatile (	"ble	0xb0(%%sr2, %%r0)	\n\t"
+			"ldi	%2, %%r20		\n\t"
+	: "=r" (lws_ret), "=r" (lws_errno)
+	: "i" (2), "r" (lws_mem), "r" (lws_old), "r" (lws_new), "r" (lws_size)
+	: "r1", "r20", "r22", "r29", "r31", "fr4", "memory"
+  );
+  if (__builtin_expect (lws_errno == -EFAULT || lws_errno == -ENOSYS, 0))
+    ABORT_INSTRUCTION;
+
+  /* If the kernel LWS call fails, retrun EBUSY */
+  if (!lws_errno && lws_ret)
+    lws_errno = -EBUSY;
+
+  return lws_errno;
+}
 #define HIDDEN __attribute__ ((visibility ("hidden")))
 
 /* Big endian masks  */
@@ -84,69 +108,101 @@
 #define MASK_1 0xffu
 #define MASK_2 0xffffu
 
-#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
-  int HIDDEN								\
-  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+#define FETCH_AND_OP_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX)		\
+  TYPE HIDDEN								\
+  __sync_fetch_and_##OP##_##WIDTH (TYPE *ptr, TYPE val)			\
   {									\
-    int failure, tmp;							\
+    TYPE tmp, newval;							\
+    int failure;							\
 									\
     do {								\
       tmp = *ptr;							\
-      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+      newval = PFX_OP (tmp INF_OP val);					\
+      failure = __kernel_cmpxchg2 (&tmp, &newval, ptr, INDEX);		\
     } while (failure != 0);						\
 									\
     return tmp;								\
   }
 
-FETCH_AND_OP_WORD (add,   , +)
-FETCH_AND_OP_WORD (sub,   , -)
-FETCH_AND_OP_WORD (or,    , |)
-FETCH_AND_OP_WORD (and,   , &)
-FETCH_AND_OP_WORD (xor,   , ^)
-FETCH_AND_OP_WORD (nand, ~, &)
+FETCH_AND_OP_2 (add,   , +, long long, 8, 3)
+FETCH_AND_OP_2 (sub,   , -, long long, 8, 3)
+FETCH_AND_OP_2 (or,    , |, long long, 8, 3)
+FETCH_AND_OP_2 (and,   , &, long long, 8, 3)
+FETCH_AND_OP_2 (xor,   , ^, long long, 8, 3)
+FETCH_AND_OP_2 (nand, ~, &, long long, 8, 3)
 
-#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
-#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+FETCH_AND_OP_2 (add,   , +, unsigned short, 2, 1)
+FETCH_AND_OP_2 (sub,   , -, unsigned short, 2, 1)
+FETCH_AND_OP_2 (or,    , |, unsigned short, 2, 1)
+FETCH_AND_OP_2 (and,   , &, unsigned short, 2, 1)
+FETCH_AND_OP_2 (xor,   , ^, unsigned short, 2, 1)
+FETCH_AND_OP_2 (nand, ~, &, unsigned short, 2, 1)
 
-/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
-   subword-sized quantities.  */
+FETCH_AND_OP_2 (add,   , +, unsigned char, 1, 0)
+FETCH_AND_OP_2 (sub,   , -, unsigned char, 1, 0)
+FETCH_AND_OP_2 (or,    , |, unsigned char, 1, 0)
+FETCH_AND_OP_2 (and,   , &, unsigned char, 1, 0)
+FETCH_AND_OP_2 (xor,   , ^, unsigned char, 1, 0)
+FETCH_AND_OP_2 (nand, ~, &, unsigned char, 1, 0)
 
-#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
+#define OP_AND_FETCH_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX)		\
   TYPE HIDDEN								\
-  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
+  __sync_##OP##_and_fetch_##WIDTH (TYPE *ptr, TYPE val)			\
   {									\
-    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
-    unsigned int mask, shift, oldval, newval;				\
+    TYPE tmp, newval;							\
     int failure;							\
 									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
+    do {								\
+      tmp = *ptr;							\
+      newval = PFX_OP (tmp INF_OP val);					\
+      failure = __kernel_cmpxchg2 (&tmp, &newval, ptr, INDEX);		\
+    } while (failure != 0);						\
 									\
+    return PFX_OP (tmp INF_OP val);					\
+  }
+
+OP_AND_FETCH_2 (add,   , +, long long, 8, 3)
+OP_AND_FETCH_2 (sub,   , -, long long, 8, 3)
+OP_AND_FETCH_2 (or,    , |, long long, 8, 3)
+OP_AND_FETCH_2 (and,   , &, long long, 8, 3)
+OP_AND_FETCH_2 (xor,   , ^, long long, 8, 3)
+OP_AND_FETCH_2 (nand, ~, &, long long, 8, 3)
+
+OP_AND_FETCH_2 (add,   , +, unsigned short, 2, 1)
+OP_AND_FETCH_2 (sub,   , -, unsigned short, 2, 1)
+OP_AND_FETCH_2 (or,    , |, unsigned short, 2, 1)
+OP_AND_FETCH_2 (and,   , &, unsigned short, 2, 1)
+OP_AND_FETCH_2 (xor,   , ^, unsigned short, 2, 1)
+OP_AND_FETCH_2 (nand, ~, &, unsigned short, 2, 1)
+
+OP_AND_FETCH_2 (add,   , +, unsigned char, 1, 0)
+OP_AND_FETCH_2 (sub,   , -, unsigned char, 1, 0)
+OP_AND_FETCH_2 (or,    , |, unsigned char, 1, 0)
+OP_AND_FETCH_2 (and,   , &, unsigned char, 1, 0)
+OP_AND_FETCH_2 (xor,   , ^, unsigned char, 1, 0)
+OP_AND_FETCH_2 (nand, ~, &, unsigned char, 1, 0)
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+  {									\
+    int failure, tmp;							\
+									\
     do {								\
-      oldval = *wordptr;						\
-      newval = ((PFX_OP (((oldval & mask) >> shift)			\
-                         INF_OP (unsigned int) val)) << shift) & mask;	\
-      newval |= oldval & ~mask;						\
-      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+      tmp = *ptr;							\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
     } while (failure != 0);						\
 									\
-    return (RETURN & mask) >> shift;					\
+    return tmp;								\
   }
 
-SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,    , |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
 
-SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
-
 #define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
   int HIDDEN								\
   __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
@@ -168,20 +224,42 @@
 OP_AND_FETCH_WORD (xor,   , ^)
 OP_AND_FETCH_WORD (nand, ~, &)
 
-SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+typedef unsigned char bool;
 
-SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+#define COMPARE_AND_SWAP_2(TYPE, WIDTH, INDEX)					\
+  TYPE HIDDEN									\
+  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, TYPE newval)	\
+  {										\
+    TYPE actual_oldval;								\
+    int fail;									\
+										\
+    while (1)									\
+      {										\
+	actual_oldval = *ptr;							\
+										\
+	if (__builtin_expect (oldval != actual_oldval, 0))			\
+	  return actual_oldval;							\
+										\
+	fail = __kernel_cmpxchg2 (&actual_oldval, &newval, ptr, INDEX);		\
+										\
+	if (__builtin_expect (!fail, 1))					\
+	  return actual_oldval;							\
+    }										\
+  }										\
+										\
+  bool HIDDEN									\
+  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,	TYPE newval)	\
+  {										\
+    int failure = __kernel_cmpxchg2 (&oldval, &newval, ptr, INDEX);		\
+    return (failure != 0);							\
+  }
 
+
+
+COMPARE_AND_SWAP_2(long long,      8, 3)
+COMPARE_AND_SWAP_2(unsigned short, 2, 1)
+COMPARE_AND_SWAP_2(unsigned char,  1, 0)
+
 int HIDDEN
 __sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
 {
@@ -201,41 +279,6 @@
     }
 }
 
-#define SUBWORD_VAL_CAS(TYPE, WIDTH)					\
-  TYPE HIDDEN								\
-  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
-				       TYPE newval)			\
-  {									\
-    int *wordptr = (int *)((unsigned long) ptr & ~3), fail;		\
-    unsigned int mask, shift, actual_oldval, actual_newval;		\
-									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
-									\
-    while (1)								\
-      {									\
-	actual_oldval = *wordptr;					\
-									\
-	if (__builtin_expect (((actual_oldval & mask) >> shift)		\
-			      != (unsigned int) oldval, 0))		\
-	  return (actual_oldval & mask) >> shift;			\
-									\
-	actual_newval = (actual_oldval & ~mask)				\
-			| (((unsigned int) newval << shift) & mask);	\
-									\
-	fail = __kernel_cmpxchg (actual_oldval, actual_newval,		\
-				 wordptr);				\
-									\
-	if (__builtin_expect (!fail, 1))				\
-	  return (actual_oldval & mask) >> shift;			\
-      }									\
-  }
-
-SUBWORD_VAL_CAS (unsigned short, 2)
-SUBWORD_VAL_CAS (unsigned char,  1)
-
-typedef unsigned char bool;
-
 bool HIDDEN
 __sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
 {
@@ -243,18 +286,25 @@
   return (failure == 0);
 }
 
-#define SUBWORD_BOOL_CAS(TYPE, WIDTH)					\
-  bool HIDDEN								\
-  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
-					TYPE newval)			\
+
+#define SYNC_LOCK_TEST_AND_SET_2(TYPE, WIDTH, INDEX)			\
+TYPE HIDDEN								\
+  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
   {									\
-    TYPE actual_oldval							\
-      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);	\
-    return (oldval == actual_oldval);					\
+    TYPE oldval;							\
+    int failure;							\
+									\
+    do {								\
+      oldval = *ptr;							\
+      failure = __kernel_cmpxchg2 (&oldval, &val, ptr, INDEX);		\
+    } while (failure != 0);						\
+									\
+    return oldval;							\
   }
 
-SUBWORD_BOOL_CAS (unsigned short, 2)
-SUBWORD_BOOL_CAS (unsigned char,  1)
+SYNC_LOCK_TEST_AND_SET_2(long long,      8, 3)
+SYNC_LOCK_TEST_AND_SET_2(unsigned short, 2, 1)
+SYNC_LOCK_TEST_AND_SET_2(unsigned char,  1, 0)
 
 int HIDDEN
 __sync_lock_test_and_set_4 (int *ptr, int val)
@@ -269,29 +319,22 @@
   return oldval;
 }
 
-#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)				\
-  TYPE HIDDEN								\
-  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
-  {									\
-    int failure;							\
-    unsigned int oldval, newval, shift, mask;				\
-    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
-									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
-									\
-    do {								\
-      oldval = *wordptr;						\
-      newval = (oldval & ~mask)						\
-	       | (((unsigned int) val << shift) & mask);		\
-      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
-    } while (failure != 0);						\
-									\
-    return (oldval & mask) >> shift;					\
+
+#define SYNC_LOCK_RELEASE_2(TYPE, WIDTH, INDEX)			\
+  void HIDDEN							\
+  __sync_lock_release_##WIDTH (TYPE *ptr)			\
+  {								\
+    TYPE failure, oldval, zero = 0;				\
+								\
+    do {							\
+      oldval = *ptr;						\
+      failure = __kernel_cmpxchg2 (&oldval, &zero, ptr, INDEX);	\
+    } while (failure != 0);					\
   }
 
-SUBWORD_TEST_AND_SET (unsigned short, 2)
-SUBWORD_TEST_AND_SET (unsigned char,  1)
+SYNC_LOCK_RELEASE_2 (long long, 8, 3)
+SYNC_LOCK_RELEASE_2 (short,     2, 1)
+SYNC_LOCK_RELEASE_2 (char,      1, 0)
 
 void HIDDEN
 __sync_lock_release_4 (int *ptr)
@@ -304,23 +347,3 @@
   } while (failure != 0);
 }
 
-#define SYNC_LOCK_RELEASE(TYPE, WIDTH)					\
-  void HIDDEN								\
-  __sync_lock_release_##WIDTH (TYPE *ptr)				\
-  {									\
-    int failure;							\
-    unsigned int oldval, newval, shift, mask;				\
-    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
-									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
-									\
-    do {								\
-      oldval = *wordptr;						\
-      newval = oldval & ~mask;						\
-      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
-    } while (failure != 0);						\
-  }
-
-SYNC_LOCK_RELEASE (short, 2)
-SYNC_LOCK_RELEASE (char,  1)

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: linux-hppa-atomic-cas2_v3.patch --]
[-- Type: text/x-diff; name=linux-hppa-atomic-cas2_v3.patch, Size: 7688 bytes --]

parisc: Implement new LWS CAS supporting 64 bit operations.

The current LWS cas only works correctly for 32bit. The new LWS allows for
CAS operations of variable size.

Signed-off-by: Guy Martin <gmsoft@tuxicoman.be>

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 8387860..7ef22e3 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -74,7 +74,7 @@ ENTRY(linux_gateway_page)
 	/* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */
 	/* Light-weight-syscall entry must always be located at 0xb0 */
 	/* WARNING: Keep this number updated with table size changes */
-#define __NR_lws_entries (2)
+#define __NR_lws_entries (3)
 
 lws_entry:
 	gate	lws_start, %r0		/* increase privilege */
@@ -502,7 +502,7 @@ lws_exit:
 
 	
 	/***************************************************
-		Implementing CAS as an atomic operation:
+		Implementing 32bit CAS as an atomic operation:
 
 		%r26 - Address to examine
 		%r25 - Old value to check (old)
@@ -659,6 +659,230 @@ cas_action:
 	ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 3b-linux_gateway_page)
 
 
+	/***************************************************
+		New CAS implementation which uses pointers and variable size
+		information. The value pointed by old and new MUST NOT change
+		while performing CAS. The lock only protect the value at %r26.
+
+		%r26 - Address to examine
+		%r25 - Pointer to the value to check (old)
+		%r24 - Pointer to the value to set (new)
+		%r23 - Size of the variable (0/1/2/3 for 8/16/32/64 bit)
+		%r28 - Return non-zero on failure
+		%r21 - Kernel error code
+
+		%r21 has the following meanings:
+
+		EAGAIN - CAS is busy, ldcw failed, try again.
+		EFAULT - Read or write failed.
+
+		Scratch: r20, r22, r28, r29, r1, fr4 (32bit for 64bit CAS only)
+
+	****************************************************/
+
+	/* ELF32 Process entry path */
+lws_compare_and_swap_2:
+#ifdef CONFIG_64BIT
+	/* Clip the input registers */
+	depdi	0, 31, 32, %r26
+	depdi	0, 31, 32, %r25
+	depdi	0, 31, 32, %r24
+	depdi	0, 31, 32, %r23
+#endif
+
+	/* Check the validity of the size pointer */
+	subi,>>= 4, %r23, %r0
+	b,n	lws_exit_nosys
+
+	/* Jump to the functions which will load the old and new values into
+	   registers depending on the their size */
+	shlw	%r23, 2, %r29
+	blr	%r29, %r0
+	nop
+
+	/* 8bit load */
+4:	ldb	0(%sr3,%r25), %r25
+	b	cas2_lock_start
+5:	ldb	0(%sr3,%r24), %r24
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/* 16bit load */
+6:	ldh	0(%sr3,%r25), %r25
+	b	cas2_lock_start
+7:	ldh	0(%sr3,%r24), %r24
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/* 32bit load */
+8:	ldw	0(%sr3,%r25), %r25
+	b	cas2_lock_start
+9:	ldw	0(%sr3,%r24), %r24
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/* 64bit load */
+#ifdef CONFIG_64BIT
+10:	ldd	0(%sr3,%r25), %r25
+11:	ldd	0(%sr3,%r24), %r24
+#else
+	/* Load new value into r22/r23 - high/low */
+10:	ldw	0(%sr3,%r25), %r22
+11:	ldw	4(%sr3,%r25), %r23
+	/* Load new value into fr4 for atomic store later */
+12:	flddx	0(%sr3,%r24), %fr4
+#endif
+
+cas2_lock_start:
+	/* Load start of lock table */
+	ldil	L%lws_lock_start, %r20
+	ldo	R%lws_lock_start(%r20), %r28
+
+	/* Extract four bits from r26 and hash lock (Bits 4-7) */
+	extru  %r26, 27, 4, %r20
+
+	/* Find lock to use, the hash is either one of 0 to
+	   15, multiplied by 16 (keep it 16-byte aligned)
+	   and add to the lock table offset. */
+	shlw	%r20, 4, %r20
+	add	%r20, %r28, %r20
+
+	rsm	PSW_SM_I, %r0			/* Disable interrupts */
+	/* COW breaks can cause contention on UP systems */
+	LDCW	0(%sr2,%r20), %r28		/* Try to acquire the lock */
+	cmpb,<>,n	%r0, %r28, cas2_action	/* Did we get it? */
+cas2_wouldblock:
+	ldo	2(%r0), %r28			/* 2nd case */
+	ssm	PSW_SM_I, %r0
+	b	lws_exit			/* Contended... */
+	ldo	-EAGAIN(%r0), %r21		/* Spin in userspace */
+
+	/*
+		prev = *addr;
+		if ( prev == old )
+		  *addr = new;
+		return prev;
+	*/
+
+	/* NOTES:
+		This all works becuse intr_do_signal
+		and schedule both check the return iasq
+		and see that we are on the kernel page
+		so this process is never scheduled off
+		or is ever sent any signal of any sort,
+		thus it is wholly atomic from usrspaces
+		perspective
+	*/
+cas2_action:
+	/* Jump to the correct function */
+	blr	%r29, %r0
+	/* Set %r28 as non-zero for now */
+	ldo	1(%r0),%r28
+
+	/* 8bit CAS */
+13:	ldb,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+14:	stb,ma	%r24, 0(%sr3,%r26)
+	b	cas2_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 16bit CAS */
+15:	ldh,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+16:	sth,ma	%r24, 0(%sr3,%r26)
+	b	cas2_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 32bit CAS */
+17:	ldw,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+18:	stw,ma	%r24, 0(%sr3,%r26)
+	b	cas2_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 64bit CAS */
+#ifdef CONFIG_64BIT
+19:	ldd,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+20:	std,ma	%r24, 0(%sr3,%r26)
+	copy	%r0, %r28
+#else
+	/* Compare first word */
+19:	ldw,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r22, %r0
+	b,n	cas2_end
+	/* Compare second word */
+20:	ldw,ma	4(%sr3,%r26), %r29
+	sub,=	%r29, %r23, %r0
+	b,n	cas2_end
+	/* Perform the store */
+21:	fstdx	%fr4, 0(%sr3,%r26)
+	copy	%r0, %r28
+#endif
+
+cas2_end:
+	/* Free lock */
+	stw,ma	%r20, 0(%sr2,%r20)
+	/* Enable interrupts */
+	ssm	PSW_SM_I, %r0
+	/* Return to userspace, set no error */
+	b	lws_exit
+	copy	%r0, %r21
+
+22:
+	/* Error occurred on load or store */
+	/* Free lock */
+	stw	%r20, 0(%sr2,%r20)
+	ssm	PSW_SM_I, %r0
+	ldo	1(%r0),%r28
+	b	lws_exit
+	ldo	-EFAULT(%r0),%r21	/* set errno */
+	nop
+	nop
+	nop
+
+	/* Exception table entries, for the load and store, return EFAULT.
+	   Each of the entries must be relocated. */
+	ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(13b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(17b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(18b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(19b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(20b-linux_gateway_page, 22b-linux_gateway_page)
+#ifndef CONFIG_64BIT
+	ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(21b-linux_gateway_page, 22b-linux_gateway_page)
+#endif
+
 	/* Make sure nothing else is placed on this page */
 	.align PAGE_SIZE
 END(linux_gateway_page)
@@ -675,8 +899,9 @@ ENTRY(end_linux_gateway_page)
 	/* Light-weight-syscall table */
 	/* Start of lws table. */
 ENTRY(lws_table)
-	LWS_ENTRY(compare_and_swap32)	/* 0 - ELF32 Atomic compare and swap */
-	LWS_ENTRY(compare_and_swap64)	/* 1 - ELF64 Atomic compare and swap */
+	LWS_ENTRY(compare_and_swap32)		/* 0 - ELF32 Atomic 32bit CAS */
+	LWS_ENTRY(compare_and_swap64)		/* 1 - ELF64 Atomic 32bit CAS */
+	LWS_ENTRY(compare_and_swap_2)		/* 2 - ELF32 Atomic 64bit CAS */
 END(lws_table)
 	/* End of lws table */
 

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCHv3] Implement gcc 64bit atomic operations and fix the broken 8/16 bit ones
  2014-09-12 16:02 [PATCHv3] Implement gcc 64bit atomic operations and fix the broken 8/16 bit ones Guy Martin
@ 2014-09-15 21:17 ` Helge Deller
  2014-09-16  1:37   ` John David Anglin
  0 siblings, 1 reply; 3+ messages in thread
From: Helge Deller @ 2014-09-15 21:17 UTC (permalink / raw)
  To: Guy Martin, linux-parisc

Hi Guy,

On 09/12/2014 06:02 PM, Guy Martin wrote:
> Here is the latest set of patch for the 8/16/64 bit atomic gcc builtins.
>
> On the kernel side a new LWS is implemented allowing for variable size CAS.
> On gcc's side, the new LWS is used for 8/16/64 bit atomic builtins, keeping the old LWS for 32bit ops.
> Since 8 and 16 bit ops were broken, it's best to use the new LWS unconditionally for these.

I've pushed your kernel patch upstream for kernel 3.17.
Furthermore I've scheduled it for backporting into kernels >= 3.13.
I would have liked to go back to kernel 3.0, but then more patches would have needed to be backported.

Maybe someone now can start to push the gcc patch upstream?

Thanks!
Helge

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCHv3] Implement gcc 64bit atomic operations and fix the broken 8/16 bit ones
  2014-09-15 21:17 ` Helge Deller
@ 2014-09-16  1:37   ` John David Anglin
  0 siblings, 0 replies; 3+ messages in thread
From: John David Anglin @ 2014-09-16  1:37 UTC (permalink / raw)
  To: Helge Deller; +Cc: Guy Martin, linux-parisc

On 15-Sep-14, at 5:17 PM, Helge Deller wrote:

> Maybe someone now can start to push the gcc patch upstream?

Although I don't have anything specific, this is a subtle area and I  
have had to fix bugs
in this code before.  I would like to see that this code actually  
works in the real world in
code that uses the GCC atomic support (e.g., glib2.0).  I'm still not  
sure we have correct
types in our code (arm revamped some of the types used in their  
implementation).

We have some time before gcc-5.0 is released.  As a new feature, it  
normally would
not be back ported without strong justification.

Dave
--
John David Anglin	dave.anglin@bell.net




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-09-16  1:37 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-12 16:02 [PATCHv3] Implement gcc 64bit atomic operations and fix the broken 8/16 bit ones Guy Martin
2014-09-15 21:17 ` Helge Deller
2014-09-16  1:37   ` John David Anglin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.