All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
@ 2019-06-24  6:41 Jerin Jacob Kollanukkaran
  2019-06-24 15:43 ` Phil Yang (Arm Technology China)
  0 siblings, 1 reply; 10+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-06-24  6:41 UTC (permalink / raw)
  To: Phil Yang, dev
  Cc: thomas, hemant.agrawal, Honnappa.Nagarahalli, gavin.hu, nd, gage.eads

> -----Original Message-----
> From: Phil Yang <phil.yang@arm.com>
> Sent: Sunday, June 23, 2019 8:46 AM
> To: dev@dpdk.org
> Cc: thomas@monjalon.net; Jerin Jacob Kollanukkaran <jerinj@marvell.com>;
> hemant.agrawal@nxp.com; Honnappa.Nagarahalli@arm.com;
> gavin.hu@arm.com; nd@arm.com; gage.eads@intel.com
> Subject: [EXT] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare
> exchange
> 
> Add 128-bit atomic compare exchange on aarch64.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
> This patch depends on 'eal/stack: fix 'pointer-sign' warning'
> http://patchwork.dpdk.org/patch/54840/
> 
> +
> +#ifdef __ARM_FEATURE_ATOMICS
> +static inline rte_int128_t
> +__rte_casp(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated,
> +int mo) {

Better to change to "const int mo".

> +
> +	/* caspX instructions register pair must start from even-numbered
> +	 * register at operand 1.
> +	 * So, specify registers for local variables here.
> +	 */
> +	register uint64_t x0 __asm("x0") = (uint64_t)old.val[0];
> +	register uint64_t x1 __asm("x1") = (uint64_t)old.val[1];
> +	register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0];
> +	register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1];
> +
> +	if (mo ==  __ATOMIC_RELAXED) {
> +		asm volatile(
> +				"casp %[old0], %[old1], %[upd0], %[upd1],
> [%[dst]]"
> +				: [old0] "+r" (x0),
> +				  [old1] "+r" (x1)
> +				: [upd0] "r" (x2),
> +				  [upd1] "r" (x3),
> +				  [dst] "r" (dst)
> +				: "memory");
> +	} else if (mo == __ATOMIC_ACQUIRE) {
> +		asm volatile(
> +				"caspa %[old0], %[old1], %[upd0], %[upd1],
> [%[dst]]"
> +				: [old0] "+r" (x0),
> +				  [old1] "+r" (x1)
> +				: [upd0] "r" (x2),
> +				  [upd1] "r" (x3),
> +				  [dst] "r" (dst)
> +				: "memory");
> +	} else if (mo == __ATOMIC_ACQ_REL) {
> +		asm volatile(
> +				"caspal %[old0], %[old1], %[upd0], %[upd1],
> [%[dst]]"
> +				: [old0] "+r" (x0),
> +				  [old1] "+r" (x1)
> +				: [upd0] "r" (x2),
> +				  [upd1] "r" (x3),
> +				  [dst] "r" (dst)
> +				: "memory");
> +	} else if (mo == __ATOMIC_RELEASE) {
> +		asm volatile(
> +				"caspl %[old0], %[old1], %[upd0], %[upd1],
> [%[dst]]"
> +				: [old0] "+r" (x0),
> +				  [old1] "+r" (x1)
> +				: [upd0] "r" (x2),
> +				  [upd1] "r" (x3),
> +				  [dst] "r" (dst)
> +				: "memory");

I think, This duplication code can be avoid with macro and casp/capsa/casal/caspl as argument.

> +	} else {
> +		rte_panic("Invalid memory order\n");


rte_panic should be removed from library. In this case, I think, invalid mo can go for strongest barrier.

> +	}
> +
> +	old.val[0] = x0;
> +	old.val[1] = x1;
> +
> +	return old;
> +}
> +#else
> +static inline rte_int128_t
> +__rte_ldx128(const rte_int128_t *src, int mo) {
> +	rte_int128_t ret;
> +	if (mo == __ATOMIC_ACQUIRE)
> +		asm volatile(
> +				"ldaxp %0, %1, %2"
> +				: "=&r" (ret.val[0]),
> +				  "=&r" (ret.val[1])
> +				: "Q" (src->val[0])
> +				: "memory");
> +	else if (mo == __ATOMIC_RELAXED)
> +		asm volatile(
> +				"ldxp %0, %1, %2"
> +				: "=&r" (ret.val[0]),
> +				  "=&r" (ret.val[1])
> +				: "Q" (src->val[0])
> +				: "memory");

Same as above comment.

> +	else
> +		rte_panic("Invalid memory order\n");

Same as above comment.

> +
> +	return ret;
> +}
> +
> +static inline uint32_t
> +__rte_stx128(rte_int128_t *dst, const rte_int128_t src, int mo) {
> +	uint32_t ret;
> +	if (mo == __ATOMIC_RELEASE)
> +		asm volatile(
> +				"stlxp %w0, %1, %2, %3"
> +				: "=&r" (ret)
> +				: "r" (src.val[0]),
> +				  "r" (src.val[1]),
> +				  "Q" (dst->val[0])
> +				: "memory");
> +	else if (mo == __ATOMIC_RELAXED)
> +		asm volatile(
> +				"stxp %w0, %1, %2, %3"
> +				: "=&r" (ret)
> +				: "r" (src.val[0]),
> +				  "r" (src.val[1]),
> +				  "Q" (dst->val[0])
> +				: "memory");
> +	else
> +		rte_panic("Invalid memory order\n");

Same as above comment.

> +
> +	/* Return 0 on success, 1 on failure */
> +	return ret;
> +}
> +#endif
> +
> +static inline int __rte_experimental
> +rte_atomic128_cmp_exchange(rte_int128_t *dst,
> +				rte_int128_t *exp,
> +				const rte_int128_t *src,
> +				unsigned int weak,
> +				int success,
> +				int failure)
> +{
> +	// Always do strong CAS

Remove C++ style code comment.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-24  6:41 [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange Jerin Jacob Kollanukkaran
@ 2019-06-24 15:43 ` Phil Yang (Arm Technology China)
  2019-06-24 16:12   ` Honnappa Nagarahalli
  0 siblings, 1 reply; 10+ messages in thread
From: Phil Yang (Arm Technology China) @ 2019-06-24 15:43 UTC (permalink / raw)
  To: jerinj, dev
  Cc: thomas, hemant.agrawal, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	nd, gage.eads, nd

Hi Jerin,

Thank you for your comments. I will modify these in the next version.

Thanks,
Phil

> -----Original Message-----
> From: Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> Sent: Monday, June 24, 2019 2:41 PM
> To: Phil Yang (Arm Technology China) <Phil.Yang@arm.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; hemant.agrawal@nxp.com; Honnappa
> Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; nd <nd@arm.com>; gage.eads@intel.com
> Subject: RE: [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
> 
> > -----Original Message-----
> > From: Phil Yang <phil.yang@arm.com>
> > Sent: Sunday, June 23, 2019 8:46 AM
> > To: dev@dpdk.org
> > Cc: thomas@monjalon.net; Jerin Jacob Kollanukkaran
> > <jerinj@marvell.com>; hemant.agrawal@nxp.com;
> > Honnappa.Nagarahalli@arm.com; gavin.hu@arm.com; nd@arm.com;
> > gage.eads@intel.com
> > Subject: [EXT] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare
> > exchange
> >
> > Add 128-bit atomic compare exchange on aarch64.
> >
> > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > ---
> > This patch depends on 'eal/stack: fix 'pointer-sign' warning'
> > http://patchwork.dpdk.org/patch/54840/
> >
> > +
> > +#ifdef __ARM_FEATURE_ATOMICS
> > +static inline rte_int128_t
> > +__rte_casp(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated,
> > +int mo) {
> 
> Better to change to "const int mo".
> 
> > +
> > +	/* caspX instructions register pair must start from even-numbered
> > +	 * register at operand 1.
> > +	 * So, specify registers for local variables here.
> > +	 */
> > +	register uint64_t x0 __asm("x0") = (uint64_t)old.val[0];
> > +	register uint64_t x1 __asm("x1") = (uint64_t)old.val[1];
> > +	register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0];
> > +	register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1];
> > +
> > +	if (mo ==  __ATOMIC_RELAXED) {
> > +		asm volatile(
> > +				"casp %[old0], %[old1], %[upd0], %[upd1],
> > [%[dst]]"
> > +				: [old0] "+r" (x0),
> > +				  [old1] "+r" (x1)
> > +				: [upd0] "r" (x2),
> > +				  [upd1] "r" (x3),
> > +				  [dst] "r" (dst)
> > +				: "memory");
> > +	} else if (mo == __ATOMIC_ACQUIRE) {
> > +		asm volatile(
> > +				"caspa %[old0], %[old1], %[upd0], %[upd1],
> > [%[dst]]"
> > +				: [old0] "+r" (x0),
> > +				  [old1] "+r" (x1)
> > +				: [upd0] "r" (x2),
> > +				  [upd1] "r" (x3),
> > +				  [dst] "r" (dst)
> > +				: "memory");
> > +	} else if (mo == __ATOMIC_ACQ_REL) {
> > +		asm volatile(
> > +				"caspal %[old0], %[old1], %[upd0], %[upd1],
> > [%[dst]]"
> > +				: [old0] "+r" (x0),
> > +				  [old1] "+r" (x1)
> > +				: [upd0] "r" (x2),
> > +				  [upd1] "r" (x3),
> > +				  [dst] "r" (dst)
> > +				: "memory");
> > +	} else if (mo == __ATOMIC_RELEASE) {
> > +		asm volatile(
> > +				"caspl %[old0], %[old1], %[upd0], %[upd1],
> > [%[dst]]"
> > +				: [old0] "+r" (x0),
> > +				  [old1] "+r" (x1)
> > +				: [upd0] "r" (x2),
> > +				  [upd1] "r" (x3),
> > +				  [dst] "r" (dst)
> > +				: "memory");
> 
> I think, This duplication code can be avoid with macro and
> casp/capsa/casal/caspl as argument.
> 
> > +	} else {
> > +		rte_panic("Invalid memory order\n");
> 
> 
> rte_panic should be removed from library. In this case, I think, invalid mo can
> go for strongest barrier.
> 
> > +	}
> > +
> > +	old.val[0] = x0;
> > +	old.val[1] = x1;
> > +
> > +	return old;
> > +}
> > +#else
> > +static inline rte_int128_t
> > +__rte_ldx128(const rte_int128_t *src, int mo) {
> > +	rte_int128_t ret;
> > +	if (mo == __ATOMIC_ACQUIRE)
> > +		asm volatile(
> > +				"ldaxp %0, %1, %2"
> > +				: "=&r" (ret.val[0]),
> > +				  "=&r" (ret.val[1])
> > +				: "Q" (src->val[0])
> > +				: "memory");
> > +	else if (mo == __ATOMIC_RELAXED)
> > +		asm volatile(
> > +				"ldxp %0, %1, %2"
> > +				: "=&r" (ret.val[0]),
> > +				  "=&r" (ret.val[1])
> > +				: "Q" (src->val[0])
> > +				: "memory");
> 
> Same as above comment.
> 
> > +	else
> > +		rte_panic("Invalid memory order\n");
> 
> Same as above comment.
> 
> > +
> > +	return ret;
> > +}
> > +
> > +static inline uint32_t
> > +__rte_stx128(rte_int128_t *dst, const rte_int128_t src, int mo) {
> > +	uint32_t ret;
> > +	if (mo == __ATOMIC_RELEASE)
> > +		asm volatile(
> > +				"stlxp %w0, %1, %2, %3"
> > +				: "=&r" (ret)
> > +				: "r" (src.val[0]),
> > +				  "r" (src.val[1]),
> > +				  "Q" (dst->val[0])
> > +				: "memory");
> > +	else if (mo == __ATOMIC_RELAXED)
> > +		asm volatile(
> > +				"stxp %w0, %1, %2, %3"
> > +				: "=&r" (ret)
> > +				: "r" (src.val[0]),
> > +				  "r" (src.val[1]),
> > +				  "Q" (dst->val[0])
> > +				: "memory");
> > +	else
> > +		rte_panic("Invalid memory order\n");
> 
> Same as above comment.
> 
> > +
> > +	/* Return 0 on success, 1 on failure */
> > +	return ret;
> > +}
> > +#endif
> > +
> > +static inline int __rte_experimental
> > +rte_atomic128_cmp_exchange(rte_int128_t *dst,
> > +				rte_int128_t *exp,
> > +				const rte_int128_t *src,
> > +				unsigned int weak,
> > +				int success,
> > +				int failure)
> > +{
> > +	// Always do strong CAS
> 
> Remove C++ style code comment.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-24 15:43 ` Phil Yang (Arm Technology China)
@ 2019-06-24 16:12   ` Honnappa Nagarahalli
  2019-06-24 16:25     ` Thomas Monjalon
  0 siblings, 1 reply; 10+ messages in thread
From: Honnappa Nagarahalli @ 2019-06-24 16:12 UTC (permalink / raw)
  To: Phil Yang (Arm Technology China), jerinj, dev
  Cc: thomas, hemant.agrawal, Gavin Hu (Arm Technology China),
	Honnappa Nagarahalli, nd, gage.eads, nd

<snip>

> > >
> > > Add 128-bit atomic compare exchange on aarch64.
> > >
> > > Signed-off-by: Phil Yang <phil.yang@arm.com>
> > > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > ---
> > > This patch depends on 'eal/stack: fix 'pointer-sign' warning'
> > > http://patchwork.dpdk.org/patch/54840/
> > >
> > > +
> > > +#ifdef __ARM_FEATURE_ATOMICS
> > > +static inline rte_int128_t
> > > +__rte_casp(rte_int128_t *dst, rte_int128_t old, rte_int128_t
> > > +updated, int mo) {
> >
> > Better to change to "const int mo".
> >
> > > +
> > > +	/* caspX instructions register pair must start from even-numbered
> > > +	 * register at operand 1.
> > > +	 * So, specify registers for local variables here.
> > > +	 */
> > > +	register uint64_t x0 __asm("x0") = (uint64_t)old.val[0];
> > > +	register uint64_t x1 __asm("x1") = (uint64_t)old.val[1];
> > > +	register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0];
> > > +	register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1];
> > > +
> > > +	if (mo ==  __ATOMIC_RELAXED) {
> > > +		asm volatile(
> > > +				"casp %[old0], %[old1], %[upd0], %[upd1],
> > > [%[dst]]"
> > > +				: [old0] "+r" (x0),
> > > +				  [old1] "+r" (x1)
> > > +				: [upd0] "r" (x2),
> > > +				  [upd1] "r" (x3),
> > > +				  [dst] "r" (dst)
> > > +				: "memory");
> > > +	} else if (mo == __ATOMIC_ACQUIRE) {
> > > +		asm volatile(
> > > +				"caspa %[old0], %[old1], %[upd0], %[upd1],
> > > [%[dst]]"
> > > +				: [old0] "+r" (x0),
> > > +				  [old1] "+r" (x1)
> > > +				: [upd0] "r" (x2),
> > > +				  [upd1] "r" (x3),
> > > +				  [dst] "r" (dst)
> > > +				: "memory");
> > > +	} else if (mo == __ATOMIC_ACQ_REL) {
> > > +		asm volatile(
> > > +				"caspal %[old0], %[old1], %[upd0], %[upd1],
> > > [%[dst]]"
> > > +				: [old0] "+r" (x0),
> > > +				  [old1] "+r" (x1)
> > > +				: [upd0] "r" (x2),
> > > +				  [upd1] "r" (x3),
> > > +				  [dst] "r" (dst)
> > > +				: "memory");
> > > +	} else if (mo == __ATOMIC_RELEASE) {
> > > +		asm volatile(
> > > +				"caspl %[old0], %[old1], %[upd0], %[upd1],
> > > [%[dst]]"
> > > +				: [old0] "+r" (x0),
> > > +				  [old1] "+r" (x1)
> > > +				: [upd0] "r" (x2),
> > > +				  [upd1] "r" (x3),
> > > +				  [dst] "r" (dst)
> > > +				: "memory");
> >
> > I think, This duplication code can be avoid with macro and
> > casp/capsa/casal/caspl as argument.
> >
> > > +	} else {
> > > +		rte_panic("Invalid memory order\n");
> >
> >
> > rte_panic should be removed from library. In this case, I think,
> > invalid mo can go for strongest barrier.
It is added here to capture programming errors. Memory order can be passed during compilation or during run time. 'rte_panic' supports both of these.
Adding code with strongest memory order will mask the programming error.

> >
> > > +	}
> > > +
> > > +	old.val[0] = x0;
> > > +	old.val[1] = x1;
> > > +
> > > +	return old;
> > > +}
> > > +#else

<snip>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-24 16:12   ` Honnappa Nagarahalli
@ 2019-06-24 16:25     ` Thomas Monjalon
  2019-06-24 17:41       ` Honnappa Nagarahalli
  0 siblings, 1 reply; 10+ messages in thread
From: Thomas Monjalon @ 2019-06-24 16:25 UTC (permalink / raw)
  To: Honnappa Nagarahalli, Phil Yang (Arm Technology China)
  Cc: jerinj, dev, hemant.agrawal, Gavin Hu (Arm Technology China),
	nd, gage.eads

24/06/2019 18:12, Honnappa Nagarahalli:
> > > > +	} else {
> > > > +		rte_panic("Invalid memory order\n");
> > >
> > >
> > > rte_panic should be removed from library. In this case, I think,
> > > invalid mo can go for strongest barrier.
>
> It is added here to capture programming errors.
> Memory order can be passed during compilation or during run time.
> 'rte_panic' supports both of these.
> Adding code with strongest memory order will mask the programming error.

An error must return a specific code from the function.
rte_panic is really forbidden in libraries.
We are in the process of removing all of them.




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-24 16:25     ` Thomas Monjalon
@ 2019-06-24 17:41       ` Honnappa Nagarahalli
  2019-06-25  6:15         ` Jerin Jacob Kollanukkaran
  0 siblings, 1 reply; 10+ messages in thread
From: Honnappa Nagarahalli @ 2019-06-24 17:41 UTC (permalink / raw)
  To: thomas, Phil Yang (Arm Technology China)
  Cc: jerinj, dev, hemant.agrawal, Gavin Hu (Arm Technology China),
	Honnappa Nagarahalli, nd, gage.eads, nd

> 
> 24/06/2019 18:12, Honnappa Nagarahalli:
> > > > > +	} else {
> > > > > +		rte_panic("Invalid memory order\n");
> > > >
> > > >
> > > > rte_panic should be removed from library. In this case, I think,
> > > > invalid mo can go for strongest barrier.
> >
> > It is added here to capture programming errors.
> > Memory order can be passed during compilation or during run time.
> > 'rte_panic' supports both of these.
> > Adding code with strongest memory order will mask the programming error.
> 
> An error must return a specific code from the function.
> rte_panic is really forbidden in libraries.
> We are in the process of removing all of them.
Thank you for clarifying.
In this particular use case, the API is similar to '__atomic_compare_exchange' built-in. Users would expect similar behavior. If we are differing from the standard behavior, we should document it in the API definition.
 
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-24 17:41       ` Honnappa Nagarahalli
@ 2019-06-25  6:15         ` Jerin Jacob Kollanukkaran
  2019-06-26 10:10           ` Phil Yang (Arm Technology China)
  0 siblings, 1 reply; 10+ messages in thread
From: Jerin Jacob Kollanukkaran @ 2019-06-25  6:15 UTC (permalink / raw)
  To: Honnappa Nagarahalli, thomas, Phil Yang (Arm Technology China)
  Cc: dev, hemant.agrawal, Gavin Hu (Arm Technology China), nd, gage.eads, nd

> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Monday, June 24, 2019 11:11 PM
> To: thomas@monjalon.net; Phil Yang (Arm Technology China)
> <Phil.Yang@arm.com>
> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; dev@dpdk.org;
> hemant.agrawal@nxp.com; Gavin Hu (Arm Technology China)
> <Gavin.Hu@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>; gage.eads@intel.com;
> nd <nd@arm.com>
> Subject: [EXT] RE: [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare
> exchange
> >
> > 24/06/2019 18:12, Honnappa Nagarahalli:
> > > > > > +	} else {
> > > > > > +		rte_panic("Invalid memory order\n");
> > > > >
> > > > >
> > > > > rte_panic should be removed from library. In this case, I think,
> > > > > invalid mo can go for strongest barrier.
> > >
> > > It is added here to capture programming errors.
> > > Memory order can be passed during compilation or during run time.
> > > 'rte_panic' supports both of these.
> > > Adding code with strongest memory order will mask the programming error.
> >
> > An error must return a specific code from the function.
> > rte_panic is really forbidden in libraries.
> > We are in the process of removing all of them.
> Thank you for clarifying.
> In this particular use case, the API is similar to '__atomic_compare_exchange'
> built-in. Users would expect similar behavior. If we are differing from the
> standard behavior, we should document it in the API definition.

IMO, We should not differ from the standard behavior(return type) of atomic_compare_exchange.
And we should not have rte_panic in library. IMO, Best option would be
1) If mo is compile time constant then check with RTE_BUILD_ON for static assert to find invalid mo
2) if mo is runtime value and it is invalid then move to strongest memory order to make functionally
correct


> 
> >
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-25  6:15         ` Jerin Jacob Kollanukkaran
@ 2019-06-26 10:10           ` Phil Yang (Arm Technology China)
  0 siblings, 0 replies; 10+ messages in thread
From: Phil Yang (Arm Technology China) @ 2019-06-26 10:10 UTC (permalink / raw)
  To: jerinj, Honnappa Nagarahalli, thomas
  Cc: dev, hemant.agrawal, Gavin Hu (Arm Technology China),
	nd, gage.eads, nd, nd

> -----Original Message-----
> From: Jerin Jacob Kollanukkaran <jerinj@marvell.com>
> Sent: Tuesday, June 25, 2019 2:16 PM
> To: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>;
> thomas@monjalon.net; Phil Yang (Arm Technology China)
> <Phil.Yang@arm.com>
> Cc: dev@dpdk.org; hemant.agrawal@nxp.com; Gavin Hu (Arm Technology
> China) <Gavin.Hu@arm.com>; nd <nd@arm.com>; gage.eads@intel.com; nd
> <nd@arm.com>
> Subject: RE: [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
> 
> > -----Original Message-----
> > From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> > Sent: Monday, June 24, 2019 11:11 PM
> > To: thomas@monjalon.net; Phil Yang (Arm Technology China)
> > <Phil.Yang@arm.com>
> > Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; dev@dpdk.org;
> > hemant.agrawal@nxp.com; Gavin Hu (Arm Technology China)
> > <Gavin.Hu@arm.com>; Honnappa Nagarahalli
> > <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>;
> gage.eads@intel.com;
> > nd <nd@arm.com>
> > Subject: [EXT] RE: [PATCH v2 1/3] eal/arm64: add 128-bit atomic
> > compare exchange
> > >
> > > 24/06/2019 18:12, Honnappa Nagarahalli:
> > > > > > > +	} else {
> > > > > > > +		rte_panic("Invalid memory order\n");
> > > > > >
> > > > > >
> > > > > > rte_panic should be removed from library. In this case, I
> > > > > > think, invalid mo can go for strongest barrier.
> > > >
> > > > It is added here to capture programming errors.
> > > > Memory order can be passed during compilation or during run time.
> > > > 'rte_panic' supports both of these.
> > > > Adding code with strongest memory order will mask the programming
> error.
> > >
> > > An error must return a specific code from the function.
> > > rte_panic is really forbidden in libraries.
> > > We are in the process of removing all of them.
> > Thank you for clarifying.
> > In this particular use case, the API is similar to
> '__atomic_compare_exchange'
> > built-in. Users would expect similar behavior. If we are differing
> > from the standard behavior, we should document it in the API definition.
> 
> IMO, We should not differ from the standard behavior(return type) of
> atomic_compare_exchange.
> And we should not have rte_panic in library. IMO, Best option would be
> 1) If mo is compile time constant then check with RTE_BUILD_ON for static
> assert to find invalid mo
I think we need to add static assert in the rte_atomic128_cmp_exchange to check invalid mo, rather than put it in these 3 internal functions.
Because the mo parameter is passed from the rte_atomic128_cmp_exchange.

> 2) if mo is runtime value and it is invalid then move to strongest memory
> order to make functionally correct
When the mo is runtime value, the compiler will always use the strongest memory order to predict atomic operations. Please check the URL (https://gcc.godbolt.org/z/GDx6Ob) for the details. 
So I think, in these 3 internal functions (__rte_ldx128, __rte_strx128, __rte_casp), it is sufficient to just follow this rule.

Thanks,
Phil Yang
> 
> 
> >
> > >
> > >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-24 14:46   ` Eads, Gage
@ 2019-06-24 15:35     ` Phil Yang (Arm Technology China)
  0 siblings, 0 replies; 10+ messages in thread
From: Phil Yang (Arm Technology China) @ 2019-06-24 15:35 UTC (permalink / raw)
  To: Eads, Gage, dev
  Cc: thomas, jerinj, hemant.agrawal, Honnappa Nagarahalli,
	Gavin Hu (Arm Technology China),
	nd, nd

> -----Original Message-----
> From: Eads, Gage <gage.eads@intel.com>
> Sent: Monday, June 24, 2019 10:46 PM
> To: Phil Yang (Arm Technology China) <Phil.Yang@arm.com>; dev@dpdk.org
> Cc: thomas@monjalon.net; jerinj@marvell.com; hemant.agrawal@nxp.com;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Gavin Hu (Arm
> Technology China) <Gavin.Hu@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
> 
> Hi Phil,
> 
> > diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h
> > b/lib/librte_eal/common/include/generic/rte_atomic.h
> > index 9958543..7dd1aa4 100644
> > --- a/lib/librte_eal/common/include/generic/rte_atomic.h
> > +++ b/lib/librte_eal/common/include/generic/rte_atomic.h
> > @@ -1081,6 +1081,18 @@ static inline void
> > rte_atomic64_clear(rte_atomic64_t *v)
> >
> >  /*------------------------ 128 bit atomic operations
> > -------------------------*/
> >
> > +/**
> > + * 128-bit integer structure.
> > + */
> > +RTE_STD_C11
> > +typedef struct {
> > +	RTE_STD_C11
> > +	union {
> > +		uint64_t val[2];
> > +		__extension__ __int128 int128;
> > +	};
> > +} __rte_aligned(16) rte_int128_t;
> > +
> >  #ifdef __DOXYGEN__
> >
> 
Hi Gage,

> This change breaks 32-bit x86 builds*. A couple ways to resolve this are 1)
> with RTE_ARCH_* ifdefs, or 2) keep duplicate definitions of the struct in the
> aarch64 and x86 header files.
OK. Let's follow the first approach. I will update it in the new version. Thanks!

> 
> Thanks,
> Gage
> 
> *http://mails.dpdk.org/archives/test-report/2019-June/086586.html

Thanks,
Phil

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-23  3:15 ` [dpdk-dev] [PATCH v2 " Phil Yang
@ 2019-06-24 14:46   ` Eads, Gage
  2019-06-24 15:35     ` Phil Yang (Arm Technology China)
  0 siblings, 1 reply; 10+ messages in thread
From: Eads, Gage @ 2019-06-24 14:46 UTC (permalink / raw)
  To: Phil Yang, dev
  Cc: thomas, jerinj, hemant.agrawal, Honnappa.Nagarahalli, gavin.hu, nd

Hi Phil,

> diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h
> b/lib/librte_eal/common/include/generic/rte_atomic.h
> index 9958543..7dd1aa4 100644
> --- a/lib/librte_eal/common/include/generic/rte_atomic.h
> +++ b/lib/librte_eal/common/include/generic/rte_atomic.h
> @@ -1081,6 +1081,18 @@ static inline void
> rte_atomic64_clear(rte_atomic64_t *v)
> 
>  /*------------------------ 128 bit atomic operations -------------------------*/
> 
> +/**
> + * 128-bit integer structure.
> + */
> +RTE_STD_C11
> +typedef struct {
> +	RTE_STD_C11
> +	union {
> +		uint64_t val[2];
> +		__extension__ __int128 int128;
> +	};
> +} __rte_aligned(16) rte_int128_t;
> +
>  #ifdef __DOXYGEN__
> 

This change breaks 32-bit x86 builds*. A couple ways to resolve this are 1) with RTE_ARCH_* ifdefs, or 2) keep duplicate definitions of the struct in the aarch64 and x86 header files.

Thanks,
Gage

*http://mails.dpdk.org/archives/test-report/2019-June/086586.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange
  2019-06-23  2:41 [dpdk-dev] [PATCH v1 " Phil Yang
@ 2019-06-23  3:15 ` Phil Yang
  2019-06-24 14:46   ` Eads, Gage
  0 siblings, 1 reply; 10+ messages in thread
From: Phil Yang @ 2019-06-23  3:15 UTC (permalink / raw)
  To: dev
  Cc: thomas, jerinj, hemant.agrawal, Honnappa.Nagarahalli, gavin.hu,
	nd, gage.eads

Add 128-bit atomic compare exchange on aarch64.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
This patch depends on 'eal/stack: fix 'pointer-sign' warning'
http://patchwork.dpdk.org/patch/54840/

v2:
Fixed coding style warning.

 .../common/include/arch/arm/rte_atomic_64.h        | 184 +++++++++++++++++++++
 .../common/include/arch/x86/rte_atomic_64.h        |  12 --
 lib/librte_eal/common/include/generic/rte_atomic.h |  15 +-
 3 files changed, 198 insertions(+), 13 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
index 97060e4..ae29ce6 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2015 Cavium, Inc
+ * Copyright(c) 2019 Arm Limited
  */
 
 #ifndef _RTE_ATOMIC_ARM64_H_
@@ -14,6 +15,9 @@ extern "C" {
 #endif
 
 #include "generic/rte_atomic.h"
+#include <rte_branch_prediction.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
 
 #define dsb(opt) asm volatile("dsb " #opt : : : "memory")
 #define dmb(opt) asm volatile("dmb " #opt : : : "memory")
@@ -40,6 +44,186 @@ extern "C" {
 
 #define rte_cio_rmb() dmb(oshld)
 
+/*----------------------- 128 bit atomic operations -------------------------*/
+
+#define RTE_HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
+#define RTE_HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || \
+			 (mo) == __ATOMIC_ACQ_REL || \
+			 (mo) == __ATOMIC_SEQ_CST)
+
+#define RTE_MO_LOAD(mo)  (RTE_HAS_ACQ((mo)) \
+		? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED)
+#define RTE_MO_STORE(mo) (RTE_HAS_RLS((mo)) \
+		? __ATOMIC_RELEASE : __ATOMIC_RELAXED)
+
+#ifdef __ARM_FEATURE_ATOMICS
+static inline rte_int128_t
+__rte_casp(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated, int mo)
+{
+
+	/* caspX instructions register pair must start from even-numbered
+	 * register at operand 1.
+	 * So, specify registers for local variables here.
+	 */
+	register uint64_t x0 __asm("x0") = (uint64_t)old.val[0];
+	register uint64_t x1 __asm("x1") = (uint64_t)old.val[1];
+	register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0];
+	register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1];
+
+	if (mo ==  __ATOMIC_RELAXED) {
+		asm volatile(
+				"casp %[old0], %[old1], %[upd0], %[upd1], [%[dst]]"
+				: [old0] "+r" (x0),
+				  [old1] "+r" (x1)
+				: [upd0] "r" (x2),
+				  [upd1] "r" (x3),
+				  [dst] "r" (dst)
+				: "memory");
+	} else if (mo == __ATOMIC_ACQUIRE) {
+		asm volatile(
+				"caspa %[old0], %[old1], %[upd0], %[upd1], [%[dst]]"
+				: [old0] "+r" (x0),
+				  [old1] "+r" (x1)
+				: [upd0] "r" (x2),
+				  [upd1] "r" (x3),
+				  [dst] "r" (dst)
+				: "memory");
+	} else if (mo == __ATOMIC_ACQ_REL) {
+		asm volatile(
+				"caspal %[old0], %[old1], %[upd0], %[upd1], [%[dst]]"
+				: [old0] "+r" (x0),
+				  [old1] "+r" (x1)
+				: [upd0] "r" (x2),
+				  [upd1] "r" (x3),
+				  [dst] "r" (dst)
+				: "memory");
+	} else if (mo == __ATOMIC_RELEASE) {
+		asm volatile(
+				"caspl %[old0], %[old1], %[upd0], %[upd1], [%[dst]]"
+				: [old0] "+r" (x0),
+				  [old1] "+r" (x1)
+				: [upd0] "r" (x2),
+				  [upd1] "r" (x3),
+				  [dst] "r" (dst)
+				: "memory");
+	} else {
+		rte_panic("Invalid memory order\n");
+	}
+
+	old.val[0] = x0;
+	old.val[1] = x1;
+
+	return old;
+}
+#else
+static inline rte_int128_t
+__rte_ldx128(const rte_int128_t *src, int mo)
+{
+	rte_int128_t ret;
+	if (mo == __ATOMIC_ACQUIRE)
+		asm volatile(
+				"ldaxp %0, %1, %2"
+				: "=&r" (ret.val[0]),
+				  "=&r" (ret.val[1])
+				: "Q" (src->val[0])
+				: "memory");
+	else if (mo == __ATOMIC_RELAXED)
+		asm volatile(
+				"ldxp %0, %1, %2"
+				: "=&r" (ret.val[0]),
+				  "=&r" (ret.val[1])
+				: "Q" (src->val[0])
+				: "memory");
+	else
+		rte_panic("Invalid memory order\n");
+
+	return ret;
+}
+
+static inline uint32_t
+__rte_stx128(rte_int128_t *dst, const rte_int128_t src, int mo)
+{
+	uint32_t ret;
+	if (mo == __ATOMIC_RELEASE)
+		asm volatile(
+				"stlxp %w0, %1, %2, %3"
+				: "=&r" (ret)
+				: "r" (src.val[0]),
+				  "r" (src.val[1]),
+				  "Q" (dst->val[0])
+				: "memory");
+	else if (mo == __ATOMIC_RELAXED)
+		asm volatile(
+				"stxp %w0, %1, %2, %3"
+				: "=&r" (ret)
+				: "r" (src.val[0]),
+				  "r" (src.val[1]),
+				  "Q" (dst->val[0])
+				: "memory");
+	else
+		rte_panic("Invalid memory order\n");
+
+	/* Return 0 on success, 1 on failure */
+	return ret;
+}
+#endif
+
+static inline int __rte_experimental
+rte_atomic128_cmp_exchange(rte_int128_t *dst,
+				rte_int128_t *exp,
+				const rte_int128_t *src,
+				unsigned int weak,
+				int success,
+				int failure)
+{
+	// Always do strong CAS
+	RTE_SET_USED(weak);
+	/* Ignore memory ordering for failure, memory order for
+	 * success must be stronger or equal
+	 */
+	RTE_SET_USED(failure);
+
+#ifdef __ARM_FEATURE_ATOMICS
+	rte_int128_t expected = *exp;
+	rte_int128_t desired = *src;
+	rte_int128_t old;
+
+	old = __rte_casp(dst, expected, desired, success);
+#else
+	int ldx_mo = RTE_MO_LOAD(success);
+	int stx_mo = RTE_MO_STORE(success);
+	uint32_t ret = 1;
+	register rte_int128_t expected = *exp;
+	register rte_int128_t desired = *src;
+	register rte_int128_t old;
+
+	/* ldx128 can not guarantee atomic,
+	 * Must write back src or old to verify atomicity of ldx128;
+	 */
+	do {
+		old = __rte_ldx128(dst, ldx_mo);
+		if (likely(old.int128 == expected.int128))
+			ret = __rte_stx128(dst, desired, stx_mo);
+		else
+			/* In the failure case (since 'weak' is ignored and only
+			 * weak == 0 is implemented), expected should contain the
+			 * atomically read value of dst. This means, 'old' needs
+			 * to be stored back to ensure it was read atomically.
+			 */
+			ret = __rte_stx128(dst, old, stx_mo);
+	} while (unlikely(ret));
+#endif
+
+	/* Unconditionally updating expected removes
+	 * an 'if' statement.
+	 * expected should already be in register if
+	 * not in the cache.
+	 */
+	*exp = old;
+
+	return (old.int128 == expected.int128);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
index 6232c57..23cf48f 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -212,18 +212,6 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
 
 /*------------------------ 128 bit atomic operations -------------------------*/
 
-/**
- * 128-bit integer structure.
- */
-RTE_STD_C11
-typedef struct {
-	RTE_STD_C11
-	union {
-		uint64_t val[2];
-		__extension__ __int128 int128;
-	};
-} __rte_aligned(16) rte_int128_t;
-
 static inline int __rte_experimental
 rte_atomic128_cmp_exchange(rte_int128_t *dst,
 			   rte_int128_t *exp,
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
index 9958543..7dd1aa4 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -1081,6 +1081,18 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
 
 /*------------------------ 128 bit atomic operations -------------------------*/
 
+/**
+ * 128-bit integer structure.
+ */
+RTE_STD_C11
+typedef struct {
+	RTE_STD_C11
+	union {
+		uint64_t val[2];
+		__extension__ __int128 int128;
+	};
+} __rte_aligned(16) rte_int128_t;
+
 #ifdef __DOXYGEN__
 
 /**
@@ -1093,7 +1105,8 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
  *     *exp = *dst
  * @endcode
  *
- * @note This function is currently only available for the x86-64 platform.
+ * @note This function is currently available for the x86-64 and aarch64
+ * platforms.
  *
  * @note The success and failure arguments must be one of the __ATOMIC_* values
  * defined in the C++11 standard. For details on their behavior, refer to the
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-06-26 10:10 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-24  6:41 [dpdk-dev] [PATCH v2 1/3] eal/arm64: add 128-bit atomic compare exchange Jerin Jacob Kollanukkaran
2019-06-24 15:43 ` Phil Yang (Arm Technology China)
2019-06-24 16:12   ` Honnappa Nagarahalli
2019-06-24 16:25     ` Thomas Monjalon
2019-06-24 17:41       ` Honnappa Nagarahalli
2019-06-25  6:15         ` Jerin Jacob Kollanukkaran
2019-06-26 10:10           ` Phil Yang (Arm Technology China)
  -- strict thread matches above, loose matches on Subject: below --
2019-06-23  2:41 [dpdk-dev] [PATCH v1 " Phil Yang
2019-06-23  3:15 ` [dpdk-dev] [PATCH v2 " Phil Yang
2019-06-24 14:46   ` Eads, Gage
2019-06-24 15:35     ` Phil Yang (Arm Technology China)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.