> I'm not mad keen on propagating these sorts of asm atomic operations throughout
> our code base. Other options would be:

gcc has atomic builtins to do this kind of work. I don't know about arm, but they do the job in x86
http://gcc.gnu.org/onlinedocs/gcc-4.1.1/gcc/Atomic-Builtins.html
so atomic_inc(val) -> __sync_fetch_and_add(val, 1) and likewise for dec/sub

Andres

> 
> - use libatomic-ops, http://www.hpl.hp.com/research/linux/atomic_ops/, although
>  this doesn't seem to be as widespread as I would like (not in RHEL5 for
>  example)
> - use a pthread lock. This is probably the simplest/best option but I wasn't
>  able to figure out the locking hierarchy of this code
> 
> So I've coped out and just copied the appropriate inlines here.
> 
> I also nuked some stray ia64 support and fixed a coment in the arm32 version
> while I was here.
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> ---
> tools/memshr/bidir-hash.c |   48 +++++++++++++++++++++++++++++++++-----------
> 1 files changed, 36 insertions(+), 12 deletions(-)
> 
> diff --git a/tools/memshr/bidir-hash.c b/tools/memshr/bidir-hash.c
> index 45d473e..bed8179 100644
> --- a/tools/memshr/bidir-hash.c
> +++ b/tools/memshr/bidir-hash.c
> @@ -100,22 +100,13 @@ int            __hash_iterator(struct __hash *h,
>                         void *d);
> static void      hash_resize(struct __hash *h);
> 
> -#if defined(__ia64__)
> -#define ia64_fetchadd4_rel(p, inc) do {                         \
> -    uint64_t ia64_intri_res;                                    \
> -    asm volatile ("fetchadd4.rel %0=[%1],%2"                    \
> -                : "=r"(ia64_intri_res) : "r"(p), "i" (inc)      \
> -                : "memory");                                    \
> -} while (0)
> -static inline void atomic_inc(uint32_t *v) { ia64_fetchadd4_rel(v, 1); }
> -static inline void atomic_dec(uint32_t *v) { ia64_fetchadd4_rel(v, -1); }
> -#elif defined(__arm__)
> +#if defined(__arm__)
> static inline void atomic_inc(uint32_t *v)
> {
>         unsigned long tmp;
>         int result;
> 
> -        __asm__ __volatile__("@ atomic_add\n"
> +        __asm__ __volatile__("@ atomic_inc\n"
> "1:     ldrex   %0, [%3]\n"
> "       add     %0, %0, #1\n"
> "       strex   %1, %0, [%3]\n"
> @@ -130,7 +121,7 @@ static inline void atomic_dec(uint32_t *v)
>         unsigned long tmp;
>         int result;
> 
> -        __asm__ __volatile__("@ atomic_sub\n"
> +        __asm__ __volatile__("@ atomic_dec\n"
> "1:     ldrex   %0, [%3]\n"
> "       sub     %0, %0, #1\n"
> "       strex   %1, %0, [%3]\n"
> @@ -140,6 +131,39 @@ static inline void atomic_dec(uint32_t *v)
>         : "r" (v)
>         : "cc");
> }
> +
> +#elif defined(__aarch64__)
> +
> +static inline void atomic_inc(uint32_t *v)
> +{
> +        unsigned long tmp;
> +        int result;
> +
> +        asm volatile("// atomic_inc\n"
> +"1:     ldxr    %w0, [%3]\n"
> +"       add     %w0, %w0, #1\n"
> +"       stxr    %w1, %w0, [%3]\n"
> +"       cbnz    %w1, 1b"
> +        : "=&r" (result), "=&r" (tmp), "+o" (v)
> +        : "r" (v)
> +        : "cc");
> +}
> +
> +static inline void atomic_dec(uint32_t *v)
> +{
> +        unsigned long tmp;
> +        int result;
> +
> +        asm volatile("// atomic_dec\n"
> +"1:     ldxr    %w0, [%3]\n"
> +"       sub     %w0, %w0, #1\n"
> +"       stxr    %w1, %w0, [%3]\n"
> +"       cbnz    %w1, 1b"
> +        : "=&r" (result), "=&r" (tmp), "+o" (v)
> +        : "r" (v)
> +        : "cc");
> +}
> +
> #else /* __x86__ */
> static inline void atomic_inc(uint32_t *v)
> {
> -- 
> 1.7.2.5