From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:36312) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1d6B8E-0007el-L8 for qemu-devel@nongnu.org; Thu, 04 May 2017 03:25:00 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1d6B8D-0007um-4d for qemu-devel@nongnu.org; Thu, 04 May 2017 03:24:58 -0400 References: <20170420120058.28404-1-pbonzini@redhat.com> <20170420120058.28404-10-pbonzini@redhat.com> <20170504071957.GD19184@lemon.lan> From: Paolo Bonzini Message-ID: Date: Thu, 4 May 2017 09:24:44 +0200 MIME-Version: 1.0 In-Reply-To: <20170504071957.GD19184@lemon.lan> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 09/17] util: add stats64 module List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Fam Zheng Cc: qemu-devel@nongnu.org, qemu-block@nongnu.org On 04/05/2017 09:19, Fam Zheng wrote: > On Thu, 04/20 14:00, Paolo Bonzini wrote: >> This module provides fast paths for 64-bit atomic operations on machines >> that only have 32-bit atomic access. > > Interesting patch! > > Out of curiosity: what are the machines here, besides i386? It strikes me as > they are old and slow anyway, then why this optimization? I guess ARM too. I figured it might be useful for TCG too in the future and it was fun to write. :) Paolo > >> >> Signed-off-by: Paolo Bonzini >> --- >> include/qemu/stats64.h | 210 +++++++++++++++++++++++++++++++++++++++++++++++++ >> util/Makefile.objs | 1 + >> util/stats64.c | 135 ++++++++++++++++++++++++++++++++ >> 3 files changed, 346 insertions(+) >> create mode 100644 include/qemu/stats64.h >> create mode 100644 util/stats64.c >> >> diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h >> new file mode 100644 >> index 0000000..70963f4 >> --- /dev/null >> +++ b/include/qemu/stats64.h >> @@ -0,0 +1,210 @@ >> +/* >> + * Atomic operations on 64-bit quantities. >> + * >> + * Copyright (C) 2017 Red Hat, Inc. >> + * >> + * Author: Paolo Bonzini >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or later. >> + * See the COPYING file in the top-level directory. >> + */ >> + >> +#ifndef QEMU_STATS64_H >> +#define QEMU_STATS64_H 1 >> + >> +#include "qemu/atomic.h" > > Include qemu/osdep.h first, to honor scripts/clean-includes? > >> + >> +/* FIXME: i386 doesn't need the spinlock. Are there any others? */ >> +#if __SIZEOF_LONG__ < 8 >> +#define STAT64_NEED_SPINLOCK 1 >> +#endif >> + >> +/* This provides atomic operations on 64-bit type, using a reader-writer >> + * spinlock on architectures that do not have 64-bit accesses. However >> + * it tries hard not to take the lock. >> + */ >> + >> +typedef struct Stat64 { >> +#ifdef STAT64_NEED_SPINLOCK >> + uint32_t low, high; >> + uint32_t lock; >> +#else >> + uint64_t value; >> +#endif >> +} Stat64; >> + >> +#ifndef STAT64_NEED_SPINLOCK >> +static inline void stat64_init(Stat64 *s, uint64_t value) >> +{ >> + /* This is not guaranteed to be atomic! */ >> + *s = (Stat64) { value }; >> +} >> + >> +static inline uint64_t stat64_get(const Stat64 *s) >> +{ >> + return atomic_read(&s->value); >> +} >> + >> +static inline void stat64_add(Stat64 *s, uint64_t value) >> +{ >> + atomic_add(&s->value, value); >> +} >> + >> +static inline void stat64_min(Stat64 *s, uint32_t value) >> +{ >> + for (;;) { >> + uint64_t orig = atomic_read(&s->value); >> + if (orig <= value) { >> + break; >> + } >> + orig = atomic_cmpxchg(&s->value, orig, value); >> + if (orig <= value) { >> + break; >> + } >> + } >> +} >> + >> +static inline void stat64_max(Stat64 *s, uint32_t value) >> +{ >> + for (;;) { >> + uint64_t orig = atomic_read(&s->value); >> + if (orig >= value) { >> + break; >> + } >> + orig = atomic_cmpxchg(&s->value, orig, value); >> + if (orig >= value) { >> + break; >> + } >> + } >> +} >> +#else >> +uint64_t stat64_get(const Stat64 *s); >> +bool stat64_min_slow(Stat64 *s, uint64_t value); >> +bool stat64_max_slow(Stat64 *s, uint64_t value); >> +bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high); >> + >> +static inline void stat64_init(Stat64 *s, uint64_t value) >> +{ >> + /* This is not guaranteed to be atomic! */ >> + *s = (Stat64) { .low = value, .high = value >> 32, .lock = 0 }; >> +} >> + >> +static inline void stat64_add(Stat64 *s, uint64_t value) >> +{ >> + uint32_t low, high; >> + high = value >> 32; >> + low = (uint32_t) value; >> + if (!low) { >> + if (high) { >> + atomic_add(&s->high, high); >> + } >> + return; >> + } >> + >> + for (;;) { >> + uint32_t orig = s->low; >> + uint32_t result = orig + low; >> + uint32_t old; >> + >> + if (result < low || high) { >> + /* If the high part is affected, take the lock. */ >> + if (stat64_add32_carry(s, low, high)) { >> + return; >> + } >> + continue; >> + } >> + >> + /* No carry, try with a 32-bit cmpxchg. The result is independent of >> + * the high 32 bits, so it can race just fine with stat64_add32_carry >> + * and even stat64_get! >> + */ >> + old = atomic_cmpxchg(&s->low, orig, result); >> + if (orig == old) { >> + return; >> + } >> + } >> +} >> + >> +static inline void stat64_min(Stat64 *s, uint64_t value) >> +{ >> + uint32_t low, high; >> + uint32_t orig_low, orig_high; >> + >> + high = value >> 32; >> + low = (uint32_t) value; >> + do { >> + orig_high = atomic_read(&s->high); >> + if (orig_high < high) { >> + return; >> + } >> + >> + if (orig_high == high) { >> + /* High 32 bits are equal. Read low after high, otherwise we >> + * can get a false positive (e.g. 0x1235,0x0000 changes to >> + * 0x1234,0x8000 and we read it as 0x1234,0x0000). Pairs with >> + * the write barrier in stat64_min_slow. >> + */ >> + smp_rmb(); >> + orig_low = atomic_read(&s->low); >> + if (orig_low <= low) { >> + return; >> + } >> + >> + /* See if we were lucky and a writer raced against us. The >> + * barrier is theoretically unnecessary, but if we remove it >> + * we may miss being lucky. >> + */ >> + smp_rmb(); >> + orig_high = atomic_read(&s->high); >> + if (orig_high < high) { >> + return; >> + } >> + } >> + >> + /* If the value changes in any way, we have to take the lock. */ >> + } while (!stat64_min_slow(s, value)); >> +} >> + >> +static inline void stat64_max(Stat64 *s, uint64_t value) >> +{ >> + uint32_t low, high; >> + uint32_t orig_low, orig_high; >> + >> + high = value >> 32; >> + low = (uint32_t) value; >> + do { >> + orig_high = atomic_read(&s->high); >> + if (orig_high > high) { >> + return; >> + } >> + >> + if (orig_high == high) { >> + /* High 32 bits are equal. Read low after high, otherwise we >> + * can get a false positive (e.g. 0x1234,0x8000 changes to >> + * 0x1235,0x0000 and we read it as 0x1235,0x8000). Pairs with >> + * the write barrier in stat64_max_slow. >> + */ >> + smp_rmb(); >> + orig_low = atomic_read(&s->low); >> + if (orig_low >= low) { >> + return; >> + } >> + >> + /* See if we were lucky and a writer raced against us. The >> + * barrier is theoretically unnecessary, but if we remove it >> + * we may miss being lucky. >> + */ >> + smp_rmb(); >> + orig_high = atomic_read(&s->high); >> + if (orig_high > high) { >> + return; >> + } >> + } >> + >> + /* If the value changes in any way, we have to take the lock. */ >> + } while (!stat64_max_slow(s, value)); >> +} >> + >> +#endif >> + >> +#endif >> diff --git a/util/Makefile.objs b/util/Makefile.objs >> index c6205eb..8a333d3 100644 >> --- a/util/Makefile.objs >> +++ b/util/Makefile.objs >> @@ -42,4 +42,5 @@ util-obj-y += log.o >> util-obj-y += qdist.o >> util-obj-y += qht.o >> util-obj-y += range.o >> +util-obj-y += stats64.o >> util-obj-y += systemd.o >> diff --git a/util/stats64.c b/util/stats64.c >> new file mode 100644 >> index 0000000..b9238d7 >> --- /dev/null >> +++ b/util/stats64.c >> @@ -0,0 +1,135 @@ >> +/* >> + * Atomic operations on 64-bit quantities. >> + * >> + * Copyright (C) 2017 Red Hat, Inc. >> + * >> + * Author: Paolo Bonzini >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or later. >> + * See the COPYING file in the top-level directory. >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "qemu/atomic.h" >> +#include "qemu/stats64.h" >> + >> +#ifdef STAT64_NEED_SPINLOCK >> +static inline void stat64_rdlock(Stat64 *s) >> +{ >> + /* Keep out incoming writers to avoid them starving us. */ >> + atomic_add(&s->lock, 2); >> + >> + /* If there is a concurrent writer, wait for it. */ >> + while (atomic_read(&s->lock) & 1) { >> + g_usleep(5); >> + } >> +} >> + >> +static inline void stat64_rdunlock(Stat64 *s) >> +{ >> + atomic_sub(&s->lock, 2); >> +} >> + >> +static inline bool stat64_wrtrylock(Stat64 *s) >> +{ >> + return atomic_cmpxchg(&s->lock, 0, 1) == 0; >> +} >> + >> +static inline void stat64_wrunlock(Stat64 *s) >> +{ >> + atomic_dec(&s->lock); >> +} >> + >> +uint64_t stat64_get(const Stat64 *s) >> +{ >> + uint32_t high, low; >> + >> + stat64_rdlock((Stat64 *)s); >> + >> + /* 64-bit writes always take the lock, so we can read in >> + * any order. >> + */ >> + high = atomic_read(&s->high); >> + low = atomic_read(&s->low); >> + stat64_rdunlock((Stat64 *)s); >> + >> + return ((uint64_t)high << 32) | low; >> +} >> + >> +bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high) > > Maybe add "try" in the name too, for this, and the two below? > >> +{ >> + uint32_t old; >> + >> + if (!stat64_wrtrylock(s)) { >> + return false; >> + } >> + >> + /* 64-bit reads always take the lock, so they don't care about the >> + * order of our update. By updating s->low first, we can check >> + * whether we have to carry into s->high. >> + */ >> + old = atomic_fetch_add(&s->low, value); >> + high += (old + value < old); >> + atomic_add(&s->high, high); >> + stat64_wrunlock(s); >> + return true; >> +} >> + >> +bool stat64_min_slow(Stat64 *s, uint64_t value) >> +{ >> + uint32_t high, low; >> + uint64_t orig; >> + >> + if (!stat64_wrtrylock(s)) { >> + return false; >> + } >> + >> + high = atomic_read(&s->high); >> + low = atomic_read(&s->low); >> + >> + orig = ((uint64_t)high << 32) | low; >> + if (orig < value) { >> + /* The value may become higher temporarily, but stat64_get does not >> + * notice (it takes the lock) and the only effect on stat64_min is >> + * that the slow path may be triggered unnecessarily. >> + * >> + * But, we have to set low before high, just like stat64_min reads >> + * high before low. >> + */ >> + atomic_set(&s->low, (uint32_t)value); >> + smp_wmb(); >> + atomic_set(&s->high, value >> 32); >> + } >> + stat64_wrunlock(s); >> + return true; >> +} >> + >> +bool stat64_max_slow(Stat64 *s, uint64_t value) >> +{ >> + uint32_t high, low; >> + uint64_t orig; >> + >> + if (!stat64_wrtrylock(s)) { >> + return false; >> + } >> + >> + high = atomic_read(&s->high); >> + low = atomic_read(&s->low); >> + >> + orig = ((uint64_t)high << 32) | low; >> + if (orig > value) { >> + /* The value may become lower temporarily, but stat64_get does not >> + * notice (it takes the lock) and the only effect on stat64_max is >> + * that the slow path may be triggered unnecessarily. >> + * >> + * But, we have to set low before high, just like stat64_max reads >> + * high before low. >> + */ >> + atomic_set(&s->low, (uint32_t)value); >> + smp_wmb(); >> + atomic_set(&s->high, value >> 32); >> + } >> + stat64_wrunlock(s); >> + return true; >> +} >> +#endif >> -- >> 2.9.3 >> >> >>