linux-acpi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: Jia He <justin.he@arm.com>
Cc: Len Brown <lenb@kernel.org>, Tony Luck <tony.luck@intel.com>,
	Borislav Petkov <bp@alien8.de>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Robert Richter <rric@kernel.org>,
	Robert Moore <robert.moore@intel.com>,
	Qiuxu Zhuo <qiuxu.zhuo@intel.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>,
	Jan Luebbe <jlu@pengutronix.de>,
	Khuong Dinh <khuong@os.amperecomputing.com>,
	Kani Toshi <toshi.kani@hpe.com>,
	James Morse <james.morse@arm.com>,
	linux-acpi@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org, devel@acpica.org,
	"Rafael J . Wysocki" <rafael@kernel.org>,
	Shuai Xue <xueshuai@linux.alibaba.com>,
	Jarkko Sakkinen <jarkko@kernel.org>,
	linux-efi@vger.kernel.org, nd@arm.com,
	Peter Zijlstra <peterz@infradead.org>
Subject: Re: [PATCH v9 6/7] apei/ghes: Use xchg_release() for updating new cache slot instead of cmpxchg()
Date: Mon, 17 Oct 2022 16:39:22 +0200	[thread overview]
Message-ID: <CAMj1kXEge+PtOHug2FhnbAetDLOTJ4jguC4uwA4oORbu-25YiA@mail.gmail.com> (raw)
In-Reply-To: <20221017130140.420986-7-justin.he@arm.com>

On Mon, 17 Oct 2022 at 15:02, Jia He <justin.he@arm.com> wrote:
>
> From: Ard Biesheuvel <ardb@kernel.org>
>
> From: Ard Biesheuvel <ardb@kernel.org>
>
> ghes_estatus_cache_add() selects a slot, and either succeeds in
> replacing its contents with a pointer to a new cached item, or it just
> gives up and frees the new item again, without attempting to select
> another slot even if one might be available.
>
> Since only inserting new items is needed, the race can only cause a failure
> if the selected slot was updated with another new item concurrently,
> which means that it is arbitrary which of those two items gets
> dropped. This means the cmpxchg() and the special case are not necessary,
> and hence just drop the existing item unconditionally. Note that this
> does not result in loss of error events, it simply means we might
> cause a false cache miss, and report the same event one additional
> time in quick succession even if the cache should have prevented that.
>
> Move the xchg_release() and call_rcu out of rcu_read_lock/unlock section
> since there is no actually dereferencing the pointer at all.
>
> Co-developed-by: Jia He <justin.he@arm.com>
> Signed-off-by: Jia He <justin.he@arm.com>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  drivers/acpi/apei/ghes.c | 47 +++++++++++++++++++++-------------------
>  1 file changed, 25 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 27c72b175e4b..5d7754053ca0 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -150,7 +150,7 @@ struct ghes_vendor_record_entry {
>  static struct gen_pool *ghes_estatus_pool;
>  static unsigned long ghes_estatus_pool_size_request;
>
> -static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
> +static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
>  static atomic_t ghes_estatus_cache_alloced;
>
>  static int ghes_panic_timeout __read_mostly = 30;
> @@ -785,31 +785,26 @@ static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
>         return cache;
>  }
>
> -static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
> +static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
>  {
> +       struct ghes_estatus_cache *cache;
>         u32 len;
>
> +       cache = container_of(head, struct ghes_estatus_cache, rcu);
>         len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
>         len = GHES_ESTATUS_CACHE_LEN(len);
>         gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
>         atomic_dec(&ghes_estatus_cache_alloced);
>  }
>
> -static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
> -{
> -       struct ghes_estatus_cache *cache;
> -
> -       cache = container_of(head, struct ghes_estatus_cache, rcu);
> -       ghes_estatus_cache_free(cache);
> -}
> -
>  static void ghes_estatus_cache_add(
>         struct acpi_hest_generic *generic,
>         struct acpi_hest_generic_status *estatus)
>  {
>         int i, slot = -1, count;
>         unsigned long long now, duration, period, max_period = 0;
> -       struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
> +       struct ghes_estatus_cache *cache, *new_cache;
> +       struct ghes_estatus_cache __rcu *victim;
>
>         new_cache = ghes_estatus_cache_alloc(generic, estatus);
>         if (new_cache == NULL)
> @@ -820,13 +815,11 @@ static void ghes_estatus_cache_add(
>                 cache = rcu_dereference(ghes_estatus_caches[i]);
>                 if (cache == NULL) {
>                         slot = i;
> -                       slot_cache = NULL;
>                         break;
>                 }
>                 duration = now - cache->time_in;
>                 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
>                         slot = i;
> -                       slot_cache = cache;
>                         break;
>                 }
>                 count = atomic_read(&cache->count);
> @@ -835,18 +828,28 @@ static void ghes_estatus_cache_add(
>                 if (period > max_period) {
>                         max_period = period;
>                         slot = i;
> -                       slot_cache = cache;
>                 }
>         }
> -       /* new_cache must be put into array after its contents are written */
> -       smp_wmb();
> -       if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
> -                                 slot_cache, new_cache) == slot_cache) {
> -               if (slot_cache)
> -                       call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
> -       } else
> -               ghes_estatus_cache_free(new_cache);
>         rcu_read_unlock();
> +
> +       if (slot != -1) {
> +               /*
> +                * Use release semantics to ensure that ghes_estatus_cached()
> +                * running on another CPU will see the updated cache fields if
> +                * it can see the new value of the pointer.
> +                */
> +               victim = xchg_release(&ghes_estatus_caches[slot], new_cache);
> +

This still lacks the RCU_INITIALIZER()

> +               /*
> +                * At this point, victim may point to a cached item different
> +                * from the one based on which we selected the slot. Instead of
> +                * going to the loop again to pick another slot, let's just
> +                * drop the other item anyway: this may cause a false cache
> +                * miss later on, but that won't cause any problems.
> +                */
> +               if (victim)
> +                       call_rcu(unrcu_pointer(&victim->rcu), ghes_estatus_cache_rcu_free);

Please use &unrcu_pointer(victim)->rcu here.

> +       }
>  }
>
>  static void __ghes_panic(struct ghes *ghes,
> --
> 2.25.1
>

  reply	other threads:[~2022-10-17 14:39 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-17 13:01 [PATCH v9 0/7] Make ghes_edac a proper module Jia He
2022-10-17 13:01 ` [PATCH v9 1/7] efi/cper: export several helpers for ghes_edac to use Jia He
2022-10-17 13:01 ` [PATCH v9 2/7] EDAC/ghes: Add a notifier for reporting memory errors Jia He
2022-10-17 13:01 ` [PATCH v9 3/7] EDAC/ghes: Prepare to make ghes_edac a proper module Jia He
2022-10-17 13:01 ` [PATCH v9 4/7] EDAC/ghes: Make ghes_edac a proper module to remove the dependency on ghes Jia He
2022-10-17 13:01 ` [PATCH v9 5/7] EDAC: Add the ghes_get_devices() check for chipset-specific edac drivers Jia He
2022-10-17 13:01 ` [PATCH v9 6/7] apei/ghes: Use xchg_release() for updating new cache slot instead of cmpxchg() Jia He
2022-10-17 14:39   ` Ard Biesheuvel [this message]
2022-10-17 13:01 ` [PATCH v9 7/7] EDAC/igen6: Return consistent errno when another edac driver is enabled Jia He

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMj1kXEge+PtOHug2FhnbAetDLOTJ4jguC4uwA4oORbu-25YiA@mail.gmail.com \
    --to=ardb@kernel.org \
    --cc=bp@alien8.de \
    --cc=devel@acpica.org \
    --cc=james.morse@arm.com \
    --cc=jarkko@kernel.org \
    --cc=jlu@pengutronix.de \
    --cc=justin.he@arm.com \
    --cc=khuong@os.amperecomputing.com \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@kernel.org \
    --cc=nd@arm.com \
    --cc=peterz@infradead.org \
    --cc=qiuxu.zhuo@intel.com \
    --cc=rafael@kernel.org \
    --cc=robert.moore@intel.com \
    --cc=rric@kernel.org \
    --cc=tony.luck@intel.com \
    --cc=toshi.kani@hpe.com \
    --cc=xueshuai@linux.alibaba.com \
    --cc=yazen.ghannam@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).