Xen-Devel Archive on lore.kernel.org
 help / color / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: Chao Gao <chao.gao@intel.com>
Cc: "Kevin Tian" <kevin.tian@intel.com>,
	"Ashok Raj" <ashok.raj@intel.com>, "Wei Liu" <wl@xen.org>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Jun Nakajima" <jun.nakajima@intel.com>,
	xen-devel@lists.xenproject.org,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Borislav Petkov" <bp@suse.de>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: Re: [Xen-devel] [PATCH v9 13/15] x86/microcode: Synchronize late microcode loading
Date: Thu, 29 Aug 2019 14:06:39 +0200
Message-ID: <2441e448-5fe1-bdbc-f0b6-720401fd0bf0@suse.com> (raw)
In-Reply-To: <1566177928-19114-14-git-send-email-chao.gao@intel.com>

On 19.08.2019 03:25, Chao Gao wrote:
> @@ -232,6 +276,34 @@ bool microcode_update_cache(struct microcode_patch *patch)
>      return true;
>  }
>  
> +/* Wait for a condition to be met with a timeout (us). */
> +static int wait_for_condition(int (*func)(void *data), void *data,
> +                         unsigned int timeout)
> +{
> +    while ( !func(data) )
> +    {
> +        if ( !timeout-- )
> +        {
> +            printk("CPU%u: Timeout in %pS\n",
> +                   smp_processor_id(), __builtin_return_address(0));
> +            return -EBUSY;
> +        }
> +        udelay(1);
> +    }
> +
> +    return 0;
> +}
> +
> +static int wait_cpu_callin(void *nr)
> +{
> +    return cpumask_weight(&cpu_callin_map) >= (unsigned long)nr;
> +}
> +
> +static int wait_cpu_callout(void *nr)
> +{
> +    return atomic_read(&cpu_out) >= (unsigned long)nr;
> +}

Since wait_for_condition() is used with only these two functions
as callbacks, they should imo return bool and take const void *.

> @@ -265,37 +337,155 @@ static int microcode_update_cpu(const struct microcode_patch *patch)
>      return err;
>  }
>  
> -static long do_microcode_update(void *patch)
> +static int slave_thread_fn(void)
> +{
> +    unsigned int cpu = smp_processor_id();
> +    unsigned int master = cpumask_first(this_cpu(cpu_sibling_mask));
> +
> +    while ( loading_state != LOADING_CALLIN )
> +        cpu_relax();
> +
> +    cpumask_set_cpu(cpu, &cpu_callin_map);
> +
> +    while ( loading_state != LOADING_EXIT )
> +        cpu_relax();
> +
> +    /* Copy update revision from the "master" thread. */
> +    this_cpu(cpu_sig).rev = per_cpu(cpu_sig, master).rev;
> +
> +    return 0;
> +}
> +
> +static int master_thread_fn(const struct microcode_patch *patch)
> +{
> +    unsigned int cpu = smp_processor_id();
> +    int ret = 0;
> +
> +    while ( loading_state != LOADING_CALLIN )
> +        cpu_relax();
> +
> +    cpumask_set_cpu(cpu, &cpu_callin_map);
> +
> +    while ( loading_state != LOADING_ENTER )
> +        cpu_relax();
> +
> +    /*
> +     * If an error happened, control thread would set 'loading_state'
> +     * to LOADING_EXIT. Don't perform ucode loading for this case
> +     */
> +    if ( loading_state == LOADING_EXIT )
> +        return ret;

Even if the producer transitions this through ENTER to EXIT, the
observer here may never get to see the ENTER state, and hence
never exit the loop above. You want either < ENTER or == CALLIN.

> +    ret = microcode_ops->apply_microcode(patch);
> +    if ( !ret )
> +        atomic_inc(&cpu_updated);
> +    atomic_inc(&cpu_out);
> +
> +    while ( loading_state != LOADING_EXIT )
> +        cpu_relax();
> +
> +    return ret;
> +}

As a cosmetic remark, I don't think "master" and "slave" are
suitable terms here. "primary" and "secondary" would imo come
closer to what the threads' relationship is.

> +static int control_thread_fn(const struct microcode_patch *patch)
>  {
> -    unsigned int cpu;
> +    unsigned int cpu = smp_processor_id(), done;
> +    unsigned long tick;
> +    int ret;
>  
> -    /* Store the patch after a successful loading */
> -    if ( !microcode_update_cpu(patch) && patch )
> +    /* Allow threads to call in */
> +    loading_state = LOADING_CALLIN;
> +    smp_mb();

Why not just smp_wmb()? (Same further down then.)

> +    cpumask_set_cpu(cpu, &cpu_callin_map);
> +
> +    /* Waiting for all threads calling in */
> +    ret = wait_for_condition(wait_cpu_callin,
> +                             (void *)(unsigned long)num_online_cpus(),
> +                             MICROCODE_CALLIN_TIMEOUT_US);
> +    if ( ret ) {

Misplaced brace.

> +static int do_microcode_update(void *patch)

const?

> @@ -326,19 +523,67 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>      {
>          ret = PTR_ERR(patch);
>          printk(XENLOG_INFO "Parsing microcode blob error %d\n", ret);
> -        goto free;
> +        goto put;
>      }
>  
>      if ( !patch )
>      {
>          printk(XENLOG_INFO "No ucode found. Update aborted!\n");
>          ret = -EINVAL;
> -        goto free;
> +        goto put;
> +    }
> +
> +    cpumask_clear(&cpu_callin_map);
> +    atomic_set(&cpu_out, 0);
> +    atomic_set(&cpu_updated, 0);
> +    loading_state = LOADING_PREPARE;
> +
> +    /* Calculate the number of online CPU core */
> +    nr_cores = 0;
> +    for_each_online_cpu(cpu)
> +        if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
> +            nr_cores++;
> +
> +    printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores);
> +
> +    /*
> +     * We intend to disable interrupt for long time, which may lead to
> +     * watchdog timeout.
> +     */
> +    watchdog_disable();
> +    /*
> +     * Late loading dance. Why the heavy-handed stop_machine effort?
> +     *
> +     * - HT siblings must be idle and not execute other code while the other
> +     *   sibling is loading microcode in order to avoid any negative
> +     *   interactions cause by the loading.
> +     *
> +     * - In addition, microcode update on the cores must be serialized until
> +     *   this requirement can be relaxed in the future. Right now, this is
> +     *   conservative and good.
> +     */
> +    ret = stop_machine_run(do_microcode_update, patch, NR_CPUS);
> +    watchdog_enable();

Considering that stop_machine_run() doesn't itself disable the watchdog,
did you consider having the control thread disable/enable the watchdog,
thus shortening the period where it's not active?

> +    updated = atomic_read(&cpu_updated);
> +    if ( updated > 0 )
> +    {
> +        spin_lock(&microcode_mutex);
> +        microcode_update_cache(patch);
> +        spin_unlock(&microcode_mutex);
>      }
> +    else
> +        microcode_free_patch(patch);
>  
> -    ret = continue_hypercall_on_cpu(cpumask_first(&cpu_online_map),
> -                                    do_microcode_update, patch);
> +    if ( updated && updated != nr_cores )
> +        printk(XENLOG_ERR "ERROR: Updating microcode succeeded on %u cores and failed\n"
> +               XENLOG_ERR "on other %u cores. A system with differing microcode \n"

Stray blank before newline.

> +               XENLOG_ERR "revisions is considered unstable. Please reboot and do not\n"
> +               XENLOG_ERR "load the microcode that triggersthis warning!\n",

Missing blank before "this".

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply index

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-19  1:25 [Xen-devel] [PATCH v9 00/15] improve " Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 01/15] microcode/intel: extend microcode_update_match() Chao Gao
2019-08-28 15:12   ` Jan Beulich
2019-08-29  7:15     ` Chao Gao
2019-08-29  7:14       ` Jan Beulich
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 02/15] microcode/amd: fix memory leak Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 03/15] microcode/amd: distinguish old and mismatched ucode in microcode_fits() Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 04/15] microcode: introduce a global cache of ucode patch Chao Gao
2019-08-22 11:11   ` Roger Pau Monné
2019-08-28 15:21   ` Jan Beulich
2019-08-29 10:18   ` Jan Beulich
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 05/15] microcode: clean up microcode_resume_cpu Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 06/15] microcode: remove struct ucode_cpu_info Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 07/15] microcode: remove pointless 'cpu' parameter Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 08/15] microcode/amd: call svm_host_osvw_init() in common code Chao Gao
2019-08-22 13:08   ` Roger Pau Monné
2019-08-28 15:26   ` Jan Beulich
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 09/15] microcode: pass a patch pointer to apply_microcode() Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 10/15] microcode: split out apply_microcode() from cpu_request_microcode() Chao Gao
2019-08-22 13:59   ` Roger Pau Monné
2019-08-29 10:06     ` Jan Beulich
2019-08-30  3:22       ` Chao Gao
2019-08-30  7:25         ` Jan Beulich
2019-08-29 10:19   ` Jan Beulich
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 11/15] microcode: unify loading update during CPU resuming and AP wakeup Chao Gao
2019-08-22 14:10   ` Roger Pau Monné
2019-08-22 16:44     ` Chao Gao
2019-08-23  9:09       ` Roger Pau Monné
2019-08-29  7:37         ` Chao Gao
2019-08-29  8:16           ` Roger Pau Monné
2019-08-29 10:26           ` Jan Beulich
2019-08-29 10:29   ` Jan Beulich
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 12/15] microcode: reduce memory allocation and copy when creating a patch Chao Gao
2019-08-23  8:11   ` Roger Pau Monné
2019-08-26  7:03     ` Chao Gao
2019-08-26  8:11       ` Roger Pau Monné
2019-08-29 10:47   ` Jan Beulich
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 13/15] x86/microcode: Synchronize late microcode loading Chao Gao
2019-08-19 10:27   ` Sergey Dyasli
2019-08-19 14:49     ` Chao Gao
2019-08-29 12:06   ` Jan Beulich [this message]
2019-08-30  3:30     ` Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 14/15] microcode: remove microcode_update_lock Chao Gao
2019-08-19  1:25 ` [Xen-devel] [PATCH v9 15/15] microcode: block #NMI handling when loading an ucode Chao Gao
2019-08-23  8:46   ` Sergey Dyasli
2019-08-26  8:07     ` Chao Gao
2019-08-27  4:52       ` Chao Gao
2019-08-28  8:52         ` Sergey Dyasli
2019-08-29 12:11         ` Jan Beulich
2019-08-30  6:35           ` Chao Gao
2019-09-09  5:52             ` Chao Gao
2019-09-09  6:16               ` Jan Beulich
2019-08-29 12:22   ` Jan Beulich
2019-08-30  6:33     ` Chao Gao
2019-08-30  7:30       ` Jan Beulich
2019-08-22  7:51 ` [Xen-devel] [PATCH v9 00/15] improve late microcode loading Sergey Dyasli
2019-08-22 15:39   ` Chao Gao

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2441e448-5fe1-bdbc-f0b6-720401fd0bf0@suse.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ashok.raj@intel.com \
    --cc=bp@suse.de \
    --cc=chao.gao@intel.com \
    --cc=jun.nakajima@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=roger.pau@citrix.com \
    --cc=tglx@linutronix.de \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Xen-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/xen-devel/0 xen-devel/git/0.git
	git clone --mirror https://lore.kernel.org/xen-devel/1 xen-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 xen-devel xen-devel/ https://lore.kernel.org/xen-devel \
		xen-devel@lists.xenproject.org xen-devel@lists.xen.org xen-devel@archiver.kernel.org
	public-inbox-index xen-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.xenproject.lists.xen-devel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox