Re: [PATCH v4 6/6] x86/microcode: Synchronize late microcode loading

From: "Roger Pau Monné" <roger.pau@citrix.com>
To: Chao Gao <chao.gao@intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>, Wei Liu <wei.liu2@citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Jun Nakajima <jun.nakajima@intel.com>,
	xen-devel@lists.xenproject.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Borislav Petkov <bp@suse.de>, Ashok Raj <ashok.raj@intel.com>
Subject: Re: [PATCH v4 6/6] x86/microcode: Synchronize late microcode loading
Date: Wed, 28 Nov 2018 16:22:09 +0100	[thread overview]
Message-ID: <20181128152209.hqpgqtuy2mx7ytk6@mac> (raw)
In-Reply-To: <1543383256-12371-7-git-send-email-chao.gao@intel.com>

On Wed, Nov 28, 2018 at 01:34:16PM +0800, Chao Gao wrote:
> This patch ports microcode improvement patches from linux kernel.
> 
> Before you read any further: the early loading method is still the
> preferred one and you should always do that. The following patch is
> improving the late loading mechanism for long running jobs and cloud use
> cases.
> 
> Gather all cores and serialize the microcode update on them by doing it
> one-by-one to make the late update process as reliable as possible and
> avoid potential issues caused by the microcode update.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> Tested-by: Chao Gao <chao.gao@intel.com>
> [linux commit: a5321aec6412b20b5ad15db2d6b916c05349dbff]
> [linux commit: bb8c13d61a629276a162c1d2b1a20a815cbcfbb7]

If this patch is the squash of two Linux commits, please post the
ported versions of the two commits separately.

> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Jun Nakajima <jun.nakajima@intel.com>
> Cc: Ashok Raj <ashok.raj@intel.com>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Jan Beulich <jbeulich@suse.com>
> ---
>  xen/arch/x86/microcode.c | 123 +++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 97 insertions(+), 26 deletions(-)
> 
> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
> index 0b435f4..d5a2a94 100644
> --- a/xen/arch/x86/microcode.c
> +++ b/xen/arch/x86/microcode.c
> @@ -22,6 +22,7 @@
>   */
>  
>  #include <xen/cpu.h>
> +#include <xen/cpumask.h>
>  #include <xen/lib.h>
>  #include <xen/kernel.h>
>  #include <xen/init.h>
> @@ -30,18 +31,25 @@
>  #include <xen/smp.h>
>  #include <xen/softirq.h>
>  #include <xen/spinlock.h>
> +#include <xen/stop_machine.h>
>  #include <xen/tasklet.h>
>  #include <xen/guest_access.h>
>  #include <xen/earlycpio.h>
> +#include <xen/watchdog.h>
>  
> +#include <asm/delay.h>
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/setup.h>
>  #include <asm/microcode.h>
>  
> +/* By default, wait for 30000us */
> +#define MICROCODE_DEFAULT_TIMEOUT_US 30000
> +
>  static module_t __initdata ucode_mod;
>  static signed int __initdata ucode_mod_idx;
>  static bool_t __initdata ucode_mod_forced;
> +static unsigned int nr_cores;
>  
>  /*
>   * If we scan the initramfs.cpio for the early microcode code
> @@ -189,8 +197,7 @@ static DEFINE_SPINLOCK(microcode_mutex);
>  DEFINE_PER_CPU(struct ucode_cpu_info, ucode_cpu_info);
>  
>  struct microcode_info {
> -    unsigned int cpu;
> -    int error;
> +    atomic_t cpu_in, cpu_out;

Can you make this variables global to the file and just remove
microcode_info?

>  };
>  
>  static void __microcode_fini_cpu(unsigned int cpu)
> @@ -242,31 +249,62 @@ static int microcode_update_cpu(void)
>      return err;
>  }
>  
> -static long do_microcode_update(void *_info)
> +/* Wait for all CPUs to rendezvous with a timeout (us) */
> +static int wait_for_cpus(atomic_t *cnt, unsigned int timeout)
>  {
> -    struct microcode_info *info = _info;
> -    int error;
> +    unsigned int cpus = num_online_cpus();
>  
> -    BUG_ON(info->cpu != smp_processor_id());
> +    atomic_inc(cnt);
>  
> -    error = microcode_update_cpu();
> -    if ( error )
> -        info->error = error;
> +    while ( atomic_read(cnt) != cpus )
> +    {
> +        if ( timeout <= 0 )
> +        {
> +            printk("Timeout when waiting for CPUs calling in\n");
> +            return -EBUSY;
> +        }
> +        udelay(1);
> +        timeout--;
> +    }
>  
> -    info->cpu = cpumask_next(info->cpu, &cpu_online_map);
> -    if ( info->cpu < nr_cpu_ids )
> -        return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
> +    return 0;
> +}
>  
> -    error = info->error;
> -    xfree(info);
> -    return error;
> +static int do_microcode_update(void *_info)
> +{
> +    struct microcode_info *info = _info;
> +    unsigned int cpu = smp_processor_id();
> +    int ret;
> +
> +    ret = wait_for_cpus(&info->cpu_in, MICROCODE_DEFAULT_TIMEOUT_US);
> +    if ( ret )
> +        return ret;
> +
> +    /*
> +     * Initiate an update on all processors which don't have an online sibling
> +     * thread with a lower thread id. Other sibling threads just await the
> +     * completion of microcode update.
> +     */
> +    if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
> +        ret = microcode_update_cpu();
> +    /*
> +     * Increase the wait timeout to a safe value here since we're serializing
> +     * the microcode update and that could take a while on a large number of
> +     * CPUs. And that is fine as the *actual* timeout will be determined by
> +     * the last CPU finished updating and thus cut short
> +     */
> +    if ( wait_for_cpus(&info->cpu_out, MICROCODE_DEFAULT_TIMEOUT_US *
> +                                       nr_cores) )

Isn't this likely to trigger the watchdog on big systems? Oh I see
below that you disable the watchdog.

> +        panic("Timeout when finishing updating microcode");
> +
> +    return ret;
>  }
>  
>  int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>  {
>      int ret;
> -    struct microcode_info *info;
>      unsigned int cpu = smp_processor_id();
> +    struct microcode_info *info;
>      struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
>      void * buffer;
>  
> @@ -283,19 +321,20 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>  
>      ret = copy_from_guest(buffer, buf, len);
>      if ( ret != 0 )
> +        goto free;
> +
> +    /* cpu_online_map must not change during update */
> +    if ( !get_cpu_maps() )
>      {
> -        xfree(info);
> -        return ret;
> +        ret = -EBUSY;
> +        goto free;
>      }
>  
>      if ( microcode_ops->start_update )
>      {
>          ret = microcode_ops->start_update();
>          if ( ret != 0 )
> -        {
> -            xfree(info);
> -            return ret;
> -        }
> +            goto put;
>      }
>  
>      spin_lock(&microcode_mutex);
> @@ -311,13 +350,45 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>      if ( ret <= 0 )
>      {
>          printk("No valid or newer microcode found. Update abort!\n");
> -        return -EINVAL;
> +        ret = -EINVAL;
> +        goto put;
>      }
>  
> -    info->error = 0;
> -    info->cpu = cpumask_first(&cpu_online_map);
> +    atomic_set(&info->cpu_in, 0);
> +    atomic_set(&info->cpu_out, 0);
> +
> +    /* Calculate the number of online CPU core */
> +    nr_cores = 0;
> +    for_each_online_cpu(cpu)
> +        if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
> +            nr_cores++;
> +
> +    printk("%d cores are to update its microcode\n", nr_cores);
>  
> -    return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
> +    /*
> +     * We intend to disable interrupt for long time, which may lead to
> +     * watchdog timeout.
> +     */
> +    watchdog_disable();
> +    /*
> +     * Late loading dance. Why the heavy-handed stop_machine effort?
> +     *
> +     * - HT siblings must be idle and not execute other code while the other
> +     *   sibling is loading microcode in order to avoid any negative
> +     *   interactions cause by the loading.

Well, the HT siblings will be executing code, since they are in a
while loop waiting for the non-siblings cores to finish updating.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel