All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] x86/microcode: Synchronize late microcode loading
@ 2018-04-25 11:46 Chao Gao
  2018-04-30 15:25 ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: Chao Gao @ 2018-04-25 11:46 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Ashok Raj, Andrew Cooper, Jan Beulich, Jun Nakajima,
	Thomas Gleixner, Borislav Petkov, Gao Chao

From: Gao Chao <chao.gao@intel.com>

This patch ports microcode improvement patches from linux kernel.

Before you read any further: the early loading method is still the
preferred one and you should always do that. The following patch is
improving the late loading mechanism for long running jobs and cloud use
cases.

Gather all cores and serialize the microcode update on them by doing it
one-by-one to make the late update process as reliable as possible and
avoid potential issues caused by the microcode update.

This patch is also in accord with Andrew's suggestion,
"Rendezvous all online cpus in an IPI to apply the patch, and keep the
processors in until all have completed the patch.", in [1].

[1]:https://wiki.xenproject.org/wiki/XenParavirtOps/microcode_update#Run_time_microcode_updates

Signed-off-by: Chao Gao <chao.gao@intel.com>
Tested-by: Chao Gao <chao.gao@intel.com>
[linux commit: a5321aec6412b20b5ad15db2d6b916c05349dbff]
[linux commit: bb8c13d61a629276a162c1d2b1a20a815cbcfbb7]
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
---
Resend for I forgot to send it to maillist.

v2:
 - Reduce the timeout from 1s to 30ms
 - update microcode with better parallelism; only one logical thread of each
 core tries to take lock and do the update.
 - remove 'error' field from struct microcode_info
 - correct coding style issues
---
 xen/arch/x86/microcode.c | 117 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 26 deletions(-)

diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
index 4163f50..fddce1e 100644
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -22,6 +22,7 @@
  */
 
 #include <xen/cpu.h>
+#include <xen/cpumask.h>
 #include <xen/lib.h>
 #include <xen/kernel.h>
 #include <xen/init.h>
@@ -30,15 +31,21 @@
 #include <xen/smp.h>
 #include <xen/softirq.h>
 #include <xen/spinlock.h>
+#include <xen/stop_machine.h>
 #include <xen/tasklet.h>
 #include <xen/guest_access.h>
 #include <xen/earlycpio.h>
+#include <xen/watchdog.h>
 
+#include <asm/delay.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/microcode.h>
 
+/* By default, wait for 30000us */
+#define MICROCODE_DEFAULT_TIMEOUT 30000
+
 static module_t __initdata ucode_mod;
 static signed int __initdata ucode_mod_idx;
 static bool_t __initdata ucode_mod_forced;
@@ -187,9 +194,9 @@ static DEFINE_SPINLOCK(microcode_mutex);
 DEFINE_PER_CPU(struct ucode_cpu_info, ucode_cpu_info);
 
 struct microcode_info {
-    unsigned int cpu;
+    cpumask_t cpus;
+    atomic_t cpu_in, cpu_out;
     uint32_t buffer_size;
-    int error;
     char buffer[1];
 };
 
@@ -281,24 +288,56 @@ static int microcode_update_cpu(const void *buf, size_t size)
     return err;
 }
 
-static long do_microcode_update(void *_info)
+/* Wait for all CPUs to rendezvous with a timeout (us) */
+static int wait_for_cpus(atomic_t *cnt, int timeout)
 {
-    struct microcode_info *info = _info;
-    int error;
+    unsigned int cpus = num_online_cpus();
 
-    BUG_ON(info->cpu != smp_processor_id());
+    atomic_inc(cnt);
 
-    error = microcode_update_cpu(info->buffer, info->buffer_size);
-    if ( error )
-        info->error = error;
+    while ( atomic_read(cnt) != cpus )
+    {
+        if ( timeout <= 0 )
+        {
+            printk("Timeout when waiting for CPUs calling in\n");
+            return -EBUSY;
+        }
+        udelay(1);
+        timeout--;
+    }
 
-    info->cpu = cpumask_next(info->cpu, &cpu_online_map);
-    if ( info->cpu < nr_cpu_ids )
-        return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+    return 0;
+}
 
-    error = info->error;
-    xfree(info);
-    return error;
+static int do_microcode_update(void *_info)
+{
+    struct microcode_info *info = _info;
+    unsigned int cpu = smp_processor_id();
+    int ret;
+
+    ret = wait_for_cpus(&info->cpu_in, MICROCODE_DEFAULT_TIMEOUT);
+    if ( ret )
+        return ret;
+
+    /*
+     * Logical threads which set the first bit in cpu_sibling_mask can do
+     * the update. Other sibling threads just await the completion of
+     * microcode update.
+     */
+    if ( cpumask_test_and_set_cpu(
+                cpumask_first(per_cpu(cpu_sibling_mask, cpu)), &info->cpus) )
+        ret = microcode_update_cpu(info->buffer, info->buffer_size);
+    /*
+     * Increase the wait timeout to a safe value here since we're serializing
+     * the microcode update and that could take a while on a large number of
+     * CPUs. And that is fine as the *actual* timeout will be determined by
+     * the last CPU finished updating and thus cut short
+     */
+    if ( wait_for_cpus(&info->cpu_out, MICROCODE_DEFAULT_TIMEOUT *
+                                       num_online_cpus()) )
+        panic("Timeout when finishing updating microcode");
+
+    return ret;
 }
 
 int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
@@ -318,26 +357,52 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
 
     ret = copy_from_guest(info->buffer, buf, len);
     if ( ret != 0 )
-    {
-        xfree(info);
-        return ret;
-    }
+        goto free;
 
     info->buffer_size = len;
-    info->error = 0;
-    info->cpu = cpumask_first(&cpu_online_map);
+
+    /* cpu_online_map must not change during update */
+    if ( !get_cpu_maps() )
+    {
+        ret = -EBUSY;
+        goto free;
+    }
 
     if ( microcode_ops->start_update )
     {
         ret = microcode_ops->start_update();
         if ( ret != 0 )
-        {
-            xfree(info);
-            return ret;
-        }
+            goto put;
     }
 
-    return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+    cpumask_empty(&info->cpus);
+    atomic_set(&info->cpu_in, 0);
+    atomic_set(&info->cpu_out, 0);
+
+    /*
+     * We intend to disable interrupt for long time, which may lead to
+     * watchdog timeout.
+     */
+    watchdog_disable();
+    /*
+     * Late loading dance. Why the heavy-handed stop_machine effort?
+     *
+     * -HT siblings must be idle and not execute other code while the other
+     *  sibling is loading microcode in order to avoid any negative
+     *  interactions cause by the loading.
+     *
+     * -In addition, microcode update on the cores must be serialized until
+     *  this requirement can be relaxed in the feature. Right now, this is
+     *  conservative and good.
+     */
+    stop_machine_run(do_microcode_update, info, NR_CPUS);
+    watchdog_enable();
+
+ put:
+    put_cpu_maps();
+ free:
+    xfree(info);
+    return ret;
 }
 
 static int __init microcode_init(void)
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] x86/microcode: Synchronize late microcode loading
  2018-04-25 11:46 [PATCH v2] x86/microcode: Synchronize late microcode loading Chao Gao
@ 2018-04-30 15:25 ` Jan Beulich
  2018-05-01  8:15   ` Chao Gao
  0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2018-04-30 15:25 UTC (permalink / raw)
  To: Chao Gao
  Cc: Kevin Tian, Ashok Raj, Andrew Cooper, xen-devel, Jun Nakajima,
	tglx, Borislav Petkov

>>> On 25.04.18 at 13:46, <chao.gao@intel.com> wrote:
> @@ -281,24 +288,56 @@ static int microcode_update_cpu(const void *buf, size_t size)
>      return err;
>  }
>  
> -static long do_microcode_update(void *_info)
> +/* Wait for all CPUs to rendezvous with a timeout (us) */
> +static int wait_for_cpus(atomic_t *cnt, int timeout)

unsigned int

> +static int do_microcode_update(void *_info)
> +{
> +    struct microcode_info *info = _info;
> +    unsigned int cpu = smp_processor_id();
> +    int ret;
> +
> +    ret = wait_for_cpus(&info->cpu_in, MICROCODE_DEFAULT_TIMEOUT);
> +    if ( ret )
> +        return ret;
> +    /*
> +     * Logical threads which set the first bit in cpu_sibling_mask can do
> +     * the update. Other sibling threads just await the completion of
> +     * microcode update.
> +     */
> +    if ( cpumask_test_and_set_cpu(
> +                cpumask_first(per_cpu(cpu_sibling_mask, cpu)), &info->cpus) )
> +        ret = microcode_update_cpu(info->buffer, info->buffer_size);

Isn't the condition inverted (i.e. missing a ! )?

Also I take it that you've confirmed that loading ucode in parallel on multiple
cores of the same socket is not a problem? The comment in the last hunk
suggests otherwise.

> +    /*
> +     * Increase the wait timeout to a safe value here since we're serializing
> +     * the microcode update and that could take a while on a large number of
> +     * CPUs. And that is fine as the *actual* timeout will be determined by
> +     * the last CPU finished updating and thus cut short
> +     */
> +    if ( wait_for_cpus(&info->cpu_out, MICROCODE_DEFAULT_TIMEOUT *
> +                                       num_online_cpus()) )
> +        panic("Timeout when finishing updating microcode");

A 3s timeout (as an example for a system with 100 CPU threads) is still
absurdly high to me, but considering you panic() anyway if you hit the
timeout the question mainly is whether there's a slim chance for this to
complete a brief moment before the timeout expires. If all goes well,
you won't come close to even 1s, but as said before - there may be
guests running, and they may become utterly confused if they don't
get any time within a second or more.

With you no longer doing things sequentially I don't, however, see why
you need to scale the timeout by CPU count.

> +
> +    return ret;
>  }

You're losing this return value (once for every CPU making it into this
function).

> @@ -318,26 +357,52 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>  
>      ret = copy_from_guest(info->buffer, buf, len);
>      if ( ret != 0 )
> -    {
> -        xfree(info);
> -        return ret;
> -    }
> +        goto free;
>  
>      info->buffer_size = len;
> -    info->error = 0;
> -    info->cpu = cpumask_first(&cpu_online_map);
> +
> +    /* cpu_online_map must not change during update */
> +    if ( !get_cpu_maps() )
> +    {
> +        ret = -EBUSY;
> +        goto free;
> +    }
>  
>      if ( microcode_ops->start_update )
>      {
>          ret = microcode_ops->start_update();
>          if ( ret != 0 )
> -        {
> -            xfree(info);
> -            return ret;
> -        }
> +            goto put;
>      }
>  
> -    return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
> +    cpumask_empty(&info->cpus);

DYM cpumask_clear()?

> +    atomic_set(&info->cpu_in, 0);
> +    atomic_set(&info->cpu_out, 0);
> +
> +    /*
> +     * We intend to disable interrupt for long time, which may lead to
> +     * watchdog timeout.
> +     */
> +    watchdog_disable();
> +    /*
> +     * Late loading dance. Why the heavy-handed stop_machine effort?
> +     *
> +     * -HT siblings must be idle and not execute other code while the other
> +     *  sibling is loading microcode in order to avoid any negative
> +     *  interactions cause by the loading.
> +     *
> +     * -In addition, microcode update on the cores must be serialized until
> +     *  this requirement can be relaxed in the feature. Right now, this is
> +     *  conservative and good.

This is the comment I've referred to above.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] x86/microcode: Synchronize late microcode loading
  2018-04-30 15:25 ` Jan Beulich
@ 2018-05-01  8:15   ` Chao Gao
  2018-05-02  6:48     ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: Chao Gao @ 2018-05-01  8:15 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kevin Tian, Ashok Raj, Andrew Cooper, xen-devel, Jun Nakajima,
	tglx, Borislav Petkov

On Mon, Apr 30, 2018 at 09:25:26AM -0600, Jan Beulich wrote:
>>>> On 25.04.18 at 13:46, <chao.gao@intel.com> wrote:
>> @@ -281,24 +288,56 @@ static int microcode_update_cpu(const void *buf, size_t size)
>>      return err;
>>  }
>>  
>> -static long do_microcode_update(void *_info)
>> +/* Wait for all CPUs to rendezvous with a timeout (us) */
>> +static int wait_for_cpus(atomic_t *cnt, int timeout)
>
>unsigned int
>
>> +static int do_microcode_update(void *_info)
>> +{
>> +    struct microcode_info *info = _info;
>> +    unsigned int cpu = smp_processor_id();
>> +    int ret;
>> +
>> +    ret = wait_for_cpus(&info->cpu_in, MICROCODE_DEFAULT_TIMEOUT);
>> +    if ( ret )
>> +        return ret;
>> +    /*
>> +     * Logical threads which set the first bit in cpu_sibling_mask can do
>> +     * the update. Other sibling threads just await the completion of
>> +     * microcode update.
>> +     */
>> +    if ( cpumask_test_and_set_cpu(
>> +                cpumask_first(per_cpu(cpu_sibling_mask, cpu)), &info->cpus) )
>> +        ret = microcode_update_cpu(info->buffer, info->buffer_size);
>
>Isn't the condition inverted (i.e. missing a ! )?

Yes.

>
>Also I take it that you've confirmed that loading ucode in parallel on multiple
>cores of the same socket is not a problem? The comment in the last hunk
>suggests otherwise.

No. In microcode_update_cpu(), microcode_mutex makes the update
sequential.

>
>> +    /*
>> +     * Increase the wait timeout to a safe value here since we're serializing
>> +     * the microcode update and that could take a while on a large number of
>> +     * CPUs. And that is fine as the *actual* timeout will be determined by
>> +     * the last CPU finished updating and thus cut short
>> +     */
>> +    if ( wait_for_cpus(&info->cpu_out, MICROCODE_DEFAULT_TIMEOUT *
>> +                                       num_online_cpus()) )
>> +        panic("Timeout when finishing updating microcode");
>
>A 3s timeout (as an example for a system with 100 CPU threads) is still
>absurdly high to me, but considering you panic() anyway if you hit the
>timeout the question mainly is whether there's a slim chance for this to
>complete a brief moment before the timeout expires. If all goes well,
>you won't come close to even 1s, but as said before - there may be
>guests running, and they may become utterly confused if they don't
>get any time within a second or more.
>
>With you no longer doing things sequentially I don't, however, see why

No. It is still sequential. And only one thread in a core will try to
acquire the lock -- microcode_mutex.

>you need to scale the timeout by CPU count.

Maybe by the number of core. But I did't find an existing variable to
count cores.

>
>> +
>> +    return ret;
>>  }
>
>You're losing this return value (once for every CPU making it into this
>function).

I don't understand this remark. This function is called by
stop_machine_run(). stop_machine_run() could return error if
any cpu failed during update. We don't care the specific CPU and
how many CPUs failed to do the update.

Thanks
Chao

>
>> @@ -318,26 +357,52 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>>  
>>      ret = copy_from_guest(info->buffer, buf, len);
>>      if ( ret != 0 )
>> -    {
>> -        xfree(info);
>> -        return ret;
>> -    }
>> +        goto free;
>>  
>>      info->buffer_size = len;
>> -    info->error = 0;
>> -    info->cpu = cpumask_first(&cpu_online_map);
>> +
>> +    /* cpu_online_map must not change during update */
>> +    if ( !get_cpu_maps() )
>> +    {
>> +        ret = -EBUSY;
>> +        goto free;
>> +    }
>>  
>>      if ( microcode_ops->start_update )
>>      {
>>          ret = microcode_ops->start_update();
>>          if ( ret != 0 )
>> -        {
>> -            xfree(info);
>> -            return ret;
>> -        }
>> +            goto put;
>>      }
>>  
>> -    return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
>> +    cpumask_empty(&info->cpus);
>
>DYM cpumask_clear()?
>
>> +    atomic_set(&info->cpu_in, 0);
>> +    atomic_set(&info->cpu_out, 0);
>> +
>> +    /*
>> +     * We intend to disable interrupt for long time, which may lead to
>> +     * watchdog timeout.
>> +     */
>> +    watchdog_disable();
>> +    /*
>> +     * Late loading dance. Why the heavy-handed stop_machine effort?
>> +     *
>> +     * -HT siblings must be idle and not execute other code while the other
>> +     *  sibling is loading microcode in order to avoid any negative
>> +     *  interactions cause by the loading.
>> +     *
>> +     * -In addition, microcode update on the cores must be serialized until
>> +     *  this requirement can be relaxed in the feature. Right now, this is
>> +     *  conservative and good.
>
>This is the comment I've referred to above.
>
>Jan
>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] x86/microcode: Synchronize late microcode loading
  2018-05-01  8:15   ` Chao Gao
@ 2018-05-02  6:48     ` Jan Beulich
  0 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2018-05-02  6:48 UTC (permalink / raw)
  To: Chao Gao
  Cc: Kevin Tian, Ashok Raj, Andrew Cooper, xen-devel, Jun Nakajima,
	tglx, Borislav Petkov

>>> On 01.05.18 at 10:15, <chao.gao@intel.com> wrote:
> On Mon, Apr 30, 2018 at 09:25:26AM -0600, Jan Beulich wrote:
>>>>> On 25.04.18 at 13:46, <chao.gao@intel.com> wrote:
>>> +static int do_microcode_update(void *_info)
>>> +{
>>> +    struct microcode_info *info = _info;
>>> +    unsigned int cpu = smp_processor_id();
>>> +    int ret;
>>> +
>>> +    ret = wait_for_cpus(&info->cpu_in, MICROCODE_DEFAULT_TIMEOUT);
>>> +    if ( ret )
>>> +        return ret;
>>> +    /*
>>> +     * Logical threads which set the first bit in cpu_sibling_mask can do
>>> +     * the update. Other sibling threads just await the completion of
>>> +     * microcode update.
>>> +     */
>>> +    if ( cpumask_test_and_set_cpu(
>>> +                cpumask_first(per_cpu(cpu_sibling_mask, cpu)), &info->cpus) )
>>> +        ret = microcode_update_cpu(info->buffer, info->buffer_size);
>>
>>Isn't the condition inverted (i.e. missing a ! )?
> 
> Yes.
> 
>>
>>Also I take it that you've confirmed that loading ucode in parallel on multiple
>>cores of the same socket is not a problem? The comment in the last hunk
>>suggests otherwise.
> 
> No. In microcode_update_cpu(), microcode_mutex makes the update
> sequential.

Oh, right, of course.

>>> +
>>> +    return ret;
>>>  }
>>
>>You're losing this return value (once for every CPU making it into this
>>function).
> 
> I don't understand this remark. This function is called by
> stop_machine_run(). stop_machine_run() could return error if
> any cpu failed during update. We don't care the specific CPU and
> how many CPUs failed to do the update.

Then please check your stop_machine_run() invocation again, in particular
what happens with that function's return value.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-05-02  6:48 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-25 11:46 [PATCH v2] x86/microcode: Synchronize late microcode loading Chao Gao
2018-04-30 15:25 ` Jan Beulich
2018-05-01  8:15   ` Chao Gao
2018-05-02  6:48     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.