xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [Xen-devel] [PATCH] x86: optimize loading of GDT at context switch
@ 2019-07-22 13:22 Juergen Gross
  2019-07-25 10:29 ` Jan Beulich
  0 siblings, 1 reply; 2+ messages in thread
From: Juergen Gross @ 2019-07-22 13:22 UTC (permalink / raw)
  To: xen-devel
  Cc: Juergen Gross, Andrew Cooper, Wei Liu, Jan Beulich, Roger Pau Monné

Instead of dynamically decide whether the previous vcpu was using full
or default GDT just add a percpu variable for that purpose. This at
once removes the need for testing vcpu_ids to differ twice.

This change improves performance by 0.5% - 1% on my test machine when
doing parallel compilation.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
This patch was originally part of my core scheduling series, but it
neither depends on any patch of that series, nor does any patch of the
series depend on this one, so I'm sending it on its own.

I have removed latching the need_full_gdt(nd) value in a local variable
as it has shown to impact performance very slightly in a bad way.
---
 xen/arch/x86/cpu/common.c  |  3 +++
 xen/arch/x86/domain.c      | 14 ++++++++------
 xen/include/asm-x86/desc.h |  1 +
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 1db96d959c..b0bb9292fd 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -49,6 +49,8 @@ unsigned int vaddr_bits __read_mostly = VADDR_BITS;
 static unsigned int cleared_caps[NCAPINTS];
 static unsigned int forced_caps[NCAPINTS];
 
+DEFINE_PER_CPU(bool, full_gdt_loaded);
+
 void __init setup_clear_cpu_cap(unsigned int cap)
 {
 	const uint32_t *dfs;
@@ -756,6 +758,7 @@ void load_system_tables(void)
 		offsetof(struct tss_struct, __cacheline_filler) - 1,
 		SYS_DESC_tss_busy);
 
+        per_cpu(full_gdt_loaded, cpu) = false;
 	lgdt(&gdtr);
 	lidt(&idtr);
 	ltr(TSS_ENTRY << 3);
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index ea55160887..353a6e24fb 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1670,7 +1670,7 @@ static void update_xen_slot_in_full_gdt(const struct vcpu *v, unsigned int cpu)
                                    : per_cpu(compat_gdt_table_l1e, cpu));
 }
 
-static void load_full_gdt(const struct vcpu *v)
+static void load_full_gdt(const struct vcpu *v, unsigned int cpu)
 {
     struct desc_ptr gdt_desc = {
         .limit = LAST_RESERVED_GDT_BYTE,
@@ -1678,6 +1678,8 @@ static void load_full_gdt(const struct vcpu *v)
     };
 
     lgdt(&gdt_desc);
+
+    per_cpu(full_gdt_loaded, cpu) = true;
 }
 
 static void load_default_gdt(unsigned int cpu)
@@ -1689,6 +1691,8 @@ static void load_default_gdt(unsigned int cpu)
     };
 
     lgdt(&gdt_desc);
+
+    per_cpu(full_gdt_loaded, cpu) = false;
 }
 
 static void __context_switch(void)
@@ -1739,8 +1743,7 @@ static void __context_switch(void)
 
     if ( need_full_gdt(nd) )
         update_xen_slot_in_full_gdt(n, cpu);
-
-    if ( need_full_gdt(pd) &&
+    if ( per_cpu(full_gdt_loaded, cpu) &&
          ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(nd)) )
         load_default_gdt(cpu);
 
@@ -1753,9 +1756,8 @@ static void __context_switch(void)
         svm_load_segs(0, 0, 0, 0, 0, 0, 0);
 #endif
 
-    if ( need_full_gdt(nd) &&
-         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(pd)) )
-        load_full_gdt(n);
+    if ( need_full_gdt(nd) && !per_cpu(full_gdt_loaded, cpu) )
+        load_full_gdt(n, cpu);
 
     if ( pd != nd )
         cpumask_clear_cpu(cpu, pd->dirty_cpumask);
diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h
index e565727dc0..c011c03ae2 100644
--- a/xen/include/asm-x86/desc.h
+++ b/xen/include/asm-x86/desc.h
@@ -210,6 +210,7 @@ DECLARE_PER_CPU(l1_pgentry_t, gdt_table_l1e);
 extern seg_desc_t boot_cpu_compat_gdt_table[];
 DECLARE_PER_CPU(seg_desc_t *, compat_gdt_table);
 DECLARE_PER_CPU(l1_pgentry_t, compat_gdt_table_l1e);
+DECLARE_PER_CPU(bool, full_gdt_loaded);
 
 extern void load_TR(void);
 
-- 
2.16.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [Xen-devel] [PATCH] x86: optimize loading of GDT at context switch
  2019-07-22 13:22 [Xen-devel] [PATCH] x86: optimize loading of GDT at context switch Juergen Gross
@ 2019-07-25 10:29 ` Jan Beulich
  0 siblings, 0 replies; 2+ messages in thread
From: Jan Beulich @ 2019-07-25 10:29 UTC (permalink / raw)
  To: Juergen Gross; +Cc: xen-devel, Roger Pau Monné, Wei Liu, Andrew Cooper

On 22.07.2019 15:22, Juergen Gross wrote:
> @@ -756,6 +758,7 @@ void load_system_tables(void)
>   		offsetof(struct tss_struct, __cacheline_filler) - 1,
>   		SYS_DESC_tss_busy);
>   
> +        per_cpu(full_gdt_loaded, cpu) = false;
>   	lgdt(&gdtr);
>   	lidt(&idtr);
>   	ltr(TSS_ENTRY << 3);

As per the surrounding code there should be a hard tab used for
indentation here.

> @@ -1739,8 +1743,7 @@ static void __context_switch(void)
>   
>       if ( need_full_gdt(nd) )
>           update_xen_slot_in_full_gdt(n, cpu);
> -
> -    if ( need_full_gdt(pd) &&
> +    if ( per_cpu(full_gdt_loaded, cpu) &&
>            ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(nd)) )
>           load_default_gdt(cpu);

I think it wouldn't be bad if the blank line was kept.

If I end up committing this, I'll try to remember to do both
adjustments, unless you object for some reason.

Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-07-25 10:33 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-22 13:22 [Xen-devel] [PATCH] x86: optimize loading of GDT at context switch Juergen Gross
2019-07-25 10:29 ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).