All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: allow NMI injection
@ 2007-02-28 11:39 Jan Beulich
  2007-02-28 11:59 ` Keir Fraser
  0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2007-02-28 11:39 UTC (permalink / raw)
  To: xen-devel

NetWare's internal debugger needs the ability to send NMI IPIs, and
there is no reason to not allow domUs or dom0's vCPUs other than vCPU 0
to handle NMIs (they just will never see hardware generated ones).

While currently not having a frontend, the added hypercall also allows
for being used to inject NMIs into foreign VMs.

Along the lines, this fixes a potential race condition caused by
previously accessing the VCPU flags field non-atomically in entry.S.

Signed-off-by: Jan Beulich <jbeulich@novell.com>

Index: 2007-02-27/xen/arch/x86/physdev.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/physdev.c	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/physdev.c	2007-02-28 12:14:58.000000000 +0100
@@ -143,6 +143,57 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         break;
     }
 
+    case PHYSDEVOP_send_nmi: {
+        struct physdev_send_nmi send_nmi;
+        struct domain *d;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&send_nmi, arg, 1) != 0 )
+            break;
+
+        ret = -EPERM;
+        if ( send_nmi.domain == DOMID_SELF )
+            d = current->domain;
+        else if ( !IS_PRIV(current->domain) )
+            break;
+        else
+            d = get_domain_by_id(send_nmi.domain);
+        ret = -ESRCH;
+        if ( d == NULL )
+            break;
+
+        switch ( send_nmi.vcpu )
+        {
+            struct vcpu *v;
+
+        case XEN_SEND_NMI_ALL:
+        case XEN_SEND_NMI_ALL_BUT_SELF:
+            for_each_vcpu(d, v)
+            {
+                if ( (send_nmi.vcpu == XEN_SEND_NMI_ALL || v != current) &&
+                     !test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
+                    vcpu_kick(v);
+            }
+            ret = 0;
+            break;
+        case 0 ... MAX_VIRT_CPUS - 1:
+            if ( (v = d->vcpu[send_nmi.vcpu]) != NULL )
+            {
+                if ( !test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
+                    vcpu_kick(v);
+                ret = 0;
+            }
+            break;
+        default:
+            ret = EINVAL;
+            break;
+        }
+
+        if ( send_nmi.domain != DOMID_SELF )
+            put_domain(d);
+        break;
+    }
+
     default:
         ret = -ENOSYS;
         break;
Index: 2007-02-27/xen/arch/x86/traps.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/traps.c	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/traps.c	2007-02-28 12:08:44.000000000 +0100
@@ -2123,6 +2123,12 @@ long do_set_trap_table(XEN_GUEST_HANDLE(
         if ( cur.address == 0 )
             break;
 
+        if ( cur.vector == 2 && !TI_GET_IF(&cur) )
+        {
+            rc = -EINVAL;
+            break;
+        }
+
         fixup_guest_code_selector(current->domain, cur.cs);
 
         memcpy(&dst[cur.vector], &cur, sizeof(cur));
Index: 2007-02-27/xen/arch/x86/x86_32/asm-offsets.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_32/asm-offsets.c	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_32/asm-offsets.c	2007-02-28 12:08:44.000000000 +0100
@@ -68,6 +68,7 @@ void __dummy__(void)
     OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+    OFFSET(VCPU_nmi_cs, struct vcpu, arch.guest_context.trap_ctxt[2].cs);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
     DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
Index: 2007-02-27/xen/arch/x86/x86_32/entry.S
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_32/entry.S	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_32/entry.S	2007-02-28 12:08:44.000000000 +0100
@@ -232,7 +232,7 @@ test_all_events:
         shl  $IRQSTAT_shift,%eax
         test %ecx,irq_stat(%eax,1)
         jnz  process_softirqs
-        btr  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+        lock btrl $_VCPUF_nmi_pending,VCPU_flags(%ebx)
         jc   process_nmi
 test_guest_events:
         movl VCPU_vcpu_info(%ebx),%eax
@@ -259,19 +259,20 @@ process_softirqs:
 
         ALIGN
 process_nmi:
-        movl VCPU_nmi_addr(%ebx),%eax
+        movzwl VCPU_nmi_cs(%ebx),%eax
+        movl VCPU_nmi_addr(%ebx),%ecx
         test %eax,%eax
         jz   test_all_events
-        bts  $_VCPUF_nmi_masked,VCPU_flags(%ebx)
+        lock btsl $_VCPUF_nmi_masked,VCPU_flags(%ebx)
         jc   1f
         sti
         leal VCPU_trap_bounce(%ebx),%edx
-        movl %eax,TRAPBOUNCE_eip(%edx)
-        movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
+        movl %ecx,TRAPBOUNCE_eip(%edx)
+        movw %ax,TRAPBOUNCE_cs(%edx)
         movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
         call create_bounce_frame
         jmp  test_all_events
-1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+1:      lock btsl $_VCPUF_nmi_pending,VCPU_flags(%ebx)
         jmp  test_guest_events
 
 bad_hypercall:
Index: 2007-02-27/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/asm-offsets.c	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/asm-offsets.c	2007-02-28 12:08:44.000000000 +0100
@@ -76,6 +76,7 @@ void __dummy__(void)
     OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+    OFFSET(VCPU_nmi_cs, struct vcpu, arch.guest_context.trap_ctxt[2].cs);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
     DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
Index: 2007-02-27/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/compat/entry.S	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/compat/entry.S	2007-02-28 12:08:44.000000000 +0100
@@ -87,7 +87,7 @@ ENTRY(compat_test_all_events)
         leaq  irq_stat(%rip),%rcx
         testl $~0,(%rcx,%rax,1)
         jnz   compat_process_softirqs
-        btrq  $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+        lock btrl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
         jc    compat_process_nmi
 compat_test_guest_events:
         movq  VCPU_vcpu_info(%rbx),%rax
@@ -101,7 +101,7 @@ compat_test_guest_events:
         movl  VCPU_event_addr(%rbx),%eax
         movl  %eax,TRAPBOUNCE_eip(%rdx)
         movl  VCPU_event_sel(%rbx),%eax
-        movl  %eax,TRAPBOUNCE_cs(%rdx)
+        movw  %ax,TRAPBOUNCE_cs(%rdx)
         movw  $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
         call  compat_create_bounce_frame
         jmp   compat_test_all_events
@@ -116,20 +116,21 @@ compat_process_softirqs:
 	ALIGN
 /* %rbx: struct vcpu */
 compat_process_nmi:
-        movl  VCPU_nmi_addr(%rbx),%eax
+        movzwl VCPU_nmi_cs(%rbx),%eax
+        movl  VCPU_nmi_addr(%rbx),%ecx
         testl %eax,%eax
         jz    compat_test_all_events
-        btsq  $_VCPUF_nmi_masked,VCPU_flags(%rbx)
+        lock btsl $_VCPUF_nmi_masked,VCPU_flags(%rbx)
         jc    1f
         sti
         leaq  VCPU_trap_bounce(%rbx),%rdx
-        movl  %eax,TRAPBOUNCE_eip(%rdx)
-        movl  $FLAT_COMPAT_KERNEL_CS,TRAPBOUNCE_cs(%rdx)
+        movl  %ecx,TRAPBOUNCE_eip(%rdx)
+        movw  %ax,TRAPBOUNCE_cs(%rdx)
         movw  $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
         call  compat_create_bounce_frame
         jmp   compat_test_all_events
 1:
-        btsq  $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+        lock btsl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
         jmp   compat_test_guest_events
 
 compat_bad_hypercall:
@@ -164,7 +165,7 @@ compat_failsafe_callback:
         movl  VCPU_failsafe_addr(%rbx),%eax
         movl  %eax,TRAPBOUNCE_eip(%rdx)
         movl  VCPU_failsafe_sel(%rbx),%eax
-        movl  %eax,TRAPBOUNCE_cs(%rdx)
+        movw  %ax,TRAPBOUNCE_cs(%rdx)
         movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
         btq   $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx)
         jnc   1f
Index: 2007-02-27/xen/arch/x86/x86_64/compat/traps.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/compat/traps.c	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/compat/traps.c	2007-02-28 12:08:44.000000000 +0100
@@ -287,6 +287,12 @@ int compat_set_trap_table(XEN_GUEST_HAND
         if ( cur.address == 0 )
             break;
 
+        if ( cur.vector == 2 && !TI_GET_IF(&cur) )
+        {
+            rc = -EINVAL;
+            break;
+        }
+
         fixup_guest_code_selector(current->domain, cur.cs);
 
         XLAT_trap_info(dst + cur.vector, &cur);
Index: 2007-02-27/xen/arch/x86/x86_64/entry.S
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/entry.S	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/entry.S	2007-02-28 12:08:44.000000000 +0100
@@ -177,7 +177,7 @@ test_all_events:
         leaq  irq_stat(%rip),%rcx
         testl $~0,(%rcx,%rax,1)
         jnz   process_softirqs
-        btr   $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+        lock btrl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
         jc    process_nmi
 test_guest_events:
         movq  VCPU_vcpu_info(%rbx),%rax
@@ -207,7 +207,7 @@ process_nmi:
         movq VCPU_nmi_addr(%rbx),%rax
         test %rax,%rax
         jz   test_all_events
-        bts  $_VCPUF_nmi_masked,VCPU_flags(%rbx)
+        lock btsl $_VCPUF_nmi_masked,VCPU_flags(%rbx)
         jc   1f
         sti
         leaq VCPU_trap_bounce(%rbx),%rdx
@@ -215,7 +215,7 @@ process_nmi:
         movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
         call create_bounce_frame
         jmp  test_all_events
-1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+1:      lock btsl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
         jmp  test_guest_events
 
 bad_hypercall:
Index: 2007-02-27/xen/arch/x86/x86_64/physdev.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/physdev.c	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/physdev.c	2007-02-28 12:08:44.000000000 +0100
@@ -30,6 +30,10 @@
 #define physdev_irq_status_query   compat_physdev_irq_status_query
 #define physdev_irq_status_query_t physdev_irq_status_query_compat_t
 
+#define xen_physdev_send_nmi       physdev_send_nmi
+CHECK_physdev_send_nmi;
+#undef xen_physdev_send_nmi
+
 #define COMPAT
 #undef guest_handle_okay
 #define guest_handle_okay          compat_handle_okay
Index: 2007-02-27/xen/common/kernel.c
===================================================================
--- 2007-02-27.orig/xen/common/kernel.c	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/common/kernel.c	2007-02-28 12:08:44.000000000 +0100
@@ -246,16 +246,20 @@ long register_guest_nmi_callback(unsigne
     struct vcpu *v = current;
     struct domain *d = current->domain;
 
-    if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
-        return -EINVAL;
-
     v->nmi_addr = address;
 #ifdef CONFIG_X86
+    v->arch.guest_context.trap_ctxt[2].vector = 2;
+    v->arch.guest_context.trap_ctxt[2].flags = 0;
+    TI_SET_IF(v->arch.guest_context.trap_ctxt + 2, 1);
+    v->arch.guest_context.trap_ctxt[2].cs =
+        !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
+
     /*
      * If no handler was registered we can 'lose the NMI edge'. Re-assert it
      * now.
      */
-    if ( arch_get_nmi_reason(d) != 0 )
+    if ( d->domain_id == 0 && v->vcpu_id == 0 &&
+         arch_get_nmi_reason(d) != 0 )
         set_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
 #endif
 
@@ -266,6 +270,11 @@ long unregister_guest_nmi_callback(void)
 {
     struct vcpu *v = current;
 
+#ifdef CONFIG_X86
+    v->arch.guest_context.trap_ctxt[2].cs = 0;
+    v->arch.guest_context.trap_ctxt[2].vector = 0;
+    v->arch.guest_context.trap_ctxt[2].flags = 0;
+#endif
     v->nmi_addr = 0;
 
     return 0;
Index: 2007-02-27/xen/include/public/physdev.h
===================================================================
--- 2007-02-27.orig/xen/include/public/physdev.h	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/include/public/physdev.h	2007-02-28 12:08:44.000000000 +0100
@@ -119,6 +119,22 @@ typedef struct physdev_irq physdev_irq_t
 DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 
 /*
+ * Allocate or free a physical upcall vector for the specified IRQ line.
+ * @arg == pointer to physdev_irq structure.
+ */
+#define PHYSDEVOP_send_nmi              13
+struct physdev_send_nmi {
+    /* IN */
+    domid_t domain;
+    uint32_t vcpu;
+};
+typedef struct physdev_send_nmi physdev_send_nmi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_send_nmi_t);
+
+#define XEN_SEND_NMI_ALL          (~(uint32_t)0)
+#define XEN_SEND_NMI_ALL_BUT_SELF (~(uint32_t)1)
+
+/*
  * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
  * hypercall since 0x00030202.
  */
Index: 2007-02-27/xen/include/xen/sched.h
===================================================================
--- 2007-02-27.orig/xen/include/xen/sched.h	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/include/xen/sched.h	2007-02-28 12:08:44.000000000 +0100
@@ -108,7 +108,11 @@ struct vcpu 
     /* Bitmask of CPUs on which this VCPU may run. */
     cpumask_t        cpu_affinity;
 
+#ifndef CONFIG_X86
     unsigned long    nmi_addr;      /* NMI callback address. */
+#else
+# define nmi_addr arch.guest_context.trap_ctxt[2].address
+#endif
 
     /* Bitmask of CPUs which are holding onto this VCPU's state. */
     cpumask_t        vcpu_dirty_cpumask;
Index: 2007-02-27/xen/include/xlat.lst
===================================================================
--- 2007-02-27.orig/xen/include/xlat.lst	2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/include/xlat.lst	2007-02-28 12:08:44.000000000 +0100
@@ -36,6 +36,7 @@
 !	memory_map			memory.h
 !	memory_reservation		memory.h
 !	translate_gpfn_list		memory.h
+?	physdev_send_nmi		physdev.h
 !	sched_poll			sched.h
 ?	sched_remote_shutdown		sched.h
 ?	sched_shutdown			sched.h

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: allow NMI injection
  2007-02-28 11:39 [PATCH] x86: allow NMI injection Jan Beulich
@ 2007-02-28 11:59 ` Keir Fraser
  2007-02-28 12:21   ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: Keir Fraser @ 2007-02-28 11:59 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 28/2/07 11:39, "Jan Beulich" <jbeulich@novell.com> wrote:

> NetWare's internal debugger needs the ability to send NMI IPIs, and
> there is no reason to not allow domUs or dom0's vCPUs other than vCPU 0
> to handle NMIs (they just will never see hardware generated ones).
> 
> While currently not having a frontend, the added hypercall also allows
> for being used to inject NMIs into foreign VMs.
> 
> Along the lines, this fixes a potential race condition caused by
> previously accessing the VCPU flags field non-atomically in entry.S.
> 
> Signed-off-by: Jan Beulich <jbeulich@novell.com>

Notably this patch changes the way that a NMI handler is registered to use
the native-like vector 2. This changes the guest interface though. Do you
really need to be able to specify a custom CS? Can you not vector to the
flat CS and then far jump?

I'm not sure about making the IPI function a physdev_op(), since this is
still a virtual NMI (it has nothing to do with real hardware NMIs). It might
be better to make it a vcpu_op. Then it would not be a great fit to allow
send-to-all send-to-all-butself overrides, but I'm not sure how important
that optimisation is (or is to make the NMI deliveries as simultaneous as
possible)?

 -- Keir

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: allow NMI injection
  2007-02-28 11:59 ` Keir Fraser
@ 2007-02-28 12:21   ` Jan Beulich
  2007-02-28 13:15     ` Keir Fraser
  0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2007-02-28 12:21 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>Notably this patch changes the way that a NMI handler is registered to use
>the native-like vector 2. This changes the guest interface though. Do you

But only in a benign fashion - registering the old way is still possible,
registering through set_trap_table now works where it previously didn't.

>really need to be able to specify a custom CS? Can you not vector to the
>flat CS and then far jump?

I probably could, but I'm generally opposed to making assumptions about
the guest where this is unnecessary (i.e. I dislike all callback pointers
registered without selectors).

>I'm not sure about making the IPI function a physdev_op(), since this is
>still a virtual NMI (it has nothing to do with real hardware NMIs). It might

I didn't find a good fit really. This seemed (from guest perspective) the
most reasonable choice.

>be better to make it a vcpu_op. Then it would not be a great fit to allow
>send-to-all send-to-all-butself overrides, but I'm not sure how important
>that optimisation is (or is to make the NMI deliveries as simultaneous as
>possible)?

The delivery shortcuts aren't a strict requirement to our NetWare guys,
yet keeping this close to how hardware IPIs can be sent seemed like a
good idea to me. For that reason, vcpu_op didn't get into the set of
hypercalls I considered for putting this under. If you really feel like not
having the shortcuts, then it could certainly go there.
Also, the cross domain sending of NMIs, if I saw this correctly, has just
recently got a domctl added (though currently implemented only for
ia64 and only for INIT), so maybe that part should really be split out.

Jan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: allow NMI injection
  2007-02-28 12:21   ` Jan Beulich
@ 2007-02-28 13:15     ` Keir Fraser
  0 siblings, 0 replies; 4+ messages in thread
From: Keir Fraser @ 2007-02-28 13:15 UTC (permalink / raw)
  To: Jan Beulich, Keir Fraser; +Cc: xen-devel

On 28/2/07 12:21, "Jan Beulich" <jbeulich@novell.com> wrote:

>> Notably this patch changes the way that a NMI handler is registered to use
>> the native-like vector 2. This changes the guest interface though. Do you
> 
> But only in a benign fashion - registering the old way is still possible,
> registering through set_trap_table now works where it previously didn't.
> 
>> really need to be able to specify a custom CS? Can you not vector to the
>> flat CS and then far jump?
> 
> I probably could, but I'm generally opposed to making assumptions about
> the guest where this is unnecessary (i.e. I dislike all callback pointers
> registered without selectors).

Oh I see. That sounds reasonable.

vcpu_op() isn't perhaps a great match but then neither is physdev_op()
really. I'll have a think about which is better.

 -- Keir

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-02-28 13:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-28 11:39 [PATCH] x86: allow NMI injection Jan Beulich
2007-02-28 11:59 ` Keir Fraser
2007-02-28 12:21   ` Jan Beulich
2007-02-28 13:15     ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.