* [PATCH] x86: allow NMI injection
@ 2007-02-28 11:39 Jan Beulich
2007-02-28 11:59 ` Keir Fraser
0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2007-02-28 11:39 UTC (permalink / raw)
To: xen-devel
NetWare's internal debugger needs the ability to send NMI IPIs, and
there is no reason to not allow domUs or dom0's vCPUs other than vCPU 0
to handle NMIs (they just will never see hardware generated ones).
While currently not having a frontend, the added hypercall also allows
for being used to inject NMIs into foreign VMs.
Along the lines, this fixes a potential race condition caused by
previously accessing the VCPU flags field non-atomically in entry.S.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Index: 2007-02-27/xen/arch/x86/physdev.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/physdev.c 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/physdev.c 2007-02-28 12:14:58.000000000 +0100
@@ -143,6 +143,57 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
}
+ case PHYSDEVOP_send_nmi: {
+ struct physdev_send_nmi send_nmi;
+ struct domain *d;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&send_nmi, arg, 1) != 0 )
+ break;
+
+ ret = -EPERM;
+ if ( send_nmi.domain == DOMID_SELF )
+ d = current->domain;
+ else if ( !IS_PRIV(current->domain) )
+ break;
+ else
+ d = get_domain_by_id(send_nmi.domain);
+ ret = -ESRCH;
+ if ( d == NULL )
+ break;
+
+ switch ( send_nmi.vcpu )
+ {
+ struct vcpu *v;
+
+ case XEN_SEND_NMI_ALL:
+ case XEN_SEND_NMI_ALL_BUT_SELF:
+ for_each_vcpu(d, v)
+ {
+ if ( (send_nmi.vcpu == XEN_SEND_NMI_ALL || v != current) &&
+ !test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
+ vcpu_kick(v);
+ }
+ ret = 0;
+ break;
+ case 0 ... MAX_VIRT_CPUS - 1:
+ if ( (v = d->vcpu[send_nmi.vcpu]) != NULL )
+ {
+ if ( !test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
+ vcpu_kick(v);
+ ret = 0;
+ }
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+
+ if ( send_nmi.domain != DOMID_SELF )
+ put_domain(d);
+ break;
+ }
+
default:
ret = -ENOSYS;
break;
Index: 2007-02-27/xen/arch/x86/traps.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/traps.c 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/traps.c 2007-02-28 12:08:44.000000000 +0100
@@ -2123,6 +2123,12 @@ long do_set_trap_table(XEN_GUEST_HANDLE(
if ( cur.address == 0 )
break;
+ if ( cur.vector == 2 && !TI_GET_IF(&cur) )
+ {
+ rc = -EINVAL;
+ break;
+ }
+
fixup_guest_code_selector(current->domain, cur.cs);
memcpy(&dst[cur.vector], &cur, sizeof(cur));
Index: 2007-02-27/xen/arch/x86/x86_32/asm-offsets.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_32/asm-offsets.c 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_32/asm-offsets.c 2007-02-28 12:08:44.000000000 +0100
@@ -68,6 +68,7 @@ void __dummy__(void)
OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+ OFFSET(VCPU_nmi_cs, struct vcpu, arch.guest_context.trap_ctxt[2].cs);
DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
Index: 2007-02-27/xen/arch/x86/x86_32/entry.S
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_32/entry.S 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_32/entry.S 2007-02-28 12:08:44.000000000 +0100
@@ -232,7 +232,7 @@ test_all_events:
shl $IRQSTAT_shift,%eax
test %ecx,irq_stat(%eax,1)
jnz process_softirqs
- btr $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+ lock btrl $_VCPUF_nmi_pending,VCPU_flags(%ebx)
jc process_nmi
test_guest_events:
movl VCPU_vcpu_info(%ebx),%eax
@@ -259,19 +259,20 @@ process_softirqs:
ALIGN
process_nmi:
- movl VCPU_nmi_addr(%ebx),%eax
+ movzwl VCPU_nmi_cs(%ebx),%eax
+ movl VCPU_nmi_addr(%ebx),%ecx
test %eax,%eax
jz test_all_events
- bts $_VCPUF_nmi_masked,VCPU_flags(%ebx)
+ lock btsl $_VCPUF_nmi_masked,VCPU_flags(%ebx)
jc 1f
sti
leal VCPU_trap_bounce(%ebx),%edx
- movl %eax,TRAPBOUNCE_eip(%edx)
- movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
+ movl %ecx,TRAPBOUNCE_eip(%edx)
+ movw %ax,TRAPBOUNCE_cs(%edx)
movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
call create_bounce_frame
jmp test_all_events
-1: bts $_VCPUF_nmi_pending,VCPU_flags(%ebx)
+1: lock btsl $_VCPUF_nmi_pending,VCPU_flags(%ebx)
jmp test_guest_events
bad_hypercall:
Index: 2007-02-27/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/asm-offsets.c 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/asm-offsets.c 2007-02-28 12:08:44.000000000 +0100
@@ -76,6 +76,7 @@ void __dummy__(void)
OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
+ OFFSET(VCPU_nmi_cs, struct vcpu, arch.guest_context.trap_ctxt[2].cs);
DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
Index: 2007-02-27/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/compat/entry.S 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/compat/entry.S 2007-02-28 12:08:44.000000000 +0100
@@ -87,7 +87,7 @@ ENTRY(compat_test_all_events)
leaq irq_stat(%rip),%rcx
testl $~0,(%rcx,%rax,1)
jnz compat_process_softirqs
- btrq $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+ lock btrl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
jc compat_process_nmi
compat_test_guest_events:
movq VCPU_vcpu_info(%rbx),%rax
@@ -101,7 +101,7 @@ compat_test_guest_events:
movl VCPU_event_addr(%rbx),%eax
movl %eax,TRAPBOUNCE_eip(%rdx)
movl VCPU_event_sel(%rbx),%eax
- movl %eax,TRAPBOUNCE_cs(%rdx)
+ movw %ax,TRAPBOUNCE_cs(%rdx)
movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
call compat_create_bounce_frame
jmp compat_test_all_events
@@ -116,20 +116,21 @@ compat_process_softirqs:
ALIGN
/* %rbx: struct vcpu */
compat_process_nmi:
- movl VCPU_nmi_addr(%rbx),%eax
+ movzwl VCPU_nmi_cs(%rbx),%eax
+ movl VCPU_nmi_addr(%rbx),%ecx
testl %eax,%eax
jz compat_test_all_events
- btsq $_VCPUF_nmi_masked,VCPU_flags(%rbx)
+ lock btsl $_VCPUF_nmi_masked,VCPU_flags(%rbx)
jc 1f
sti
leaq VCPU_trap_bounce(%rbx),%rdx
- movl %eax,TRAPBOUNCE_eip(%rdx)
- movl $FLAT_COMPAT_KERNEL_CS,TRAPBOUNCE_cs(%rdx)
+ movl %ecx,TRAPBOUNCE_eip(%rdx)
+ movw %ax,TRAPBOUNCE_cs(%rdx)
movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
call compat_create_bounce_frame
jmp compat_test_all_events
1:
- btsq $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+ lock btsl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
jmp compat_test_guest_events
compat_bad_hypercall:
@@ -164,7 +165,7 @@ compat_failsafe_callback:
movl VCPU_failsafe_addr(%rbx),%eax
movl %eax,TRAPBOUNCE_eip(%rdx)
movl VCPU_failsafe_sel(%rbx),%eax
- movl %eax,TRAPBOUNCE_cs(%rdx)
+ movw %ax,TRAPBOUNCE_cs(%rdx)
movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
btq $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx)
jnc 1f
Index: 2007-02-27/xen/arch/x86/x86_64/compat/traps.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/compat/traps.c 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/compat/traps.c 2007-02-28 12:08:44.000000000 +0100
@@ -287,6 +287,12 @@ int compat_set_trap_table(XEN_GUEST_HAND
if ( cur.address == 0 )
break;
+ if ( cur.vector == 2 && !TI_GET_IF(&cur) )
+ {
+ rc = -EINVAL;
+ break;
+ }
+
fixup_guest_code_selector(current->domain, cur.cs);
XLAT_trap_info(dst + cur.vector, &cur);
Index: 2007-02-27/xen/arch/x86/x86_64/entry.S
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/entry.S 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/entry.S 2007-02-28 12:08:44.000000000 +0100
@@ -177,7 +177,7 @@ test_all_events:
leaq irq_stat(%rip),%rcx
testl $~0,(%rcx,%rax,1)
jnz process_softirqs
- btr $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+ lock btrl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
jc process_nmi
test_guest_events:
movq VCPU_vcpu_info(%rbx),%rax
@@ -207,7 +207,7 @@ process_nmi:
movq VCPU_nmi_addr(%rbx),%rax
test %rax,%rax
jz test_all_events
- bts $_VCPUF_nmi_masked,VCPU_flags(%rbx)
+ lock btsl $_VCPUF_nmi_masked,VCPU_flags(%rbx)
jc 1f
sti
leaq VCPU_trap_bounce(%rbx),%rdx
@@ -215,7 +215,7 @@ process_nmi:
movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
call create_bounce_frame
jmp test_all_events
-1: bts $_VCPUF_nmi_pending,VCPU_flags(%rbx)
+1: lock btsl $_VCPUF_nmi_pending,VCPU_flags(%rbx)
jmp test_guest_events
bad_hypercall:
Index: 2007-02-27/xen/arch/x86/x86_64/physdev.c
===================================================================
--- 2007-02-27.orig/xen/arch/x86/x86_64/physdev.c 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/arch/x86/x86_64/physdev.c 2007-02-28 12:08:44.000000000 +0100
@@ -30,6 +30,10 @@
#define physdev_irq_status_query compat_physdev_irq_status_query
#define physdev_irq_status_query_t physdev_irq_status_query_compat_t
+#define xen_physdev_send_nmi physdev_send_nmi
+CHECK_physdev_send_nmi;
+#undef xen_physdev_send_nmi
+
#define COMPAT
#undef guest_handle_okay
#define guest_handle_okay compat_handle_okay
Index: 2007-02-27/xen/common/kernel.c
===================================================================
--- 2007-02-27.orig/xen/common/kernel.c 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/common/kernel.c 2007-02-28 12:08:44.000000000 +0100
@@ -246,16 +246,20 @@ long register_guest_nmi_callback(unsigne
struct vcpu *v = current;
struct domain *d = current->domain;
- if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
- return -EINVAL;
-
v->nmi_addr = address;
#ifdef CONFIG_X86
+ v->arch.guest_context.trap_ctxt[2].vector = 2;
+ v->arch.guest_context.trap_ctxt[2].flags = 0;
+ TI_SET_IF(v->arch.guest_context.trap_ctxt + 2, 1);
+ v->arch.guest_context.trap_ctxt[2].cs =
+ !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
+
/*
* If no handler was registered we can 'lose the NMI edge'. Re-assert it
* now.
*/
- if ( arch_get_nmi_reason(d) != 0 )
+ if ( d->domain_id == 0 && v->vcpu_id == 0 &&
+ arch_get_nmi_reason(d) != 0 )
set_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
#endif
@@ -266,6 +270,11 @@ long unregister_guest_nmi_callback(void)
{
struct vcpu *v = current;
+#ifdef CONFIG_X86
+ v->arch.guest_context.trap_ctxt[2].cs = 0;
+ v->arch.guest_context.trap_ctxt[2].vector = 0;
+ v->arch.guest_context.trap_ctxt[2].flags = 0;
+#endif
v->nmi_addr = 0;
return 0;
Index: 2007-02-27/xen/include/public/physdev.h
===================================================================
--- 2007-02-27.orig/xen/include/public/physdev.h 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/include/public/physdev.h 2007-02-28 12:08:44.000000000 +0100
@@ -119,6 +119,22 @@ typedef struct physdev_irq physdev_irq_t
DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
/*
+ * Allocate or free a physical upcall vector for the specified IRQ line.
+ * @arg == pointer to physdev_irq structure.
+ */
+#define PHYSDEVOP_send_nmi 13
+struct physdev_send_nmi {
+ /* IN */
+ domid_t domain;
+ uint32_t vcpu;
+};
+typedef struct physdev_send_nmi physdev_send_nmi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_send_nmi_t);
+
+#define XEN_SEND_NMI_ALL (~(uint32_t)0)
+#define XEN_SEND_NMI_ALL_BUT_SELF (~(uint32_t)1)
+
+/*
* Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
* hypercall since 0x00030202.
*/
Index: 2007-02-27/xen/include/xen/sched.h
===================================================================
--- 2007-02-27.orig/xen/include/xen/sched.h 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/include/xen/sched.h 2007-02-28 12:08:44.000000000 +0100
@@ -108,7 +108,11 @@ struct vcpu
/* Bitmask of CPUs on which this VCPU may run. */
cpumask_t cpu_affinity;
+#ifndef CONFIG_X86
unsigned long nmi_addr; /* NMI callback address. */
+#else
+# define nmi_addr arch.guest_context.trap_ctxt[2].address
+#endif
/* Bitmask of CPUs which are holding onto this VCPU's state. */
cpumask_t vcpu_dirty_cpumask;
Index: 2007-02-27/xen/include/xlat.lst
===================================================================
--- 2007-02-27.orig/xen/include/xlat.lst 2007-02-28 12:14:26.000000000 +0100
+++ 2007-02-27/xen/include/xlat.lst 2007-02-28 12:08:44.000000000 +0100
@@ -36,6 +36,7 @@
! memory_map memory.h
! memory_reservation memory.h
! translate_gpfn_list memory.h
+? physdev_send_nmi physdev.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] x86: allow NMI injection
2007-02-28 11:39 [PATCH] x86: allow NMI injection Jan Beulich
@ 2007-02-28 11:59 ` Keir Fraser
2007-02-28 12:21 ` Jan Beulich
0 siblings, 1 reply; 4+ messages in thread
From: Keir Fraser @ 2007-02-28 11:59 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 28/2/07 11:39, "Jan Beulich" <jbeulich@novell.com> wrote:
> NetWare's internal debugger needs the ability to send NMI IPIs, and
> there is no reason to not allow domUs or dom0's vCPUs other than vCPU 0
> to handle NMIs (they just will never see hardware generated ones).
>
> While currently not having a frontend, the added hypercall also allows
> for being used to inject NMIs into foreign VMs.
>
> Along the lines, this fixes a potential race condition caused by
> previously accessing the VCPU flags field non-atomically in entry.S.
>
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
Notably this patch changes the way that a NMI handler is registered to use
the native-like vector 2. This changes the guest interface though. Do you
really need to be able to specify a custom CS? Can you not vector to the
flat CS and then far jump?
I'm not sure about making the IPI function a physdev_op(), since this is
still a virtual NMI (it has nothing to do with real hardware NMIs). It might
be better to make it a vcpu_op. Then it would not be a great fit to allow
send-to-all send-to-all-butself overrides, but I'm not sure how important
that optimisation is (or is to make the NMI deliveries as simultaneous as
possible)?
-- Keir
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] x86: allow NMI injection
2007-02-28 11:59 ` Keir Fraser
@ 2007-02-28 12:21 ` Jan Beulich
2007-02-28 13:15 ` Keir Fraser
0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2007-02-28 12:21 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel
>Notably this patch changes the way that a NMI handler is registered to use
>the native-like vector 2. This changes the guest interface though. Do you
But only in a benign fashion - registering the old way is still possible,
registering through set_trap_table now works where it previously didn't.
>really need to be able to specify a custom CS? Can you not vector to the
>flat CS and then far jump?
I probably could, but I'm generally opposed to making assumptions about
the guest where this is unnecessary (i.e. I dislike all callback pointers
registered without selectors).
>I'm not sure about making the IPI function a physdev_op(), since this is
>still a virtual NMI (it has nothing to do with real hardware NMIs). It might
I didn't find a good fit really. This seemed (from guest perspective) the
most reasonable choice.
>be better to make it a vcpu_op. Then it would not be a great fit to allow
>send-to-all send-to-all-butself overrides, but I'm not sure how important
>that optimisation is (or is to make the NMI deliveries as simultaneous as
>possible)?
The delivery shortcuts aren't a strict requirement to our NetWare guys,
yet keeping this close to how hardware IPIs can be sent seemed like a
good idea to me. For that reason, vcpu_op didn't get into the set of
hypercalls I considered for putting this under. If you really feel like not
having the shortcuts, then it could certainly go there.
Also, the cross domain sending of NMIs, if I saw this correctly, has just
recently got a domctl added (though currently implemented only for
ia64 and only for INIT), so maybe that part should really be split out.
Jan
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] x86: allow NMI injection
2007-02-28 12:21 ` Jan Beulich
@ 2007-02-28 13:15 ` Keir Fraser
0 siblings, 0 replies; 4+ messages in thread
From: Keir Fraser @ 2007-02-28 13:15 UTC (permalink / raw)
To: Jan Beulich, Keir Fraser; +Cc: xen-devel
On 28/2/07 12:21, "Jan Beulich" <jbeulich@novell.com> wrote:
>> Notably this patch changes the way that a NMI handler is registered to use
>> the native-like vector 2. This changes the guest interface though. Do you
>
> But only in a benign fashion - registering the old way is still possible,
> registering through set_trap_table now works where it previously didn't.
>
>> really need to be able to specify a custom CS? Can you not vector to the
>> flat CS and then far jump?
>
> I probably could, but I'm generally opposed to making assumptions about
> the guest where this is unnecessary (i.e. I dislike all callback pointers
> registered without selectors).
Oh I see. That sounds reasonable.
vcpu_op() isn't perhaps a great match but then neither is physdev_op()
really. I'll have a think about which is better.
-- Keir
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-02-28 13:15 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-28 11:39 [PATCH] x86: allow NMI injection Jan Beulich
2007-02-28 11:59 ` Keir Fraser
2007-02-28 12:21 ` Jan Beulich
2007-02-28 13:15 ` Keir Fraser
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.