From mboxrd@z Thu Jan 1 00:00:00 1970 From: Razvan Cojocaru Subject: [PATCH RFC V11 2/5] xen: Optimize introspection access to guest state Date: Tue, 2 Sep 2014 18:37:16 +0300 Message-ID: <1409672239-19520-2-git-send-email-rcojocaru@bitdefender.com> References: <1409672239-19520-1-git-send-email-rcojocaru@bitdefender.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta4.messagelabs.com ([85.158.143.247]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1XOq9O-0000TQ-QE for xen-devel@lists.xenproject.org; Tue, 02 Sep 2014 15:37:43 +0000 In-Reply-To: <1409672239-19520-1-git-send-email-rcojocaru@bitdefender.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xenproject.org Cc: kevin.tian@intel.com, ian.campbell@citrix.com, Razvan Cojocaru , stefano.stabellini@eu.citrix.com, jun.nakajima@intel.com, andrew.cooper3@citrix.com, eddie.dong@intel.com, tim@xen.org, JBeulich@suse.com, ian.jackson@eu.citrix.com List-Id: xen-devel@lists.xenproject.org Speed optimization for introspection purposes: a handful of registers are sent along with each mem_event. This requires enlargement of the mem_event_request / mem_event_response stuctures, and additional code to fill in relevant values. Since the EPT event processing code needs more data than CR3 or MSR event processors, hvm_mem_event_fill_regs() fills in less data than p2m_mem_event_fill_regs(), in order to avoid overhead. Struct hvm_hw_cpu has been considered instead of the custom struct mem_event_regs_st, but its size would cause quick filling up of the mem_event ring buffer. Signed-off-by: Razvan Cojocaru Acked-by: Jan Beulich --- Changes since V5: - Const-correctness. Changes since V4: - Renamed "x86_mem_event_regs" to "mem_event_regs_x86" and removed a typedef. Changes since V3: - Changed the name of VCPU variables referring to the current VCPU to "curr". - Renamed "mem_event_regs" to "x86_mem_event_regs" to make it explicit. Changes since V2: - Removed inline function compiler suggestions. - Removed superfluous memset(&ctxt, 0, sizeof(struct hvm_hw_cpu)). - Padded struct mem_event_regs_st. - Renamed mem_event_regs_t to mem_event_regs. - Readability changes. Changes since V1: - Replaced guest_x86_mode with cs_arbytes in the mem_event_regs_st structure. - Removed superfluous preprocessor check for __x86_64__ in p2m_mem_event_fill_regs(). --- tools/libxc/xc_mem_access.c | 10 +++++++++- tools/libxc/xc_mem_event.c | 7 +++++-- tools/libxc/xc_private.h | 2 +- tools/libxc/xenctrl.h | 2 ++ xen/arch/x86/hvm/vmx/vmcs.c | 25 +++++++++++++++++++++++++ xen/arch/x86/hvm/vmx/vmx.c | 13 +++++++++++++ xen/arch/x86/mm/mem_event.c | 11 +++++++++++ xen/include/asm-x86/hvm/domain.h | 1 + xen/include/asm-x86/hvm/hvm.h | 2 ++ xen/include/asm-x86/hvm/vmx/vmcs.h | 7 +++++++ xen/include/public/domctl.h | 7 ++++--- 11 files changed, 80 insertions(+), 7 deletions(-) diff --git a/tools/libxc/xc_mem_access.c b/tools/libxc/xc_mem_access.c index 461f0e9..55d0e9f 100644 --- a/tools/libxc/xc_mem_access.c +++ b/tools/libxc/xc_mem_access.c @@ -26,7 +26,15 @@ void *xc_mem_access_enable(xc_interface *xch, domid_t domain_id, uint32_t *port) { - return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN, port); + return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN, + port, 0); +} + +void *xc_mem_access_enable_introspection(xc_interface *xch, domid_t domain_id, + uint32_t *port) +{ + return xc_mem_event_enable(xch, domain_id, HVM_PARAM_ACCESS_RING_PFN, + port, 1); } int xc_mem_access_disable(xc_interface *xch, domid_t domain_id) diff --git a/tools/libxc/xc_mem_event.c b/tools/libxc/xc_mem_event.c index faf1cc6..8c0be4e 100644 --- a/tools/libxc/xc_mem_event.c +++ b/tools/libxc/xc_mem_event.c @@ -57,7 +57,7 @@ int xc_mem_event_memop(xc_interface *xch, domid_t domain_id, } void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param, - uint32_t *port) + uint32_t *port, int enable_introspection) { void *ring_page = NULL; uint64_t pfn; @@ -120,7 +120,10 @@ void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param, break; case HVM_PARAM_ACCESS_RING_PFN: - op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE; + if ( enable_introspection ) + op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION; + else + op = XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE; mode = XEN_DOMCTL_MEM_EVENT_OP_ACCESS; break; diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h index c50a7c9..94df688 100644 --- a/tools/libxc/xc_private.h +++ b/tools/libxc/xc_private.h @@ -381,6 +381,6 @@ int xc_mem_event_memop(xc_interface *xch, domid_t domain_id, * param can be HVM_PARAM_PAGING/ACCESS/SHARING_RING_PFN */ void *xc_mem_event_enable(xc_interface *xch, domid_t domain_id, int param, - uint32_t *port); + uint32_t *port, int enable_introspection); #endif /* __XC_PRIVATE_H__ */ diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index 1c5d0db..28b5562 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -2263,6 +2263,8 @@ int xc_mem_paging_load(xc_interface *xch, domid_t domain_id, * Caller has to unmap this page when done. */ void *xc_mem_access_enable(xc_interface *xch, domid_t domain_id, uint32_t *port); +void *xc_mem_access_enable_introspection(xc_interface *xch, domid_t domain_id, + uint32_t *port); int xc_mem_access_disable(xc_interface *xch, domid_t domain_id); int xc_mem_access_resume(xc_interface *xch, domid_t domain_id); diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 4a4f4e1..fc1f882 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -39,6 +39,7 @@ #include #include #include +#include static bool_t __read_mostly opt_vpid_enabled = 1; boolean_param("vpid", opt_vpid_enabled); @@ -71,6 +72,18 @@ u32 vmx_vmexit_control __read_mostly; u32 vmx_vmentry_control __read_mostly; u64 vmx_ept_vpid_cap __read_mostly; +const u32 vmx_introspection_force_enabled_msrs[] = { + MSR_IA32_SYSENTER_EIP, + MSR_IA32_SYSENTER_ESP, + MSR_IA32_SYSENTER_CS, + MSR_IA32_MC0_CTL, + MSR_STAR, + MSR_LSTAR +}; + +const unsigned int vmx_introspection_force_enabled_msrs_size = + ARRAY_SIZE(vmx_introspection_force_enabled_msrs); + static DEFINE_PER_CPU_READ_MOSTLY(struct vmcs_struct *, vmxon_region); static DEFINE_PER_CPU(struct vmcs_struct *, current_vmcs); static DEFINE_PER_CPU(struct list_head, active_vmcs_list); @@ -695,11 +708,23 @@ static void vmx_set_host_env(struct vcpu *v) void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr, int type) { unsigned long *msr_bitmap = v->arch.hvm_vmx.msr_bitmap; + struct domain *d = v->domain; /* VMX MSR bitmap supported? */ if ( msr_bitmap == NULL ) return; + if ( unlikely(d->arch.hvm_domain.introspection_enabled) && + mem_event_check_ring(&d->mem_event->access) ) + { + unsigned int i; + + /* Filter out MSR-s needed for memory introspection */ + for ( i = 0; i < vmx_introspection_force_enabled_msrs_size; i++ ) + if ( msr == vmx_introspection_force_enabled_msrs[i] ) + return; + } + /* * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals * have the write-low and read-high bitmap offsets the wrong way round. diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 26b1ad5..b08664e 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1683,6 +1683,18 @@ void vmx_hypervisor_cpuid_leaf(uint32_t sub_idx, *eax |= XEN_HVM_CPUID_X2APIC_VIRT; } +static void vmx_enable_msr_exit_interception(struct domain *d) +{ + struct vcpu *v; + unsigned int i; + + /* Enable interception for MSRs needed for memory introspection. */ + for_each_vcpu ( d, v ) + for ( i = 0; i < vmx_introspection_force_enabled_msrs_size; i++ ) + vmx_enable_intercept_for_msr(v, vmx_introspection_force_enabled_msrs[i], + MSR_TYPE_W); +} + static struct hvm_function_table __initdata vmx_function_table = { .name = "VMX", .cpu_up_prepare = vmx_cpu_up_prepare, @@ -1741,6 +1753,7 @@ static struct hvm_function_table __initdata vmx_function_table = { .handle_eoi = vmx_handle_eoi, .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m, .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf, + .enable_msr_exit_interception = vmx_enable_msr_exit_interception, }; const struct hvm_function_table * __init start_vmx(void) diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c index ba7e71e..fdd5ff6 100644 --- a/xen/arch/x86/mm/mem_event.c +++ b/xen/arch/x86/mm/mem_event.c @@ -587,6 +587,7 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, switch( mec->op ) { case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE: + case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION: { rc = -ENODEV; /* Only HAP is supported */ @@ -600,13 +601,23 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, rc = mem_event_enable(d, mec, med, _VPF_mem_access, HVM_PARAM_ACCESS_RING_PFN, mem_access_notification); + + if ( mec->op != XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE && + rc == 0 && hvm_funcs.enable_msr_exit_interception ) + { + d->arch.hvm_domain.introspection_enabled = 1; + hvm_funcs.enable_msr_exit_interception(d); + } } break; case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE: { if ( med->ring_page ) + { rc = mem_event_disable(d, med); + d->arch.hvm_domain.introspection_enabled = 0; + } } break; diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h index 291a2e0..30d4aa3 100644 --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -134,6 +134,7 @@ struct hvm_domain { bool_t mem_sharing_enabled; bool_t qemu_mapcache_invalidate; bool_t is_s3_suspended; + bool_t introspection_enabled; /* * TSC value that VCPUs use to calculate their tsc_offset value. diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 1123857..121d053 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -205,6 +205,8 @@ struct hvm_function_table { void (*hypervisor_cpuid_leaf)(uint32_t sub_idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); + + void (*enable_msr_exit_interception)(struct domain *d); }; extern struct hvm_function_table hvm_funcs; diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 215d93c..6a99dca 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -471,6 +471,13 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; +/* + * A set of MSR-s that need to be enabled for memory introspection + * to work. + */ +extern const u32 vmx_introspection_force_enabled_msrs[]; +extern const unsigned int vmx_introspection_force_enabled_msrs_size; + #define VMCS_VPID_WIDTH 16 #define MSR_TYPE_R 1 diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 5b11bbf..a3431ec 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -772,10 +772,11 @@ struct xen_domctl_gdbsx_domstatus { * ENODEV - host lacks HAP support (EPT/NPT) or HAP is disabled in guest * EBUSY - guest has or had access enabled, ring buffer still active */ -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE_INTROSPECTION 2 /* * Sharing ENOMEM helper. -- 1.7.9.5