All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/3] x86/AMD support data breakpoint extension registers
@ 2014-04-07  9:31 Jan Beulich
  2014-04-07  9:38 ` [PATCH v3 1/3] SVM: " Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Jan Beulich @ 2014-04-07  9:31 UTC (permalink / raw)
  To: xen-devel
  Cc: Ian Campbell, Ian Jackson, Keir Fraser, Aravind Gopalakrishnan,
	suravee.suthikulpanit

1: SVM: support data breakpoint extension registers
2: PV: support data breakpoint extension registers
3: libxc/PV: save/restore data breakpoint extension registers

Aravind - it wasn't clear from your most recent response whether the
Reviewed-by applied to only the feature mask changes (previously in
a single series with the changes here) or also to these ones, so I left
it off.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3 1/3] SVM: support data breakpoint extension registers
  2014-04-07  9:31 [PATCH v3 0/3] x86/AMD support data breakpoint extension registers Jan Beulich
@ 2014-04-07  9:38 ` Jan Beulich
  2014-04-11 14:32   ` George Dunlap
  2014-04-07  9:38 ` [PATCH v3 2/3] x86/PV: " Jan Beulich
  2014-04-07  9:39 ` [PATCH v3 3/3] libxc/PV: save/restore " Jan Beulich
  2 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2014-04-07  9:38 UTC (permalink / raw)
  To: xen-devel
  Cc: Ian Campbell, Ian Jackson, Keir Fraser, Aravind Gopalakrishnan,
	suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 11484 bytes --]

Leveraging the generic MSR save/restore logic introduced a little while
ago.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Aravind Gopalakrishnan<aravind.gopalakrishnan@amd.com>

--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -125,6 +125,7 @@
 #define X86_FEATURE_NODEID_MSR  19 /* NodeId MSR */
 #define X86_FEATURE_TBM         21 /* trailing bit manipulations */
 #define X86_FEATURE_TOPOEXT     22 /* topology extensions CPUID leafs */
+#define X86_FEATURE_DBEXT       26 /* data breakpoint extension */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
 #define X86_FEATURE_FSGSBASE     0 /* {RD,WR}{FS,GS}BASE instructions */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -110,9 +110,10 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
                     bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
+                    bitmaskof(X86_FEATURE_LWP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
-                    bitmaskof(X86_FEATURE_LWP));
+                    bitmaskof(X86_FEATURE_DBEXT));
         regs[3] &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
                     (is_pae ? bitmaskof(X86_FEATURE_NX) : 0) |
                     (is_64bit ? bitmaskof(X86_FEATURE_LM) : 0) |
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3082,6 +3082,9 @@ void hvm_cpuid(unsigned int input, unsig
         /* Only provide PSE36 when guest runs in 32bit PAE or in long mode */
         if ( !(hvm_pae_enabled(v) || hvm_long_mode_enabled(v)) )
             *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
+        /* Hide data breakpoint extensions if the hardware has not support. */
+        if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+            *ecx &= ~cpufeat_mask(X86_FEATURE_DBEXT);
         break;
 
     case 0x80000008:
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -160,14 +160,28 @@ void svm_intercept_msr(struct vcpu *v, u
 static void svm_save_dr(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned int flag_dr_dirty = v->arch.hvm_vcpu.flag_dr_dirty;
 
-    if ( !v->arch.hvm_vcpu.flag_dr_dirty )
+    if ( !flag_dr_dirty )
         return;
 
     /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
     v->arch.hvm_vcpu.flag_dr_dirty = 0;
     vmcb_set_dr_intercepts(vmcb, ~0u);
 
+    if ( flag_dr_dirty & 2 )
+    {
+        svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW);
+        svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_RW);
+        svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_RW);
+        svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_RW);
+
+        rdmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]);
+        rdmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]);
+        rdmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]);
+        rdmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]);
+    }
+
     v->arch.debugreg[0] = read_debugreg(0);
     v->arch.debugreg[1] = read_debugreg(1);
     v->arch.debugreg[2] = read_debugreg(2);
@@ -178,12 +192,32 @@ static void svm_save_dr(struct vcpu *v)
 
 static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v)
 {
+    unsigned int ecx;
+
     if ( v->arch.hvm_vcpu.flag_dr_dirty )
         return;
 
     v->arch.hvm_vcpu.flag_dr_dirty = 1;
     vmcb_set_dr_intercepts(vmcb, 0);
 
+    ASSERT(v == current);
+    hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+    if ( test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
+    {
+        svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+        svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+        svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+        svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+
+        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]);
+        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]);
+        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]);
+        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]);
+
+        /* Can't use hvm_cpuid() in svm_save_dr(): v != current. */
+        v->arch.hvm_vcpu.flag_dr_dirty |= 2;
+    }
+
     write_debugreg(0, v->arch.debugreg[0]);
     write_debugreg(1, v->arch.debugreg[1]);
     write_debugreg(2, v->arch.debugreg[2]);
@@ -355,6 +389,72 @@ static int svm_load_vmcb_ctxt(struct vcp
     return 0;
 }
 
+static unsigned int __init svm_init_msr(void)
+{
+    return boot_cpu_has(X86_FEATURE_DBEXT) ? 4 : 0;
+}
+
+static void svm_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+    {
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[0];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR0_ADDRESS_MASK;
+
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[1];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR1_ADDRESS_MASK;
+
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[2];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR2_ADDRESS_MASK;
+
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[3];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR3_ADDRESS_MASK;
+    }
+}
+
+static int svm_load_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+    unsigned int i, idx;
+    int err = 0;
+
+    for ( i = 0; i < ctxt->count; ++i )
+    {
+        switch ( idx = ctxt->msr[i].index )
+        {
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                err = -ENXIO;
+            else if ( ctxt->msr[i].val >> 32 )
+                err = -EDOM;
+            else
+                v->arch.hvm_svm.dr_mask[0] = ctxt->msr[i].val;
+            break;
+
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                err = -ENXIO;
+            else if ( ctxt->msr[i].val >> 32 )
+                err = -EDOM;
+            else
+                v->arch.hvm_svm.dr_mask[idx - MSR_AMD64_DR1_ADDRESS_MASK + 1] =
+                    ctxt->msr[i].val;
+            break;
+
+        default:
+            continue;
+        }
+        if ( err )
+            break;
+        ctxt->msr[i]._rsvd = 1;
+    }
+
+    return err;
+}
+
 static void svm_fpu_enter(struct vcpu *v)
 {
     struct vmcb_struct *n1vmcb = vcpu_nestedhvm(v).nv_n1vmcx;
@@ -1451,6 +1551,8 @@ static int svm_msr_read_intercept(unsign
 
     switch ( msr )
     {
+        unsigned int ecx;
+
     case MSR_IA32_SYSENTER_CS:
         *msr_content = v->arch.hvm_svm.guest_sysenter_cs;
         break;
@@ -1526,6 +1628,21 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD64_DR0_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
+            goto gpf;
+        *msr_content = v->arch.hvm_svm.dr_mask[0];
+        break;
+
+    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
+            goto gpf;
+        *msr_content =
+            v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1];
+        break;
+
     case MSR_AMD_OSVW_ID_LENGTH:
     case MSR_AMD_OSVW_STATUS:
         ret = svm_handle_osvw(v, msr, msr_content, 1);
@@ -1594,6 +1711,8 @@ static int svm_msr_write_intercept(unsig
 
     switch ( msr )
     {
+        unsigned int ecx;
+
     case MSR_IA32_SYSENTER_CS:
         vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = msr_content;
         break;
@@ -1669,6 +1788,21 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD64_DR0_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) || (msr_content >> 32) )
+            goto gpf;
+        v->arch.hvm_svm.dr_mask[0] = msr_content;
+        break;
+
+    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) || (msr_content >> 32) )
+            goto gpf;
+        v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1] =
+            msr_content;
+        break;
+
     case MSR_AMD_OSVW_ID_LENGTH:
     case MSR_AMD_OSVW_STATUS:
         ret = svm_handle_osvw(v, msr, &msr_content, 0);
@@ -2022,6 +2156,9 @@ static struct hvm_function_table __initd
     .vcpu_destroy         = svm_vcpu_destroy,
     .save_cpu_ctxt        = svm_save_vmcb_ctxt,
     .load_cpu_ctxt        = svm_load_vmcb_ctxt,
+    .init_msr             = svm_init_msr,
+    .save_msr             = svm_save_msr,
+    .load_msr             = svm_load_msr,
     .get_interrupt_shadow = svm_get_interrupt_shadow,
     .set_interrupt_shadow = svm_set_interrupt_shadow,
     .guest_x86_mode       = svm_guest_x86_mode,
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -134,6 +134,7 @@
 #define X86_FEATURE_NODEID_MSR  (6*32+19) /* NodeId MSR */
 #define X86_FEATURE_TBM         (6*32+21) /* trailing bit manipulations */
 #define X86_FEATURE_TOPOEXT     (6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_DBEXT       (6*32+26) /* data breakpoint extension */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
 #define X86_FEATURE_FSGSBASE	(7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -515,6 +515,9 @@ struct arch_svm_struct {
     uint64_t guest_lwp_cfg;      /* guest version */
     uint64_t cpu_lwp_cfg;        /* CPU version */
 
+    /* data breakpoint extension MSRs */
+    uint32_t dr_mask[4];
+
     /* OSVW MSRs */
     struct {
         u64 length;
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -140,7 +140,7 @@ struct hvm_vcpu {
 
     int                 xen_port;
 
-    bool_t              flag_dr_dirty;
+    u8                  flag_dr_dirty;
     bool_t              debug_state_latch;
     bool_t              single_step;
 
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -206,6 +206,11 @@
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT	46
 
+#define MSR_AMD64_DR0_ADDRESS_MASK	0xc0011027
+#define MSR_AMD64_DR1_ADDRESS_MASK	0xc0011019
+#define MSR_AMD64_DR2_ADDRESS_MASK	0xc001101a
+#define MSR_AMD64_DR3_ADDRESS_MASK	0xc001101b
+
 /* AMD Family10h machine check MSRs */
 #define MSR_F10_MC4_MISC1		0xc0000408
 #define MSR_F10_MC4_MISC2		0xc0000409



[-- Attachment #2: SVM-debug-address-mask-MSRs.patch --]
[-- Type: text/plain, Size: 11536 bytes --]

x86/SVM: support data breakpoint extension registers

Leveraging the generic MSR save/restore logic introduced a little while
ago.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Aravind Gopalakrishnan<aravind.gopalakrishnan@amd.com>

--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -125,6 +125,7 @@
 #define X86_FEATURE_NODEID_MSR  19 /* NodeId MSR */
 #define X86_FEATURE_TBM         21 /* trailing bit manipulations */
 #define X86_FEATURE_TOPOEXT     22 /* topology extensions CPUID leafs */
+#define X86_FEATURE_DBEXT       26 /* data breakpoint extension */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
 #define X86_FEATURE_FSGSBASE     0 /* {RD,WR}{FS,GS}BASE instructions */
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -110,9 +110,10 @@ static void amd_xc_cpuid_policy(
                     bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
                     bitmaskof(X86_FEATURE_OSVW) |
                     bitmaskof(X86_FEATURE_XOP) |
+                    bitmaskof(X86_FEATURE_LWP) |
                     bitmaskof(X86_FEATURE_FMA4) |
                     bitmaskof(X86_FEATURE_TBM) |
-                    bitmaskof(X86_FEATURE_LWP));
+                    bitmaskof(X86_FEATURE_DBEXT));
         regs[3] &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
                     (is_pae ? bitmaskof(X86_FEATURE_NX) : 0) |
                     (is_64bit ? bitmaskof(X86_FEATURE_LM) : 0) |
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3082,6 +3082,9 @@ void hvm_cpuid(unsigned int input, unsig
         /* Only provide PSE36 when guest runs in 32bit PAE or in long mode */
         if ( !(hvm_pae_enabled(v) || hvm_long_mode_enabled(v)) )
             *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
+        /* Hide data breakpoint extensions if the hardware has not support. */
+        if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+            *ecx &= ~cpufeat_mask(X86_FEATURE_DBEXT);
         break;
 
     case 0x80000008:
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -160,14 +160,28 @@ void svm_intercept_msr(struct vcpu *v, u
 static void svm_save_dr(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned int flag_dr_dirty = v->arch.hvm_vcpu.flag_dr_dirty;
 
-    if ( !v->arch.hvm_vcpu.flag_dr_dirty )
+    if ( !flag_dr_dirty )
         return;
 
     /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
     v->arch.hvm_vcpu.flag_dr_dirty = 0;
     vmcb_set_dr_intercepts(vmcb, ~0u);
 
+    if ( flag_dr_dirty & 2 )
+    {
+        svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW);
+        svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_RW);
+        svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_RW);
+        svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_RW);
+
+        rdmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]);
+        rdmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]);
+        rdmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]);
+        rdmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]);
+    }
+
     v->arch.debugreg[0] = read_debugreg(0);
     v->arch.debugreg[1] = read_debugreg(1);
     v->arch.debugreg[2] = read_debugreg(2);
@@ -178,12 +192,32 @@ static void svm_save_dr(struct vcpu *v)
 
 static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v)
 {
+    unsigned int ecx;
+
     if ( v->arch.hvm_vcpu.flag_dr_dirty )
         return;
 
     v->arch.hvm_vcpu.flag_dr_dirty = 1;
     vmcb_set_dr_intercepts(vmcb, 0);
 
+    ASSERT(v == current);
+    hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+    if ( test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
+    {
+        svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+        svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+        svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+        svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+
+        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]);
+        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]);
+        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]);
+        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]);
+
+        /* Can't use hvm_cpuid() in svm_save_dr(): v != current. */
+        v->arch.hvm_vcpu.flag_dr_dirty |= 2;
+    }
+
     write_debugreg(0, v->arch.debugreg[0]);
     write_debugreg(1, v->arch.debugreg[1]);
     write_debugreg(2, v->arch.debugreg[2]);
@@ -355,6 +389,72 @@ static int svm_load_vmcb_ctxt(struct vcp
     return 0;
 }
 
+static unsigned int __init svm_init_msr(void)
+{
+    return boot_cpu_has(X86_FEATURE_DBEXT) ? 4 : 0;
+}
+
+static void svm_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+    {
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[0];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR0_ADDRESS_MASK;
+
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[1];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR1_ADDRESS_MASK;
+
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[2];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR2_ADDRESS_MASK;
+
+        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[3];
+        if ( ctxt->msr[ctxt->count].val )
+            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR3_ADDRESS_MASK;
+    }
+}
+
+static int svm_load_msr(struct vcpu *v, struct hvm_msr *ctxt)
+{
+    unsigned int i, idx;
+    int err = 0;
+
+    for ( i = 0; i < ctxt->count; ++i )
+    {
+        switch ( idx = ctxt->msr[i].index )
+        {
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                err = -ENXIO;
+            else if ( ctxt->msr[i].val >> 32 )
+                err = -EDOM;
+            else
+                v->arch.hvm_svm.dr_mask[0] = ctxt->msr[i].val;
+            break;
+
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                err = -ENXIO;
+            else if ( ctxt->msr[i].val >> 32 )
+                err = -EDOM;
+            else
+                v->arch.hvm_svm.dr_mask[idx - MSR_AMD64_DR1_ADDRESS_MASK + 1] =
+                    ctxt->msr[i].val;
+            break;
+
+        default:
+            continue;
+        }
+        if ( err )
+            break;
+        ctxt->msr[i]._rsvd = 1;
+    }
+
+    return err;
+}
+
 static void svm_fpu_enter(struct vcpu *v)
 {
     struct vmcb_struct *n1vmcb = vcpu_nestedhvm(v).nv_n1vmcx;
@@ -1451,6 +1551,8 @@ static int svm_msr_read_intercept(unsign
 
     switch ( msr )
     {
+        unsigned int ecx;
+
     case MSR_IA32_SYSENTER_CS:
         *msr_content = v->arch.hvm_svm.guest_sysenter_cs;
         break;
@@ -1526,6 +1628,21 @@ static int svm_msr_read_intercept(unsign
         vpmu_do_rdmsr(msr, msr_content);
         break;
 
+    case MSR_AMD64_DR0_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
+            goto gpf;
+        *msr_content = v->arch.hvm_svm.dr_mask[0];
+        break;
+
+    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
+            goto gpf;
+        *msr_content =
+            v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1];
+        break;
+
     case MSR_AMD_OSVW_ID_LENGTH:
     case MSR_AMD_OSVW_STATUS:
         ret = svm_handle_osvw(v, msr, msr_content, 1);
@@ -1594,6 +1711,8 @@ static int svm_msr_write_intercept(unsig
 
     switch ( msr )
     {
+        unsigned int ecx;
+
     case MSR_IA32_SYSENTER_CS:
         vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = msr_content;
         break;
@@ -1669,6 +1788,21 @@ static int svm_msr_write_intercept(unsig
          */
         break;
 
+    case MSR_AMD64_DR0_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) || (msr_content >> 32) )
+            goto gpf;
+        v->arch.hvm_svm.dr_mask[0] = msr_content;
+        break;
+
+    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
+        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) || (msr_content >> 32) )
+            goto gpf;
+        v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1] =
+            msr_content;
+        break;
+
     case MSR_AMD_OSVW_ID_LENGTH:
     case MSR_AMD_OSVW_STATUS:
         ret = svm_handle_osvw(v, msr, &msr_content, 0);
@@ -2022,6 +2156,9 @@ static struct hvm_function_table __initd
     .vcpu_destroy         = svm_vcpu_destroy,
     .save_cpu_ctxt        = svm_save_vmcb_ctxt,
     .load_cpu_ctxt        = svm_load_vmcb_ctxt,
+    .init_msr             = svm_init_msr,
+    .save_msr             = svm_save_msr,
+    .load_msr             = svm_load_msr,
     .get_interrupt_shadow = svm_get_interrupt_shadow,
     .set_interrupt_shadow = svm_set_interrupt_shadow,
     .guest_x86_mode       = svm_guest_x86_mode,
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -134,6 +134,7 @@
 #define X86_FEATURE_NODEID_MSR  (6*32+19) /* NodeId MSR */
 #define X86_FEATURE_TBM         (6*32+21) /* trailing bit manipulations */
 #define X86_FEATURE_TOPOEXT     (6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_DBEXT       (6*32+26) /* data breakpoint extension */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
 #define X86_FEATURE_FSGSBASE	(7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -515,6 +515,9 @@ struct arch_svm_struct {
     uint64_t guest_lwp_cfg;      /* guest version */
     uint64_t cpu_lwp_cfg;        /* CPU version */
 
+    /* data breakpoint extension MSRs */
+    uint32_t dr_mask[4];
+
     /* OSVW MSRs */
     struct {
         u64 length;
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -140,7 +140,7 @@ struct hvm_vcpu {
 
     int                 xen_port;
 
-    bool_t              flag_dr_dirty;
+    u8                  flag_dr_dirty;
     bool_t              debug_state_latch;
     bool_t              single_step;
 
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -206,6 +206,11 @@
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT	46
 
+#define MSR_AMD64_DR0_ADDRESS_MASK	0xc0011027
+#define MSR_AMD64_DR1_ADDRESS_MASK	0xc0011019
+#define MSR_AMD64_DR2_ADDRESS_MASK	0xc001101a
+#define MSR_AMD64_DR3_ADDRESS_MASK	0xc001101b
+
 /* AMD Family10h machine check MSRs */
 #define MSR_F10_MC4_MISC1		0xc0000408
 #define MSR_F10_MC4_MISC2		0xc0000409

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3 2/3] x86/PV: support data breakpoint extension registers
  2014-04-07  9:31 [PATCH v3 0/3] x86/AMD support data breakpoint extension registers Jan Beulich
  2014-04-07  9:38 ` [PATCH v3 1/3] SVM: " Jan Beulich
@ 2014-04-07  9:38 ` Jan Beulich
  2014-04-11 14:58   ` George Dunlap
  2014-04-07  9:39 ` [PATCH v3 3/3] libxc/PV: save/restore " Jan Beulich
  2 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2014-04-07  9:38 UTC (permalink / raw)
  To: xen-devel
  Cc: Ian Campbell, Ian Jackson, Keir Fraser, Aravind Gopalakrishnan,
	suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 13498 bytes --]

Introducing an extension to XEN_DOMCTL_[gs]et_ext_vcpucontext similar
to the generic MSR save/restore logic recently added for HVM.

This also moves some debug register related declarations/definition to
the header intended for these.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Aravind Gopalakrishnan<aravind.gopalakrishnan@amd.com>
---
v3: Split off save/restore code to a separate patch, allowing this one
    to be reviewed/applied independently.

--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -9,6 +9,7 @@
 #include <xen/smp.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
+#include <asm/debugreg.h>
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1341,14 +1341,7 @@ static void paravirt_ctxt_switch_to(stru
         write_cr4(cr4);
 
     if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
-    {
-        write_debugreg(0, v->arch.debugreg[0]);
-        write_debugreg(1, v->arch.debugreg[1]);
-        write_debugreg(2, v->arch.debugreg[2]);
-        write_debugreg(3, v->arch.debugreg[3]);
-        write_debugreg(6, v->arch.debugreg[6]);
-        write_debugreg(7, v->arch.debugreg[7]);
-    }
+        activate_debugregs(v);
 
     if ( (v->domain->arch.tsc_mode ==  TSC_MODE_PVRDTSCP) &&
          boot_cpu_has(X86_FEATURE_RDTSCP) )
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -52,6 +52,7 @@ long arch_do_domctl(
 {
     long ret = 0;
     bool_t copyback = 0;
+    unsigned long i;
 
     switch ( domctl->cmd )
     {
@@ -319,7 +320,6 @@ long arch_do_domctl(
 
     case XEN_DOMCTL_getmemlist:
     {
-        int i;
         unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
         uint64_t mfn;
         struct page_info *page;
@@ -645,7 +645,6 @@ long arch_do_domctl(
         unsigned long mfn = domctl->u.memory_mapping.first_mfn;
         unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns;
         int add = domctl->u.memory_mapping.add_mapping;
-        unsigned long i;
 
         ret = -EINVAL;
         if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */
@@ -809,6 +808,7 @@ long arch_do_domctl(
     {
         struct xen_domctl_ext_vcpucontext *evc;
         struct vcpu *v;
+        struct xen_domctl_ext_vcpu_msr msr;
 
         evc = &domctl->u.ext_vcpucontext;
 
@@ -854,7 +854,42 @@ long arch_do_domctl(
             evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2;
             evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
 
-            ret = 0;
+            i = ret = 0;
+            if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+            {
+                unsigned int j;
+
+                if ( v->arch.pv_vcpu.dr_mask[0] )
+                {
+                    if ( i < evc->msr_count && !ret )
+                    {
+                        msr.index = MSR_AMD64_DR0_ADDRESS_MASK;
+                        msr.reserved = 0;
+                        msr.value = v->arch.pv_vcpu.dr_mask[0];
+                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+                for ( j = 0; j < 3; ++j )
+                {
+                    if ( !v->arch.pv_vcpu.dr_mask[1 + j] )
+                        continue;
+                    if ( i < evc->msr_count && !ret )
+                    {
+                        msr.index = MSR_AMD64_DR1_ADDRESS_MASK + j;
+                        msr.reserved = 0;
+                        msr.value = v->arch.pv_vcpu.dr_mask[1 + j];
+                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+            }
+            if ( i > evc->msr_count && !ret )
+                ret = -ENOBUFS;
+            evc->msr_count = i;
+
             vcpu_unpause(v);
             copyback = 1;
         }
@@ -909,9 +944,49 @@ long arch_do_domctl(
 
                 ret = vmce_restore_vcpu(v, &vmce);
             }
+            else if ( evc->size > offsetof(typeof(*evc), vmce) )
+                ret = -EINVAL;
             else
                 ret = 0;
 
+            if ( ret || evc->size <= offsetof(typeof(*evc), msrs) )
+                /* nothing */;
+            else if ( evc->size < offsetof(typeof(*evc), msrs) +
+                                  sizeof(evc->msrs) )
+                ret = -EINVAL;
+            else
+            {
+                for ( i = 0; i < evc->msr_count; ++i )
+                {
+                    ret = -EFAULT;
+                    if ( copy_from_guest_offset(&msr, evc->msrs, i, 1) )
+                        break;
+                    ret = -EINVAL;
+                    if ( msr.reserved )
+                        break;
+                    switch ( msr.index )
+                    {
+                    case MSR_AMD64_DR0_ADDRESS_MASK:
+                        if ( !boot_cpu_has(X86_FEATURE_DBEXT) ||
+                             (msr.value >> 32) )
+                            break;
+                        v->arch.pv_vcpu.dr_mask[0] = msr.value;
+                        continue;
+                    case MSR_AMD64_DR1_ADDRESS_MASK ...
+                         MSR_AMD64_DR3_ADDRESS_MASK:
+                        if ( !boot_cpu_has(X86_FEATURE_DBEXT) ||
+                             (msr.value >> 32) )
+                            break;
+                        msr.index -= MSR_AMD64_DR1_ADDRESS_MASK - 1;
+                        v->arch.pv_vcpu.dr_mask[msr.index] = msr.value;
+                        continue;
+                    }
+                    break;
+                }
+                if ( i == evc->msr_count )
+                    ret = 0;
+            }
+
             domain_unpause(d);
         }
     }
@@ -921,7 +996,6 @@ long arch_do_domctl(
     {
         xen_domctl_cpuid_t *ctl = &domctl->u.cpuid;
         cpuid_input_t *cpuid = NULL; 
-        int i;
 
         for ( i = 0; i < MAX_CPUID_INPUT; i++ )
         {
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2498,6 +2498,23 @@ static int emulate_privileged_op(struct 
             if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
                 goto fail;
             break;
+
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) )
+                goto fail;
+            v->arch.pv_vcpu.dr_mask[0] = msr_content;
+            if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
+                wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, msr_content);
+            break;
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) )
+                goto fail;
+            v->arch.pv_vcpu.dr_mask
+                [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1] = msr_content;
+            if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
+                wrmsrl(regs->_ecx, msr_content);
+            break;
+
         default:
             if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) == 1 )
                 break;
@@ -2585,6 +2602,21 @@ static int emulate_privileged_op(struct 
             regs->eax = (uint32_t)msr_content;
             regs->edx = (uint32_t)(msr_content >> 32);
             break;
+
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                goto fail;
+            regs->eax = v->arch.pv_vcpu.dr_mask[0];
+            regs->edx = 0;
+            break;
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                goto fail;
+            regs->eax = v->arch.pv_vcpu.dr_mask
+                            [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1];
+            regs->edx = 0;
+            break;
+
         default:
             if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
             {
@@ -3628,7 +3660,27 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
     return rc;
 }
 
-long set_debugreg(struct vcpu *v, int reg, unsigned long value)
+void activate_debugregs(const struct vcpu *curr)
+{
+    ASSERT(curr == current);
+
+    write_debugreg(0, curr->arch.debugreg[0]);
+    write_debugreg(1, curr->arch.debugreg[1]);
+    write_debugreg(2, curr->arch.debugreg[2]);
+    write_debugreg(3, curr->arch.debugreg[3]);
+    write_debugreg(6, curr->arch.debugreg[6]);
+    write_debugreg(7, curr->arch.debugreg[7]);
+
+    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+    {
+        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[0]);
+        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[1]);
+        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[2]);
+        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[3]);
+    }
+}
+
+long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
 {
     int i;
     struct vcpu *curr = current;
@@ -3709,11 +3761,8 @@ long set_debugreg(struct vcpu *v, int re
             if ( (v == curr) &&
                  !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
             {
-                write_debugreg(0, v->arch.debugreg[0]);
-                write_debugreg(1, v->arch.debugreg[1]);
-                write_debugreg(2, v->arch.debugreg[2]);
-                write_debugreg(3, v->arch.debugreg[3]);
-                write_debugreg(6, v->arch.debugreg[6]);
+                activate_debugregs(curr);
+                break;
             }
         }
         if ( v == curr )
--- a/xen/include/asm-x86/debugreg.h
+++ b/xen/include/asm-x86/debugreg.h
@@ -64,4 +64,16 @@
 #define DR_GLOBAL_EXACT_ENABLE   (0x00000200ul) /* Global exact enable */
 #define DR_GENERAL_DETECT        (0x00002000ul) /* General detect enable */
 
+#define write_debugreg(reg, val) do {                       \
+    unsigned long __val = val;                              \
+    asm volatile ( "mov %0,%%db" #reg : : "r" (__val) );    \
+} while (0)
+#define read_debugreg(reg) ({                               \
+    unsigned long __val;                                    \
+    asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) );  \
+    __val;                                                  \
+})
+long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value);
+void activate_debugregs(const struct vcpu *);
+
 #endif /* _X86_DEBUGREG_H */
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -374,6 +374,9 @@ struct pv_vcpu
     unsigned long shadow_ldt_mapcnt;
     spinlock_t shadow_ldt_lock;
 
+    /* data breakpoint extension MSRs */
+    uint32_t dr_mask[4];
+
     /* Deferred VA-based update state. */
     bool_t need_update_runstate_area;
     struct vcpu_time_info pending_system_time;
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -462,17 +462,6 @@ long set_gdt(struct vcpu *d, 
              unsigned long *frames, 
              unsigned int entries);
 
-#define write_debugreg(reg, val) do {                       \
-    unsigned long __val = val;                              \
-    asm volatile ( "mov %0,%%db" #reg : : "r" (__val) );    \
-} while (0)
-#define read_debugreg(reg) ({                               \
-    unsigned long __val;                                    \
-    asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) );  \
-    __val;                                                  \
-})
-long set_debugreg(struct vcpu *p, int reg, unsigned long value);
-
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static always_inline void rep_nop(void)
 {
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -36,7 +36,7 @@
 #include "grant_table.h"
 #include "hvm/save.h"
 
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
+#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
 
 /*
  * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
@@ -563,6 +563,16 @@ typedef struct xen_domctl_pin_mem_cachea
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
 
 
+#if defined(__i386__) || defined(__x86_64__)
+struct xen_domctl_ext_vcpu_msr {
+    uint32_t         index;
+    uint32_t         reserved;
+    uint64_aligned_t value;
+};
+typedef struct xen_domctl_ext_vcpu_msr xen_domctl_ext_vcpu_msr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpu_msr_t);
+#endif
+
 /* XEN_DOMCTL_set_ext_vcpucontext */
 /* XEN_DOMCTL_get_ext_vcpucontext */
 struct xen_domctl_ext_vcpucontext {
@@ -582,6 +592,7 @@ struct xen_domctl_ext_vcpucontext {
     uint16_t         sysenter_callback_cs;
     uint8_t          syscall32_disables_events;
     uint8_t          sysenter_disables_events;
+    uint16_t         msr_count;
 #if defined(__GNUC__)
     union {
         uint64_aligned_t mcg_cap;
@@ -590,6 +601,7 @@ struct xen_domctl_ext_vcpucontext {
 #else
     struct hvm_vmce_vcpu vmce;
 #endif
+    XEN_GUEST_HANDLE_64(xen_domctl_ext_vcpu_msr_t) msrs;
 #endif
 };
 typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;



[-- Attachment #2: PV-debug-address-mask-MSRs.patch --]
[-- Type: text/plain, Size: 13549 bytes --]

x86/PV: support data breakpoint extension registers

Introducing an extension to XEN_DOMCTL_[gs]et_ext_vcpucontext similar
to the generic MSR save/restore logic recently added for HVM.

This also moves some debug register related declarations/definition to
the header intended for these.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Aravind Gopalakrishnan<aravind.gopalakrishnan@amd.com>
---
v3: Split off save/restore code to a separate patch, allowing this one
    to be reviewed/applied independently.

--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -9,6 +9,7 @@
 #include <xen/smp.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
+#include <asm/debugreg.h>
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1341,14 +1341,7 @@ static void paravirt_ctxt_switch_to(stru
         write_cr4(cr4);
 
     if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
-    {
-        write_debugreg(0, v->arch.debugreg[0]);
-        write_debugreg(1, v->arch.debugreg[1]);
-        write_debugreg(2, v->arch.debugreg[2]);
-        write_debugreg(3, v->arch.debugreg[3]);
-        write_debugreg(6, v->arch.debugreg[6]);
-        write_debugreg(7, v->arch.debugreg[7]);
-    }
+        activate_debugregs(v);
 
     if ( (v->domain->arch.tsc_mode ==  TSC_MODE_PVRDTSCP) &&
          boot_cpu_has(X86_FEATURE_RDTSCP) )
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -52,6 +52,7 @@ long arch_do_domctl(
 {
     long ret = 0;
     bool_t copyback = 0;
+    unsigned long i;
 
     switch ( domctl->cmd )
     {
@@ -319,7 +320,6 @@ long arch_do_domctl(
 
     case XEN_DOMCTL_getmemlist:
     {
-        int i;
         unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
         uint64_t mfn;
         struct page_info *page;
@@ -645,7 +645,6 @@ long arch_do_domctl(
         unsigned long mfn = domctl->u.memory_mapping.first_mfn;
         unsigned long nr_mfns = domctl->u.memory_mapping.nr_mfns;
         int add = domctl->u.memory_mapping.add_mapping;
-        unsigned long i;
 
         ret = -EINVAL;
         if ( (mfn + nr_mfns - 1) < mfn || /* wrap? */
@@ -809,6 +808,7 @@ long arch_do_domctl(
     {
         struct xen_domctl_ext_vcpucontext *evc;
         struct vcpu *v;
+        struct xen_domctl_ext_vcpu_msr msr;
 
         evc = &domctl->u.ext_vcpucontext;
 
@@ -854,7 +854,42 @@ long arch_do_domctl(
             evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2;
             evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
 
-            ret = 0;
+            i = ret = 0;
+            if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+            {
+                unsigned int j;
+
+                if ( v->arch.pv_vcpu.dr_mask[0] )
+                {
+                    if ( i < evc->msr_count && !ret )
+                    {
+                        msr.index = MSR_AMD64_DR0_ADDRESS_MASK;
+                        msr.reserved = 0;
+                        msr.value = v->arch.pv_vcpu.dr_mask[0];
+                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+                for ( j = 0; j < 3; ++j )
+                {
+                    if ( !v->arch.pv_vcpu.dr_mask[1 + j] )
+                        continue;
+                    if ( i < evc->msr_count && !ret )
+                    {
+                        msr.index = MSR_AMD64_DR1_ADDRESS_MASK + j;
+                        msr.reserved = 0;
+                        msr.value = v->arch.pv_vcpu.dr_mask[1 + j];
+                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
+                            ret = -EFAULT;
+                    }
+                    ++i;
+                }
+            }
+            if ( i > evc->msr_count && !ret )
+                ret = -ENOBUFS;
+            evc->msr_count = i;
+
             vcpu_unpause(v);
             copyback = 1;
         }
@@ -909,9 +944,49 @@ long arch_do_domctl(
 
                 ret = vmce_restore_vcpu(v, &vmce);
             }
+            else if ( evc->size > offsetof(typeof(*evc), vmce) )
+                ret = -EINVAL;
             else
                 ret = 0;
 
+            if ( ret || evc->size <= offsetof(typeof(*evc), msrs) )
+                /* nothing */;
+            else if ( evc->size < offsetof(typeof(*evc), msrs) +
+                                  sizeof(evc->msrs) )
+                ret = -EINVAL;
+            else
+            {
+                for ( i = 0; i < evc->msr_count; ++i )
+                {
+                    ret = -EFAULT;
+                    if ( copy_from_guest_offset(&msr, evc->msrs, i, 1) )
+                        break;
+                    ret = -EINVAL;
+                    if ( msr.reserved )
+                        break;
+                    switch ( msr.index )
+                    {
+                    case MSR_AMD64_DR0_ADDRESS_MASK:
+                        if ( !boot_cpu_has(X86_FEATURE_DBEXT) ||
+                             (msr.value >> 32) )
+                            break;
+                        v->arch.pv_vcpu.dr_mask[0] = msr.value;
+                        continue;
+                    case MSR_AMD64_DR1_ADDRESS_MASK ...
+                         MSR_AMD64_DR3_ADDRESS_MASK:
+                        if ( !boot_cpu_has(X86_FEATURE_DBEXT) ||
+                             (msr.value >> 32) )
+                            break;
+                        msr.index -= MSR_AMD64_DR1_ADDRESS_MASK - 1;
+                        v->arch.pv_vcpu.dr_mask[msr.index] = msr.value;
+                        continue;
+                    }
+                    break;
+                }
+                if ( i == evc->msr_count )
+                    ret = 0;
+            }
+
             domain_unpause(d);
         }
     }
@@ -921,7 +996,6 @@ long arch_do_domctl(
     {
         xen_domctl_cpuid_t *ctl = &domctl->u.cpuid;
         cpuid_input_t *cpuid = NULL; 
-        int i;
 
         for ( i = 0; i < MAX_CPUID_INPUT; i++ )
         {
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2498,6 +2498,23 @@ static int emulate_privileged_op(struct 
             if ( wrmsr_safe(regs->ecx, msr_content) != 0 )
                 goto fail;
             break;
+
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) )
+                goto fail;
+            v->arch.pv_vcpu.dr_mask[0] = msr_content;
+            if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
+                wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, msr_content);
+            break;
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) || (msr_content >> 32) )
+                goto fail;
+            v->arch.pv_vcpu.dr_mask
+                [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1] = msr_content;
+            if ( v->arch.debugreg[7] & DR7_ACTIVE_MASK )
+                wrmsrl(regs->_ecx, msr_content);
+            break;
+
         default:
             if ( wrmsr_hypervisor_regs(regs->ecx, msr_content) == 1 )
                 break;
@@ -2585,6 +2602,21 @@ static int emulate_privileged_op(struct 
             regs->eax = (uint32_t)msr_content;
             regs->edx = (uint32_t)(msr_content >> 32);
             break;
+
+        case MSR_AMD64_DR0_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                goto fail;
+            regs->eax = v->arch.pv_vcpu.dr_mask[0];
+            regs->edx = 0;
+            break;
+        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+                goto fail;
+            regs->eax = v->arch.pv_vcpu.dr_mask
+                            [regs->_ecx - MSR_AMD64_DR1_ADDRESS_MASK + 1];
+            regs->edx = 0;
+            break;
+
         default:
             if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
             {
@@ -3628,7 +3660,27 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
     return rc;
 }
 
-long set_debugreg(struct vcpu *v, int reg, unsigned long value)
+void activate_debugregs(const struct vcpu *curr)
+{
+    ASSERT(curr == current);
+
+    write_debugreg(0, curr->arch.debugreg[0]);
+    write_debugreg(1, curr->arch.debugreg[1]);
+    write_debugreg(2, curr->arch.debugreg[2]);
+    write_debugreg(3, curr->arch.debugreg[3]);
+    write_debugreg(6, curr->arch.debugreg[6]);
+    write_debugreg(7, curr->arch.debugreg[7]);
+
+    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+    {
+        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[0]);
+        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[1]);
+        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[2]);
+        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[3]);
+    }
+}
+
+long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
 {
     int i;
     struct vcpu *curr = current;
@@ -3709,11 +3761,8 @@ long set_debugreg(struct vcpu *v, int re
             if ( (v == curr) &&
                  !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
             {
-                write_debugreg(0, v->arch.debugreg[0]);
-                write_debugreg(1, v->arch.debugreg[1]);
-                write_debugreg(2, v->arch.debugreg[2]);
-                write_debugreg(3, v->arch.debugreg[3]);
-                write_debugreg(6, v->arch.debugreg[6]);
+                activate_debugregs(curr);
+                break;
             }
         }
         if ( v == curr )
--- a/xen/include/asm-x86/debugreg.h
+++ b/xen/include/asm-x86/debugreg.h
@@ -64,4 +64,16 @@
 #define DR_GLOBAL_EXACT_ENABLE   (0x00000200ul) /* Global exact enable */
 #define DR_GENERAL_DETECT        (0x00002000ul) /* General detect enable */
 
+#define write_debugreg(reg, val) do {                       \
+    unsigned long __val = val;                              \
+    asm volatile ( "mov %0,%%db" #reg : : "r" (__val) );    \
+} while (0)
+#define read_debugreg(reg) ({                               \
+    unsigned long __val;                                    \
+    asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) );  \
+    __val;                                                  \
+})
+long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value);
+void activate_debugregs(const struct vcpu *);
+
 #endif /* _X86_DEBUGREG_H */
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -374,6 +374,9 @@ struct pv_vcpu
     unsigned long shadow_ldt_mapcnt;
     spinlock_t shadow_ldt_lock;
 
+    /* data breakpoint extension MSRs */
+    uint32_t dr_mask[4];
+
     /* Deferred VA-based update state. */
     bool_t need_update_runstate_area;
     struct vcpu_time_info pending_system_time;
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -462,17 +462,6 @@ long set_gdt(struct vcpu *d, 
              unsigned long *frames, 
              unsigned int entries);
 
-#define write_debugreg(reg, val) do {                       \
-    unsigned long __val = val;                              \
-    asm volatile ( "mov %0,%%db" #reg : : "r" (__val) );    \
-} while (0)
-#define read_debugreg(reg) ({                               \
-    unsigned long __val;                                    \
-    asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) );  \
-    __val;                                                  \
-})
-long set_debugreg(struct vcpu *p, int reg, unsigned long value);
-
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static always_inline void rep_nop(void)
 {
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -36,7 +36,7 @@
 #include "grant_table.h"
 #include "hvm/save.h"
 
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
+#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
 
 /*
  * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
@@ -563,6 +563,16 @@ typedef struct xen_domctl_pin_mem_cachea
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
 
 
+#if defined(__i386__) || defined(__x86_64__)
+struct xen_domctl_ext_vcpu_msr {
+    uint32_t         index;
+    uint32_t         reserved;
+    uint64_aligned_t value;
+};
+typedef struct xen_domctl_ext_vcpu_msr xen_domctl_ext_vcpu_msr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpu_msr_t);
+#endif
+
 /* XEN_DOMCTL_set_ext_vcpucontext */
 /* XEN_DOMCTL_get_ext_vcpucontext */
 struct xen_domctl_ext_vcpucontext {
@@ -582,6 +592,7 @@ struct xen_domctl_ext_vcpucontext {
     uint16_t         sysenter_callback_cs;
     uint8_t          syscall32_disables_events;
     uint8_t          sysenter_disables_events;
+    uint16_t         msr_count;
 #if defined(__GNUC__)
     union {
         uint64_aligned_t mcg_cap;
@@ -590,6 +601,7 @@ struct xen_domctl_ext_vcpucontext {
 #else
     struct hvm_vmce_vcpu vmce;
 #endif
+    XEN_GUEST_HANDLE_64(xen_domctl_ext_vcpu_msr_t) msrs;
 #endif
 };
 typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3 3/3] libxc/PV: save/restore data breakpoint extension registers
  2014-04-07  9:31 [PATCH v3 0/3] x86/AMD support data breakpoint extension registers Jan Beulich
  2014-04-07  9:38 ` [PATCH v3 1/3] SVM: " Jan Beulich
  2014-04-07  9:38 ` [PATCH v3 2/3] x86/PV: " Jan Beulich
@ 2014-04-07  9:39 ` Jan Beulich
  2014-04-11 16:37   ` George Dunlap
  2 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2014-04-07  9:39 UTC (permalink / raw)
  To: xen-devel
  Cc: Ian Campbell, Ian Jackson, Keir Fraser, Aravind Gopalakrishnan,
	suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 7701 bytes --]

Requiring the handling of the generic MSR extension to
XEN_DOMCTL_[gs]et_ext_vcpucontext.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Split off of hypervisor patch and address review comments:
    - check XEN_DOMCTL_get_ext_vcpucontext size output before looking
      at msr_count field
    - scrub xen_domctl_ext_vcpucontext_t's msrs field on the sending
      side
    - clear xen_domctl_ext_vcpucontext_t's msr_count field on the
      restore side if the size field doesn't cover the msrs one
    - make use of hypercall buffer bouncing interfaces on the restore
      side (on the save side this would seem to complicate the code
      rather than simplifying it)
    - add documenting note to tools/libxc/xg_save_restore.h

--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -38,6 +38,7 @@
 
 #include <stdlib.h>
 #include <unistd.h>
+#include <assert.h>
 
 #include "xg_private.h"
 #include "xg_save_restore.h"
@@ -590,8 +591,13 @@ static int buffer_tail_pv(xc_interface *
                           uint32_t vcpuextstate_size)
 {
     unsigned int i;
-    size_t pfnlen, vcpulen;
+    size_t pfnlen, vcpulen, total;
+    int alloc = 0;
     struct domain_info_context *dinfo = &ctx->dinfo;
+    union {
+        const unsigned char *raw;
+        xen_domctl_ext_vcpucontext_t *evc;
+    } ptr;
 
     /* TODO: handle changing pfntab and vcpu counts */
     /* PFN tab */
@@ -634,11 +640,42 @@ static int buffer_tail_pv(xc_interface *
             ERROR("Error allocating VCPU ctxt tail buffer");
             goto free_pfntab;
         }
+        alloc = 1;
     }
     // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
-    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
-        PERROR("Error when reading ctxt");
-        goto free_vcpus;
+    for ( total = i = 0, ptr.raw = buf->vcpubuf; ext_vcpucontext; ) {
+        if ( RDEXACT(fd, buf->vcpubuf + total, vcpulen) ) {
+            PERROR("Error when reading ctxt");
+            goto free_vcpus;
+        }
+        total += vcpulen;
+        for ( vcpulen = 0; i < buf->vcpucount; ++i ) {
+            size_t msrlen;
+
+            if ( (const unsigned char *)(ptr.evc + 1) > buf->vcpubuf + total )
+                break;
+            if ( ptr.evc->size <
+                 (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
+                  sizeof(ptr.evc->msrs)) )
+                ptr.evc->msr_count = 0;
+            msrlen = ptr.evc->msr_count * sizeof(xen_domctl_ext_vcpu_msr_t);
+            vcpulen += msrlen;
+            ptr.raw += 128 + msrlen + vcpuextstate_size;
+        }
+        if ( !vcpulen ) {
+            assert(i == buf->vcpucount);
+            break;
+        }
+        if ( alloc ) {
+            void *nbuf = realloc(buf->vcpubuf, total + vcpulen);
+
+            if ( !nbuf ) {
+                ERROR("Error growing VCPU ctxt tail buffer");
+                goto free_vcpus;
+            }
+            ptr.raw = nbuf + (ptr.raw - buf->vcpubuf);
+            buf->vcpubuf = nbuf;
+        }
     }
 
     /* load shared_info_page */
@@ -2130,9 +2167,28 @@ int xc_domain_restore(xc_interface *xch,
             goto vcpu_ext_state_restore;
         memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
         vcpup += 128;
-        domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
-        domctl.domain = dom;
-        frc = xc_domctl(xch, &domctl);
+        if ( domctl.u.ext_vcpucontext.msr_count )
+        {
+            xen_domctl_ext_vcpu_msr_t *msrs = (void *)vcpup;
+            size_t sz = domctl.u.ext_vcpucontext.msr_count * sizeof(*msrs);
+            DECLARE_HYPERCALL_BOUNCE(msrs, sz, XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+            if ( xc_hypercall_bounce_pre(xch, msrs) )
+            {
+                PERROR("Can't bounce in vcpu%d MSRs", i);
+                goto out;
+            }
+            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
+            domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+            domctl.domain = dom;
+            frc = xc_domctl(xch, &domctl);
+            xc_hypercall_bounce_post(xch, msrs);
+            vcpup += sz;
+        } else {
+            domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+            domctl.domain = dom;
+            frc = xc_domctl(xch, &domctl);
+        }
         if ( frc != 0 )
         {
             PERROR("Couldn't set extended vcpu%d info", i);
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -836,6 +836,9 @@ int xc_domain_save(xc_interface *xch, in
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
+    /* MSR extensions to xen_domctl_ext_vcpucontext_t */
+    DECLARE_HYPERCALL_BUFFER(xen_domctl_ext_vcpu_msr_t, msrs);
+
     /* A copy of the CPU eXtended States of the guest. */
     DECLARE_HYPERCALL_BUFFER(void, buffer);
 
@@ -1960,16 +1963,42 @@ int xc_domain_save(xc_interface *xch, in
         domctl.domain = dom;
         memset(&domctl.u, 0, sizeof(domctl.u));
         domctl.u.ext_vcpucontext.vcpu = i;
-        if ( xc_domctl(xch, &domctl) < 0 )
+        frc = xc_domctl(xch, &domctl);
+        if ( frc < 0 && errno == ENOBUFS &&
+             domctl.u.ext_vcpucontext.size >=
+             (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
+              sizeof(domctl.u.ext_vcpucontext.msrs)) &&
+             domctl.u.ext_vcpucontext.msr_count )
+        {
+            msrs = xc_hypercall_buffer_alloc(xch, msrs,
+                                             domctl.u.ext_vcpucontext.msr_count *
+                                             sizeof(*msrs));
+            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
+            frc = msrs ? xc_domctl(xch, &domctl) : -1;
+            /* Don't save the actual pointer. */
+            set_xen_guest_handle_raw(domctl.u.ext_vcpucontext.msrs, NULL);
+        }
+        if ( frc < 0 )
         {
             PERROR("No extended context for VCPU%d", i);
             goto out;
         }
         if ( wrexact(io_fd, &domctl.u.ext_vcpucontext, 128) )
         {
-            PERROR("Error when writing to state file (2)");
+            PERROR("Error when writing to state file (ext ctxt)");
             goto out;
         }
+        if ( msrs )
+        {
+            if ( wrexact(io_fd, msrs,
+                         domctl.u.ext_vcpucontext.msr_count * sizeof(*msrs)) )
+            {
+                PERROR("Error when writing to state file (MSRs)");
+                goto out;
+            }
+            xc_hypercall_buffer_free(xch, msrs);
+            msrs = NULL;
+        }
 
         /* Start to fetch CPU eXtended States */
         /* Get buffer size first */
@@ -2134,6 +2163,7 @@ int xc_domain_save(xc_interface *xch, in
 
     xc_hypercall_buffer_free_pages(xch, to_send, NRPAGES(bitmap_size(dinfo->p2m_size)));
     xc_hypercall_buffer_free_pages(xch, to_skip, NRPAGES(bitmap_size(dinfo->p2m_size)));
+    xc_hypercall_buffer_free(xch, msrs);
 
     free(pfn_type);
     free(pfn_batch);
--- a/tools/libxc/xg_save_restore.h
+++ b/tools/libxc/xg_save_restore.h
@@ -62,6 +62,9 @@
  *                        the block size.
  *     "extv"           : Presence indicates use of extended VCPU context in
  *                        tail, data size is 0.
+ *                        This block may be followed by an array of
+ *                        xen_domctl_ext_vcpu_msr_t as indicated by
+ *                        xen_domctl_ext_vcpucontext_t's msr_count field.
  *
  * p2m (PV-only):
  *



[-- Attachment #2: PV-debug-address-mask-MSRs-sr.patch --]
[-- Type: text/plain, Size: 7759 bytes --]

libxc/PV: save/restore data breakpoint extension registers

Requiring the handling of the generic MSR extension to
XEN_DOMCTL_[gs]et_ext_vcpucontext.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Split off of hypervisor patch and address review comments:
    - check XEN_DOMCTL_get_ext_vcpucontext size output before looking
      at msr_count field
    - scrub xen_domctl_ext_vcpucontext_t's msrs field on the sending
      side
    - clear xen_domctl_ext_vcpucontext_t's msr_count field on the
      restore side if the size field doesn't cover the msrs one
    - make use of hypercall buffer bouncing interfaces on the restore
      side (on the save side this would seem to complicate the code
      rather than simplifying it)
    - add documenting note to tools/libxc/xg_save_restore.h

--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -38,6 +38,7 @@
 
 #include <stdlib.h>
 #include <unistd.h>
+#include <assert.h>
 
 #include "xg_private.h"
 #include "xg_save_restore.h"
@@ -590,8 +591,13 @@ static int buffer_tail_pv(xc_interface *
                           uint32_t vcpuextstate_size)
 {
     unsigned int i;
-    size_t pfnlen, vcpulen;
+    size_t pfnlen, vcpulen, total;
+    int alloc = 0;
     struct domain_info_context *dinfo = &ctx->dinfo;
+    union {
+        const unsigned char *raw;
+        xen_domctl_ext_vcpucontext_t *evc;
+    } ptr;
 
     /* TODO: handle changing pfntab and vcpu counts */
     /* PFN tab */
@@ -634,11 +640,42 @@ static int buffer_tail_pv(xc_interface *
             ERROR("Error allocating VCPU ctxt tail buffer");
             goto free_pfntab;
         }
+        alloc = 1;
     }
     // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
-    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
-        PERROR("Error when reading ctxt");
-        goto free_vcpus;
+    for ( total = i = 0, ptr.raw = buf->vcpubuf; ext_vcpucontext; ) {
+        if ( RDEXACT(fd, buf->vcpubuf + total, vcpulen) ) {
+            PERROR("Error when reading ctxt");
+            goto free_vcpus;
+        }
+        total += vcpulen;
+        for ( vcpulen = 0; i < buf->vcpucount; ++i ) {
+            size_t msrlen;
+
+            if ( (const unsigned char *)(ptr.evc + 1) > buf->vcpubuf + total )
+                break;
+            if ( ptr.evc->size <
+                 (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
+                  sizeof(ptr.evc->msrs)) )
+                ptr.evc->msr_count = 0;
+            msrlen = ptr.evc->msr_count * sizeof(xen_domctl_ext_vcpu_msr_t);
+            vcpulen += msrlen;
+            ptr.raw += 128 + msrlen + vcpuextstate_size;
+        }
+        if ( !vcpulen ) {
+            assert(i == buf->vcpucount);
+            break;
+        }
+        if ( alloc ) {
+            void *nbuf = realloc(buf->vcpubuf, total + vcpulen);
+
+            if ( !nbuf ) {
+                ERROR("Error growing VCPU ctxt tail buffer");
+                goto free_vcpus;
+            }
+            ptr.raw = nbuf + (ptr.raw - buf->vcpubuf);
+            buf->vcpubuf = nbuf;
+        }
     }
 
     /* load shared_info_page */
@@ -2130,9 +2167,28 @@ int xc_domain_restore(xc_interface *xch,
             goto vcpu_ext_state_restore;
         memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
         vcpup += 128;
-        domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
-        domctl.domain = dom;
-        frc = xc_domctl(xch, &domctl);
+        if ( domctl.u.ext_vcpucontext.msr_count )
+        {
+            xen_domctl_ext_vcpu_msr_t *msrs = (void *)vcpup;
+            size_t sz = domctl.u.ext_vcpucontext.msr_count * sizeof(*msrs);
+            DECLARE_HYPERCALL_BOUNCE(msrs, sz, XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+            if ( xc_hypercall_bounce_pre(xch, msrs) )
+            {
+                PERROR("Can't bounce in vcpu%d MSRs", i);
+                goto out;
+            }
+            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
+            domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+            domctl.domain = dom;
+            frc = xc_domctl(xch, &domctl);
+            xc_hypercall_bounce_post(xch, msrs);
+            vcpup += sz;
+        } else {
+            domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+            domctl.domain = dom;
+            frc = xc_domctl(xch, &domctl);
+        }
         if ( frc != 0 )
         {
             PERROR("Couldn't set extended vcpu%d info", i);
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -836,6 +836,9 @@ int xc_domain_save(xc_interface *xch, in
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
+    /* MSR extensions to xen_domctl_ext_vcpucontext_t */
+    DECLARE_HYPERCALL_BUFFER(xen_domctl_ext_vcpu_msr_t, msrs);
+
     /* A copy of the CPU eXtended States of the guest. */
     DECLARE_HYPERCALL_BUFFER(void, buffer);
 
@@ -1960,16 +1963,42 @@ int xc_domain_save(xc_interface *xch, in
         domctl.domain = dom;
         memset(&domctl.u, 0, sizeof(domctl.u));
         domctl.u.ext_vcpucontext.vcpu = i;
-        if ( xc_domctl(xch, &domctl) < 0 )
+        frc = xc_domctl(xch, &domctl);
+        if ( frc < 0 && errno == ENOBUFS &&
+             domctl.u.ext_vcpucontext.size >=
+             (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
+              sizeof(domctl.u.ext_vcpucontext.msrs)) &&
+             domctl.u.ext_vcpucontext.msr_count )
+        {
+            msrs = xc_hypercall_buffer_alloc(xch, msrs,
+                                             domctl.u.ext_vcpucontext.msr_count *
+                                             sizeof(*msrs));
+            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
+            frc = msrs ? xc_domctl(xch, &domctl) : -1;
+            /* Don't save the actual pointer. */
+            set_xen_guest_handle_raw(domctl.u.ext_vcpucontext.msrs, NULL);
+        }
+        if ( frc < 0 )
         {
             PERROR("No extended context for VCPU%d", i);
             goto out;
         }
         if ( wrexact(io_fd, &domctl.u.ext_vcpucontext, 128) )
         {
-            PERROR("Error when writing to state file (2)");
+            PERROR("Error when writing to state file (ext ctxt)");
             goto out;
         }
+        if ( msrs )
+        {
+            if ( wrexact(io_fd, msrs,
+                         domctl.u.ext_vcpucontext.msr_count * sizeof(*msrs)) )
+            {
+                PERROR("Error when writing to state file (MSRs)");
+                goto out;
+            }
+            xc_hypercall_buffer_free(xch, msrs);
+            msrs = NULL;
+        }
 
         /* Start to fetch CPU eXtended States */
         /* Get buffer size first */
@@ -2134,6 +2163,7 @@ int xc_domain_save(xc_interface *xch, in
 
     xc_hypercall_buffer_free_pages(xch, to_send, NRPAGES(bitmap_size(dinfo->p2m_size)));
     xc_hypercall_buffer_free_pages(xch, to_skip, NRPAGES(bitmap_size(dinfo->p2m_size)));
+    xc_hypercall_buffer_free(xch, msrs);
 
     free(pfn_type);
     free(pfn_batch);
--- a/tools/libxc/xg_save_restore.h
+++ b/tools/libxc/xg_save_restore.h
@@ -62,6 +62,9 @@
  *                        the block size.
  *     "extv"           : Presence indicates use of extended VCPU context in
  *                        tail, data size is 0.
+ *                        This block may be followed by an array of
+ *                        xen_domctl_ext_vcpu_msr_t as indicated by
+ *                        xen_domctl_ext_vcpucontext_t's msr_count field.
  *
  * p2m (PV-only):
  *

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 1/3] SVM: support data breakpoint extension registers
  2014-04-07  9:38 ` [PATCH v3 1/3] SVM: " Jan Beulich
@ 2014-04-11 14:32   ` George Dunlap
  0 siblings, 0 replies; 11+ messages in thread
From: George Dunlap @ 2014-04-11 14:32 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, Aravind Gopalakrishnan,
	Suravee Suthikulpanit, xen-devel

On Mon, Apr 7, 2014 at 10:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
> Leveraging the generic MSR save/restore logic introduced a little while
> ago.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>

> Tested-by: Aravind Gopalakrishnan<aravind.gopalakrishnan@amd.com>
>
> --- a/tools/libxc/xc_cpufeature.h
> +++ b/tools/libxc/xc_cpufeature.h
> @@ -125,6 +125,7 @@
>  #define X86_FEATURE_NODEID_MSR  19 /* NodeId MSR */
>  #define X86_FEATURE_TBM         21 /* trailing bit manipulations */
>  #define X86_FEATURE_TOPOEXT     22 /* topology extensions CPUID leafs */
> +#define X86_FEATURE_DBEXT       26 /* data breakpoint extension */
>
>  /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
>  #define X86_FEATURE_FSGSBASE     0 /* {RD,WR}{FS,GS}BASE instructions */
> --- a/tools/libxc/xc_cpuid_x86.c
> +++ b/tools/libxc/xc_cpuid_x86.c
> @@ -110,9 +110,10 @@ static void amd_xc_cpuid_policy(
>                      bitmaskof(X86_FEATURE_3DNOWPREFETCH) |
>                      bitmaskof(X86_FEATURE_OSVW) |
>                      bitmaskof(X86_FEATURE_XOP) |
> +                    bitmaskof(X86_FEATURE_LWP) |
>                      bitmaskof(X86_FEATURE_FMA4) |
>                      bitmaskof(X86_FEATURE_TBM) |
> -                    bitmaskof(X86_FEATURE_LWP));
> +                    bitmaskof(X86_FEATURE_DBEXT));
>          regs[3] &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
>                      (is_pae ? bitmaskof(X86_FEATURE_NX) : 0) |
>                      (is_64bit ? bitmaskof(X86_FEATURE_LM) : 0) |
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3082,6 +3082,9 @@ void hvm_cpuid(unsigned int input, unsig
>          /* Only provide PSE36 when guest runs in 32bit PAE or in long mode */
>          if ( !(hvm_pae_enabled(v) || hvm_long_mode_enabled(v)) )
>              *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
> +        /* Hide data breakpoint extensions if the hardware has not support. */
> +        if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
> +            *ecx &= ~cpufeat_mask(X86_FEATURE_DBEXT);
>          break;
>
>      case 0x80000008:
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -160,14 +160,28 @@ void svm_intercept_msr(struct vcpu *v, u
>  static void svm_save_dr(struct vcpu *v)
>  {
>      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
> +    unsigned int flag_dr_dirty = v->arch.hvm_vcpu.flag_dr_dirty;
>
> -    if ( !v->arch.hvm_vcpu.flag_dr_dirty )
> +    if ( !flag_dr_dirty )
>          return;
>
>      /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
>      v->arch.hvm_vcpu.flag_dr_dirty = 0;
>      vmcb_set_dr_intercepts(vmcb, ~0u);
>
> +    if ( flag_dr_dirty & 2 )
> +    {
> +        svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW);
> +        svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_RW);
> +        svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_RW);
> +        svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_RW);
> +
> +        rdmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]);
> +        rdmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]);
> +        rdmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]);
> +        rdmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]);
> +    }
> +
>      v->arch.debugreg[0] = read_debugreg(0);
>      v->arch.debugreg[1] = read_debugreg(1);
>      v->arch.debugreg[2] = read_debugreg(2);
> @@ -178,12 +192,32 @@ static void svm_save_dr(struct vcpu *v)
>
>  static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v)
>  {
> +    unsigned int ecx;
> +
>      if ( v->arch.hvm_vcpu.flag_dr_dirty )
>          return;
>
>      v->arch.hvm_vcpu.flag_dr_dirty = 1;
>      vmcb_set_dr_intercepts(vmcb, 0);
>
> +    ASSERT(v == current);
> +    hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
> +    if ( test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
> +    {
> +        svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
> +        svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
> +        svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
> +        svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
> +
> +        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[0]);
> +        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[1]);
> +        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[2]);
> +        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.hvm_svm.dr_mask[3]);
> +
> +        /* Can't use hvm_cpuid() in svm_save_dr(): v != current. */
> +        v->arch.hvm_vcpu.flag_dr_dirty |= 2;
> +    }
> +
>      write_debugreg(0, v->arch.debugreg[0]);
>      write_debugreg(1, v->arch.debugreg[1]);
>      write_debugreg(2, v->arch.debugreg[2]);
> @@ -355,6 +389,72 @@ static int svm_load_vmcb_ctxt(struct vcp
>      return 0;
>  }
>
> +static unsigned int __init svm_init_msr(void)
> +{
> +    return boot_cpu_has(X86_FEATURE_DBEXT) ? 4 : 0;
> +}
> +
> +static void svm_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
> +{
> +    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
> +    {
> +        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[0];
> +        if ( ctxt->msr[ctxt->count].val )
> +            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR0_ADDRESS_MASK;
> +
> +        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[1];
> +        if ( ctxt->msr[ctxt->count].val )
> +            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR1_ADDRESS_MASK;
> +
> +        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[2];
> +        if ( ctxt->msr[ctxt->count].val )
> +            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR2_ADDRESS_MASK;
> +
> +        ctxt->msr[ctxt->count].val = v->arch.hvm_svm.dr_mask[3];
> +        if ( ctxt->msr[ctxt->count].val )
> +            ctxt->msr[ctxt->count++].index = MSR_AMD64_DR3_ADDRESS_MASK;
> +    }
> +}
> +
> +static int svm_load_msr(struct vcpu *v, struct hvm_msr *ctxt)
> +{
> +    unsigned int i, idx;
> +    int err = 0;
> +
> +    for ( i = 0; i < ctxt->count; ++i )
> +    {
> +        switch ( idx = ctxt->msr[i].index )
> +        {
> +        case MSR_AMD64_DR0_ADDRESS_MASK:
> +            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
> +                err = -ENXIO;
> +            else if ( ctxt->msr[i].val >> 32 )
> +                err = -EDOM;
> +            else
> +                v->arch.hvm_svm.dr_mask[0] = ctxt->msr[i].val;
> +            break;
> +
> +        case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
> +            if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
> +                err = -ENXIO;
> +            else if ( ctxt->msr[i].val >> 32 )
> +                err = -EDOM;
> +            else
> +                v->arch.hvm_svm.dr_mask[idx - MSR_AMD64_DR1_ADDRESS_MASK + 1] =
> +                    ctxt->msr[i].val;
> +            break;
> +
> +        default:
> +            continue;
> +        }
> +        if ( err )
> +            break;
> +        ctxt->msr[i]._rsvd = 1;
> +    }
> +
> +    return err;
> +}
> +
>  static void svm_fpu_enter(struct vcpu *v)
>  {
>      struct vmcb_struct *n1vmcb = vcpu_nestedhvm(v).nv_n1vmcx;
> @@ -1451,6 +1551,8 @@ static int svm_msr_read_intercept(unsign
>
>      switch ( msr )
>      {
> +        unsigned int ecx;
> +
>      case MSR_IA32_SYSENTER_CS:
>          *msr_content = v->arch.hvm_svm.guest_sysenter_cs;
>          break;
> @@ -1526,6 +1628,21 @@ static int svm_msr_read_intercept(unsign
>          vpmu_do_rdmsr(msr, msr_content);
>          break;
>
> +    case MSR_AMD64_DR0_ADDRESS_MASK:
> +        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
> +        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
> +            goto gpf;
> +        *msr_content = v->arch.hvm_svm.dr_mask[0];
> +        break;
> +
> +    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
> +        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
> +        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) )
> +            goto gpf;
> +        *msr_content =
> +            v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1];
> +        break;
> +
>      case MSR_AMD_OSVW_ID_LENGTH:
>      case MSR_AMD_OSVW_STATUS:
>          ret = svm_handle_osvw(v, msr, msr_content, 1);
> @@ -1594,6 +1711,8 @@ static int svm_msr_write_intercept(unsig
>
>      switch ( msr )
>      {
> +        unsigned int ecx;
> +
>      case MSR_IA32_SYSENTER_CS:
>          vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = msr_content;
>          break;
> @@ -1669,6 +1788,21 @@ static int svm_msr_write_intercept(unsig
>           */
>          break;
>
> +    case MSR_AMD64_DR0_ADDRESS_MASK:
> +        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
> +        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) || (msr_content >> 32) )
> +            goto gpf;
> +        v->arch.hvm_svm.dr_mask[0] = msr_content;
> +        break;
> +
> +    case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
> +        hvm_cpuid(0x80000001, NULL, NULL, &ecx, NULL);
> +        if ( !test_bit(X86_FEATURE_DBEXT & 31, &ecx) || (msr_content >> 32) )
> +            goto gpf;
> +        v->arch.hvm_svm.dr_mask[msr - MSR_AMD64_DR1_ADDRESS_MASK + 1] =
> +            msr_content;
> +        break;
> +
>      case MSR_AMD_OSVW_ID_LENGTH:
>      case MSR_AMD_OSVW_STATUS:
>          ret = svm_handle_osvw(v, msr, &msr_content, 0);
> @@ -2022,6 +2156,9 @@ static struct hvm_function_table __initd
>      .vcpu_destroy         = svm_vcpu_destroy,
>      .save_cpu_ctxt        = svm_save_vmcb_ctxt,
>      .load_cpu_ctxt        = svm_load_vmcb_ctxt,
> +    .init_msr             = svm_init_msr,
> +    .save_msr             = svm_save_msr,
> +    .load_msr             = svm_load_msr,
>      .get_interrupt_shadow = svm_get_interrupt_shadow,
>      .set_interrupt_shadow = svm_set_interrupt_shadow,
>      .guest_x86_mode       = svm_guest_x86_mode,
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -134,6 +134,7 @@
>  #define X86_FEATURE_NODEID_MSR  (6*32+19) /* NodeId MSR */
>  #define X86_FEATURE_TBM         (6*32+21) /* trailing bit manipulations */
>  #define X86_FEATURE_TOPOEXT     (6*32+22) /* topology extensions CPUID leafs */
> +#define X86_FEATURE_DBEXT       (6*32+26) /* data breakpoint extension */
>
>  /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
>  #define X86_FEATURE_FSGSBASE   (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
> --- a/xen/include/asm-x86/hvm/svm/vmcb.h
> +++ b/xen/include/asm-x86/hvm/svm/vmcb.h
> @@ -515,6 +515,9 @@ struct arch_svm_struct {
>      uint64_t guest_lwp_cfg;      /* guest version */
>      uint64_t cpu_lwp_cfg;        /* CPU version */
>
> +    /* data breakpoint extension MSRs */
> +    uint32_t dr_mask[4];
> +
>      /* OSVW MSRs */
>      struct {
>          u64 length;
> --- a/xen/include/asm-x86/hvm/vcpu.h
> +++ b/xen/include/asm-x86/hvm/vcpu.h
> @@ -140,7 +140,7 @@ struct hvm_vcpu {
>
>      int                 xen_port;
>
> -    bool_t              flag_dr_dirty;
> +    u8                  flag_dr_dirty;
>      bool_t              debug_state_latch;
>      bool_t              single_step;
>
> --- a/xen/include/asm-x86/msr-index.h
> +++ b/xen/include/asm-x86/msr-index.h
> @@ -206,6 +206,11 @@
>  #define MSR_AMD64_DC_CFG               0xc0011022
>  #define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT        46
>
> +#define MSR_AMD64_DR0_ADDRESS_MASK     0xc0011027
> +#define MSR_AMD64_DR1_ADDRESS_MASK     0xc0011019
> +#define MSR_AMD64_DR2_ADDRESS_MASK     0xc001101a
> +#define MSR_AMD64_DR3_ADDRESS_MASK     0xc001101b
> +
>  /* AMD Family10h machine check MSRs */
>  #define MSR_F10_MC4_MISC1              0xc0000408
>  #define MSR_F10_MC4_MISC2              0xc0000409
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 2/3] x86/PV: support data breakpoint extension registers
  2014-04-07  9:38 ` [PATCH v3 2/3] x86/PV: " Jan Beulich
@ 2014-04-11 14:58   ` George Dunlap
  2014-04-11 15:32     ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: George Dunlap @ 2014-04-11 14:58 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, Aravind Gopalakrishnan,
	Suravee Suthikulpanit, xen-devel

On Mon, Apr 7, 2014 at 10:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
> Introducing an extension to XEN_DOMCTL_[gs]et_ext_vcpucontext similar
> to the generic MSR save/restore logic recently added for HVM.
>
> This also moves some debug register related declarations/definition to
> the header intended for these.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Tested-by: Aravind Gopalakrishnan<aravind.gopalakrishnan@amd.com>
> ---
> v3: Split off save/restore code to a separate patch, allowing this one
>     to be reviewed/applied independently.
>
> --- a/xen/arch/x86/acpi/suspend.c
> +++ b/xen/arch/x86/acpi/suspend.c
> @@ -9,6 +9,7 @@
>  #include <xen/smp.h>
>  #include <asm/processor.h>
>  #include <asm/msr.h>
> +#include <asm/debugreg.h>
>  #include <asm/flushtlb.h>
>  #include <asm/hvm/hvm.h>
>  #include <asm/hvm/support.h>
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1341,14 +1341,7 @@ static void paravirt_ctxt_switch_to(stru
>          write_cr4(cr4);
>
>      if ( unlikely(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
> -    {
> -        write_debugreg(0, v->arch.debugreg[0]);
> -        write_debugreg(1, v->arch.debugreg[1]);
> -        write_debugreg(2, v->arch.debugreg[2]);
> -        write_debugreg(3, v->arch.debugreg[3]);
> -        write_debugreg(6, v->arch.debugreg[6]);
> -        write_debugreg(7, v->arch.debugreg[7]);
> -    }
> +        activate_debugregs(v);
>
>      if ( (v->domain->arch.tsc_mode ==  TSC_MODE_PVRDTSCP) &&
>           boot_cpu_has(X86_FEATURE_RDTSCP) )
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c

[snip]

> @@ -854,7 +854,42 @@ long arch_do_domctl(
>              evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2;
>              evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
>
> -            ret = 0;
> +            i = ret = 0;
> +            if ( boot_cpu_has(X86_FEATURE_DBEXT) )
> +            {
> +                unsigned int j;
> +
> +                if ( v->arch.pv_vcpu.dr_mask[0] )
> +                {
> +                    if ( i < evc->msr_count && !ret )
> +                    {
> +                        msr.index = MSR_AMD64_DR0_ADDRESS_MASK;
> +                        msr.reserved = 0;
> +                        msr.value = v->arch.pv_vcpu.dr_mask[0];
> +                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )

Sorry if I missed something, but is there any bounds-checking on this
buffer?  I don't see any specification in the header file about how
big the buffer needs to be, nor any conceptual limit to the number of
elements which might be copied into it -- at least in this path.

> @@ -3628,7 +3660,27 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
>      return rc;
>  }
>
> -long set_debugreg(struct vcpu *v, int reg, unsigned long value)
> +void activate_debugregs(const struct vcpu *curr)
> +{
> +    ASSERT(curr == current);
> +
> +    write_debugreg(0, curr->arch.debugreg[0]);
> +    write_debugreg(1, curr->arch.debugreg[1]);
> +    write_debugreg(2, curr->arch.debugreg[2]);
> +    write_debugreg(3, curr->arch.debugreg[3]);
> +    write_debugreg(6, curr->arch.debugreg[6]);
> +    write_debugreg(7, curr->arch.debugreg[7]);
> +
> +    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
> +    {
> +        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[0]);
> +        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[1]);
> +        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[2]);
> +        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[3]);
> +    }
> +}
> +
> +long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
>  {
>      int i;
>      struct vcpu *curr = current;
> @@ -3709,11 +3761,8 @@ long set_debugreg(struct vcpu *v, int re
>              if ( (v == curr) &&
>                   !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
>              {
> -                write_debugreg(0, v->arch.debugreg[0]);
> -                write_debugreg(1, v->arch.debugreg[1]);
> -                write_debugreg(2, v->arch.debugreg[2]);
> -                write_debugreg(3, v->arch.debugreg[3]);
> -                write_debugreg(6, v->arch.debugreg[6]);
> +                activate_debugregs(curr);

So in the other place you replaced with activate_debugregs(), it
writes DR7; but here it doesn't -- and yet the function will write
DR7.  Is that a problem?

 -George

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 2/3] x86/PV: support data breakpoint extension registers
  2014-04-11 14:58   ` George Dunlap
@ 2014-04-11 15:32     ` Jan Beulich
  2014-04-11 15:37       ` George Dunlap
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2014-04-11 15:32 UTC (permalink / raw)
  To: George Dunlap
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, Aravind Gopalakrishnan,
	Suravee Suthikulpanit, xen-devel

>>> On 11.04.14 at 16:58, <George.Dunlap@eu.citrix.com> wrote:
> On Mon, Apr 7, 2014 at 10:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>> @@ -854,7 +854,42 @@ long arch_do_domctl(
>>              evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2;
>>              evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
>>
>> -            ret = 0;
>> +            i = ret = 0;
>> +            if ( boot_cpu_has(X86_FEATURE_DBEXT) )
>> +            {
>> +                unsigned int j;
>> +
>> +                if ( v->arch.pv_vcpu.dr_mask[0] )
>> +                {
>> +                    if ( i < evc->msr_count && !ret )
>> +                    {
>> +                        msr.index = MSR_AMD64_DR0_ADDRESS_MASK;
>> +                        msr.reserved = 0;
>> +                        msr.value = v->arch.pv_vcpu.dr_mask[0];
>> +                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
> 
> Sorry if I missed something, but is there any bounds-checking on this
> buffer?  I don't see any specification in the header file about how
> big the buffer needs to be, nor any conceptual limit to the number of
> elements which might be copied into it -- at least in this path.

The conceptual limit is 64k (due to evc->msr_count being a 16 bit
quantity). The limit up to which we may write to the buffer is the
passed in evc->msr_count (which gets updated at the end to the
number we would have wanted to write).

>> @@ -3628,7 +3660,27 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
>>      return rc;
>>  }
>>
>> -long set_debugreg(struct vcpu *v, int reg, unsigned long value)
>> +void activate_debugregs(const struct vcpu *curr)
>> +{
>> +    ASSERT(curr == current);
>> +
>> +    write_debugreg(0, curr->arch.debugreg[0]);
>> +    write_debugreg(1, curr->arch.debugreg[1]);
>> +    write_debugreg(2, curr->arch.debugreg[2]);
>> +    write_debugreg(3, curr->arch.debugreg[3]);
>> +    write_debugreg(6, curr->arch.debugreg[6]);
>> +    write_debugreg(7, curr->arch.debugreg[7]);
>> +
>> +    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
>> +    {
>> +        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[0]);
>> +        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[1]);
>> +        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[2]);
>> +        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[3]);
>> +    }
>> +}
>> +
>> +long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
>>  {
>>      int i;
>>      struct vcpu *curr = current;
>> @@ -3709,11 +3761,8 @@ long set_debugreg(struct vcpu *v, int re
>>              if ( (v == curr) &&
>>                   !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
>>              {
>> -                write_debugreg(0, v->arch.debugreg[0]);
>> -                write_debugreg(1, v->arch.debugreg[1]);
>> -                write_debugreg(2, v->arch.debugreg[2]);
>> -                write_debugreg(3, v->arch.debugreg[3]);
>> -                write_debugreg(6, v->arch.debugreg[6]);
>> +                activate_debugregs(curr);
> 
> So in the other place you replaced with activate_debugregs(), it
> writes DR7; but here it doesn't -- and yet the function will write
> DR7.  Is that a problem?

It's not strictly a problem (because right above we checked that
no breakpoints are active, and right below we write it with the
intended value), but it shouldn't be done to be on the safe side
as well as because the double write is needlessly expensive. And
I do recall that I made a mental note that the DR7 write needs to
remain separate - only to then forget it. I should clearly have
written it down. So thanks a lot for spotting this!

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 2/3] x86/PV: support data breakpoint extension registers
  2014-04-11 15:32     ` Jan Beulich
@ 2014-04-11 15:37       ` George Dunlap
  0 siblings, 0 replies; 11+ messages in thread
From: George Dunlap @ 2014-04-11 15:37 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, Aravind Gopalakrishnan,
	Suravee Suthikulpanit, xen-devel

On 04/11/2014 04:32 PM, Jan Beulich wrote:
>>>> On 11.04.14 at 16:58, <George.Dunlap@eu.citrix.com> wrote:
>> On Mon, Apr 7, 2014 at 10:38 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>> @@ -854,7 +854,42 @@ long arch_do_domctl(
>>>               evc->vmce.mci_ctl2_bank0 = v->arch.vmce.bank[0].mci_ctl2;
>>>               evc->vmce.mci_ctl2_bank1 = v->arch.vmce.bank[1].mci_ctl2;
>>>
>>> -            ret = 0;
>>> +            i = ret = 0;
>>> +            if ( boot_cpu_has(X86_FEATURE_DBEXT) )
>>> +            {
>>> +                unsigned int j;
>>> +
>>> +                if ( v->arch.pv_vcpu.dr_mask[0] )
>>> +                {
>>> +                    if ( i < evc->msr_count && !ret )
>>> +                    {
>>> +                        msr.index = MSR_AMD64_DR0_ADDRESS_MASK;
>>> +                        msr.reserved = 0;
>>> +                        msr.value = v->arch.pv_vcpu.dr_mask[0];
>>> +                        if ( copy_to_guest_offset(evc->msrs, i, &msr, 1) )
>> Sorry if I missed something, but is there any bounds-checking on this
>> buffer?  I don't see any specification in the header file about how
>> big the buffer needs to be, nor any conceptual limit to the number of
>> elements which might be copied into it -- at least in this path.
> The conceptual limit is 64k (due to evc->msr_count being a 16 bit
> quantity). The limit up to which we may write to the buffer is the
> passed in evc->msr_count (which gets updated at the end to the
> number we would have wanted to write).

Dur -- sorry, the first time I went through this patch (last week) I 
noticed the "i < evc->msr_count", but somehow I missed it the second 
time through.

>
>>> @@ -3628,7 +3660,27 @@ long do_set_trap_table(XEN_GUEST_HANDLE_
>>>       return rc;
>>>   }
>>>
>>> -long set_debugreg(struct vcpu *v, int reg, unsigned long value)
>>> +void activate_debugregs(const struct vcpu *curr)
>>> +{
>>> +    ASSERT(curr == current);
>>> +
>>> +    write_debugreg(0, curr->arch.debugreg[0]);
>>> +    write_debugreg(1, curr->arch.debugreg[1]);
>>> +    write_debugreg(2, curr->arch.debugreg[2]);
>>> +    write_debugreg(3, curr->arch.debugreg[3]);
>>> +    write_debugreg(6, curr->arch.debugreg[6]);
>>> +    write_debugreg(7, curr->arch.debugreg[7]);
>>> +
>>> +    if ( boot_cpu_has(X86_FEATURE_DBEXT) )
>>> +    {
>>> +        wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[0]);
>>> +        wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[1]);
>>> +        wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[2]);
>>> +        wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, curr->arch.pv_vcpu.dr_mask[3]);
>>> +    }
>>> +}
>>> +
>>> +long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
>>>   {
>>>       int i;
>>>       struct vcpu *curr = current;
>>> @@ -3709,11 +3761,8 @@ long set_debugreg(struct vcpu *v, int re
>>>               if ( (v == curr) &&
>>>                    !(v->arch.debugreg[7] & DR7_ACTIVE_MASK) )
>>>               {
>>> -                write_debugreg(0, v->arch.debugreg[0]);
>>> -                write_debugreg(1, v->arch.debugreg[1]);
>>> -                write_debugreg(2, v->arch.debugreg[2]);
>>> -                write_debugreg(3, v->arch.debugreg[3]);
>>> -                write_debugreg(6, v->arch.debugreg[6]);
>>> +                activate_debugregs(curr);
>> So in the other place you replaced with activate_debugregs(), it
>> writes DR7; but here it doesn't -- and yet the function will write
>> DR7.  Is that a problem?
> It's not strictly a problem (because right above we checked that
> no breakpoints are active, and right below we write it with the
> intended value), but it shouldn't be done to be on the safe side
> as well as because the double write is needlessly expensive. And
> I do recall that I made a mental note that the DR7 write needs to
> remain separate - only to then forget it. I should clearly have
> written it down. So thanks a lot for spotting this!

Easy to do. :-)

  -George

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 3/3] libxc/PV: save/restore data breakpoint extension registers
  2014-04-07  9:39 ` [PATCH v3 3/3] libxc/PV: save/restore " Jan Beulich
@ 2014-04-11 16:37   ` George Dunlap
  2014-04-14  7:50     ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: George Dunlap @ 2014-04-11 16:37 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, Aravind Gopalakrishnan,
	Suravee Suthikulpanit, xen-devel

On Mon, Apr 7, 2014 at 10:39 AM, Jan Beulich <JBeulich@suse.com> wrote:
> Requiring the handling of the generic MSR extension to
> XEN_DOMCTL_[gs]et_ext_vcpucontext.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v3: Split off of hypervisor patch and address review comments:
>     - check XEN_DOMCTL_get_ext_vcpucontext size output before looking
>       at msr_count field
>     - scrub xen_domctl_ext_vcpucontext_t's msrs field on the sending
>       side
>     - clear xen_domctl_ext_vcpucontext_t's msr_count field on the
>       restore side if the size field doesn't cover the msrs one
>     - make use of hypercall buffer bouncing interfaces on the restore
>       side (on the save side this would seem to complicate the code
>       rather than simplifying it)
>     - add documenting note to tools/libxc/xg_save_restore.h
>
> --- a/tools/libxc/xc_domain_restore.c
> +++ b/tools/libxc/xc_domain_restore.c
> @@ -38,6 +38,7 @@
>
>  #include <stdlib.h>
>  #include <unistd.h>
> +#include <assert.h>
>
>  #include "xg_private.h"
>  #include "xg_save_restore.h"
> @@ -590,8 +591,13 @@ static int buffer_tail_pv(xc_interface *
>                            uint32_t vcpuextstate_size)
>  {
>      unsigned int i;
> -    size_t pfnlen, vcpulen;
> +    size_t pfnlen, vcpulen, total;
> +    int alloc = 0;
>      struct domain_info_context *dinfo = &ctx->dinfo;
> +    union {
> +        const unsigned char *raw;
> +        xen_domctl_ext_vcpucontext_t *evc;
> +    } ptr;
>
>      /* TODO: handle changing pfntab and vcpu counts */
>      /* PFN tab */
> @@ -634,11 +640,42 @@ static int buffer_tail_pv(xc_interface *
>              ERROR("Error allocating VCPU ctxt tail buffer");
>              goto free_pfntab;
>          }
> +        alloc = 1;
>      }
>      // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
> -    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
> -        PERROR("Error when reading ctxt");
> -        goto free_vcpus;
> +    for ( total = i = 0, ptr.raw = buf->vcpubuf; ext_vcpucontext; ) {

This isn't actually a for loop.  Would it really be so terrible to do
if(ext_vcpucontext) { while(1) {}}?

> +        if ( RDEXACT(fd, buf->vcpubuf + total, vcpulen) ) {
> +            PERROR("Error when reading ctxt");
> +            goto free_vcpus;
> +        }
> +        total += vcpulen;
> +        for ( vcpulen = 0; i < buf->vcpucount; ++i ) {
> +            size_t msrlen;
> +
> +            if ( (const unsigned char *)(ptr.evc + 1) > buf->vcpubuf + total )
> +                break;
> +            if ( ptr.evc->size <
> +                 (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
> +                  sizeof(ptr.evc->msrs)) )
> +                ptr.evc->msr_count = 0;
> +            msrlen = ptr.evc->msr_count * sizeof(xen_domctl_ext_vcpu_msr_t);
> +            vcpulen += msrlen;
> +            ptr.raw += 128 + msrlen + vcpuextstate_size;
> +        }
> +        if ( !vcpulen ) {
> +            assert(i == buf->vcpucount);
> +            break;
> +        }

If you're going to write such twisted logic, you absolutely must
comment it -- at very least you need to give a high level description
of what it's doing.  I've been staring at this for an hour now, and I
only have a vague idea what it's supposed to be doing.  Seriously, is
this any better than spaghetti code?


> --- a/tools/libxc/xc_domain_save.c
> +++ b/tools/libxc/xc_domain_save.c
[snip]
> @@ -1960,16 +1963,42 @@ int xc_domain_save(xc_interface *xch, in
>          domctl.domain = dom;
>          memset(&domctl.u, 0, sizeof(domctl.u));
>          domctl.u.ext_vcpucontext.vcpu = i;
> -        if ( xc_domctl(xch, &domctl) < 0 )
> +        frc = xc_domctl(xch, &domctl);
> +        if ( frc < 0 && errno == ENOBUFS &&
> +             domctl.u.ext_vcpucontext.size >=
> +             (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
> +              sizeof(domctl.u.ext_vcpucontext.msrs)) &&
> +             domctl.u.ext_vcpucontext.msr_count )
> +        {
> +            msrs = xc_hypercall_buffer_alloc(xch, msrs,
> +                                             domctl.u.ext_vcpucontext.msr_count *
> +                                             sizeof(*msrs));
> +            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
> +            frc = msrs ? xc_domctl(xch, &domctl) : -1;
> +            /* Don't save the actual pointer. */
> +            set_xen_guest_handle_raw(domctl.u.ext_vcpucontext.msrs, NULL);
> +        }

I think this could use a comment as well.

 -George

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 3/3] libxc/PV: save/restore data breakpoint extension registers
  2014-04-11 16:37   ` George Dunlap
@ 2014-04-14  7:50     ` Jan Beulich
  2014-04-14 15:22       ` George Dunlap
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2014-04-14  7:50 UTC (permalink / raw)
  To: George Dunlap
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, Aravind Gopalakrishnan,
	Suravee Suthikulpanit, xen-devel

>>> On 11.04.14 at 18:37, <dunlapg@umich.edu> wrote:
> On Mon, Apr 7, 2014 at 10:39 AM, Jan Beulich <JBeulich@suse.com> wrote:
>> @@ -634,11 +640,42 @@ static int buffer_tail_pv(xc_interface *
>>              ERROR("Error allocating VCPU ctxt tail buffer");
>>              goto free_pfntab;
>>          }
>> +        alloc = 1;
>>      }
>>      // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
>> -    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
>> -        PERROR("Error when reading ctxt");
>> -        goto free_vcpus;
>> +    for ( total = i = 0, ptr.raw = buf->vcpubuf; ext_vcpucontext; ) {
> 
> This isn't actually a for loop.  Would it really be so terrible to do
> if(ext_vcpucontext) { while(1) {}}?

Nothing really terrible, but it would increase indentation by one level
for no real reason.

>> +        if ( RDEXACT(fd, buf->vcpubuf + total, vcpulen) ) {
>> +            PERROR("Error when reading ctxt");
>> +            goto free_vcpus;
>> +        }
>> +        total += vcpulen;
>> +        for ( vcpulen = 0; i < buf->vcpucount; ++i ) {
>> +            size_t msrlen;
>> +
>> +            if ( (const unsigned char *)(ptr.evc + 1) > buf->vcpubuf + total )
>> +                break;
>> +            if ( ptr.evc->size <
>> +                 (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
>> +                  sizeof(ptr.evc->msrs)) )
>> +                ptr.evc->msr_count = 0;
>> +            msrlen = ptr.evc->msr_count * sizeof(xen_domctl_ext_vcpu_msr_t);
>> +            vcpulen += msrlen;
>> +            ptr.raw += 128 + msrlen + vcpuextstate_size;
>> +        }
>> +        if ( !vcpulen ) {
>> +            assert(i == buf->vcpucount);
>> +            break;
>> +        }
> 
> If you're going to write such twisted logic, you absolutely must
> comment it -- at very least you need to give a high level description
> of what it's doing.  I've been staring at this for an hour now, and I
> only have a vague idea what it's supposed to be doing.  Seriously, is
> this any better than spaghetti code?

Now do you have any better suggestion? I was just trying to extend
what is there with as little churn on the code as possible. I'm not even
certain the model chose is desirable - I could imagine that introducing
another "extended info block", e.g. tagged "msrs", might be preferable.

In any event, this isn't code I'm feeling comfortable changing, so of all
possible options I'd prefer to have this whole aspect of the intended
functionality done by someone else who is. And the patch here then
might serve as no more than a baseline reference, to be thrown away
entirely if so desired.

Perhaps the best thing really is to leave the save/restore aspect out
for now, and get it implemented once Andrew's rewrite is in place.

>> @@ -1960,16 +1963,42 @@ int xc_domain_save(xc_interface *xch, in
>>          domctl.domain = dom;
>>          memset(&domctl.u, 0, sizeof(domctl.u));
>>          domctl.u.ext_vcpucontext.vcpu = i;
>> -        if ( xc_domctl(xch, &domctl) < 0 )
>> +        frc = xc_domctl(xch, &domctl);
>> +        if ( frc < 0 && errno == ENOBUFS &&
>> +             domctl.u.ext_vcpucontext.size >=
>> +             (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
>> +              sizeof(domctl.u.ext_vcpucontext.msrs)) &&
>> +             domctl.u.ext_vcpucontext.msr_count )
>> +        {
>> +            msrs = xc_hypercall_buffer_alloc(xch, msrs,
>> +                                             domctl.u.ext_vcpucontext.msr_count *
>> +                                             sizeof(*msrs));
>> +            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
>> +            frc = msrs ? xc_domctl(xch, &domctl) : -1;
>> +            /* Don't save the actual pointer. */
>> +            set_xen_guest_handle_raw(domctl.u.ext_vcpucontext.msrs, NULL);
>> +        }
> 
> I think this could use a comment as well.

Not sure what you refer to here - the one non-obvious thing (clearing
out the handle) is being commented. Are you trying to tell me that this
isn't sufficient of an explanation, or are you referring to other parts of
this code block?

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 3/3] libxc/PV: save/restore data breakpoint extension registers
  2014-04-14  7:50     ` Jan Beulich
@ 2014-04-14 15:22       ` George Dunlap
  0 siblings, 0 replies; 11+ messages in thread
From: George Dunlap @ 2014-04-14 15:22 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, Aravind Gopalakrishnan,
	Suravee Suthikulpanit, xen-devel

On Mon, Apr 14, 2014 at 8:50 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>>> On 11.04.14 at 18:37, <dunlapg@umich.edu> wrote:
>> On Mon, Apr 7, 2014 at 10:39 AM, Jan Beulich <JBeulich@suse.com> wrote:
>>> @@ -634,11 +640,42 @@ static int buffer_tail_pv(xc_interface *
>>>              ERROR("Error allocating VCPU ctxt tail buffer");
>>>              goto free_pfntab;
>>>          }
>>> +        alloc = 1;
>>>      }
>>>      // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
>>> -    if ( RDEXACT(fd, buf->vcpubuf, vcpulen) ) {
>>> -        PERROR("Error when reading ctxt");
>>> -        goto free_vcpus;
>>> +    for ( total = i = 0, ptr.raw = buf->vcpubuf; ext_vcpucontext; ) {
>>
>> This isn't actually a for loop.  Would it really be so terrible to do
>> if(ext_vcpucontext) { while(1) {}}?
>
> Nothing really terrible, but it would increase indentation by one level
> for no real reason.

Making the intent of the code immediately understandable to anyone
looking at it is far from "no real reason".  If indentation makes the
code difficult to read, you can use a goto to skip the section.
Having this for-loop-that's-not-really-a-for-loop is *far* less
readable than code with an extra goto.

>
>>> +        if ( RDEXACT(fd, buf->vcpubuf + total, vcpulen) ) {
>>> +            PERROR("Error when reading ctxt");
>>> +            goto free_vcpus;
>>> +        }
>>> +        total += vcpulen;
>>> +        for ( vcpulen = 0; i < buf->vcpucount; ++i ) {
>>> +            size_t msrlen;
>>> +
>>> +            if ( (const unsigned char *)(ptr.evc + 1) > buf->vcpubuf + total )
>>> +                break;
>>> +            if ( ptr.evc->size <
>>> +                 (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
>>> +                  sizeof(ptr.evc->msrs)) )
>>> +                ptr.evc->msr_count = 0;
>>> +            msrlen = ptr.evc->msr_count * sizeof(xen_domctl_ext_vcpu_msr_t);
>>> +            vcpulen += msrlen;
>>> +            ptr.raw += 128 + msrlen + vcpuextstate_size;
>>> +        }
>>> +        if ( !vcpulen ) {
>>> +            assert(i == buf->vcpucount);
>>> +            break;
>>> +        }
>>
>> If you're going to write such twisted logic, you absolutely must
>> comment it -- at very least you need to give a high level description
>> of what it's doing.  I've been staring at this for an hour now, and I
>> only have a vague idea what it's supposed to be doing.  Seriously, is
>> this any better than spaghetti code?
>
> Now do you have any better suggestion? I was just trying to extend
> what is there with as little churn on the code as possible. I'm not even
> certain the model chose is desirable - I could imagine that introducing
> another "extended info block", e.g. tagged "msrs", might be preferable.

It's not so much about the data layout, as that I can't actually
follow what this code does.  Having "for(vcpulen=0;...", then inside
the loop, before doing anything, having a contitional break statement,
then immediately after the loop breaking out of the while(1) if
vcpulen == 0?  Wouldn't it have been better to check for the condition
*before* going into the inner for loop at all?  (Or perhaps even
before going into the outer loop?)  Not to mention the fact that
"vcpulen" doesn't actually mean the length of a vcpu anymore -- and
all without any comments to help someone figuring out what was going
on.

I started to try to re-write the loop in a way that would actually be
comprehensible, but at some point I realized that decoding your
uncommented post-optimized algorithm shouldn't be my job.  It should
at very least be your job to explain it to a reasonable degree.

> In any event, this isn't code I'm feeling comfortable changing, so of all
> possible options I'd prefer to have this whole aspect of the intended
> functionality done by someone else who is. And the patch here then
> might serve as no more than a baseline reference, to be thrown away
> entirely if so desired.
>
> Perhaps the best thing really is to leave the save/restore aspect out
> for now, and get it implemented once Andrew's rewrite is in place.

That might make sense.

>
>>> @@ -1960,16 +1963,42 @@ int xc_domain_save(xc_interface *xch, in
>>>          domctl.domain = dom;
>>>          memset(&domctl.u, 0, sizeof(domctl.u));
>>>          domctl.u.ext_vcpucontext.vcpu = i;
>>> -        if ( xc_domctl(xch, &domctl) < 0 )
>>> +        frc = xc_domctl(xch, &domctl);
>>> +        if ( frc < 0 && errno == ENOBUFS &&
>>> +             domctl.u.ext_vcpucontext.size >=
>>> +             (offsetof(xen_domctl_ext_vcpucontext_t, msrs) +
>>> +              sizeof(domctl.u.ext_vcpucontext.msrs)) &&
>>> +             domctl.u.ext_vcpucontext.msr_count )
>>> +        {
>>> +            msrs = xc_hypercall_buffer_alloc(xch, msrs,
>>> +                                             domctl.u.ext_vcpucontext.msr_count *
>>> +                                             sizeof(*msrs));
>>> +            set_xen_guest_handle(domctl.u.ext_vcpucontext.msrs, msrs);
>>> +            frc = msrs ? xc_domctl(xch, &domctl) : -1;
>>> +            /* Don't save the actual pointer. */
>>> +            set_xen_guest_handle_raw(domctl.u.ext_vcpucontext.msrs, NULL);
>>> +        }
>>
>> I think this could use a comment as well.
>
> Not sure what you refer to here - the one non-obvious thing (clearing
> out the handle) is being commented. Are you trying to tell me that this
> isn't sufficient of an explanation, or are you referring to other parts of
> this code block?

In this case, I meant a comment explaining pointing to he size change
in 4.5.  Someone casually looking through the code shouldn't be
required to have entire history of the migration protocol at the tips
of their fingers.

 -George

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-04-14 15:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-07  9:31 [PATCH v3 0/3] x86/AMD support data breakpoint extension registers Jan Beulich
2014-04-07  9:38 ` [PATCH v3 1/3] SVM: " Jan Beulich
2014-04-11 14:32   ` George Dunlap
2014-04-07  9:38 ` [PATCH v3 2/3] x86/PV: " Jan Beulich
2014-04-11 14:58   ` George Dunlap
2014-04-11 15:32     ` Jan Beulich
2014-04-11 15:37       ` George Dunlap
2014-04-07  9:39 ` [PATCH v3 3/3] libxc/PV: save/restore " Jan Beulich
2014-04-11 16:37   ` George Dunlap
2014-04-14  7:50     ` Jan Beulich
2014-04-14 15:22       ` George Dunlap

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.