All of lore.kernel.org
 help / color / mirror / Atom feed
* [Xen-devel] [PATCH v6 1/9] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-28 16:37   ` Jan Beulich
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params Tamas K Lengyel
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

The owner domain of shared pages is dom_cow, use that for get_page
otherwise the function fails to return the correct page. Fixing the
error now and simplifying the checks since we can't have any shared
entries with dom_cow being NULL.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
v6: use simplified check for dom_cow in both slow and fast path
---
 xen/arch/x86/mm/p2m.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 49cc138362..f54360b43d 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -574,11 +574,12 @@ struct page_info *p2m_get_page_from_gfn(
                 if ( fdom == NULL )
                     page = NULL;
             }
-            else if ( !get_page(page, p2m->domain) &&
-                      /* Page could be shared */
-                      (!dom_cow || !p2m_is_shared(*t) ||
-                       !get_page(page, dom_cow)) )
-                page = NULL;
+            else
+            {
+                struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
+                if ( !get_page(page, d) )
+                    page = NULL;
+            }
         }
         p2m_read_unlock(p2m);
 
@@ -594,8 +595,9 @@ struct page_info *p2m_get_page_from_gfn(
     mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
     if ( p2m_is_ram(*t) && mfn_valid(mfn) )
     {
+        struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
         page = mfn_to_page(mfn);
-        if ( !get_page(page, p2m->domain) )
+        if ( !get_page(page, d) )
             page = NULL;
     }
     put_gfn(p2m->domain, gfn_x(gfn));
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 1/9] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-28 16:47   ` Jan Beulich
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 3/9] xen/x86: Make hap_get_allocation accessible Tamas K Lengyel
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

Currently the hvm parameters are only accessible via the HVMOP hypercalls. In
this patch we introduce a new function that can copy both the hvm context and
parameters directly into a target domain. No functional changes in existing
code.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
v6: finish addressing all of Jan's comments
---
 xen/arch/x86/hvm/hvm.c        | 236 +++++++++++++++++++++-------------
 xen/include/asm-x86/hvm/hvm.h |   2 +
 2 files changed, 148 insertions(+), 90 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 0b93609a82..0505c75661 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4077,16 +4077,17 @@ static int hvmop_set_evtchn_upcall_vector(
 }
 
 static int hvm_allow_set_param(struct domain *d,
-                               const struct xen_hvm_param *a)
+                               uint32_t index,
+                               uint64_t new_value)
 {
-    uint64_t value = d->arch.hvm.params[a->index];
+    uint64_t value = d->arch.hvm.params[index];
     int rc;
 
     rc = xsm_hvm_param(XSM_TARGET, d, HVMOP_set_param);
     if ( rc )
         return rc;
 
-    switch ( a->index )
+    switch ( index )
     {
     /* The following parameters can be set by the guest. */
     case HVM_PARAM_CALLBACK_IRQ:
@@ -4119,7 +4120,7 @@ static int hvm_allow_set_param(struct domain *d,
     if ( rc )
         return rc;
 
-    switch ( a->index )
+    switch ( index )
     {
     /* The following parameters should only be changed once. */
     case HVM_PARAM_VIRIDIAN:
@@ -4129,7 +4130,7 @@ static int hvm_allow_set_param(struct domain *d,
     case HVM_PARAM_NR_IOREQ_SERVER_PAGES:
     case HVM_PARAM_ALTP2M:
     case HVM_PARAM_MCA_CAP:
-        if ( value != 0 && a->value != value )
+        if ( value != 0 && new_value != value )
             rc = -EEXIST;
         break;
     default:
@@ -4139,49 +4140,32 @@ static int hvm_allow_set_param(struct domain *d,
     return rc;
 }
 
-static int hvmop_set_param(
-    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
+static int hvm_set_param(struct domain *d, uint32_t index, uint64_t value)
 {
     struct domain *curr_d = current->domain;
-    struct xen_hvm_param a;
-    struct domain *d;
     struct vcpu *v;
     int rc;
 
-    if ( copy_from_guest(&a, arg, 1) )
-        return -EFAULT;
-
-    if ( a.index >= HVM_NR_PARAMS )
+    if ( index >= HVM_NR_PARAMS )
         return -EINVAL;
 
-    /* Make sure the above bound check is not bypassed during speculation. */
-    block_speculation();
-
-    d = rcu_lock_domain_by_any_id(a.domid);
-    if ( d == NULL )
-        return -ESRCH;
-
-    rc = -EINVAL;
-    if ( !is_hvm_domain(d) )
-        goto out;
-
-    rc = hvm_allow_set_param(d, &a);
+    rc = hvm_allow_set_param(d, index, value);
     if ( rc )
         goto out;
 
-    switch ( a.index )
+    switch ( index )
     {
     case HVM_PARAM_CALLBACK_IRQ:
-        hvm_set_callback_via(d, a.value);
+        hvm_set_callback_via(d, value);
         hvm_latch_shinfo_size(d);
         break;
     case HVM_PARAM_TIMER_MODE:
-        if ( a.value > HVMPTM_one_missed_tick_pending )
+        if ( value > HVMPTM_one_missed_tick_pending )
             rc = -EINVAL;
         break;
     case HVM_PARAM_VIRIDIAN:
-        if ( (a.value & ~HVMPV_feature_mask) ||
-             !(a.value & HVMPV_base_freq) )
+        if ( (value & ~HVMPV_feature_mask) ||
+             !(value & HVMPV_base_freq) )
             rc = -EINVAL;
         break;
     case HVM_PARAM_IDENT_PT:
@@ -4191,7 +4175,7 @@ static int hvmop_set_param(
          */
         if ( !paging_mode_hap(d) || !cpu_has_vmx )
         {
-            d->arch.hvm.params[a.index] = a.value;
+            d->arch.hvm.params[index] = value;
             break;
         }
 
@@ -4206,7 +4190,7 @@ static int hvmop_set_param(
 
         rc = 0;
         domain_pause(d);
-        d->arch.hvm.params[a.index] = a.value;
+        d->arch.hvm.params[index] = value;
         for_each_vcpu ( d, v )
             paging_update_cr3(v, false);
         domain_unpause(d);
@@ -4215,23 +4199,23 @@ static int hvmop_set_param(
         break;
     case HVM_PARAM_DM_DOMAIN:
         /* The only value this should ever be set to is DOMID_SELF */
-        if ( a.value != DOMID_SELF )
+        if ( value != DOMID_SELF )
             rc = -EINVAL;
 
-        a.value = curr_d->domain_id;
+        value = curr_d->domain_id;
         break;
     case HVM_PARAM_ACPI_S_STATE:
         rc = 0;
-        if ( a.value == 3 )
+        if ( value == 3 )
             hvm_s3_suspend(d);
-        else if ( a.value == 0 )
+        else if ( value == 0 )
             hvm_s3_resume(d);
         else
             rc = -EINVAL;
 
         break;
     case HVM_PARAM_ACPI_IOPORTS_LOCATION:
-        rc = pmtimer_change_ioport(d, a.value);
+        rc = pmtimer_change_ioport(d, value);
         break;
     case HVM_PARAM_MEMORY_EVENT_CR0:
     case HVM_PARAM_MEMORY_EVENT_CR3:
@@ -4246,24 +4230,24 @@ static int hvmop_set_param(
         rc = xsm_hvm_param_nested(XSM_PRIV, d);
         if ( rc )
             break;
-        if ( a.value > 1 )
+        if ( value > 1 )
             rc = -EINVAL;
         /*
          * Remove the check below once we have
          * shadow-on-shadow.
          */
-        if ( !paging_mode_hap(d) && a.value )
+        if ( !paging_mode_hap(d) && value )
             rc = -EINVAL;
-        if ( a.value &&
+        if ( value &&
              d->arch.hvm.params[HVM_PARAM_ALTP2M] )
             rc = -EINVAL;
         /* Set up NHVM state for any vcpus that are already up. */
-        if ( a.value &&
+        if ( value &&
              !d->arch.hvm.params[HVM_PARAM_NESTEDHVM] )
             for_each_vcpu(d, v)
                 if ( rc == 0 )
                     rc = nestedhvm_vcpu_initialise(v);
-        if ( !a.value || rc )
+        if ( !value || rc )
             for_each_vcpu(d, v)
                 nestedhvm_vcpu_destroy(v);
         break;
@@ -4271,30 +4255,30 @@ static int hvmop_set_param(
         rc = xsm_hvm_param_altp2mhvm(XSM_PRIV, d);
         if ( rc )
             break;
-        if ( a.value > XEN_ALTP2M_limited )
+        if ( value > XEN_ALTP2M_limited )
             rc = -EINVAL;
-        if ( a.value &&
+        if ( value &&
              d->arch.hvm.params[HVM_PARAM_NESTEDHVM] )
             rc = -EINVAL;
         break;
     case HVM_PARAM_TRIPLE_FAULT_REASON:
-        if ( a.value > SHUTDOWN_MAX )
+        if ( value > SHUTDOWN_MAX )
             rc = -EINVAL;
         break;
     case HVM_PARAM_IOREQ_SERVER_PFN:
-        d->arch.hvm.ioreq_gfn.base = a.value;
+        d->arch.hvm.ioreq_gfn.base = value;
         break;
     case HVM_PARAM_NR_IOREQ_SERVER_PAGES:
     {
         unsigned int i;
 
-        if ( a.value == 0 ||
-             a.value > sizeof(d->arch.hvm.ioreq_gfn.mask) * 8 )
+        if ( value == 0 ||
+             value > sizeof(d->arch.hvm.ioreq_gfn.mask) * 8 )
         {
             rc = -EINVAL;
             break;
         }
-        for ( i = 0; i < a.value; i++ )
+        for ( i = 0; i < value; i++ )
             set_bit(i, &d->arch.hvm.ioreq_gfn.mask);
 
         break;
@@ -4306,35 +4290,35 @@ static int hvmop_set_param(
                      sizeof(d->arch.hvm.ioreq_gfn.legacy_mask) * 8);
         BUILD_BUG_ON(HVM_PARAM_BUFIOREQ_PFN >
                      sizeof(d->arch.hvm.ioreq_gfn.legacy_mask) * 8);
-        if ( a.value )
-            set_bit(a.index, &d->arch.hvm.ioreq_gfn.legacy_mask);
+        if ( value )
+            set_bit(index, &d->arch.hvm.ioreq_gfn.legacy_mask);
         break;
 
     case HVM_PARAM_X87_FIP_WIDTH:
-        if ( a.value != 0 && a.value != 4 && a.value != 8 )
+        if ( value != 0 && value != 4 && value != 8 )
         {
             rc = -EINVAL;
             break;
         }
-        d->arch.x87_fip_width = a.value;
+        d->arch.x87_fip_width = value;
         break;
 
     case HVM_PARAM_VM86_TSS:
         /* Hardware would silently truncate high bits. */
-        if ( a.value != (uint32_t)a.value )
+        if ( value != (uint32_t)value )
         {
             if ( d == curr_d )
                 domain_crash(d);
             rc = -EINVAL;
         }
         /* Old hvmloader binaries hardcode the size to 128 bytes. */
-        if ( a.value )
-            a.value |= (128ULL << 32) | VM86_TSS_UPDATED;
-        a.index = HVM_PARAM_VM86_TSS_SIZED;
+        if ( value )
+            value |= (128ULL << 32) | VM86_TSS_UPDATED;
+        index = HVM_PARAM_VM86_TSS_SIZED;
         break;
 
     case HVM_PARAM_VM86_TSS_SIZED:
-        if ( (a.value >> 32) < sizeof(struct tss32) )
+        if ( (value >> 32) < sizeof(struct tss32) )
         {
             if ( d == curr_d )
                 domain_crash(d);
@@ -4345,26 +4329,56 @@ static int hvmop_set_param(
          * 256 bits interrupt redirection bitmap + 64k bits I/O bitmap
          * plus one padding byte).
          */
-        if ( (a.value >> 32) > sizeof(struct tss32) +
+        if ( (value >> 32) > sizeof(struct tss32) +
                                (0x100 / 8) + (0x10000 / 8) + 1 )
-            a.value = (uint32_t)a.value |
+            value = (uint32_t)value |
                       ((sizeof(struct tss32) + (0x100 / 8) +
                                                (0x10000 / 8) + 1) << 32);
-        a.value |= VM86_TSS_UPDATED;
+        value |= VM86_TSS_UPDATED;
         break;
 
     case HVM_PARAM_MCA_CAP:
-        rc = vmce_enable_mca_cap(d, a.value);
+        rc = vmce_enable_mca_cap(d, value);
         break;
     }
 
     if ( rc != 0 )
         goto out;
 
-    d->arch.hvm.params[a.index] = a.value;
+    d->arch.hvm.params[index] = value;
 
     HVM_DBG_LOG(DBG_LEVEL_HCALL, "set param %u = %"PRIx64,
-                a.index, a.value);
+                index, value);
+
+ out:
+    return rc;
+}
+
+int hvmop_set_param(
+    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
+{
+    struct xen_hvm_param a;
+    struct domain *d;
+    int rc;
+
+    if ( copy_from_guest(&a, arg, 1) )
+        return -EFAULT;
+
+    if ( a.index >= HVM_NR_PARAMS )
+        return -EINVAL;
+
+    /* Make sure the above bound check is not bypassed during speculation. */
+    block_speculation();
+
+    d = rcu_lock_domain_by_any_id(a.domid);
+    if ( d == NULL )
+        return -ESRCH;
+
+    rc = -EINVAL;
+    if ( !is_hvm_domain(d) )
+        goto out;
+
+    rc = hvm_set_param(d, a.index, a.value);
 
  out:
     rcu_unlock_domain(d);
@@ -4372,7 +4386,7 @@ static int hvmop_set_param(
 }
 
 static int hvm_allow_get_param(struct domain *d,
-                               const struct xen_hvm_param *a)
+                               uint32_t index)
 {
     int rc;
 
@@ -4380,7 +4394,7 @@ static int hvm_allow_get_param(struct domain *d,
     if ( rc )
         return rc;
 
-    switch ( a->index )
+    switch ( index )
     {
     /* The following parameters can be read by the guest. */
     case HVM_PARAM_CALLBACK_IRQ:
@@ -4410,6 +4424,40 @@ static int hvm_allow_get_param(struct domain *d,
     return rc;
 }
 
+static int hvm_get_param(struct domain *d, uint32_t index, uint64_t *value)
+{
+    int rc;
+
+    rc = hvm_allow_get_param(d, index);
+    if ( rc )
+        return rc;
+
+    switch ( index )
+    {
+    case HVM_PARAM_ACPI_S_STATE:
+        *value = d->arch.hvm.is_s3_suspended ? 3 : 0;
+        break;
+
+    case HVM_PARAM_VM86_TSS:
+        *value = (uint32_t)d->arch.hvm.params[HVM_PARAM_VM86_TSS_SIZED];
+        break;
+
+    case HVM_PARAM_VM86_TSS_SIZED:
+        *value = d->arch.hvm.params[HVM_PARAM_VM86_TSS_SIZED] &
+                   ~VM86_TSS_UPDATED;
+        break;
+
+    case HVM_PARAM_X87_FIP_WIDTH:
+        *value = d->arch.x87_fip_width;
+        break;
+    default:
+        *value = d->arch.hvm.params[index];
+        break;
+    }
+
+    return 0;
+};
+
 static int hvmop_get_param(
     XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
 {
@@ -4434,33 +4482,10 @@ static int hvmop_get_param(
     if ( !is_hvm_domain(d) )
         goto out;
 
-    rc = hvm_allow_get_param(d, &a);
+    rc = hvm_get_param(d, a.index, &a.value);
     if ( rc )
         goto out;
 
-    switch ( a.index )
-    {
-    case HVM_PARAM_ACPI_S_STATE:
-        a.value = d->arch.hvm.is_s3_suspended ? 3 : 0;
-        break;
-
-    case HVM_PARAM_VM86_TSS:
-        a.value = (uint32_t)d->arch.hvm.params[HVM_PARAM_VM86_TSS_SIZED];
-        break;
-
-    case HVM_PARAM_VM86_TSS_SIZED:
-        a.value = d->arch.hvm.params[HVM_PARAM_VM86_TSS_SIZED] &
-                  ~VM86_TSS_UPDATED;
-        break;
-
-    case HVM_PARAM_X87_FIP_WIDTH:
-        a.value = d->arch.x87_fip_width;
-        break;
-    default:
-        a.value = d->arch.hvm.params[a.index];
-        break;
-    }
-
     rc = __copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
 
     HVM_DBG_LOG(DBG_LEVEL_HCALL, "get param %u = %"PRIx64,
@@ -5297,6 +5322,37 @@ void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
     alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg);
 }
 
+int hvm_copy_context_and_params(struct domain *dst, struct domain *src)
+{
+    int rc;
+    unsigned int i;
+    struct hvm_domain_context c = { };
+
+    for ( i = 0; i < HVM_NR_PARAMS; i++ )
+    {
+        uint64_t value = 0;
+
+        if ( hvm_get_param(src, i, &value) || !value )
+            continue;
+
+        if ( (rc = hvm_set_param(dst, i, value)) )
+            return rc;
+    }
+
+    c.size = hvm_save_size(src);
+    if ( (c.data = vmalloc(c.size)) == NULL )
+        return -ENOMEM;
+
+    if ( !(rc = hvm_save(src, &c)) )
+    {
+        c.cur = 0;
+        rc = hvm_load(dst, &c);
+    }
+
+    vfree(c.data);
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 9eab1d7493..24da824cbf 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -337,6 +337,8 @@ unsigned long hvm_cr4_guest_valid_bits(const struct domain *d, bool restore);
 bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
                         void *ctxt);
 
+int hvm_copy_context_and_params(struct domain *src, struct domain *dst);
+
 #ifdef CONFIG_HVM
 
 #define hvm_get_guest_tsc(v) hvm_get_guest_tsc_fixed(v, 0)
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 3/9] xen/x86: Make hap_get_allocation accessible
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 1/9] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries Tamas K Lengyel
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 4/9] x86/mem_sharing: Replace MEM_SHARING_DEBUG with gdprintk Tamas K Lengyel
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

During VM forking we'll copy the parent domain's parameters to the client,
including the HAP shadow memory setting that is used for storing the domain's
EPT. We'll copy this in the hypervisor instead doing it during toolstack launch
to allow the domain to start executing and unsharing memory before (or
even completely without) the toolstack.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/mm/hap/hap.c | 3 +--
 xen/include/asm-x86/hap.h | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 3d93f3451c..c7c7ff6e99 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -321,8 +321,7 @@ static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
 }
 
 /* Return the size of the pool, rounded up to the nearest MB */
-static unsigned int
-hap_get_allocation(struct domain *d)
+unsigned int hap_get_allocation(struct domain *d)
 {
     unsigned int pg = d->arch.paging.hap.total_pages
         + d->arch.paging.hap.p2m_pages;
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index b94bfb4ed0..1bf07e49fe 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -45,6 +45,7 @@ int   hap_track_dirty_vram(struct domain *d,
 
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted);
+unsigned int hap_get_allocation(struct domain *d);
 
 #endif /* XEN_HAP_H */
 
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 4/9] x86/mem_sharing: Replace MEM_SHARING_DEBUG with gdprintk
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
                   ` (2 preceding siblings ...)
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 3/9] xen/x86: Make hap_get_allocation accessible Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-28 16:52   ` Jan Beulich
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap Tamas K Lengyel
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

Using XENLOG_ERR level since this is only used in debug paths (ie. it's
expected the user already has loglvl=all set). Also use %pd to print the domain
ids.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/mm/mem_sharing.c | 82 +++++++++++++++++------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 5ce075d307..2b3be5b125 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -49,9 +49,6 @@ typedef struct pg_lock_data {
 
 static DEFINE_PER_CPU(pg_lock_data_t, __pld);
 
-#define MEM_SHARING_DEBUG(_f, _a...)                                  \
-    debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
-
 /* Reverse map defines */
 #define RMAP_HASHTAB_ORDER  0
 #define RMAP_HASHTAB_SIZE   \
@@ -482,9 +479,9 @@ static int audit(void)
         /* If we can't lock it, it's definitely not a shared page */
         if ( !mem_sharing_page_lock(pg) )
         {
-            MEM_SHARING_DEBUG(
-                "mfn %lx in audit list, but cannot be locked (%lx)!\n",
-                mfn_x(mfn), pg->u.inuse.type_info);
+            gdprintk(XENLOG_ERR,
+                     "mfn %lx in audit list, but cannot be locked (%lx)!\n",
+                     mfn_x(mfn), pg->u.inuse.type_info);
             errors++;
             continue;
         }
@@ -492,9 +489,9 @@ static int audit(void)
         /* Check if the MFN has correct type, owner and handle. */
         if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page )
         {
-            MEM_SHARING_DEBUG(
-                "mfn %lx in audit list, but not PGT_shared_page (%lx)!\n",
-                mfn_x(mfn), pg->u.inuse.type_info & PGT_type_mask);
+            gdprintk(XENLOG_ERR,
+                     "mfn %lx in audit list, but not PGT_shared_page (%lx)!\n",
+                     mfn_x(mfn), pg->u.inuse.type_info & PGT_type_mask);
             errors++;
             continue;
         }
@@ -502,24 +499,24 @@ static int audit(void)
         /* Check the page owner. */
         if ( page_get_owner(pg) != dom_cow )
         {
-            MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner %pd!\n",
-                              mfn_x(mfn), page_get_owner(pg));
+            gdprintk(XENLOG_ERR, "mfn %lx shared, but wrong owner (%pd)!\n",
+                     mfn_x(mfn), page_get_owner(pg));
             errors++;
         }
 
         /* Check the m2p entry */
         if ( !SHARED_M2P(get_gpfn_from_mfn(mfn_x(mfn))) )
         {
-            MEM_SHARING_DEBUG("mfn %lx shared, but wrong m2p entry (%lx)!\n",
-                              mfn_x(mfn), get_gpfn_from_mfn(mfn_x(mfn)));
+            gdprintk(XENLOG_ERR, "mfn %lx shared, but wrong m2p entry (%lx)!\n",
+                     mfn_x(mfn), get_gpfn_from_mfn(mfn_x(mfn)));
             errors++;
         }
 
         /* Check we have a list */
         if ( (!pg->sharing) || rmap_count(pg) == 0 )
         {
-            MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n",
-                              mfn_x(mfn));
+            gdprintk(XENLOG_ERR, "mfn %lx shared, but empty gfn list!\n",
+                     mfn_x(mfn));
             errors++;
             continue;
         }
@@ -538,24 +535,26 @@ static int audit(void)
             d = get_domain_by_id(g->domain);
             if ( d == NULL )
             {
-                MEM_SHARING_DEBUG("Unknown dom: %hu, for PFN=%lx, MFN=%lx\n",
-                                  g->domain, g->gfn, mfn_x(mfn));
+                gdprintk(XENLOG_ERR,
+                         "Unknown dom: %d, for PFN=%lx, MFN=%lx\n",
+                         g->domain, g->gfn, mfn_x(mfn));
                 errors++;
                 continue;
             }
             o_mfn = get_gfn_query_unlocked(d, g->gfn, &t);
             if ( !mfn_eq(o_mfn, mfn) )
             {
-                MEM_SHARING_DEBUG("Incorrect P2M for d=%hu, PFN=%lx."
-                                  "Expecting MFN=%lx, got %lx\n",
-                                  g->domain, g->gfn, mfn_x(mfn), mfn_x(o_mfn));
+                gdprintk(XENLOG_ERR, "Incorrect P2M for %pd, PFN=%lx."
+                         "Expecting MFN=%lx, got %lx\n",
+                         d, g->gfn, mfn_x(mfn), mfn_x(o_mfn));
                 errors++;
             }
             if ( t != p2m_ram_shared )
             {
-                MEM_SHARING_DEBUG("Incorrect P2M type for d=%hu, PFN=%lx MFN=%lx."
-                                  "Expecting t=%d, got %d\n",
-                                  g->domain, g->gfn, mfn_x(mfn), p2m_ram_shared, t);
+                gdprintk(XENLOG_ERR,
+                         "Incorrect P2M type for %pd, PFN=%lx MFN=%lx."
+                         "Expecting t=%d, got %d\n",
+                         d, g->gfn, mfn_x(mfn), p2m_ram_shared, t);
                 errors++;
             }
             put_domain(d);
@@ -564,10 +563,10 @@ static int audit(void)
         /* The type count has an extra ref because we have locked the page */
         if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) )
         {
-            MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
-                              "nr_gfns in list %lu, in type_info %lx\n",
-                              mfn_x(mfn), nr_gfns,
-                              (pg->u.inuse.type_info & PGT_count_mask));
+            gdprintk(XENLOG_ERR, "Mismatched counts for MFN=%lx."
+                     "nr_gfns in list %lu, in type_info %lx\n",
+                     mfn_x(mfn), nr_gfns,
+                     (pg->u.inuse.type_info & PGT_count_mask));
             errors++;
         }
 
@@ -578,8 +577,8 @@ static int audit(void)
 
     if ( count_found != count_expected )
     {
-        MEM_SHARING_DEBUG("Expected %ld shared mfns, found %ld.",
-                          count_expected, count_found);
+        gdprintk(XENLOG_ERR, "Expected %ld shared mfns, found %ld.",
+                 count_expected, count_found);
         errors++;
     }
 
@@ -757,10 +756,10 @@ static int debug_mfn(mfn_t mfn)
         return -EINVAL;
     }
 
-    MEM_SHARING_DEBUG(
-        "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner=%pd\n",
-        mfn_x(page_to_mfn(page)), page->count_info,
-        page->u.inuse.type_info, page_get_owner(page));
+    gdprintk(XENLOG_ERR,
+             "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%pd\n",
+             mfn_x(page_to_mfn(page)), page->count_info,
+             page->u.inuse.type_info, page_get_owner(page));
 
     /* -1 because the page is locked and that's an additional type ref */
     num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1;
@@ -776,8 +775,9 @@ static int debug_gfn(struct domain *d, gfn_t gfn)
 
     mfn = get_gfn_query(d, gfn_x(gfn), &p2mt);
 
-    MEM_SHARING_DEBUG("Debug for dom%d, gfn=%" PRI_gfn "\n",
-                      d->domain_id, gfn_x(gfn));
+    gdprintk(XENLOG_ERR, "Debug for %pd, gfn=%" PRI_gfn "\n",
+             d, gfn_x(gfn));
+
     num_refs = debug_mfn(mfn);
     put_gfn(d, gfn_x(gfn));
 
@@ -793,13 +793,13 @@ static int debug_gref(struct domain *d, grant_ref_t ref)
     rc = mem_sharing_gref_to_gfn(d->grant_table, ref, &gfn, &status);
     if ( rc )
     {
-        MEM_SHARING_DEBUG("Asked to debug [dom=%d,gref=%u]: error %d.\n",
-                          d->domain_id, ref, rc);
+        gdprintk(XENLOG_ERR, "Asked to debug [%pd,gref=%u]: error %d.\n",
+                 d, ref, rc);
         return rc;
     }
 
-    MEM_SHARING_DEBUG("==> Grant [dom=%d,ref=%d], status=%x. ",
-                      d->domain_id, ref, status);
+    gdprintk(XENLOG_ERR, "==> Grant [%pd,ref=%d], status=%x. ",
+             d, ref, status);
 
     return debug_gfn(d, gfn);
 }
@@ -1278,8 +1278,8 @@ int __mem_sharing_unshare_page(struct domain *d,
  private_page_found:
     if ( p2m_change_type_one(d, gfn, p2m_ram_shared, p2m_ram_rw) )
     {
-        gdprintk(XENLOG_ERR, "Could not change p2m type d %hu gfn %lx.\n",
-                 d->domain_id, gfn);
+        gdprintk(XENLOG_ERR, "Could not change p2m type d %pd gfn %lx.\n",
+                 d, gfn);
         BUG();
     }
 
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
                   ` (3 preceding siblings ...)
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 4/9] x86/mem_sharing: Replace MEM_SHARING_DEBUG with gdprintk Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-28 16:55   ` Jan Beulich
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 6/9] xen/mem_sharing: VM forking Tamas K Lengyel
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

When plugging a hole in the target physmap don't use the access permission
returned by __get_gfn_type_access as it can be non-sensical, leading to
spurious vm_events being sent out for access violations at unexpected
locations. Make use of p2m->default_access instead.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/mm/mem_sharing.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 2b3be5b125..52b139c1bb 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1071,11 +1071,10 @@ int add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle_t sh,
     p2m_type_t smfn_type, cmfn_type;
     struct gfn_info *gfn_info;
     struct p2m_domain *p2m = p2m_get_hostp2m(cd);
-    p2m_access_t a;
     struct two_gfns tg;
 
     get_two_gfns(sd, _gfn(sgfn), &smfn_type, NULL, &smfn,
-                 cd, _gfn(cgfn), &cmfn_type, &a, &cmfn, 0, &tg, lock);
+                 cd, _gfn(cgfn), &cmfn_type, NULL, &cmfn, 0, &tg, lock);
 
     /* Get the source shared page, check and lock */
     ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
@@ -1110,7 +1109,7 @@ int add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle_t sh,
     }
 
     ret = p2m_set_entry(p2m, _gfn(cgfn), smfn, PAGE_ORDER_4K,
-                        p2m_ram_shared, a);
+                        p2m_ram_shared, p2m->default_access);
 
     /* Tempted to turn this into an assert */
     if ( ret )
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 6/9] xen/mem_sharing: VM forking
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
                   ` (4 preceding siblings ...)
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access Tamas K Lengyel
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

VM forking is the process of creating a domain with an empty memory space and a
parent domain specified from which to populate the memory when necessary. For
the new domain to be functional the VM state is copied over as part of the fork
operation (HVM params, hap allocation, etc).

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
v6: use get_domain/put_domain on parent to make sure it's not destroyed
---
 xen/arch/x86/domain.c             |  11 ++
 xen/arch/x86/hvm/hvm.c            |   2 +-
 xen/arch/x86/mm/mem_sharing.c     | 222 ++++++++++++++++++++++++++++++
 xen/arch/x86/mm/p2m.c             |  11 +-
 xen/include/asm-x86/mem_sharing.h |  20 ++-
 xen/include/public/memory.h       |   5 +
 xen/include/xen/sched.h           |   2 +
 7 files changed, 269 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 28fefa1f81..60d110c08f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2198,6 +2198,17 @@ int domain_relinquish_resources(struct domain *d)
             ret = relinquish_shared_pages(d);
             if ( ret )
                 return ret;
+
+            /*
+             * If the domain is forked, decrement the parent's pause count
+             * and release the domain.
+             */
+            if ( d->parent )
+            {
+                domain_unpause(d->parent);
+                put_domain(d->parent);
+                d->parent = NULL;
+            }
         }
 #endif
 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 0505c75661..dffab7e899 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1911,7 +1911,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
     }
 #endif
 
-    /* Spurious fault? PoD and log-dirty also take this path. */
+    /* Spurious fault? PoD, log-dirty and VM forking also take this path. */
     if ( p2m_is_ram(p2mt) )
     {
         rc = 1;
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 52b139c1bb..b6be78486d 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -22,6 +22,7 @@
 
 #include <xen/types.h>
 #include <xen/domain_page.h>
+#include <xen/event.h>
 #include <xen/spinlock.h>
 #include <xen/rwlock.h>
 #include <xen/mm.h>
@@ -36,6 +37,9 @@
 #include <asm/altp2m.h>
 #include <asm/atomic.h>
 #include <asm/event.h>
+#include <asm/hap.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/save.h>
 #include <xsm/xsm.h>
 
 #include "mm-locks.h"
@@ -1421,6 +1425,194 @@ static inline int mem_sharing_control(struct domain *d, bool enable)
     return 0;
 }
 
+/*
+ * Forking a page only gets called when the VM faults due to no entry being
+ * in the EPT for the access. Depending on the type of access we either
+ * populate the physmap with a shared entry for read-only access or
+ * fork the page if its a write access.
+ *
+ * The client p2m is already locked so we only need to lock
+ * the parent's here.
+ */
+int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing)
+{
+    int rc = -ENOENT;
+    shr_handle_t handle;
+    struct domain *parent;
+    struct p2m_domain *p2m;
+    unsigned long gfn_l = gfn_x(gfn);
+    mfn_t mfn, new_mfn;
+    p2m_type_t p2mt;
+    struct page_info *page;
+
+    if ( !mem_sharing_is_fork(d) )
+        return -ENOENT;
+
+    parent = d->parent;
+
+    if ( !unsharing )
+    {
+        /* For read-only accesses we just add a shared entry to the physmap */
+        while ( parent )
+        {
+            if ( !(rc = nominate_page(parent, gfn, 0, &handle)) )
+                break;
+
+            parent = parent->parent;
+        }
+
+        if ( !rc )
+        {
+            /* The client's p2m is already locked */
+            struct p2m_domain *pp2m = p2m_get_hostp2m(parent);
+
+            p2m_lock(pp2m);
+            rc = add_to_physmap(parent, gfn_l, handle, d, gfn_l, false);
+            p2m_unlock(pp2m);
+
+            if ( !rc )
+                return 0;
+        }
+    }
+
+    /*
+     * If it's a write access (ie. unsharing) or if adding a shared entry to
+     * the physmap failed we'll fork the page directly.
+     */
+    p2m = p2m_get_hostp2m(d);
+    parent = d->parent;
+
+    while ( parent )
+    {
+        mfn = get_gfn_query(parent, gfn_l, &p2mt);
+
+        if ( mfn_valid(mfn) && p2m_is_any_ram(p2mt) )
+            break;
+
+        put_gfn(parent, gfn_l);
+        parent = parent->parent;
+    }
+
+    if ( !parent )
+        return -ENOENT;
+
+    if ( !(page = alloc_domheap_page(d, 0)) )
+    {
+        put_gfn(parent, gfn_l);
+        return -ENOMEM;
+    }
+
+    new_mfn = page_to_mfn(page);
+    copy_domain_page(new_mfn, mfn);
+    set_gpfn_from_mfn(mfn_x(new_mfn), gfn_l);
+
+    put_gfn(parent, gfn_l);
+
+    return p2m->set_entry(p2m, gfn, new_mfn, PAGE_ORDER_4K, p2m_ram_rw,
+                          p2m->default_access, -1);
+}
+
+static int bring_up_vcpus(struct domain *cd, struct cpupool *cpupool)
+{
+    int ret;
+    unsigned int i;
+
+    if ( (ret = cpupool_move_domain(cd, cpupool)) )
+        return ret;
+
+    for ( i = 0; i < cd->max_vcpus; i++ )
+    {
+        if ( cd->vcpu[i] )
+            continue;
+
+        if ( !vcpu_create(cd, i) )
+            return -EINVAL;
+    }
+
+    domain_update_node_affinity(cd);
+    return 0;
+}
+
+static int fork_hap_allocation(struct domain *d, struct domain *cd)
+{
+    int rc;
+    bool preempted;
+    unsigned long mb = hap_get_allocation(d);
+
+    if ( mb == hap_get_allocation(cd) )
+        return 0;
+
+    paging_lock(cd);
+    rc = hap_set_allocation(cd, mb << (20 - PAGE_SHIFT), &preempted);
+    paging_unlock(cd);
+
+    if ( rc )
+        return rc;
+
+    if ( preempted )
+        return -ERESTART;
+
+    return 0;
+}
+
+static void fork_tsc(struct domain *d, struct domain *cd)
+{
+    uint32_t tsc_mode;
+    uint32_t gtsc_khz;
+    uint32_t incarnation;
+    uint64_t elapsed_nsec;
+
+    tsc_get_info(d, &tsc_mode, &elapsed_nsec, &gtsc_khz, &incarnation);
+    tsc_set_info(cd, tsc_mode, elapsed_nsec, gtsc_khz, incarnation);
+}
+
+static int mem_sharing_fork(struct domain *d, struct domain *cd)
+{
+    int rc = -EINVAL;
+
+    if ( !cd->controller_pause_count )
+        return rc;
+
+    /*
+     * We only want to get and pause the parent once, not each time this
+     * operation is restarted due to preemption.
+     */
+    if ( !cd->parent_paused )
+    {
+        if ( get_domain(d) )
+            return rc;
+
+        domain_pause(d);
+        cd->parent_paused = true;
+    }
+
+    cd->max_pages = d->max_pages;
+    cd->max_vcpus = d->max_vcpus;
+
+    /* this is preemptible so it's the first to get done */
+    if ( (rc = fork_hap_allocation(d, cd)) )
+        goto done;
+
+    if ( (rc = bring_up_vcpus(cd, d->cpupool)) )
+        goto done;
+
+    if ( (rc = hvm_copy_context_and_params(cd, d)) )
+        goto done;
+
+    fork_tsc(d, cd);
+
+    cd->parent = d;
+
+ done:
+    if ( rc && rc != -ERESTART )
+    {
+        domain_unpause(d);
+        cd->parent_paused = false;
+    }
+
+    return rc;
+}
+
 int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
 {
     int rc;
@@ -1675,6 +1867,36 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
         rc = debug_gref(d, mso.u.debug.u.gref);
         break;
 
+    case XENMEM_sharing_op_fork:
+    {
+        struct domain *pd;
+
+        rc = -EINVAL;
+        if ( mso.u.fork._pad[0] || mso.u.fork._pad[1] ||
+             mso.u.fork._pad[2] )
+            goto out;
+
+        rc = rcu_lock_live_remote_domain_by_id(mso.u.fork.parent_domain,
+                                               &pd);
+        if ( rc )
+            goto out;
+
+        if ( !mem_sharing_enabled(pd) )
+        {
+            if ( (rc = mem_sharing_control(pd, true)) )
+                goto out;
+        }
+
+        rc = mem_sharing_fork(pd, d);
+
+        if ( rc == -ERESTART )
+            rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
+                                               "lh", XENMEM_sharing_op,
+                                               arg);
+        rcu_unlock_domain(pd);
+        break;
+    }
+
     default:
         rc = -ENOSYS;
         break;
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index f54360b43d..d128963036 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -509,6 +509,14 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
 
     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
 
+    /* Check if we need to fork the page */
+    if ( (q & P2M_ALLOC) && p2m_is_hole(*t) &&
+         !mem_sharing_fork_page(p2m->domain, gfn, !!(q & P2M_UNSHARE)) )
+    {
+        mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
+    }
+
+    /* Check if we need to unshare the page */
     if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
     {
         ASSERT(p2m_is_hostp2m(p2m));
@@ -587,7 +595,8 @@ struct page_info *p2m_get_page_from_gfn(
             return page;
 
         /* Error path: not a suitable GFN at all */
-        if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) )
+        if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) &&
+             !mem_sharing_is_fork(p2m->domain) )
             return NULL;
     }
 
diff --git a/xen/include/asm-x86/mem_sharing.h b/xen/include/asm-x86/mem_sharing.h
index 53760a2896..812171284f 100644
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -26,8 +26,7 @@
 
 #ifdef CONFIG_MEM_SHARING
 
-struct mem_sharing_domain
-{
+struct mem_sharing_domain {
     bool enabled;
 
     /*
@@ -39,6 +38,9 @@ struct mem_sharing_domain
 
 #define mem_sharing_enabled(d) ((d)->arch.hvm.mem_sharing.enabled)
 
+#define mem_sharing_is_fork(d) \
+    (mem_sharing_enabled(d) && !!((d)->parent))
+
 /* Auditing of memory sharing code? */
 #ifndef NDEBUG
 #define MEM_SHARING_AUDIT 1
@@ -88,6 +90,9 @@ static inline int mem_sharing_unshare_page(struct domain *d,
     return rc;
 }
 
+int mem_sharing_fork_page(struct domain *d, gfn_t gfn,
+                          bool unsharing);
+
 /*
  * If called by a foreign domain, possible errors are
  *   -EBUSY -> ring full
@@ -117,6 +122,7 @@ int relinquish_shared_pages(struct domain *d);
 #else
 
 #define mem_sharing_enabled(d) false
+#define mem_sharing_is_fork(p2m) false
 
 static inline unsigned int mem_sharing_get_nr_saved_mfns(void)
 {
@@ -141,6 +147,16 @@ static inline int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
     return -EOPNOTSUPP;
 }
 
+static inline int mem_sharing_fork(struct domain *d, struct domain *cd, bool vcpu)
+{
+    return -EOPNOTSUPP;
+}
+
+static inline int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool lock)
+{
+    return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* __MEM_SHARING_H__ */
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index cfdda6e2a8..90a3f4498e 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -482,6 +482,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
 #define XENMEM_sharing_op_add_physmap       6
 #define XENMEM_sharing_op_audit             7
 #define XENMEM_sharing_op_range_share       8
+#define XENMEM_sharing_op_fork              9
 
 #define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)
 #define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)
@@ -532,6 +533,10 @@ struct xen_mem_sharing_op {
                 uint32_t gref;     /* IN: gref to debug         */
             } u;
         } debug;
+        struct mem_sharing_op_fork {
+            domid_t parent_domain;
+            uint16_t _pad[3];                /* Must be set to 0 */
+        } fork;
     } u;
 };
 typedef struct xen_mem_sharing_op xen_mem_sharing_op_t;
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 7c5c437247..8ed727e10c 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -507,6 +507,8 @@ struct domain
     /* Memory sharing support */
 #ifdef CONFIG_MEM_SHARING
     struct vm_event_domain *vm_event_share;
+    struct domain *parent; /* VM fork parent */
+    bool parent_paused;
 #endif
     /* Memory paging support */
 #ifdef CONFIG_HAS_MEM_PAGING
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
                   ` (5 preceding siblings ...)
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 6/9] xen/mem_sharing: VM forking Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-29 13:09   ` Jan Beulich
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 8/9] x86/mem_sharing: reset a fork Tamas K Lengyel
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 9/9] xen/tools: VM forking toolstack side Tamas K Lengyel
  8 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

Use __get_gfn_type_access instead of p2m->get_entry to trigger page-forking
when the mem_access permission is being set on a page that has not yet been
copied over from the parent.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/mm/mem_access.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
index d16540a9aa..ede774fb50 100644
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -303,11 +303,10 @@ static int set_mem_access(struct domain *d, struct p2m_domain *p2m,
     ASSERT(!ap2m);
 #endif
     {
-        mfn_t mfn;
         p2m_access_t _a;
         p2m_type_t t;
-
-        mfn = p2m->get_entry(p2m, gfn, &t, &_a, 0, NULL, NULL);
+        mfn_t mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &t, &_a,
+                                          P2M_ALLOC, NULL, false);
         rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, t, a, -1);
     }
 
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 8/9] x86/mem_sharing: reset a fork
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
                   ` (6 preceding siblings ...)
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  2020-01-28 15:13   ` Tamas K Lengyel
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 9/9] xen/tools: VM forking toolstack side Tamas K Lengyel
  8 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

Implement hypercall that allows a fork to shed all memory that got allocated
for it during its execution and re-load its vCPU context from the parent VM.
This allows the forked VM to reset into the same state the parent VM is in a
faster way then creating a new fork would be. Measurements show about a 2x
speedup during normal fuzzing operations. Performance may vary depending how
much memory got allocated for the forked VM. If it has been completely
deduplicated from the parent VM then creating a new fork would likely be more
performant.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/mm/mem_sharing.c | 76 +++++++++++++++++++++++++++++++++++
 xen/include/public/memory.h   |  1 +
 2 files changed, 77 insertions(+)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index b6be78486d..88549db7b2 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1613,6 +1613,59 @@ static int mem_sharing_fork(struct domain *d, struct domain *cd)
     return rc;
 }
 
+/*
+ * The fork reset operation is intended to be used on short-lived forks only.
+ * There is no hypercall continuation operation implemented for this reason.
+ * For forks that obtain a larger memory footprint it is likely going to be
+ * more performant to create a new fork instead of resetting an existing one.
+ *
+ * TODO: In case this hypercall would become useful on forks with larger memory
+ * footprints the hypercall continuation should be implemented.
+ */
+static int mem_sharing_fork_reset(struct domain *d, struct domain *cd)
+{
+    int rc;
+    struct p2m_domain* p2m = p2m_get_hostp2m(cd);
+    struct page_info *page, *tmp;
+
+    domain_pause(cd);
+
+    page_list_for_each_safe(page, tmp, &cd->page_list)
+    {
+        p2m_type_t p2mt;
+        p2m_access_t p2ma;
+        gfn_t gfn;
+        mfn_t mfn = page_to_mfn(page);
+
+        if ( !mfn_valid(mfn) )
+            continue;
+
+        gfn = mfn_to_gfn(cd, mfn);
+        mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &p2mt, &p2ma,
+                                    0, NULL, false);
+
+        if ( !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) )
+            continue;
+
+        /* take an extra reference */
+        if ( !get_page(page, cd) )
+            continue;
+
+        rc = p2m->set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
+                            p2m_invalid, p2m_access_rwx, -1);
+        ASSERT(!rc);
+
+        put_page_alloc_ref(page);
+        put_page(page);
+    }
+
+    if ( !(rc = hvm_copy_context_and_params(d, cd)) )
+        fork_tsc(d, cd);
+
+    domain_unpause(cd);
+    return rc;
+}
+
 int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
 {
     int rc;
@@ -1897,6 +1950,29 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
         break;
     }
 
+    case XENMEM_sharing_op_fork_reset:
+    {
+        struct domain *pd;
+
+        rc = -EINVAL;
+        if ( mso.u.fork._pad[0] || mso.u.fork._pad[1] ||
+             mso.u.fork._pad[2] )
+            goto out;
+
+        rc = -ENOSYS;
+        if ( !d->parent )
+            goto out;
+
+        rc = rcu_lock_live_remote_domain_by_id(d->parent->domain_id, &pd);
+        if ( rc )
+            goto out;
+
+        rc = mem_sharing_fork_reset(pd, d);
+
+        rcu_unlock_domain(pd);
+        break;
+    }
+
     default:
         rc = -ENOSYS;
         break;
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 90a3f4498e..e3d063e22e 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -483,6 +483,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
 #define XENMEM_sharing_op_audit             7
 #define XENMEM_sharing_op_range_share       8
 #define XENMEM_sharing_op_fork              9
+#define XENMEM_sharing_op_fork_reset        10
 
 #define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)
 #define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [Xen-devel] [PATCH v6 9/9] xen/tools: VM forking toolstack side
       [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
                   ` (7 preceding siblings ...)
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 8/9] x86/mem_sharing: reset a fork Tamas K Lengyel
@ 2020-01-27 18:06 ` Tamas K Lengyel
  8 siblings, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-27 18:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Tamas K Lengyel

Add necessary bits to implement "xl fork-vm" commands. The command allows the
user to specify how to launch the device model allowing for a late-launch model
in which the user can execute the fork without the device model and decide to
only later launch it.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 docs/man/xl.1.pod.in          |  36 ++++++
 tools/libxc/include/xenctrl.h |  13 ++
 tools/libxc/xc_memshr.c       |  22 ++++
 tools/libxl/libxl.h           |   7 +
 tools/libxl/libxl_create.c    | 237 +++++++++++++++++++++++-----------
 tools/libxl/libxl_dm.c        |   2 +-
 tools/libxl/libxl_dom.c       |  43 +++++-
 tools/libxl/libxl_internal.h  |   1 +
 tools/libxl/libxl_types.idl   |   1 +
 tools/xl/xl.h                 |   5 +
 tools/xl/xl_cmdtable.c        |  12 ++
 tools/xl/xl_saverestore.c     |  95 ++++++++++++++
 tools/xl/xl_vmcontrol.c       |   8 ++
 13 files changed, 398 insertions(+), 84 deletions(-)

diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
index d4b5e8e362..22cc4149b0 100644
--- a/docs/man/xl.1.pod.in
+++ b/docs/man/xl.1.pod.in
@@ -694,6 +694,42 @@ Leave the domain paused after creating the snapshot.
 
 =back
 
+=item B<fork-vm> [I<OPTIONS>] I<domain-id>
+
+Create a fork of a running VM. The domain will be paused after the operation
+and needs to remain paused while forks of it exist.
+
+B<OPTIONS>
+
+=over 4
+
+=item B<-p>
+
+Leave the fork paused after creating it.
+
+=item B<--launch-dm>
+
+Specify whether the device model (QEMU) should be launched for the fork. Late
+launch allows to start the device model for an already running fork.
+
+=item B<-C>
+
+The config file to use when launching the device model. Currently required when
+launching the device model.
+
+=item B<-Q>
+
+The qemu save file to use when launching the device model.  Currently required
+when launching the device model.
+
+=item B<--fork-reset>
+
+Perform a reset operation of an already running fork. Note that resetting may
+be less performant then creating a new fork depending on how much memory the
+fork has deduplicated during its runtime.
+
+=back
+
 =item B<sharing> [I<domain-id>]
 
 Display the number of shared pages for a specified domain. If no domain is
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index cc4eb1e3d3..6f65888dd0 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2225,6 +2225,19 @@ int xc_memshr_range_share(xc_interface *xch,
                           uint64_t first_gfn,
                           uint64_t last_gfn);
 
+int xc_memshr_fork(xc_interface *xch,
+                   uint32_t source_domain,
+                   uint32_t client_domain);
+
+/*
+ * Note: this function is only intended to be used on short-lived forks that
+ * haven't yet aquired a lot of memory. In case the fork has a lot of memory
+ * it is likely more performant to create a new fork with xc_memshr_fork.
+ *
+ * With VMs that have a lot of memory this call may block for a long time.
+ */
+int xc_memshr_fork_reset(xc_interface *xch, uint32_t forked_domain);
+
 /* Debug calls: return the number of pages referencing the shared frame backing
  * the input argument. Should be one or greater.
  *
diff --git a/tools/libxc/xc_memshr.c b/tools/libxc/xc_memshr.c
index 97e2e6a8d9..d0e4ee225b 100644
--- a/tools/libxc/xc_memshr.c
+++ b/tools/libxc/xc_memshr.c
@@ -239,6 +239,28 @@ int xc_memshr_debug_gref(xc_interface *xch,
     return xc_memshr_memop(xch, domid, &mso);
 }
 
+int xc_memshr_fork(xc_interface *xch, uint32_t pdomid, uint32_t domid)
+{
+    xen_mem_sharing_op_t mso;
+
+    memset(&mso, 0, sizeof(mso));
+
+    mso.op = XENMEM_sharing_op_fork;
+    mso.u.fork.parent_domain = pdomid;
+
+    return xc_memshr_memop(xch, domid, &mso);
+}
+
+int xc_memshr_fork_reset(xc_interface *xch, uint32_t domid)
+{
+    xen_mem_sharing_op_t mso;
+
+    memset(&mso, 0, sizeof(mso));
+    mso.op = XENMEM_sharing_op_fork_reset;
+
+    return xc_memshr_memop(xch, domid, &mso);
+}
+
 int xc_memshr_audit(xc_interface *xch)
 {
     xen_mem_sharing_op_t mso;
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 54abb9db1f..75cb070587 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1536,6 +1536,13 @@ int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config,
                             const libxl_asyncop_how *ao_how,
                             const libxl_asyncprogress_how *aop_console_how)
                             LIBXL_EXTERNAL_CALLERS_ONLY;
+int libxl_domain_fork_vm(libxl_ctx *ctx, uint32_t pdomid, uint32_t *domid)
+                             LIBXL_EXTERNAL_CALLERS_ONLY;
+int libxl_domain_fork_launch_dm(libxl_ctx *ctx, libxl_domain_config *d_config,
+                                uint32_t domid,
+                                const libxl_asyncprogress_how *aop_console_how)
+                                LIBXL_EXTERNAL_CALLERS_ONLY;
+int libxl_domain_fork_reset(libxl_ctx *ctx, uint32_t domid);
 int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config,
                                 uint32_t *domid, int restore_fd,
                                 int send_back_fd,
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 32d45dcef0..e0d219596c 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -536,12 +536,12 @@ out:
     return ret;
 }
 
-int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
-                       libxl__domain_build_state *state,
-                       uint32_t *domid)
+static int libxl__domain_make_xs_entries(libxl__gc *gc, libxl_domain_config *d_config,
+                                         libxl__domain_build_state *state,
+                                         uint32_t domid)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
-    int ret, rc, nb_vm;
+    int rc, nb_vm;
     const char *dom_type;
     char *uuid_string;
     char *dom_path, *vm_path, *libxl_path;
@@ -553,7 +553,6 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
 
     /* convenience aliases */
     libxl_domain_create_info *info = &d_config->c_info;
-    libxl_domain_build_info *b_info = &d_config->b_info;
 
     uuid_string = libxl__uuid2string(gc, info->uuid);
     if (!uuid_string) {
@@ -561,64 +560,7 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
         goto out;
     }
 
-    /* Valid domid here means we're soft resetting. */
-    if (!libxl_domid_valid_guest(*domid)) {
-        struct xen_domctl_createdomain create = {
-            .ssidref = info->ssidref,
-            .max_vcpus = b_info->max_vcpus,
-            .max_evtchn_port = b_info->event_channels,
-            .max_grant_frames = b_info->max_grant_frames,
-            .max_maptrack_frames = b_info->max_maptrack_frames,
-        };
-
-        if (info->type != LIBXL_DOMAIN_TYPE_PV) {
-            create.flags |= XEN_DOMCTL_CDF_hvm;
-            create.flags |=
-                libxl_defbool_val(info->hap) ? XEN_DOMCTL_CDF_hap : 0;
-            create.flags |=
-                libxl_defbool_val(info->oos) ? 0 : XEN_DOMCTL_CDF_oos_off;
-        }
-
-        assert(info->passthrough != LIBXL_PASSTHROUGH_DEFAULT);
-        LOG(DETAIL, "passthrough: %s",
-            libxl_passthrough_to_string(info->passthrough));
-
-        if (info->passthrough != LIBXL_PASSTHROUGH_DISABLED)
-            create.flags |= XEN_DOMCTL_CDF_iommu;
-
-        if (info->passthrough == LIBXL_PASSTHROUGH_SYNC_PT)
-            create.iommu_opts |= XEN_DOMCTL_IOMMU_no_sharept;
-
-        /* Ultimately, handle is an array of 16 uint8_t, same as uuid */
-        libxl_uuid_copy(ctx, (libxl_uuid *)&create.handle, &info->uuid);
-
-        ret = libxl__arch_domain_prepare_config(gc, d_config, &create);
-        if (ret < 0) {
-            LOGED(ERROR, *domid, "fail to get domain config");
-            rc = ERROR_FAIL;
-            goto out;
-        }
-
-        ret = xc_domain_create(ctx->xch, domid, &create);
-        if (ret < 0) {
-            LOGED(ERROR, *domid, "domain creation fail");
-            rc = ERROR_FAIL;
-            goto out;
-        }
-
-        rc = libxl__arch_domain_save_config(gc, d_config, state, &create);
-        if (rc < 0)
-            goto out;
-    }
-
-    ret = xc_cpupool_movedomain(ctx->xch, info->poolid, *domid);
-    if (ret < 0) {
-        LOGED(ERROR, *domid, "domain move fail");
-        rc = ERROR_FAIL;
-        goto out;
-    }
-
-    dom_path = libxl__xs_get_dompath(gc, *domid);
+    dom_path = libxl__xs_get_dompath(gc, domid);
     if (!dom_path) {
         rc = ERROR_FAIL;
         goto out;
@@ -626,12 +568,12 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
 
     vm_path = GCSPRINTF("/vm/%s", uuid_string);
     if (!vm_path) {
-        LOGD(ERROR, *domid, "cannot allocate create paths");
+        LOGD(ERROR, domid, "cannot allocate create paths");
         rc = ERROR_FAIL;
         goto out;
     }
 
-    libxl_path = libxl__xs_libxl_path(gc, *domid);
+    libxl_path = libxl__xs_libxl_path(gc, domid);
     if (!libxl_path) {
         rc = ERROR_FAIL;
         goto out;
@@ -642,10 +584,10 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
 
     roperm[0].id = 0;
     roperm[0].perms = XS_PERM_NONE;
-    roperm[1].id = *domid;
+    roperm[1].id = domid;
     roperm[1].perms = XS_PERM_READ;
 
-    rwperm[0].id = *domid;
+    rwperm[0].id = domid;
     rwperm[0].perms = XS_PERM_NONE;
 
 retry_transaction:
@@ -663,7 +605,7 @@ retry_transaction:
                     noperm, ARRAY_SIZE(noperm));
 
     xs_write(ctx->xsh, t, GCSPRINTF("%s/vm", dom_path), vm_path, strlen(vm_path));
-    rc = libxl__domain_rename(gc, *domid, 0, info->name, t);
+    rc = libxl__domain_rename(gc, domid, 0, info->name, t);
     if (rc)
         goto out;
 
@@ -740,7 +682,7 @@ retry_transaction:
 
     vm_list = libxl_list_vm(ctx, &nb_vm);
     if (!vm_list) {
-        LOGD(ERROR, *domid, "cannot get number of running guests");
+        LOGD(ERROR, domid, "cannot get number of running guests");
         rc = ERROR_FAIL;
         goto out;
     }
@@ -764,7 +706,7 @@ retry_transaction:
             t = 0;
             goto retry_transaction;
         }
-        LOGED(ERROR, *domid, "domain creation ""xenstore transaction commit failed");
+        LOGED(ERROR, domid, "domain creation ""xenstore transaction commit failed");
         rc = ERROR_FAIL;
         goto out;
     }
@@ -776,6 +718,80 @@ retry_transaction:
     return rc;
 }
 
+int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
+                       libxl__domain_build_state *state,
+                       uint32_t *domid)
+{
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+    int ret, rc;
+
+    /* convenience aliases */
+    libxl_domain_create_info *info = &d_config->c_info;
+    libxl_domain_build_info *b_info = &d_config->b_info;
+
+    /* Valid domid here means we're soft resetting. */
+    if (!libxl_domid_valid_guest(*domid)) {
+        struct xen_domctl_createdomain create = {
+            .ssidref = info->ssidref,
+            .max_vcpus = b_info->max_vcpus,
+            .max_evtchn_port = b_info->event_channels,
+            .max_grant_frames = b_info->max_grant_frames,
+            .max_maptrack_frames = b_info->max_maptrack_frames,
+        };
+
+        if (info->type != LIBXL_DOMAIN_TYPE_PV) {
+            create.flags |= XEN_DOMCTL_CDF_hvm;
+            create.flags |=
+                libxl_defbool_val(info->hap) ? XEN_DOMCTL_CDF_hap : 0;
+            create.flags |=
+                libxl_defbool_val(info->oos) ? 0 : XEN_DOMCTL_CDF_oos_off;
+        }
+
+        assert(info->passthrough != LIBXL_PASSTHROUGH_DEFAULT);
+        LOG(DETAIL, "passthrough: %s",
+            libxl_passthrough_to_string(info->passthrough));
+
+        if (info->passthrough != LIBXL_PASSTHROUGH_DISABLED)
+            create.flags |= XEN_DOMCTL_CDF_iommu;
+
+        if (info->passthrough == LIBXL_PASSTHROUGH_SYNC_PT)
+            create.iommu_opts |= XEN_DOMCTL_IOMMU_no_sharept;
+
+        /* Ultimately, handle is an array of 16 uint8_t, same as uuid */
+        libxl_uuid_copy(ctx, (libxl_uuid *)&create.handle, &info->uuid);
+
+        ret = libxl__arch_domain_prepare_config(gc, d_config, &create);
+        if (ret < 0) {
+            LOGED(ERROR, *domid, "fail to get domain config");
+            rc = ERROR_FAIL;
+            goto out;
+        }
+
+        ret = xc_domain_create(ctx->xch, domid, &create);
+        if (ret < 0) {
+            LOGED(ERROR, *domid, "domain creation fail");
+            rc = ERROR_FAIL;
+            goto out;
+        }
+
+        rc = libxl__arch_domain_save_config(gc, d_config, state, &create);
+        if (rc < 0)
+            goto out;
+    }
+
+    ret = xc_cpupool_movedomain(ctx->xch, info->poolid, *domid);
+    if (ret < 0) {
+        LOGED(ERROR, *domid, "domain move fail");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = libxl__domain_make_xs_entries(gc, d_config, state, *domid);
+
+out:
+    return rc;
+}
+
 static int store_libxl_entry(libxl__gc *gc, uint32_t domid,
                              libxl_domain_build_info *b_info)
 {
@@ -1097,15 +1113,31 @@ static void initiate_domain_create(libxl__egc *egc,
     ret = libxl__domain_config_setdefault(gc,d_config,domid);
     if (ret) goto error_out;
 
-    ret = libxl__domain_make(gc, d_config, &dcs->build_state, &domid);
-    if (ret) {
-        LOGD(ERROR, domid, "cannot make domain: %d", ret);
+    if ( !d_config->dm_restore_file )
+    {
+        ret = libxl__domain_make(gc, d_config, &dcs->build_state, &domid);
         dcs->guest_domid = domid;
+
+        if (ret) {
+            LOGD(ERROR, domid, "cannot make domain: %d", ret);
+            ret = ERROR_FAIL;
+            goto error_out;
+        }
+    } else if ( dcs->guest_domid != INVALID_DOMID ) {
+        domid = dcs->guest_domid;
+
+        ret = libxl__domain_make_xs_entries(gc, d_config, &dcs->build_state, domid);
+        if (ret) {
+            LOGD(ERROR, domid, "cannot make domain: %d", ret);
+            ret = ERROR_FAIL;
+            goto error_out;
+        }
+    } else {
+        LOGD(ERROR, domid, "cannot make domain");
         ret = ERROR_FAIL;
         goto error_out;
     }
 
-    dcs->guest_domid = domid;
     dcs->sdss.dm.guest_domid = 0; /* means we haven't spawned */
 
     /* post-4.13 todo: move these next bits of defaulting to
@@ -1141,7 +1173,7 @@ static void initiate_domain_create(libxl__egc *egc,
     if (ret)
         goto error_out;
 
-    if (restore_fd >= 0 || dcs->domid_soft_reset != INVALID_DOMID) {
+    if (restore_fd >= 0 || dcs->domid_soft_reset != INVALID_DOMID || d_config->dm_restore_file) {
         LOGD(DEBUG, domid, "restoring, not running bootloader");
         domcreate_bootloader_done(egc, &dcs->bl, 0);
     } else  {
@@ -1217,7 +1249,16 @@ static void domcreate_bootloader_done(libxl__egc *egc,
     dcs->sdss.dm.callback = domcreate_devmodel_started;
     dcs->sdss.callback = domcreate_devmodel_started;
 
-    if (restore_fd < 0 && dcs->domid_soft_reset == INVALID_DOMID) {
+    if (restore_fd < 0 && dcs->domid_soft_reset == INVALID_DOMID && !d_config->dm_restore_file) {
+        rc = libxl__domain_build(gc, d_config, domid, state);
+        domcreate_rebuild_done(egc, dcs, rc);
+        return;
+    }
+
+    if ( d_config->dm_restore_file ) {
+        dcs->srs.dcs = dcs;
+        dcs->srs.ao = ao;
+        state->forked_vm = true;
         rc = libxl__domain_build(gc, d_config, domid, state);
         domcreate_rebuild_done(egc, dcs, rc);
         return;
@@ -1415,6 +1456,7 @@ static void domcreate_rebuild_done(libxl__egc *egc,
     /* convenience aliases */
     const uint32_t domid = dcs->guest_domid;
     libxl_domain_config *const d_config = dcs->guest_config;
+    libxl__domain_build_state *const state = &dcs->build_state;
 
     if (ret) {
         LOGD(ERROR, domid, "cannot (re-)build domain: %d", ret);
@@ -1422,6 +1464,9 @@ static void domcreate_rebuild_done(libxl__egc *egc,
         goto error_out;
     }
 
+    if ( d_config->dm_restore_file )
+        state->saved_state = GCSPRINTF("%s", d_config->dm_restore_file);
+
     store_libxl_entry(gc, domid, &d_config->b_info);
 
     libxl__multidev_begin(ao, &dcs->multidev);
@@ -1823,10 +1868,13 @@ static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config,
     GCNEW(cdcs);
     cdcs->dcs.ao = ao;
     cdcs->dcs.guest_config = d_config;
+    cdcs->dcs.guest_domid = *domid;
+
     libxl_domain_config_init(&cdcs->dcs.guest_config_saved);
     libxl_domain_config_copy(ctx, &cdcs->dcs.guest_config_saved, d_config);
     cdcs->dcs.restore_fd = cdcs->dcs.libxc_fd = restore_fd;
     cdcs->dcs.send_back_fd = send_back_fd;
+
     if (restore_fd > -1) {
         cdcs->dcs.restore_params = *params;
         rc = libxl__fd_flags_modify_save(gc, cdcs->dcs.restore_fd,
@@ -2069,6 +2117,43 @@ int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config,
                             ao_how, aop_console_how);
 }
 
+int libxl_domain_fork_vm(libxl_ctx *ctx, uint32_t pdomid, uint32_t *domid)
+{
+    int rc;
+    struct xen_domctl_createdomain create = {0};
+    create.flags |= XEN_DOMCTL_CDF_hvm;
+    create.flags |= XEN_DOMCTL_CDF_hap;
+    create.flags |= XEN_DOMCTL_CDF_oos_off;
+    create.arch.emulation_flags = (XEN_X86_EMU_ALL & ~XEN_X86_EMU_VPCI);
+
+    create.ssidref = SECINITSID_DOMU;
+    create.max_vcpus = 1; // placeholder, will be cloned from pdomid
+    create.max_evtchn_port = 1023;
+    create.max_grant_frames = LIBXL_MAX_GRANT_FRAMES_DEFAULT;
+    create.max_maptrack_frames = LIBXL_MAX_MAPTRACK_FRAMES_DEFAULT;
+
+    if ( (rc = xc_domain_create(ctx->xch, domid, &create)) )
+        return rc;
+
+    if ( (rc = xc_memshr_fork(ctx->xch, pdomid, *domid)) )
+        xc_domain_destroy(ctx->xch, *domid);
+
+    return rc;
+}
+
+int libxl_domain_fork_launch_dm(libxl_ctx *ctx, libxl_domain_config *d_config,
+                                uint32_t domid,
+                                const libxl_asyncprogress_how *aop_console_how)
+{
+    unset_disk_colo_restore(d_config);
+    return do_domain_create(ctx, d_config, &domid, -1, -1, 0, 0, aop_console_how);
+}
+
+int libxl_domain_fork_reset(libxl_ctx *ctx, uint32_t domid)
+{
+    return xc_memshr_fork_reset(ctx->xch, domid);
+}
+
 int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config,
                                 uint32_t *domid, int restore_fd,
                                 int send_back_fd,
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index e92e412c1b..9d967e1d32 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -2787,7 +2787,7 @@ static void device_model_spawn_outcome(libxl__egc *egc,
 
     libxl__domain_build_state *state = dmss->build_state;
 
-    if (state->saved_state) {
+    if (state->saved_state && !state->forked_vm) {
         ret2 = unlink(state->saved_state);
         if (ret2) {
             LOGED(ERROR, dmss->guest_domid, "%s: failed to remove device-model state %s",
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 573c63692b..c867c86d32 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -249,9 +249,12 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
     libxl_domain_build_info *const info = &d_config->b_info;
     libxl_ctx *ctx = libxl__gc_owner(gc);
     char *xs_domid, *con_domid;
-    int rc;
+    int rc = 0;
     uint64_t size;
 
+    if ( state->forked_vm )
+        goto skip_fork;
+
     if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
         LOG(ERROR, "Couldn't set max vcpu count");
         return ERROR_FAIL;
@@ -362,7 +365,6 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
         }
     }
 
-
     rc = libxl__arch_extra_memory(gc, info, &size);
     if (rc < 0) {
         LOGE(ERROR, "Couldn't get arch extra constant memory size");
@@ -374,6 +376,11 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
         return ERROR_FAIL;
     }
 
+    rc = libxl__arch_domain_create(gc, d_config, domid);
+    if ( rc )
+        goto out;
+
+skip_fork:
     xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
     state->store_domid = xs_domid ? atoi(xs_domid) : 0;
     free(xs_domid);
@@ -385,8 +392,7 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
     state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
     state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
 
-    rc = libxl__arch_domain_create(gc, d_config, domid);
-
+out:
     return rc;
 }
 
@@ -444,6 +450,9 @@ int libxl__build_post(libxl__gc *gc, uint32_t domid,
     char **ents;
     int i, rc;
 
+    if ( state->forked_vm )
+        goto skip_fork;
+
     if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
         rc = set_vnuma_affinity(gc, domid, info);
         if (rc)
@@ -468,6 +477,7 @@ int libxl__build_post(libxl__gc *gc, uint32_t domid,
         }
     }
 
+skip_fork:
     ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
     ents[0] = "memory/static-max";
     ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
@@ -730,14 +740,16 @@ static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
                                 libxl_domain_build_info *info,
                                 int store_evtchn, unsigned long *store_mfn,
                                 int console_evtchn, unsigned long *console_mfn,
-                                domid_t store_domid, domid_t console_domid)
+                                domid_t store_domid, domid_t console_domid,
+                                bool forked_vm)
 {
     struct hvm_info_table *va_hvm;
     uint8_t *va_map, sum;
     uint64_t str_mfn, cons_mfn;
     int i;
 
-    if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
+    if ( info->type == LIBXL_DOMAIN_TYPE_HVM && !forked_vm )
+    {
         va_map = xc_map_foreign_range(handle, domid,
                                       XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
                                       HVM_INFO_PFN);
@@ -1053,6 +1065,23 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
     struct xc_dom_image *dom = NULL;
     bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
 
+    if ( state->forked_vm )
+    {
+        rc = hvm_build_set_params(ctx->xch, domid, info, state->store_port,
+                                  &state->store_mfn, state->console_port,
+                                  &state->console_mfn, state->store_domid,
+                                  state->console_domid, state->forked_vm);
+
+        if ( rc )
+            return rc;
+
+        return xc_dom_gnttab_seed(ctx->xch, domid, true,
+                                  state->console_mfn,
+                                  state->store_mfn,
+                                  state->console_domid,
+                                  state->store_domid);
+    }
+
     xc_dom_loginit(ctx->xch);
 
     /*
@@ -1177,7 +1206,7 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
     rc = hvm_build_set_params(ctx->xch, domid, info, state->store_port,
                                &state->store_mfn, state->console_port,
                                &state->console_mfn, state->store_domid,
-                               state->console_domid);
+                               state->console_domid, false);
     if (rc != 0) {
         LOG(ERROR, "hvm build set params failed");
         goto out;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 2555aa4575..7c1fd72efb 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1375,6 +1375,7 @@ typedef struct {
 
     char *saved_state;
     int dm_monitor_fd;
+    bool forked_vm;
 
     libxl__file_reference pv_kernel;
     libxl__file_reference pv_ramdisk;
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 7921950f6a..7c4c4057a9 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -956,6 +956,7 @@ libxl_domain_config = Struct("domain_config", [
     ("on_watchdog", libxl_action_on_shutdown),
     ("on_crash", libxl_action_on_shutdown),
     ("on_soft_reset", libxl_action_on_shutdown),
+    ("dm_restore_file", string, {'const': True}),
     ], dir=DIR_IN)
 
 libxl_diskinfo = Struct("diskinfo", [
diff --git a/tools/xl/xl.h b/tools/xl/xl.h
index 60bdad8ffb..9bdad6526e 100644
--- a/tools/xl/xl.h
+++ b/tools/xl/xl.h
@@ -31,6 +31,7 @@ struct cmd_spec {
 };
 
 struct domain_create {
+    uint32_t ddomid; /* fork launch dm for this domid */
     int debug;
     int daemonize;
     int monitor; /* handle guest reboots etc */
@@ -45,6 +46,7 @@ struct domain_create {
     const char *config_file;
     char *extra_config; /* extra config string */
     const char *restore_file;
+    const char *dm_restore_file;
     char *colo_proxy_script;
     bool userspace_colo_proxy;
     int migrate_fd; /* -1 means none */
@@ -127,6 +129,9 @@ int main_pciassignable_remove(int argc, char **argv);
 int main_pciassignable_list(int argc, char **argv);
 #ifndef LIBXL_HAVE_NO_SUSPEND_RESUME
 int main_restore(int argc, char **argv);
+int main_fork_vm(int argc, char **argv);
+int main_fork_launch_dm(int argc, char **argv);
+int main_fork_reset(int argc, char **argv);
 int main_migrate_receive(int argc, char **argv);
 int main_save(int argc, char **argv);
 int main_migrate(int argc, char **argv);
diff --git a/tools/xl/xl_cmdtable.c b/tools/xl/xl_cmdtable.c
index 3b302b2f20..3a5d371057 100644
--- a/tools/xl/xl_cmdtable.c
+++ b/tools/xl/xl_cmdtable.c
@@ -185,6 +185,18 @@ struct cmd_spec cmd_table[] = {
       "Restore a domain from a saved state",
       "- for internal use only",
     },
+    { "fork-vm",
+      &main_fork_vm, 0, 1,
+      "Fork a domain from the running parent domid",
+      "[options] <Domid>",
+      "-h                           Print this help.\n"
+      "-C <config>                  Use config file for VM fork.\n"
+      "-Q <qemu-save-file>          Use qemu save file for VM fork.\n"
+      "--launch-dm <yes|no|late>    Launch device model (QEMU) for VM fork.\n"
+      "--fork-reset                 Reset VM fork.\n"
+      "-p                           Do not unpause fork VM after operation.\n"
+      "-d                           Enable debug messages.\n"
+    },
 #endif
     { "dump-core",
       &main_dump_core, 0, 1,
diff --git a/tools/xl/xl_saverestore.c b/tools/xl/xl_saverestore.c
index 9be033fe65..25c20d2f1b 100644
--- a/tools/xl/xl_saverestore.c
+++ b/tools/xl/xl_saverestore.c
@@ -229,6 +229,101 @@ int main_restore(int argc, char **argv)
     return EXIT_SUCCESS;
 }
 
+int main_fork_vm(int argc, char **argv)
+{
+    int rc, debug = 0;
+    uint32_t domid_in = INVALID_DOMID, domid_out = INVALID_DOMID;
+    int launch_dm = 1;
+    bool reset = 0;
+    bool pause = 0;
+    const char *config_file = NULL;
+    const char *dm_restore_file = NULL;
+
+    int opt;
+    static struct option opts[] = {
+        {"launch-dm", 1, 0, 'l'},
+        {"fork-reset", 0, 0, 'r'},
+        COMMON_LONG_OPTS
+    };
+
+    SWITCH_FOREACH_OPT(opt, "phdC:Q:l:rN:D:B:V:", opts, "fork-vm", 1) {
+    case 'd':
+        debug = 1;
+        break;
+    case 'p':
+        pause = 1;
+        break;
+    case 'C':
+        config_file = optarg;
+        break;
+    case 'Q':
+        dm_restore_file = optarg;
+        break;
+    case 'l':
+        if ( !strcmp(optarg, "no") )
+            launch_dm = 0;
+        if ( !strcmp(optarg, "yes") )
+            launch_dm = 1;
+        if ( !strcmp(optarg, "late") )
+            launch_dm = 2;
+        break;
+    case 'r':
+        reset = 1;
+        break;
+    case 'N': /* fall-through */
+    case 'D': /* fall-through */
+    case 'B': /* fall-through */
+    case 'V':
+        fprintf(stderr, "Unimplemented option(s)\n");
+        return EXIT_FAILURE;
+    }
+
+    if (argc-optind == 1) {
+        domid_in = atoi(argv[optind]);
+    } else {
+        help("fork-vm");
+        return EXIT_FAILURE;
+    }
+
+    if (launch_dm && (!config_file || !dm_restore_file)) {
+        fprintf(stderr, "Currently you must provide both -C and -Q options\n");
+        return EXIT_FAILURE;
+    }
+
+    if (reset) {
+        domid_out = domid_in;
+        if (libxl_domain_fork_reset(ctx, domid_in) == EXIT_FAILURE)
+            return EXIT_FAILURE;
+    }
+
+    if (launch_dm == 2 || reset) {
+        domid_out = domid_in;
+        rc = EXIT_SUCCESS;
+    } else
+        rc = libxl_domain_fork_vm(ctx, domid_in, &domid_out);
+
+    if (rc == EXIT_SUCCESS && launch_dm) {
+        struct domain_create dom_info;
+        memset(&dom_info, 0, sizeof(dom_info));
+        dom_info.ddomid = domid_out;
+        dom_info.dm_restore_file = dm_restore_file;
+        dom_info.debug = debug;
+        dom_info.paused = pause;
+        dom_info.config_file = config_file;
+        dom_info.migrate_fd = -1;
+        dom_info.send_back_fd = -1;
+        rc = create_domain(&dom_info) < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+    }
+
+    if (rc == EXIT_SUCCESS && !pause)
+        rc = libxl_domain_unpause(ctx, domid_out, NULL);
+
+    if (rc == EXIT_SUCCESS)
+        fprintf(stderr, "fork-vm command successfully returned domid: %u\n", domid_out);
+
+    return rc;
+}
+
 int main_save(int argc, char **argv)
 {
     uint32_t domid;
diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
index e520b1da79..d9cb19c599 100644
--- a/tools/xl/xl_vmcontrol.c
+++ b/tools/xl/xl_vmcontrol.c
@@ -645,6 +645,7 @@ int create_domain(struct domain_create *dom_info)
 
     libxl_domain_config d_config;
 
+    uint32_t ddomid = dom_info->ddomid; // launch dm for this domain iff set
     int debug = dom_info->debug;
     int daemonize = dom_info->daemonize;
     int monitor = dom_info->monitor;
@@ -655,6 +656,7 @@ int create_domain(struct domain_create *dom_info)
     const char *restore_file = dom_info->restore_file;
     const char *config_source = NULL;
     const char *restore_source = NULL;
+    const char *dm_restore_file = dom_info->dm_restore_file;
     int migrate_fd = dom_info->migrate_fd;
     bool config_in_json;
 
@@ -923,6 +925,12 @@ start:
          * restore/migrate-receive it again.
          */
         restoring = 0;
+    } else if ( ddomid ) {
+        d_config.dm_restore_file = dm_restore_file;
+        ret = libxl_domain_fork_launch_dm(ctx, &d_config, ddomid,
+                                          autoconnect_console_how);
+        domid = ddomid;
+        ddomid = INVALID_DOMID;
     } else if (domid_soft_reset != INVALID_DOMID) {
         /* Do soft reset. */
         ret = libxl_domain_soft_reset(ctx, &d_config, domid_soft_reset,
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 8/9] x86/mem_sharing: reset a fork
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 8/9] x86/mem_sharing: reset a fork Tamas K Lengyel
@ 2020-01-28 15:13   ` Tamas K Lengyel
  0 siblings, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-28 15:13 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: Xen-devel

> +    if ( !(rc = hvm_copy_context_and_params(d, cd)) )

I just realized that I forgot to swap the order of the parameters here
after the requested change was made in the prereq patch introducing
the function.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 1/9] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 1/9] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries Tamas K Lengyel
@ 2020-01-28 16:37   ` Jan Beulich
  2020-01-28 16:42     ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-01-28 16:37 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: xen-devel

On 27.01.2020 19:06, Tamas K Lengyel wrote:
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -574,11 +574,12 @@ struct page_info *p2m_get_page_from_gfn(
>                  if ( fdom == NULL )
>                      page = NULL;
>              }
> -            else if ( !get_page(page, p2m->domain) &&
> -                      /* Page could be shared */
> -                      (!dom_cow || !p2m_is_shared(*t) ||
> -                       !get_page(page, dom_cow)) )
> -                page = NULL;
> +            else
> +            {
> +                struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
> +                if ( !get_page(page, d) )
> +                    page = NULL;
> +            }
>          }
>          p2m_read_unlock(p2m);
>  
> @@ -594,8 +595,9 @@ struct page_info *p2m_get_page_from_gfn(
>      mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
>      if ( p2m_is_ram(*t) && mfn_valid(mfn) )
>      {
> +        struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
>          page = mfn_to_page(mfn);
> -        if ( !get_page(page, p2m->domain) )
> +        if ( !get_page(page, d) )
>              page = NULL;
>      }
>      put_gfn(p2m->domain, gfn_x(gfn));

Personally I would have preferred to go without new local variables
here, but I'm not the maintainer of this code. However, at the very
least blank lines would need inserting between declaration and
statements. With at least this done (which could be done while
committing)

Reviewed-by: Jan Beulich <jbeulich@suse.com>

As an aside, I don't think the title accurately describes the
change, since there's just one out of two cases which shared
entries weren't taken care of. Similarly the description doesn't
reflect this at all.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 1/9] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries
  2020-01-28 16:37   ` Jan Beulich
@ 2020-01-28 16:42     ` Tamas K Lengyel
  0 siblings, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-28 16:42 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Tue, Jan 28, 2020 at 9:38 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> > --- a/xen/arch/x86/mm/p2m.c
> > +++ b/xen/arch/x86/mm/p2m.c
> > @@ -574,11 +574,12 @@ struct page_info *p2m_get_page_from_gfn(
> >                  if ( fdom == NULL )
> >                      page = NULL;
> >              }
> > -            else if ( !get_page(page, p2m->domain) &&
> > -                      /* Page could be shared */
> > -                      (!dom_cow || !p2m_is_shared(*t) ||
> > -                       !get_page(page, dom_cow)) )
> > -                page = NULL;
> > +            else
> > +            {
> > +                struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
> > +                if ( !get_page(page, d) )
> > +                    page = NULL;
> > +            }
> >          }
> >          p2m_read_unlock(p2m);
> >
> > @@ -594,8 +595,9 @@ struct page_info *p2m_get_page_from_gfn(
> >      mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
> >      if ( p2m_is_ram(*t) && mfn_valid(mfn) )
> >      {
> > +        struct domain *d = !p2m_is_shared(*t) ? p2m->domain : dom_cow;
> >          page = mfn_to_page(mfn);
> > -        if ( !get_page(page, p2m->domain) )
> > +        if ( !get_page(page, d) )
> >              page = NULL;
> >      }
> >      put_gfn(p2m->domain, gfn_x(gfn));
>
> Personally I would have preferred to go without new local variables
> here, but I'm not the maintainer of this code. However, at the very
> least blank lines would need inserting between declaration and
> statements. With at least this done (which could be done while
> committing)
>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
> As an aside, I don't think the title accurately describes the
> change, since there's just one out of two cases which shared
> entries weren't taken care of. Similarly the description doesn't
> reflect this at all.

Well, for our use-case it is broken right now. So just because it's
not broken in another doesn't make the title/description incorrect.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params Tamas K Lengyel
@ 2020-01-28 16:47   ` Jan Beulich
  2020-01-28 16:54     ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-01-28 16:47 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: xen-devel

On 27.01.2020 19:06, Tamas K Lengyel wrote:
> @@ -4139,49 +4140,32 @@ static int hvm_allow_set_param(struct domain *d,
>      return rc;
>  }
>  
> -static int hvmop_set_param(
> -    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
> +static int hvm_set_param(struct domain *d, uint32_t index, uint64_t value)
>  {
>      struct domain *curr_d = current->domain;
> -    struct xen_hvm_param a;
> -    struct domain *d;
>      struct vcpu *v;
>      int rc;
>  
> -    if ( copy_from_guest(&a, arg, 1) )
> -        return -EFAULT;
> -
> -    if ( a.index >= HVM_NR_PARAMS )
> +    if ( index >= HVM_NR_PARAMS )
>          return -EINVAL;

The equivalent of this on the "get" path now seems to be gone. Is
there any reason the one here is still needed?

> +int hvmop_set_param(
> +    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
> +{
> +    struct xen_hvm_param a;
> +    struct domain *d;
> +    int rc;
> +
> +    if ( copy_from_guest(&a, arg, 1) )
> +        return -EFAULT;
> +
> +    if ( a.index >= HVM_NR_PARAMS )
> +        return -EINVAL;
> +
> +    /* Make sure the above bound check is not bypassed during speculation. */
> +    block_speculation();
> +
> +    d = rcu_lock_domain_by_any_id(a.domid);
> +    if ( d == NULL )
> +        return -ESRCH;
> +
> +    rc = -EINVAL;
> +    if ( !is_hvm_domain(d) )
> +        goto out;

Despite your claim to have addressed my remaining comment from v4,
you still use goto here when there's an easy alternative.

> @@ -5297,6 +5322,37 @@ void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
>      alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg);
>  }
>  
> +int hvm_copy_context_and_params(struct domain *dst, struct domain *src)
> +{
> +    int rc;
> +    unsigned int i;
> +    struct hvm_domain_context c = { };
> +
> +    for ( i = 0; i < HVM_NR_PARAMS; i++ )
> +    {
> +        uint64_t value = 0;
> +
> +        if ( hvm_get_param(src, i, &value) || !value )
> +            continue;
> +
> +        if ( (rc = hvm_set_param(dst, i, value)) )
> +            return rc;
> +    }
> +
> +    c.size = hvm_save_size(src);
> +    if ( (c.data = vmalloc(c.size)) == NULL )
> +        return -ENOMEM;
> +
> +    if ( !(rc = hvm_save(src, &c)) )

Also contrary to your claim you still do allocation and save
after the loop, leaving dst in a partly modified state in more
cases than necessary. May I ask that you go back to the v4
comments one more time?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 4/9] x86/mem_sharing: Replace MEM_SHARING_DEBUG with gdprintk
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 4/9] x86/mem_sharing: Replace MEM_SHARING_DEBUG with gdprintk Tamas K Lengyel
@ 2020-01-28 16:52   ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-01-28 16:52 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: xen-devel

On 27.01.2020 19:06, Tamas K Lengyel wrote:
> Using XENLOG_ERR level since this is only used in debug paths (ie. it's
> expected the user already has loglvl=all set). Also use %pd to print the domain
> ids.
> 
> Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>

Acked-by: Jan Beulich <jbeulich@suse.com>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params
  2020-01-28 16:47   ` Jan Beulich
@ 2020-01-28 16:54     ` Tamas K Lengyel
  2020-01-28 16:59       ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-28 16:54 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Tue, Jan 28, 2020 at 9:48 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> > @@ -4139,49 +4140,32 @@ static int hvm_allow_set_param(struct domain *d,
> >      return rc;
> >  }
> >
> > -static int hvmop_set_param(
> > -    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
> > +static int hvm_set_param(struct domain *d, uint32_t index, uint64_t value)
> >  {
> >      struct domain *curr_d = current->domain;
> > -    struct xen_hvm_param a;
> > -    struct domain *d;
> >      struct vcpu *v;
> >      int rc;
> >
> > -    if ( copy_from_guest(&a, arg, 1) )
> > -        return -EFAULT;
> > -
> > -    if ( a.index >= HVM_NR_PARAMS )
> > +    if ( index >= HVM_NR_PARAMS )
> >          return -EINVAL;
>
> The equivalent of this on the "get" path now seems to be gone. Is
> there any reason the one here is still needed?
>
> > +int hvmop_set_param(
> > +    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
> > +{
> > +    struct xen_hvm_param a;
> > +    struct domain *d;
> > +    int rc;
> > +
> > +    if ( copy_from_guest(&a, arg, 1) )
> > +        return -EFAULT;
> > +
> > +    if ( a.index >= HVM_NR_PARAMS )
> > +        return -EINVAL;
> > +
> > +    /* Make sure the above bound check is not bypassed during speculation. */
> > +    block_speculation();
> > +
> > +    d = rcu_lock_domain_by_any_id(a.domid);
> > +    if ( d == NULL )
> > +        return -ESRCH;
> > +
> > +    rc = -EINVAL;
> > +    if ( !is_hvm_domain(d) )
> > +        goto out;
>
> Despite your claim to have addressed my remaining comment from v4,
> you still use goto here when there's an easy alternative.

I didn't write this code. This is preexisting code that I'm just
moving. I don't want to rewrite preexisting code here.

>
> > @@ -5297,6 +5322,37 @@ void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
> >      alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg);
> >  }
> >
> > +int hvm_copy_context_and_params(struct domain *dst, struct domain *src)
> > +{
> > +    int rc;
> > +    unsigned int i;
> > +    struct hvm_domain_context c = { };
> > +
> > +    for ( i = 0; i < HVM_NR_PARAMS; i++ )
> > +    {
> > +        uint64_t value = 0;
> > +
> > +        if ( hvm_get_param(src, i, &value) || !value )
> > +            continue;
> > +
> > +        if ( (rc = hvm_set_param(dst, i, value)) )
> > +            return rc;
> > +    }
> > +
> > +    c.size = hvm_save_size(src);
> > +    if ( (c.data = vmalloc(c.size)) == NULL )
> > +        return -ENOMEM;
> > +
> > +    if ( !(rc = hvm_save(src, &c)) )
>
> Also contrary to your claim you still do allocation and save
> after the loop, leaving dst in a partly modified state in more
> cases than necessary. May I ask that you go back to the v4
> comments one more time?

I guess I'll do that cause I thought you asked for the allocation to
be moved at the end. It was the other way around before, so I guess I
don't know what you are asking for.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap Tamas K Lengyel
@ 2020-01-28 16:55   ` Jan Beulich
  2020-01-28 17:02     ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-01-28 16:55 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: xen-devel

On 27.01.2020 19:06, Tamas K Lengyel wrote:
> When plugging a hole in the target physmap don't use the access permission
> returned by __get_gfn_type_access as it can be non-sensical, leading to
> spurious vm_events being sent out for access violations at unexpected
> locations. Make use of p2m->default_access instead.

As before, to me "can be non-sensical" is insufficient as a reason
here. If it can also be a "good" value, it still remains unclear
why in that case p2m->default_access is nevertheless the right
value to use.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params
  2020-01-28 16:54     ` Tamas K Lengyel
@ 2020-01-28 16:59       ` Jan Beulich
  2020-01-28 17:03         ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-01-28 16:59 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: Xen-devel, Tamas K Lengyel

On 28.01.2020 17:54, Tamas K Lengyel wrote:
> On Tue, Jan 28, 2020 at 9:48 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 27.01.2020 19:06, Tamas K Lengyel wrote:
>>> @@ -4139,49 +4140,32 @@ static int hvm_allow_set_param(struct domain *d,
>>>      return rc;
>>>  }
>>>
>>> -static int hvmop_set_param(
>>> -    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
>>> +static int hvm_set_param(struct domain *d, uint32_t index, uint64_t value)
>>>  {
>>>      struct domain *curr_d = current->domain;
>>> -    struct xen_hvm_param a;
>>> -    struct domain *d;
>>>      struct vcpu *v;
>>>      int rc;
>>>
>>> -    if ( copy_from_guest(&a, arg, 1) )
>>> -        return -EFAULT;
>>> -
>>> -    if ( a.index >= HVM_NR_PARAMS )
>>> +    if ( index >= HVM_NR_PARAMS )
>>>          return -EINVAL;
>>
>> The equivalent of this on the "get" path now seems to be gone. Is
>> there any reason the one here is still needed?
>>
>>> +int hvmop_set_param(
>>> +    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
>>> +{
>>> +    struct xen_hvm_param a;
>>> +    struct domain *d;
>>> +    int rc;
>>> +
>>> +    if ( copy_from_guest(&a, arg, 1) )
>>> +        return -EFAULT;
>>> +
>>> +    if ( a.index >= HVM_NR_PARAMS )
>>> +        return -EINVAL;
>>> +
>>> +    /* Make sure the above bound check is not bypassed during speculation. */
>>> +    block_speculation();
>>> +
>>> +    d = rcu_lock_domain_by_any_id(a.domid);
>>> +    if ( d == NULL )
>>> +        return -ESRCH;
>>> +
>>> +    rc = -EINVAL;
>>> +    if ( !is_hvm_domain(d) )
>>> +        goto out;
>>
>> Despite your claim to have addressed my remaining comment from v4,
>> you still use goto here when there's an easy alternative.
> 
> I didn't write this code. This is preexisting code that I'm just
> moving. I don't want to rewrite preexisting code here.

Well, with the code movement you could (and imo should) _move_
the "out" label instead of duplicating it.

>>> @@ -5297,6 +5322,37 @@ void hvm_set_segment_register(struct vcpu *v, enum x86_segment seg,
>>>      alternative_vcall(hvm_funcs.set_segment_register, v, seg, reg);
>>>  }
>>>
>>> +int hvm_copy_context_and_params(struct domain *dst, struct domain *src)
>>> +{
>>> +    int rc;
>>> +    unsigned int i;
>>> +    struct hvm_domain_context c = { };
>>> +
>>> +    for ( i = 0; i < HVM_NR_PARAMS; i++ )
>>> +    {
>>> +        uint64_t value = 0;
>>> +
>>> +        if ( hvm_get_param(src, i, &value) || !value )
>>> +            continue;
>>> +
>>> +        if ( (rc = hvm_set_param(dst, i, value)) )
>>> +            return rc;
>>> +    }
>>> +
>>> +    c.size = hvm_save_size(src);
>>> +    if ( (c.data = vmalloc(c.size)) == NULL )
>>> +        return -ENOMEM;
>>> +
>>> +    if ( !(rc = hvm_save(src, &c)) )
>>
>> Also contrary to your claim you still do allocation and save
>> after the loop, leaving dst in a partly modified state in more
>> cases than necessary. May I ask that you go back to the v4
>> comments one more time?
> 
> I guess I'll do that cause I thought you asked for the allocation to
> be moved at the end. It was the other way around before, so I guess I
> don't know what you are asking for.

Alloc, save, loop over params, load, free.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-28 16:55   ` Jan Beulich
@ 2020-01-28 17:02     ` Tamas K Lengyel
  2020-01-29 13:27       ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-28 17:02 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> > When plugging a hole in the target physmap don't use the access permission
> > returned by __get_gfn_type_access as it can be non-sensical, leading to
> > spurious vm_events being sent out for access violations at unexpected
> > locations. Make use of p2m->default_access instead.
>
> As before, to me "can be non-sensical" is insufficient as a reason
> here. If it can also be a "good" value, it still remains unclear
> why in that case p2m->default_access is nevertheless the right
> value to use.

I have already explained in the previous version of the patch why I
said "can be". Forgot to change the commit message from "can be" to
"is".

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params
  2020-01-28 16:59       ` Jan Beulich
@ 2020-01-28 17:03         ` Tamas K Lengyel
  0 siblings, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-28 17:03 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Tue, Jan 28, 2020 at 9:59 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 28.01.2020 17:54, Tamas K Lengyel wrote:
> > On Tue, Jan 28, 2020 at 9:48 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> >>> @@ -4139,49 +4140,32 @@ static int hvm_allow_set_param(struct domain *d,
> >>>      return rc;
> >>>  }
> >>>
> >>> -static int hvmop_set_param(
> >>> -    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
> >>> +static int hvm_set_param(struct domain *d, uint32_t index, uint64_t value)
> >>>  {
> >>>      struct domain *curr_d = current->domain;
> >>> -    struct xen_hvm_param a;
> >>> -    struct domain *d;
> >>>      struct vcpu *v;
> >>>      int rc;
> >>>
> >>> -    if ( copy_from_guest(&a, arg, 1) )
> >>> -        return -EFAULT;
> >>> -
> >>> -    if ( a.index >= HVM_NR_PARAMS )
> >>> +    if ( index >= HVM_NR_PARAMS )
> >>>          return -EINVAL;
> >>
> >> The equivalent of this on the "get" path now seems to be gone. Is
> >> there any reason the one here is still needed?
> >>
> >>> +int hvmop_set_param(
> >>> +    XEN_GUEST_HANDLE_PARAM(xen_hvm_param_t) arg)
> >>> +{
> >>> +    struct xen_hvm_param a;
> >>> +    struct domain *d;
> >>> +    int rc;
> >>> +
> >>> +    if ( copy_from_guest(&a, arg, 1) )
> >>> +        return -EFAULT;
> >>> +
> >>> +    if ( a.index >= HVM_NR_PARAMS )
> >>> +        return -EINVAL;
> >>> +
> >>> +    /* Make sure the above bound check is not bypassed during speculation. */
> >>> +    block_speculation();
> >>> +
> >>> +    d = rcu_lock_domain_by_any_id(a.domid);
> >>> +    if ( d == NULL )
> >>> +        return -ESRCH;
> >>> +
> >>> +    rc = -EINVAL;
> >>> +    if ( !is_hvm_domain(d) )
> >>> +        goto out;
> >>
> >> Despite your claim to have addressed my remaining comment from v4,
> >> you still use goto here when there's an easy alternative.
> >
> > I didn't write this code. This is preexisting code that I'm just
> > moving. I don't want to rewrite preexisting code here.
>
> Well, with the code movement you could (and imo should) _move_
> the "out" label instead of duplicating it.

I really rather not change preexisting code when possible.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access
  2020-01-27 18:06 ` [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access Tamas K Lengyel
@ 2020-01-29 13:09   ` Jan Beulich
  2020-01-29 13:53     ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-01-29 13:09 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: xen-devel

On 27.01.2020 19:06, Tamas K Lengyel wrote:
> Use __get_gfn_type_access instead of p2m->get_entry to trigger page-forking
> when the mem_access permission is being set on a page that has not yet been
> copied over from the parent.

You talking of page-forking here, don't you mean ...

> --- a/xen/arch/x86/mm/mem_access.c
> +++ b/xen/arch/x86/mm/mem_access.c
> @@ -303,11 +303,10 @@ static int set_mem_access(struct domain *d, struct p2m_domain *p2m,
>      ASSERT(!ap2m);
>  #endif
>      {
> -        mfn_t mfn;
>          p2m_access_t _a;
>          p2m_type_t t;
> -
> -        mfn = p2m->get_entry(p2m, gfn, &t, &_a, 0, NULL, NULL);
> +        mfn_t mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &t, &_a,
> +                                          P2M_ALLOC, NULL, false);

... P2M_UNSHARE here?

Also shouldn't you have Cc-ed Petre and Alexandru on this patch
(for their R: entries) and at least George (perhaps also Andrew
and me) to get an ack, seeing that you're the only maintainer
of the file?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-28 17:02     ` Tamas K Lengyel
@ 2020-01-29 13:27       ` Jan Beulich
  2020-01-29 14:05         ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-01-29 13:27 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: Xen-devel, Tamas K Lengyel

On 28.01.2020 18:02, Tamas K Lengyel wrote:
> On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 27.01.2020 19:06, Tamas K Lengyel wrote:
>>> When plugging a hole in the target physmap don't use the access permission
>>> returned by __get_gfn_type_access as it can be non-sensical, leading to
>>> spurious vm_events being sent out for access violations at unexpected
>>> locations. Make use of p2m->default_access instead.
>>
>> As before, to me "can be non-sensical" is insufficient as a reason
>> here. If it can also be a "good" value, it still remains unclear
>> why in that case p2m->default_access is nevertheless the right
>> value to use.
> 
> I have already explained in the previous version of the patch why I
> said "can be". Forgot to change the commit message from "can be" to
> "is".

Changing just the commit message would be easy while committing.
But even with the change I would ask why this is. Looking at
ept_get_entry() (and assuming p2m_pt_get_entry() will work
similarly, minus the p2m_access_t which can't come out of the
PTE just yet), I see

    if ( is_epte_valid(ept_entry) )
    {
        *t = p2m_recalc_type(recalc || ept_entry->recalc,
                             ept_entry->sa_p2mt, p2m, gfn);
        *a = ept_entry->access;

near its end. Which means even a hole can have its access field
set. So it's still not clear to me from the description why
p2m->default_access is uniformly the value to use. Wouldn't you
rather want to override the original value only if it's
p2m_access_n together with p2m_invalid or p2m_mmio_dm (but not
paged-out pages)? Of course then the question is whether there
wouldn't be an ambiguity with p2m_access_n having got set
explicitly on the page. But maybe this is impossible for
p2m_invalid / p2m_mmio_dm?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access
  2020-01-29 13:09   ` Jan Beulich
@ 2020-01-29 13:53     ` Tamas K Lengyel
  2020-01-29 14:04       ` Jan Beulich
  0 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-29 13:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Wed, Jan 29, 2020 at 6:10 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> > Use __get_gfn_type_access instead of p2m->get_entry to trigger page-forking
> > when the mem_access permission is being set on a page that has not yet been
> > copied over from the parent.
>
> You talking of page-forking here, don't you mean ...
>
> > --- a/xen/arch/x86/mm/mem_access.c
> > +++ b/xen/arch/x86/mm/mem_access.c
> > @@ -303,11 +303,10 @@ static int set_mem_access(struct domain *d, struct p2m_domain *p2m,
> >      ASSERT(!ap2m);
> >  #endif
> >      {
> > -        mfn_t mfn;
> >          p2m_access_t _a;
> >          p2m_type_t t;
> > -
> > -        mfn = p2m->get_entry(p2m, gfn, &t, &_a, 0, NULL, NULL);
> > +        mfn_t mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &t, &_a,
> > +                                          P2M_ALLOC, NULL, false);
>
> ... P2M_UNSHARE here?

No, P2M_UNSHARE is only required if you are doing a memory write.
Setting memory access permissions is not a memory write, so it's
sufficient to just allocate the p2m entry. P2M_ALLOCATE also
encompasses forking the entry if there is a parent VM.

>
> Also shouldn't you have Cc-ed Petre and Alexandru on this patch
> (for their R: entries) and at least George (perhaps also Andrew
> and me) to get an ack, seeing that you're the only maintainer
> of the file?

I've ran ./add_maintainers.pl on the patches, not sure why noone else got CC-d.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access
  2020-01-29 13:53     ` Tamas K Lengyel
@ 2020-01-29 14:04       ` Jan Beulich
  0 siblings, 0 replies; 29+ messages in thread
From: Jan Beulich @ 2020-01-29 14:04 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: Xen-devel, Tamas K Lengyel

On 29.01.2020 14:53, Tamas K Lengyel wrote:
> On Wed, Jan 29, 2020 at 6:10 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 27.01.2020 19:06, Tamas K Lengyel wrote:
>>> Use __get_gfn_type_access instead of p2m->get_entry to trigger page-forking
>>> when the mem_access permission is being set on a page that has not yet been
>>> copied over from the parent.
>>
>> You talking of page-forking here, don't you mean ...
>>
>>> --- a/xen/arch/x86/mm/mem_access.c
>>> +++ b/xen/arch/x86/mm/mem_access.c
>>> @@ -303,11 +303,10 @@ static int set_mem_access(struct domain *d, struct p2m_domain *p2m,
>>>      ASSERT(!ap2m);
>>>  #endif
>>>      {
>>> -        mfn_t mfn;
>>>          p2m_access_t _a;
>>>          p2m_type_t t;
>>> -
>>> -        mfn = p2m->get_entry(p2m, gfn, &t, &_a, 0, NULL, NULL);
>>> +        mfn_t mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &t, &_a,
>>> +                                          P2M_ALLOC, NULL, false);
>>
>> ... P2M_UNSHARE here?
> 
> No, P2M_UNSHARE is only required if you are doing a memory write.
> Setting memory access permissions is not a memory write, so it's
> sufficient to just allocate the p2m entry. P2M_ALLOCATE also
> encompasses forking the entry if there is a parent VM.

Ah, I see. And hence
Reviewed-by: Jan Beulich <jbeulich@suse.com>

>> Also shouldn't you have Cc-ed Petre and Alexandru on this patch
>> (for their R: entries) and at least George (perhaps also Andrew
>> and me) to get an ack, seeing that you're the only maintainer
>> of the file?
> 
> I've ran ./add_maintainers.pl on the patches, not sure why noone else got CC-d.

It not picking up R: entries would seem like a bug to me. It not
knowing of nesting of maintainership is entirely expected (or
else it would also need to know that it's - in this case - you
who is invoking it).

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-29 13:27       ` Jan Beulich
@ 2020-01-29 14:05         ` Tamas K Lengyel
  2020-01-29 14:29           ` Tamas K Lengyel
  2020-01-29 14:44           ` Jan Beulich
  0 siblings, 2 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-29 14:05 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Wed, Jan 29, 2020 at 6:27 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 28.01.2020 18:02, Tamas K Lengyel wrote:
> > On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> >>> When plugging a hole in the target physmap don't use the access permission
> >>> returned by __get_gfn_type_access as it can be non-sensical, leading to
> >>> spurious vm_events being sent out for access violations at unexpected
> >>> locations. Make use of p2m->default_access instead.
> >>
> >> As before, to me "can be non-sensical" is insufficient as a reason
> >> here. If it can also be a "good" value, it still remains unclear
> >> why in that case p2m->default_access is nevertheless the right
> >> value to use.
> >
> > I have already explained in the previous version of the patch why I
> > said "can be". Forgot to change the commit message from "can be" to
> > "is".
>
> Changing just the commit message would be easy while committing.
> But even with the change I would ask why this is. Looking at
> ept_get_entry() (and assuming p2m_pt_get_entry() will work
> similarly, minus the p2m_access_t which can't come out of the
> PTE just yet), I see
>
>     if ( is_epte_valid(ept_entry) )
>     {
>         *t = p2m_recalc_type(recalc || ept_entry->recalc,
>                              ept_entry->sa_p2mt, p2m, gfn);
>         *a = ept_entry->access;
>
> near its end. Which means even a hole can have its access field
> set. So it's still not clear to me from the description why
> p2m->default_access is uniformly the value to use. Wouldn't you
> rather want to override the original value only if it's
> p2m_access_n together with p2m_invalid or p2m_mmio_dm (but not
> paged-out pages)?

At this point I would just rather state that add_to_physmap only works
on actual holes, not with paged-out pages. In fact, I would like to
see mem_paging being dropped from the codebase entirely since it's
been abandoned for years and noone expressing any interest in keeping
it. In the interim I would rather not spend unnecessary cycles on
speculating about potential corner-cases of mem_paging when noone
actually uses it.

> Of course then the question is whether there
> wouldn't be an ambiguity with p2m_access_n having got set
> explicitly on the page. But maybe this is impossible for
> p2m_invalid / p2m_mmio_dm?

As far as mem_access permissions go, I don't know of any usecase that
would set mem_access permission on a hole even if by looks of it it is
technically possible. At this point I would rather just put this
corner-case's description in a comment.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-29 14:05         ` Tamas K Lengyel
@ 2020-01-29 14:29           ` Tamas K Lengyel
  2020-01-29 14:44           ` Jan Beulich
  1 sibling, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-29 14:29 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Wed, Jan 29, 2020 at 7:05 AM Tamas K Lengyel
<tamas.k.lengyel@gmail.com> wrote:
>
> On Wed, Jan 29, 2020 at 6:27 AM Jan Beulich <jbeulich@suse.com> wrote:
> >
> > On 28.01.2020 18:02, Tamas K Lengyel wrote:
> > > On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> > >>
> > >> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> > >>> When plugging a hole in the target physmap don't use the access permission
> > >>> returned by __get_gfn_type_access as it can be non-sensical, leading to
> > >>> spurious vm_events being sent out for access violations at unexpected
> > >>> locations. Make use of p2m->default_access instead.
> > >>
> > >> As before, to me "can be non-sensical" is insufficient as a reason
> > >> here. If it can also be a "good" value, it still remains unclear
> > >> why in that case p2m->default_access is nevertheless the right
> > >> value to use.
> > >
> > > I have already explained in the previous version of the patch why I
> > > said "can be". Forgot to change the commit message from "can be" to
> > > "is".
> >
> > Changing just the commit message would be easy while committing.
> > But even with the change I would ask why this is. Looking at
> > ept_get_entry() (and assuming p2m_pt_get_entry() will work
> > similarly, minus the p2m_access_t which can't come out of the
> > PTE just yet), I see
> >
> >     if ( is_epte_valid(ept_entry) )
> >     {
> >         *t = p2m_recalc_type(recalc || ept_entry->recalc,
> >                              ept_entry->sa_p2mt, p2m, gfn);
> >         *a = ept_entry->access;
> >
> > near its end. Which means even a hole can have its access field
> > set. So it's still not clear to me from the description why
> > p2m->default_access is uniformly the value to use. Wouldn't you
> > rather want to override the original value only if it's
> > p2m_access_n together with p2m_invalid or p2m_mmio_dm (but not
> > paged-out pages)?
>
> At this point I would just rather state that add_to_physmap only works
> on actual holes, not with paged-out pages. In fact, I would like to
> see mem_paging being dropped from the codebase entirely since it's
> been abandoned for years and noone expressing any interest in keeping
> it. In the interim I would rather not spend unnecessary cycles on
> speculating about potential corner-cases of mem_paging when noone
> actually uses it.
>
> > Of course then the question is whether there
> > wouldn't be an ambiguity with p2m_access_n having got set
> > explicitly on the page. But maybe this is impossible for
> > p2m_invalid / p2m_mmio_dm?
>
> As far as mem_access permissions go, I don't know of any usecase that
> would set mem_access permission on a hole even if by looks of it it is
> technically possible. At this point I would rather just put this
> corner-case's description in a comment.

A potential solution for this - if one would need it in the future -
would be to add another VM event type that Xen can use to alert the
toolstack that a pre-existing mem_access permission is being
overwritten by forking. That would allow the toolstack to reset the
permission if it wants to. But honestly, I think it's a lot of code
for a situation that I don't expect anyone will ever run into. Let's
just document it and move on.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-29 14:05         ` Tamas K Lengyel
  2020-01-29 14:29           ` Tamas K Lengyel
@ 2020-01-29 14:44           ` Jan Beulich
  2020-01-29 14:56             ` Tamas K Lengyel
  1 sibling, 1 reply; 29+ messages in thread
From: Jan Beulich @ 2020-01-29 14:44 UTC (permalink / raw)
  To: Tamas K Lengyel; +Cc: Xen-devel, Tamas K Lengyel

On 29.01.2020 15:05, Tamas K Lengyel wrote:
> On Wed, Jan 29, 2020 at 6:27 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 28.01.2020 18:02, Tamas K Lengyel wrote:
>>> On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> On 27.01.2020 19:06, Tamas K Lengyel wrote:
>>>>> When plugging a hole in the target physmap don't use the access permission
>>>>> returned by __get_gfn_type_access as it can be non-sensical, leading to
>>>>> spurious vm_events being sent out for access violations at unexpected
>>>>> locations. Make use of p2m->default_access instead.
>>>>
>>>> As before, to me "can be non-sensical" is insufficient as a reason
>>>> here. If it can also be a "good" value, it still remains unclear
>>>> why in that case p2m->default_access is nevertheless the right
>>>> value to use.
>>>
>>> I have already explained in the previous version of the patch why I
>>> said "can be". Forgot to change the commit message from "can be" to
>>> "is".
>>
>> Changing just the commit message would be easy while committing.
>> But even with the change I would ask why this is. Looking at
>> ept_get_entry() (and assuming p2m_pt_get_entry() will work
>> similarly, minus the p2m_access_t which can't come out of the
>> PTE just yet), I see
>>
>>     if ( is_epte_valid(ept_entry) )
>>     {
>>         *t = p2m_recalc_type(recalc || ept_entry->recalc,
>>                              ept_entry->sa_p2mt, p2m, gfn);
>>         *a = ept_entry->access;
>>
>> near its end. Which means even a hole can have its access field
>> set. So it's still not clear to me from the description why
>> p2m->default_access is uniformly the value to use. Wouldn't you
>> rather want to override the original value only if it's
>> p2m_access_n together with p2m_invalid or p2m_mmio_dm (but not
>> paged-out pages)?
> 
> At this point I would just rather state that add_to_physmap only works
> on actual holes, not with paged-out pages. In fact, I would like to
> see mem_paging being dropped from the codebase entirely since it's
> been abandoned for years and noone expressing any interest in keeping
> it. In the interim I would rather not spend unnecessary cycles on
> speculating about potential corner-cases of mem_paging when noone
> actually uses it.
> 
>> Of course then the question is whether there
>> wouldn't be an ambiguity with p2m_access_n having got set
>> explicitly on the page. But maybe this is impossible for
>> p2m_invalid / p2m_mmio_dm?
> 
> As far as mem_access permissions go, I don't know of any usecase that
> would set mem_access permission on a hole even if by looks of it it is
> technically possible. At this point I would rather just put this
> corner-case's description in a comment.

I think I would ack a revised patch having this properly explained.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-29 14:44           ` Jan Beulich
@ 2020-01-29 14:56             ` Tamas K Lengyel
  2020-01-29 16:09               ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-29 14:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Wed, Jan 29, 2020 at 7:44 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 29.01.2020 15:05, Tamas K Lengyel wrote:
> > On Wed, Jan 29, 2020 at 6:27 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 28.01.2020 18:02, Tamas K Lengyel wrote:
> >>> On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> >>>>> When plugging a hole in the target physmap don't use the access permission
> >>>>> returned by __get_gfn_type_access as it can be non-sensical, leading to
> >>>>> spurious vm_events being sent out for access violations at unexpected
> >>>>> locations. Make use of p2m->default_access instead.
> >>>>
> >>>> As before, to me "can be non-sensical" is insufficient as a reason
> >>>> here. If it can also be a "good" value, it still remains unclear
> >>>> why in that case p2m->default_access is nevertheless the right
> >>>> value to use.
> >>>
> >>> I have already explained in the previous version of the patch why I
> >>> said "can be". Forgot to change the commit message from "can be" to
> >>> "is".
> >>
> >> Changing just the commit message would be easy while committing.
> >> But even with the change I would ask why this is. Looking at
> >> ept_get_entry() (and assuming p2m_pt_get_entry() will work
> >> similarly, minus the p2m_access_t which can't come out of the
> >> PTE just yet), I see
> >>
> >>     if ( is_epte_valid(ept_entry) )
> >>     {
> >>         *t = p2m_recalc_type(recalc || ept_entry->recalc,
> >>                              ept_entry->sa_p2mt, p2m, gfn);
> >>         *a = ept_entry->access;
> >>
> >> near its end. Which means even a hole can have its access field
> >> set. So it's still not clear to me from the description why
> >> p2m->default_access is uniformly the value to use. Wouldn't you
> >> rather want to override the original value only if it's
> >> p2m_access_n together with p2m_invalid or p2m_mmio_dm (but not
> >> paged-out pages)?
> >
> > At this point I would just rather state that add_to_physmap only works
> > on actual holes, not with paged-out pages. In fact, I would like to
> > see mem_paging being dropped from the codebase entirely since it's
> > been abandoned for years and noone expressing any interest in keeping
> > it. In the interim I would rather not spend unnecessary cycles on
> > speculating about potential corner-cases of mem_paging when noone
> > actually uses it.
> >
> >> Of course then the question is whether there
> >> wouldn't be an ambiguity with p2m_access_n having got set
> >> explicitly on the page. But maybe this is impossible for
> >> p2m_invalid / p2m_mmio_dm?
> >
> > As far as mem_access permissions go, I don't know of any usecase that
> > would set mem_access permission on a hole even if by looks of it it is
> > technically possible. At this point I would rather just put this
> > corner-case's description in a comment.
>
> I think I would ack a revised patch having this properly explained.

That's fine, I'll add some comments to this effect and reword the
commit message.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-29 14:56             ` Tamas K Lengyel
@ 2020-01-29 16:09               ` Tamas K Lengyel
  2020-01-29 16:16                 ` Tamas K Lengyel
  0 siblings, 1 reply; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-29 16:09 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Wed, Jan 29, 2020 at 7:56 AM Tamas K Lengyel
<tamas.k.lengyel@gmail.com> wrote:
>
> On Wed, Jan 29, 2020 at 7:44 AM Jan Beulich <jbeulich@suse.com> wrote:
> >
> > On 29.01.2020 15:05, Tamas K Lengyel wrote:
> > > On Wed, Jan 29, 2020 at 6:27 AM Jan Beulich <jbeulich@suse.com> wrote:
> > >>
> > >> On 28.01.2020 18:02, Tamas K Lengyel wrote:
> > >>> On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> > >>>>
> > >>>> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> > >>>>> When plugging a hole in the target physmap don't use the access permission
> > >>>>> returned by __get_gfn_type_access as it can be non-sensical, leading to
> > >>>>> spurious vm_events being sent out for access violations at unexpected
> > >>>>> locations. Make use of p2m->default_access instead.
> > >>>>
> > >>>> As before, to me "can be non-sensical" is insufficient as a reason
> > >>>> here. If it can also be a "good" value, it still remains unclear
> > >>>> why in that case p2m->default_access is nevertheless the right
> > >>>> value to use.
> > >>>
> > >>> I have already explained in the previous version of the patch why I
> > >>> said "can be". Forgot to change the commit message from "can be" to
> > >>> "is".
> > >>
> > >> Changing just the commit message would be easy while committing.
> > >> But even with the change I would ask why this is. Looking at
> > >> ept_get_entry() (and assuming p2m_pt_get_entry() will work
> > >> similarly, minus the p2m_access_t which can't come out of the
> > >> PTE just yet), I see
> > >>
> > >>     if ( is_epte_valid(ept_entry) )
> > >>     {
> > >>         *t = p2m_recalc_type(recalc || ept_entry->recalc,
> > >>                              ept_entry->sa_p2mt, p2m, gfn);
> > >>         *a = ept_entry->access;
> > >>
> > >> near its end. Which means even a hole can have its access field
> > >> set. So it's still not clear to me from the description why
> > >> p2m->default_access is uniformly the value to use. Wouldn't you
> > >> rather want to override the original value only if it's
> > >> p2m_access_n together with p2m_invalid or p2m_mmio_dm (but not
> > >> paged-out pages)?
> > >
> > > At this point I would just rather state that add_to_physmap only works
> > > on actual holes, not with paged-out pages. In fact, I would like to
> > > see mem_paging being dropped from the codebase entirely since it's
> > > been abandoned for years and noone expressing any interest in keeping
> > > it. In the interim I would rather not spend unnecessary cycles on
> > > speculating about potential corner-cases of mem_paging when noone
> > > actually uses it.
> > >
> > >> Of course then the question is whether there
> > >> wouldn't be an ambiguity with p2m_access_n having got set
> > >> explicitly on the page. But maybe this is impossible for
> > >> p2m_invalid / p2m_mmio_dm?
> > >
> > > As far as mem_access permissions go, I don't know of any usecase that
> > > would set mem_access permission on a hole even if by looks of it it is
> > > technically possible. At this point I would rather just put this
> > > corner-case's description in a comment.
> >
> > I think I would ack a revised patch having this properly explained.
>
> That's fine, I'll add some comments to this effect and reword the
> commit message.
>

Actually, looking at the implementation of p2m_set_mem_access it's not
clear to me whether we can even have a hole with a mem_access
permission set. Can you have a "hole" type with a valid gfn? If not,
this is a non-issue since p2m_set_mem_access only sets mem_access
permissions that pass !gfn_eq(gfn, INVALID_GFN).

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap
  2020-01-29 16:09               ` Tamas K Lengyel
@ 2020-01-29 16:16                 ` Tamas K Lengyel
  0 siblings, 0 replies; 29+ messages in thread
From: Tamas K Lengyel @ 2020-01-29 16:16 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Tamas K Lengyel

On Wed, Jan 29, 2020 at 9:09 AM Tamas K Lengyel
<tamas.k.lengyel@gmail.com> wrote:
>
> On Wed, Jan 29, 2020 at 7:56 AM Tamas K Lengyel
> <tamas.k.lengyel@gmail.com> wrote:
> >
> > On Wed, Jan 29, 2020 at 7:44 AM Jan Beulich <jbeulich@suse.com> wrote:
> > >
> > > On 29.01.2020 15:05, Tamas K Lengyel wrote:
> > > > On Wed, Jan 29, 2020 at 6:27 AM Jan Beulich <jbeulich@suse.com> wrote:
> > > >>
> > > >> On 28.01.2020 18:02, Tamas K Lengyel wrote:
> > > >>> On Tue, Jan 28, 2020 at 9:56 AM Jan Beulich <jbeulich@suse.com> wrote:
> > > >>>>
> > > >>>> On 27.01.2020 19:06, Tamas K Lengyel wrote:
> > > >>>>> When plugging a hole in the target physmap don't use the access permission
> > > >>>>> returned by __get_gfn_type_access as it can be non-sensical, leading to
> > > >>>>> spurious vm_events being sent out for access violations at unexpected
> > > >>>>> locations. Make use of p2m->default_access instead.
> > > >>>>
> > > >>>> As before, to me "can be non-sensical" is insufficient as a reason
> > > >>>> here. If it can also be a "good" value, it still remains unclear
> > > >>>> why in that case p2m->default_access is nevertheless the right
> > > >>>> value to use.
> > > >>>
> > > >>> I have already explained in the previous version of the patch why I
> > > >>> said "can be". Forgot to change the commit message from "can be" to
> > > >>> "is".
> > > >>
> > > >> Changing just the commit message would be easy while committing.
> > > >> But even with the change I would ask why this is. Looking at
> > > >> ept_get_entry() (and assuming p2m_pt_get_entry() will work
> > > >> similarly, minus the p2m_access_t which can't come out of the
> > > >> PTE just yet), I see
> > > >>
> > > >>     if ( is_epte_valid(ept_entry) )
> > > >>     {
> > > >>         *t = p2m_recalc_type(recalc || ept_entry->recalc,
> > > >>                              ept_entry->sa_p2mt, p2m, gfn);
> > > >>         *a = ept_entry->access;
> > > >>
> > > >> near its end. Which means even a hole can have its access field
> > > >> set. So it's still not clear to me from the description why
> > > >> p2m->default_access is uniformly the value to use. Wouldn't you
> > > >> rather want to override the original value only if it's
> > > >> p2m_access_n together with p2m_invalid or p2m_mmio_dm (but not
> > > >> paged-out pages)?
> > > >
> > > > At this point I would just rather state that add_to_physmap only works
> > > > on actual holes, not with paged-out pages. In fact, I would like to
> > > > see mem_paging being dropped from the codebase entirely since it's
> > > > been abandoned for years and noone expressing any interest in keeping
> > > > it. In the interim I would rather not spend unnecessary cycles on
> > > > speculating about potential corner-cases of mem_paging when noone
> > > > actually uses it.
> > > >
> > > >> Of course then the question is whether there
> > > >> wouldn't be an ambiguity with p2m_access_n having got set
> > > >> explicitly on the page. But maybe this is impossible for
> > > >> p2m_invalid / p2m_mmio_dm?
> > > >
> > > > As far as mem_access permissions go, I don't know of any usecase that
> > > > would set mem_access permission on a hole even if by looks of it it is
> > > > technically possible. At this point I would rather just put this
> > > > corner-case's description in a comment.
> > >
> > > I think I would ack a revised patch having this properly explained.
> >
> > That's fine, I'll add some comments to this effect and reword the
> > commit message.
> >
>
> Actually, looking at the implementation of p2m_set_mem_access it's not
> clear to me whether we can even have a hole with a mem_access
> permission set. Can you have a "hole" type with a valid gfn? If not,
> this is a non-issue since p2m_set_mem_access only sets mem_access
> permissions that pass !gfn_eq(gfn, INVALID_GFN).

Never mind, of course gfn can be anything (ie valid) since it's not
tied to whether the entry actually exists or not.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2020-01-29 16:17 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1580148227.git.tamas.lengyel@intel.com>
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 1/9] x86/p2m: Allow p2m_get_page_from_gfn to return shared entries Tamas K Lengyel
2020-01-28 16:37   ` Jan Beulich
2020-01-28 16:42     ` Tamas K Lengyel
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 2/9] x86/hvm: introduce hvm_copy_context_and_params Tamas K Lengyel
2020-01-28 16:47   ` Jan Beulich
2020-01-28 16:54     ` Tamas K Lengyel
2020-01-28 16:59       ` Jan Beulich
2020-01-28 17:03         ` Tamas K Lengyel
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 3/9] xen/x86: Make hap_get_allocation accessible Tamas K Lengyel
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 4/9] x86/mem_sharing: Replace MEM_SHARING_DEBUG with gdprintk Tamas K Lengyel
2020-01-28 16:52   ` Jan Beulich
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 5/9] x86/mem_sharing: use default_access in add_to_physmap Tamas K Lengyel
2020-01-28 16:55   ` Jan Beulich
2020-01-28 17:02     ` Tamas K Lengyel
2020-01-29 13:27       ` Jan Beulich
2020-01-29 14:05         ` Tamas K Lengyel
2020-01-29 14:29           ` Tamas K Lengyel
2020-01-29 14:44           ` Jan Beulich
2020-01-29 14:56             ` Tamas K Lengyel
2020-01-29 16:09               ` Tamas K Lengyel
2020-01-29 16:16                 ` Tamas K Lengyel
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 6/9] xen/mem_sharing: VM forking Tamas K Lengyel
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 7/9] xen/mem_access: Use __get_gfn_type_access in set_mem_access Tamas K Lengyel
2020-01-29 13:09   ` Jan Beulich
2020-01-29 13:53     ` Tamas K Lengyel
2020-01-29 14:04       ` Jan Beulich
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 8/9] x86/mem_sharing: reset a fork Tamas K Lengyel
2020-01-28 15:13   ` Tamas K Lengyel
2020-01-27 18:06 ` [Xen-devel] [PATCH v6 9/9] xen/tools: VM forking toolstack side Tamas K Lengyel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.