All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 5/5] X86/vMCE: guest broken page handling when migration
@ 2012-10-10 14:47 Liu, Jinsong
  2012-10-16 10:44 ` Liu, Jinsong
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Liu, Jinsong @ 2012-10-10 14:47 UTC (permalink / raw)
  To: Ian Campbell, xen-devel
  Cc: Ian Jackson, Christoph Egger, Keir (Xen.org), Jan Beulich

[-- Attachment #1: Type: text/plain, Size: 8753 bytes --]

X86/vMCE: guest broken page handling when migration

This patch is used to handle guest broken page when migration.

At sender, the broken page would not be mapped, and the error page
content would not be copied to target, otherwise it may trigger more
serious error (i.e. SRAR error). While its pfn_type and pfn number
would be transferred to target so that target take appropriate action.

At target, it would set p2m as p2m_ram_broken for broken page, so that
if guest access the broken page again, it would kill guest as expected.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 090447c780db tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xc_domain.c	Thu Oct 11 05:49:39 2012 +0800
@@ -307,6 +307,22 @@
     return do_domctl(xch, &domctl);
 }
 
+/* set broken page p2m */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_broken_page_p2m.pfn = pfn;
+    ret = do_domctl(xch, &domctl);
+
+    return ret ? -1 : 0;
+}
+
 /* get info from hvm guest for save */
 int xc_domain_hvm_getcontext(xc_interface *xch,
                              uint32_t domid,
diff -r 090447c780db tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Thu Oct 11 05:49:39 2012 +0800
@@ -962,9 +962,15 @@
 
     countpages = count;
     for (i = oldcount; i < buf->nr_pages; ++i)
-        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB
-            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XALLOC)
+    {
+        unsigned long pagetype;
+
+        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
+             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
+             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
             --countpages;
+    }
 
     if (!countpages)
         return count;
@@ -1200,6 +1206,17 @@
             /* a bogus/unmapped/allocate-only page: skip it */
             continue;
 
+        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
+        {
+            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
+            {
+                ERROR("Set p2m for broken page failed, "
+                      "dom=%d, pfn=%lx\n", dom, pfn);
+                goto err_mapped;
+            }
+            continue;
+        }
+
         if (pfn_err[i])
         {
             ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx",
diff -r 090447c780db tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Thu Oct 11 05:49:39 2012 +0800
@@ -1285,6 +1285,13 @@
                 if ( !hvm )
                     gmfn = pfn_to_mfn(gmfn);
 
+                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
+                {
+                    pfn_type[j] |= pfn_batch[j];
+                    ++run;
+                    continue;
+                }
+
                 if ( pfn_err[j] )
                 {
                     if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
@@ -1379,8 +1386,12 @@
                     }
                 }
 
-                /* skip pages that aren't present or are alloc-only */
+                /*
+                 * skip pages that aren't present,
+                 * or are broken, or are alloc-only
+                 */
                 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
+                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
                     || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
                     continue;
 
diff -r 090447c780db tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xenctrl.h	Thu Oct 11 05:49:39 2012 +0800
@@ -595,6 +595,17 @@
                                uint32_t domid);
 
 /**
+ * This function set p2m for broken page
+ * &parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id which broken page belong to
+ * @parm pfn the pfn number of the broken page
+ * @return 0 on success, -1 on failure
+ */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn);
+
+/**
  * This function returns information about the context of a hvm domain
  * @parm xch a handle to an open hypervisor interface
  * @parm domid the domain to get information from
diff -r 090447c780db xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/xen/arch/x86/domctl.c	Thu Oct 11 05:49:39 2012 +0800
@@ -209,12 +209,18 @@
                 for ( j = 0; j < k; j++ )
                 {
                     unsigned long type = 0;
+                    p2m_type_t t;
 
-                    page = get_page_from_gfn(d, arr[j], NULL, P2M_ALLOC);
+                    page = get_page_from_gfn(d, arr[j], &t, P2M_ALLOC);
 
                     if ( unlikely(!page) ||
                          unlikely(is_xen_heap_page(page)) )
-                        type = XEN_DOMCTL_PFINFO_XTAB;
+                    {
+                        if ( p2m_is_broken(t) )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
+                        else
+                            type = XEN_DOMCTL_PFINFO_XTAB;
+                    }
                     else
                     {
                         switch( page->u.inuse.type_info & PGT_type_mask )
@@ -235,6 +241,9 @@
 
                         if ( page->u.inuse.type_info & PGT_pinned )
                             type |= XEN_DOMCTL_PFINFO_LPINTAB;
+
+                        if ( page->count_info & PGC_broken )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
                     }
 
                     if ( page )
@@ -1609,6 +1618,28 @@
     }
     break;
 
+    case XEN_DOMCTL_set_broken_page_p2m:
+    {
+        struct domain *d;
+        p2m_type_t pt;
+        unsigned long pfn;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            pfn = domctl->u.set_broken_page_p2m.pfn;
+
+            get_gfn_query(d, pfn, &pt);
+            p2m_change_type(d, pfn, pt, p2m_ram_broken);
+            put_gfn(d, pfn);
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, u_domctl);
         break;
diff -r 090447c780db xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Thu Oct 11 05:12:48 2012 +0800
+++ b/xen/include/public/domctl.h	Thu Oct 11 05:49:39 2012 +0800
@@ -136,6 +136,7 @@
 #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
 #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
 #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
+#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
 #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
 #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
 
@@ -835,6 +836,12 @@
 typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t);
 
+struct xen_domctl_set_broken_page_p2m {
+    uint64_aligned_t pfn;
+};
+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -902,6 +909,7 @@
 #define XEN_DOMCTL_set_virq_handler              66
 #define XEN_DOMCTL_vmce_monitor_start            67
 #define XEN_DOMCTL_vmce_monitor_end              68
+#define XEN_DOMCTL_set_broken_page_p2m           69
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -957,6 +965,7 @@
         struct xen_domctl_audit_p2m         audit_p2m;
         struct xen_domctl_set_virq_handler  set_virq_handler;
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
+        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         uint8_t                             pad[128];

[-- Attachment #2: 5_vmce_migration_pfntype_broken.patch --]
[-- Type: application/octet-stream, Size: 8521 bytes --]

X86/vMCE: guest broken page handling when migration

This patch is used to handle guest broken page when migration.

At sender, the broken page would not be mapped, and the error page
content would not be copied to target, otherwise it may trigger more
serious error (i.e. SRAR error). While its pfn_type and pfn number
would be transferred to target so that target take appropriate action.

At target, it would set p2m as p2m_ram_broken for broken page, so that
if guest access the broken page again, it would kill guest as expected.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 090447c780db tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xc_domain.c	Thu Oct 11 05:49:39 2012 +0800
@@ -307,6 +307,22 @@
     return do_domctl(xch, &domctl);
 }
 
+/* set broken page p2m */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_broken_page_p2m.pfn = pfn;
+    ret = do_domctl(xch, &domctl);
+
+    return ret ? -1 : 0;
+}
+
 /* get info from hvm guest for save */
 int xc_domain_hvm_getcontext(xc_interface *xch,
                              uint32_t domid,
diff -r 090447c780db tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Thu Oct 11 05:49:39 2012 +0800
@@ -962,9 +962,15 @@
 
     countpages = count;
     for (i = oldcount; i < buf->nr_pages; ++i)
-        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB
-            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XALLOC)
+    {
+        unsigned long pagetype;
+
+        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
+             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
+             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
             --countpages;
+    }
 
     if (!countpages)
         return count;
@@ -1200,6 +1206,17 @@
             /* a bogus/unmapped/allocate-only page: skip it */
             continue;
 
+        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
+        {
+            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
+            {
+                ERROR("Set p2m for broken page failed, "
+                      "dom=%d, pfn=%lx\n", dom, pfn);
+                goto err_mapped;
+            }
+            continue;
+        }
+
         if (pfn_err[i])
         {
             ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx",
diff -r 090447c780db tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Thu Oct 11 05:49:39 2012 +0800
@@ -1285,6 +1285,13 @@
                 if ( !hvm )
                     gmfn = pfn_to_mfn(gmfn);
 
+                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
+                {
+                    pfn_type[j] |= pfn_batch[j];
+                    ++run;
+                    continue;
+                }
+
                 if ( pfn_err[j] )
                 {
                     if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
@@ -1379,8 +1386,12 @@
                     }
                 }
 
-                /* skip pages that aren't present or are alloc-only */
+                /*
+                 * skip pages that aren't present,
+                 * or are broken, or are alloc-only
+                 */
                 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
+                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
                     || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
                     continue;
 
diff -r 090447c780db tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Thu Oct 11 05:12:48 2012 +0800
+++ b/tools/libxc/xenctrl.h	Thu Oct 11 05:49:39 2012 +0800
@@ -595,6 +595,17 @@
                                uint32_t domid);
 
 /**
+ * This function set p2m for broken page
+ * &parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id which broken page belong to
+ * @parm pfn the pfn number of the broken page
+ * @return 0 on success, -1 on failure
+ */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn);
+
+/**
  * This function returns information about the context of a hvm domain
  * @parm xch a handle to an open hypervisor interface
  * @parm domid the domain to get information from
diff -r 090447c780db xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Thu Oct 11 05:12:48 2012 +0800
+++ b/xen/arch/x86/domctl.c	Thu Oct 11 05:49:39 2012 +0800
@@ -209,12 +209,18 @@
                 for ( j = 0; j < k; j++ )
                 {
                     unsigned long type = 0;
+                    p2m_type_t t;
 
-                    page = get_page_from_gfn(d, arr[j], NULL, P2M_ALLOC);
+                    page = get_page_from_gfn(d, arr[j], &t, P2M_ALLOC);
 
                     if ( unlikely(!page) ||
                          unlikely(is_xen_heap_page(page)) )
-                        type = XEN_DOMCTL_PFINFO_XTAB;
+                    {
+                        if ( p2m_is_broken(t) )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
+                        else
+                            type = XEN_DOMCTL_PFINFO_XTAB;
+                    }
                     else
                     {
                         switch( page->u.inuse.type_info & PGT_type_mask )
@@ -235,6 +241,9 @@
 
                         if ( page->u.inuse.type_info & PGT_pinned )
                             type |= XEN_DOMCTL_PFINFO_LPINTAB;
+
+                        if ( page->count_info & PGC_broken )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
                     }
 
                     if ( page )
@@ -1609,6 +1618,28 @@
     }
     break;
 
+    case XEN_DOMCTL_set_broken_page_p2m:
+    {
+        struct domain *d;
+        p2m_type_t pt;
+        unsigned long pfn;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            pfn = domctl->u.set_broken_page_p2m.pfn;
+
+            get_gfn_query(d, pfn, &pt);
+            p2m_change_type(d, pfn, pt, p2m_ram_broken);
+            put_gfn(d, pfn);
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, u_domctl);
         break;
diff -r 090447c780db xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Thu Oct 11 05:12:48 2012 +0800
+++ b/xen/include/public/domctl.h	Thu Oct 11 05:49:39 2012 +0800
@@ -136,6 +136,7 @@
 #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
 #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
 #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
+#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
 #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
 #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
 
@@ -835,6 +836,12 @@
 typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t);
 
+struct xen_domctl_set_broken_page_p2m {
+    uint64_aligned_t pfn;
+};
+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -902,6 +909,7 @@
 #define XEN_DOMCTL_set_virq_handler              66
 #define XEN_DOMCTL_vmce_monitor_start            67
 #define XEN_DOMCTL_vmce_monitor_end              68
+#define XEN_DOMCTL_set_broken_page_p2m           69
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -957,6 +965,7 @@
         struct xen_domctl_audit_p2m         audit_p2m;
         struct xen_domctl_set_virq_handler  set_virq_handler;
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
+        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         uint8_t                             pad[128];

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-10 14:47 [PATCH 5/5] X86/vMCE: guest broken page handling when migration Liu, Jinsong
@ 2012-10-16 10:44 ` Liu, Jinsong
  2012-10-19 15:14 ` Ian Jackson
  2012-10-19 16:54 ` George Dunlap
  2 siblings, 0 replies; 15+ messages in thread
From: Liu, Jinsong @ 2012-10-16 10:44 UTC (permalink / raw)
  To: Ian Jackson, Ian Campbell, xen-devel
  Cc: Christoph Egger, Keir (Xen.org), Jan Beulich

Ping?

Thanks,
Jinsong

Liu, Jinsong wrote:
> X86/vMCE: guest broken page handling when migration
> 
> This patch is used to handle guest broken page when migration.
> 
> At sender, the broken page would not be mapped, and the error page
> content would not be copied to target, otherwise it may trigger more
> serious error (i.e. SRAR error). While its pfn_type and pfn number
> would be transferred to target so that target take appropriate action.
> 
> At target, it would set p2m as p2m_ram_broken for broken page, so that
> if guest access the broken page again, it would kill guest as
> expected. 
> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> 
> diff -r 090447c780db tools/libxc/xc_domain.c
> --- a/tools/libxc/xc_domain.c	Thu Oct 11 05:12:48 2012 +0800
> +++ b/tools/libxc/xc_domain.c	Thu Oct 11 05:49:39 2012 +0800
> @@ -307,6 +307,22 @@
>      return do_domctl(xch, &domctl);
>  }
> 
> +/* set broken page p2m */
> +int xc_set_broken_page_p2m(xc_interface *xch,
> +                           uint32_t domid,
> +                           unsigned long pfn)
> +{
> +    int ret;
> +    DECLARE_DOMCTL;
> +
> +    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.set_broken_page_p2m.pfn = pfn;
> +    ret = do_domctl(xch, &domctl);
> +
> +    return ret ? -1 : 0;
> +}
> +
>  /* get info from hvm guest for save */
>  int xc_domain_hvm_getcontext(xc_interface *xch,
>                               uint32_t domid,
> diff -r 090447c780db tools/libxc/xc_domain_restore.c
> --- a/tools/libxc/xc_domain_restore.c	Thu Oct 11 05:12:48 2012 +0800
> +++ b/tools/libxc/xc_domain_restore.c	Thu Oct 11 05:49:39 2012 +0800
> @@ -962,9 +962,15 @@
> 
>      countpages = count;
>      for (i = oldcount; i < buf->nr_pages; ++i)
> -        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
> XEN_DOMCTL_PFINFO_XTAB 
> -            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
> XEN_DOMCTL_PFINFO_XALLOC) +    {
> +        unsigned long pagetype;
> +
> +        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
> +        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
> +             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
> +             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
>              --countpages;
> +    }
> 
>      if (!countpages)
>          return count;
> @@ -1200,6 +1206,17 @@
>              /* a bogus/unmapped/allocate-only page: skip it */
>              continue;
> 
> +        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
> +        {
> +            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
> +            {
> +                ERROR("Set p2m for broken page failed, "
> +                      "dom=%d, pfn=%lx\n", dom, pfn);
> +                goto err_mapped;
> +            }
> +            continue;
> +        }
> +
>          if (pfn_err[i])
>          {
>              ERROR("unexpected PFN mapping failure pfn %lx map_mfn
> %lx p2m_mfn %lx", 
> diff -r 090447c780db tools/libxc/xc_domain_save.c
> --- a/tools/libxc/xc_domain_save.c	Thu Oct 11 05:12:48 2012 +0800
> +++ b/tools/libxc/xc_domain_save.c	Thu Oct 11 05:49:39 2012 +0800
> @@ -1285,6 +1285,13 @@
>                  if ( !hvm )
>                      gmfn = pfn_to_mfn(gmfn);
> 
> +                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
> +                {
> +                    pfn_type[j] |= pfn_batch[j];
> +                    ++run;
> +                    continue;
> +                }
> +
>                  if ( pfn_err[j] )
>                  {
>                      if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
> @@ -1379,8 +1386,12 @@
>                      }
>                  }
> 
> -                /* skip pages that aren't present or are alloc-only
> */ +                /*
> +                 * skip pages that aren't present,
> +                 * or are broken, or are alloc-only
> +                 */
>                  if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
> +                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
>                      || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
>                      continue;
> 
> diff -r 090447c780db tools/libxc/xenctrl.h
> --- a/tools/libxc/xenctrl.h	Thu Oct 11 05:12:48 2012 +0800
> +++ b/tools/libxc/xenctrl.h	Thu Oct 11 05:49:39 2012 +0800
> @@ -595,6 +595,17 @@
>                                 uint32_t domid);
> 
>  /**
> + * This function set p2m for broken page
> + * &parm xch a handle to an open hypervisor interface
> + * @parm domid the domain id which broken page belong to
> + * @parm pfn the pfn number of the broken page
> + * @return 0 on success, -1 on failure
> + */
> +int xc_set_broken_page_p2m(xc_interface *xch,
> +                           uint32_t domid,
> +                           unsigned long pfn);
> +
> +/**
>   * This function returns information about the context of a hvm
> domain 
>   * @parm xch a handle to an open hypervisor interface
>   * @parm domid the domain to get information from
> diff -r 090447c780db xen/arch/x86/domctl.c
> --- a/xen/arch/x86/domctl.c	Thu Oct 11 05:12:48 2012 +0800
> +++ b/xen/arch/x86/domctl.c	Thu Oct 11 05:49:39 2012 +0800
> @@ -209,12 +209,18 @@
>                  for ( j = 0; j < k; j++ )
>                  {
>                      unsigned long type = 0;
> +                    p2m_type_t t;
> 
> -                    page = get_page_from_gfn(d, arr[j], NULL,
> P2M_ALLOC); +                    page = get_page_from_gfn(d, arr[j],
> &t, P2M_ALLOC); 
> 
>                      if ( unlikely(!page) ||
>                           unlikely(is_xen_heap_page(page)) )
> -                        type = XEN_DOMCTL_PFINFO_XTAB;
> +                    {
> +                        if ( p2m_is_broken(t) )
> +                            type = XEN_DOMCTL_PFINFO_BROKEN;
> +                        else
> +                            type = XEN_DOMCTL_PFINFO_XTAB;
> +                    }
>                      else
>                      {
>                          switch( page->u.inuse.type_info &
> PGT_type_mask ) @@ -235,6 +241,9 @@
> 
>                          if ( page->u.inuse.type_info & PGT_pinned )
>                              type |= XEN_DOMCTL_PFINFO_LPINTAB;
> +
> +                        if ( page->count_info & PGC_broken )
> +                            type = XEN_DOMCTL_PFINFO_BROKEN;
>                      }
> 
>                      if ( page )
> @@ -1609,6 +1618,28 @@
>      }
>      break;
> 
> +    case XEN_DOMCTL_set_broken_page_p2m:
> +    {
> +        struct domain *d;
> +        p2m_type_t pt;
> +        unsigned long pfn;
> +
> +        d = rcu_lock_domain_by_id(domctl->domain);
> +        if ( d != NULL )
> +        {
> +            pfn = domctl->u.set_broken_page_p2m.pfn;
> +
> +            get_gfn_query(d, pfn, &pt);
> +            p2m_change_type(d, pfn, pt, p2m_ram_broken);
> +            put_gfn(d, pfn);
> +
> +            rcu_unlock_domain(d);
> +        }
> +        else
> +            ret = -ESRCH;
> +    }
> +    break;
> +
>      default:
>          ret = iommu_do_domctl(domctl, u_domctl);
>          break;
> diff -r 090447c780db xen/include/public/domctl.h
> --- a/xen/include/public/domctl.h	Thu Oct 11 05:12:48 2012 +0800
> +++ b/xen/include/public/domctl.h	Thu Oct 11 05:49:39 2012 +0800
> @@ -136,6 +136,7 @@
>  #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
>  #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
>  #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
> +#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
>  #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
>  #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
> 
> @@ -835,6 +836,12 @@
>  typedef struct xen_domctl_set_access_required
>  xen_domctl_set_access_required_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); 
> 
> +struct xen_domctl_set_broken_page_p2m {
> +    uint64_aligned_t pfn;
> +};
> +typedef struct xen_domctl_set_broken_page_p2m
> xen_domctl_set_broken_page_p2m_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -902,6 +909,7 @@
>  #define XEN_DOMCTL_set_virq_handler              66
>  #define XEN_DOMCTL_vmce_monitor_start            67
>  #define XEN_DOMCTL_vmce_monitor_end              68
> +#define XEN_DOMCTL_set_broken_page_p2m           69
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -957,6 +965,7 @@
>          struct xen_domctl_audit_p2m         audit_p2m;
>          struct xen_domctl_set_virq_handler  set_virq_handler;
>          struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
> +        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>          uint8_t                             pad[128];

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-10 14:47 [PATCH 5/5] X86/vMCE: guest broken page handling when migration Liu, Jinsong
  2012-10-16 10:44 ` Liu, Jinsong
@ 2012-10-19 15:14 ` Ian Jackson
  2012-10-19 17:09   ` George Dunlap
  2012-10-19 16:54 ` George Dunlap
  2 siblings, 1 reply; 15+ messages in thread
From: Ian Jackson @ 2012-10-19 15:14 UTC (permalink / raw)
  To: Liu, Jinsong
  Cc: Christoph Egger, xen-devel, Keir (Xen.org), Ian Campbell, Jan Beulich

Liu, Jinsong writes ("[Xen-devel] [PATCH 5/5] X86/vMCE: guest broken page handling when migration"):
> X86/vMCE: guest broken page handling when migration
> 
> This patch is used to handle guest broken page when migration.

This looks plausible to me, as far as the tools go.  Can you explain
how you have tested this ?  Did you manage to do any tests of the
remus codepaths ?

Thanks,
Ian.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-10 14:47 [PATCH 5/5] X86/vMCE: guest broken page handling when migration Liu, Jinsong
  2012-10-16 10:44 ` Liu, Jinsong
  2012-10-19 15:14 ` Ian Jackson
@ 2012-10-19 16:54 ` George Dunlap
  2 siblings, 0 replies; 15+ messages in thread
From: George Dunlap @ 2012-10-19 16:54 UTC (permalink / raw)
  To: Liu, Jinsong
  Cc: Christoph Egger, xen-devel, Keir (Xen.org),
	Ian Campbell, Ian Jackson, Jan Beulich

On Wed, Oct 10, 2012 at 3:47 PM, Liu, Jinsong <jinsong.liu@intel.com> wrote:
> X86/vMCE: guest broken page handling when migration
>
> This patch is used to handle guest broken page when migration.
>
> At sender, the broken page would not be mapped, and the error page
> content would not be copied to target, otherwise it may trigger more
> serious error (i.e. SRAR error). While its pfn_type and pfn number
> would be transferred to target so that target take appropriate action.
>
> At target, it would set p2m as p2m_ram_broken for broken page, so that
> if guest access the broken page again, it would kill guest as expected.
>
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

The changes to the save/restore code look correct to me; they
shouldn't break backwards-compatibility (i.e., migrating 4.2 -> 4.3).

(Save/restore algorithm) Acked-by: George Dunlap <george.dunlap@eu.citrix.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-19 15:14 ` Ian Jackson
@ 2012-10-19 17:09   ` George Dunlap
  2012-10-19 21:07     ` Liu, Jinsong
  2012-10-22 10:54     ` Ian Jackson
  0 siblings, 2 replies; 15+ messages in thread
From: George Dunlap @ 2012-10-19 17:09 UTC (permalink / raw)
  To: Ian Jackson
  Cc: Liu, Jinsong, Christoph Egger, xen-devel, Keir (Xen.org),
	Ian Campbell, Jan Beulich

On Fri, Oct 19, 2012 at 4:14 PM, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
> Liu, Jinsong writes ("[Xen-devel] [PATCH 5/5] X86/vMCE: guest broken page handling when migration"):
>> X86/vMCE: guest broken page handling when migration
>>
>> This patch is used to handle guest broken page when migration.
>
> This looks plausible to me, as far as the tools go.  Can you explain
> how you have tested this ?  Did you manage to do any tests of the
> remus codepaths ?

I'm pretty sure that this shouldn't cause any problems with Remus.  If
it's difficult for Jinsong to test Remus, I think probably OK to
commit it, and then revert it if the Remus guys have any problems.

 -George

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-19 17:09   ` George Dunlap
@ 2012-10-19 21:07     ` Liu, Jinsong
  2012-10-22 10:54     ` Ian Jackson
  1 sibling, 0 replies; 15+ messages in thread
From: Liu, Jinsong @ 2012-10-19 21:07 UTC (permalink / raw)
  To: George Dunlap, Ian Jackson
  Cc: Christoph Egger, xen-devel, Keir (Xen.org), Ian Campbell, Jan Beulich

[-- Attachment #1: Type: text/plain, Size: 6278 bytes --]

George Dunlap wrote:
> On Fri, Oct 19, 2012 at 4:14 PM, Ian Jackson
> <Ian.Jackson@eu.citrix.com> wrote: 
>> Liu, Jinsong writes ("[Xen-devel] [PATCH 5/5] X86/vMCE: guest broken
>> page handling when migration"): 
>>> X86/vMCE: guest broken page handling when migration
>>> 
>>> This patch is used to handle guest broken page when migration.
>> 
>> This looks plausible to me, as far as the tools go.  Can you explain
>> how you have tested this ?  Did you manage to do any tests of the
>> remus codepaths ?
> 
> I'm pretty sure that this shouldn't cause any problems with Remus.  If
> it's difficult for Jinsong to test Remus, I think probably OK to
> commit it, and then revert it if the Remus guys have any problems.
> 
>  -George

Attached are 2 test program, my test steps are,
1. at sender, inject a vmce to guest, scan & record the broken page (pfn1)
2. do live migration, success;
3. at target, scan the broken page (pfn2) and compare
4. pfn1 = pfn2

I tested live migration, but not remus.

Thanks,
Jinsong

===============
test program 1:

diff -r ad02805c77b8 xen/arch/x86/cpu/mcheck/vmce.c
--- a/xen/arch/x86/cpu/mcheck/vmce.c	Sat Jul 28 06:35:47 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/vmce.c	Sat Aug 25 04:01:19 2012 +0800
@@ -28,6 +28,7 @@
 #include <xen/smp.h>
 #include <xen/mm.h>
 #include <xen/hvm/save.h>
+#include <xen/keyhandler.h>
 #include <asm/processor.h>
 #include <public/sysctl.h>
 #include <asm/system.h>
@@ -449,3 +450,93 @@
     return rc;
 }
 
+
+/***************************************************/
+
+unsigned long broken_pfn = 0;
+
+static void sender_broken_page(unsigned char key)
+{
+    domid_t sender_domid = 1;
+    struct domain *d;
+    struct p2m_domain *p2m;
+    unsigned long pfn;
+    p2m_type_t pt;
+
+    d = rcu_lock_domain_by_id(sender_domid);
+    p2m = p2m_get_hostp2m(d);
+
+    for ( pfn = 0; pfn < p2m->max_mapped_pfn; pfn++ )
+    {
+        get_gfn_query(d, pfn, &pt);
+        if ( unlikely(p2m_is_broken(pt)) )
+        {
+            printk("!!!!!! before migration, find broken page, "
+                   "dom = %d, pfn = %lx, pt = %d\n",
+                    (unsigned int)d->domain_id, pfn, (unsigned int)pt);
+            broken_pfn = pfn;
+        }
+        put_gfn(d, pfn);
+    }
+
+    rcu_unlock_domain(d);
+}
+
+static struct keyhandler sender_broken_page_keyhandler = {
+    .diagnostic = 1,
+    .u.fn = sender_broken_page,
+};
+
+static __init int sender_broken_page_init(void)
+{
+    register_keyhandler('6', &sender_broken_page_keyhandler);
+    return 0;
+}
+__initcall(sender_broken_page_init);
+
+/****************************************************/
+
+
+static void target_broken_page(unsigned char key)
+{
+    domid_t target_domid = 2;
+    struct domain *d;
+    struct p2m_domain *p2m;
+    unsigned long pfn;
+    p2m_type_t pt;
+
+    d = rcu_lock_domain_by_id(target_domid);
+    p2m = p2m_get_hostp2m(d);
+
+    for ( pfn = 0; pfn < p2m->max_mapped_pfn; pfn++ )
+    {
+        get_gfn_query(d, pfn, &pt);
+        if ( unlikely(p2m_is_broken(pt)) )
+            printk("@@@@@@ after migration, find broken page, "
+                   "dom = %d, pfn = %lx, pt = %d\n",
+                    (unsigned int)d->domain_id, pfn, (unsigned int)pt);
+        put_gfn(d, pfn);
+    }
+
+    get_gfn_query(d, broken_pfn, &pt);
+    printk("@@@@@@ after migration, broken_pfn type is, "
+           "dom = %d, broken_pfn = %lx, pt = %d\n",
+           (unsigned int)d->domain_id, broken_pfn, (unsigned int)pt);
+    put_gfn(d, broken_pfn);
+
+    rcu_unlock_domain(d);
+}
+
+static struct keyhandler target_broken_page_keyhandler = {
+    .diagnostic = 1,
+    .u.fn = target_broken_page,
+};
+
+static __init int target_broken_page_init(void)
+{
+    register_keyhandler('7', &target_broken_page_keyhandler);
+    return 0;
+}
+__initcall(target_broken_page_init);
+
+/*****************************************************/

===============
test program 2:

diff -r de462f2f1db8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Mon Aug 27 21:38:11 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Mon Aug 27 21:56:53 2012 +0800
@@ -1207,8 +1207,11 @@
 
         if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
         {
+            fprintf(stderr, "@target, find broken page, "
+                      "dom=%d, pfn=0x%lx, pagetype=0x%lx\n", dom, pfn, pagetype);
             if ( xc_set_broken_page_p2m(xch, dom, pfn) )
             {
+                fprintf(stderr, "... ERROR WHEN SET P2M FOR BROKEN PAGE\n");
                 ERROR("Set p2m for broken page fail, "
                       "dom=%d, pfn=%lx\n", dom, pfn);
                 goto err_mapped;
diff -r de462f2f1db8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Mon Aug 27 21:38:11 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Mon Aug 27 21:56:53 2012 +0800
@@ -1289,6 +1289,9 @@
                 {
                     pfn_type[j] |= pfn_batch[j];
                     ++run;
+                    fprintf(stderr, "@sender, find broken page, "
+                            "pfn_err[0x%x]=%d, pfn_type[0x%x] =%lx\n",
+                    (int)j, (int)pfn_err[j], (int)j, (unsigned long)pfn_type[j]);
                     continue;
                 }
 
diff -r de462f2f1db8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Mon Aug 27 21:38:11 2012 +0800
+++ b/xen/arch/x86/domctl.c	Mon Aug 27 21:56:53 2012 +0800
@@ -1566,13 +1566,19 @@
         struct domain *d;
         p2m_type_t pt;
         unsigned long pfn;
+        mfn_t r_mfn;
 
         d = rcu_lock_domain_by_id(domctl->domain);
         if ( d != NULL )
         {
             pfn = domctl->u.set_broken_page_p2m.pfn;
 
-            get_gfn_query(d, pfn, &pt);
+            r_mfn = get_gfn_query(d, pfn, &pt);
+            printk("@XEN_DOMCTL_set_broken_page_p2m, before set p2m, "
+                   "pfn=%lx, pt=%d\n", pfn, (int)pt);
+            if (!mfn_valid(mfn_x(r_mfn)))
+                printk("r_mfn IS INVALID!!!\n");
+
             p2m_change_type(d, pfn, pt, p2m_ram_broken);
             put_gfn(d, pfn);
 =============

[-- Attachment #2: test1.patch --]
[-- Type: application/octet-stream, Size: 2861 bytes --]

diff -r ad02805c77b8 xen/arch/x86/cpu/mcheck/vmce.c
--- a/xen/arch/x86/cpu/mcheck/vmce.c	Sat Jul 28 06:35:47 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/vmce.c	Sat Aug 25 04:01:19 2012 +0800
@@ -28,6 +28,7 @@
 #include <xen/smp.h>
 #include <xen/mm.h>
 #include <xen/hvm/save.h>
+#include <xen/keyhandler.h>
 #include <asm/processor.h>
 #include <public/sysctl.h>
 #include <asm/system.h>
@@ -449,3 +450,93 @@
     return rc;
 }
 
+
+/***************************************************/
+
+unsigned long broken_pfn = 0;
+
+static void sender_broken_page(unsigned char key)
+{
+    domid_t sender_domid = 1;
+    struct domain *d;
+    struct p2m_domain *p2m;
+    unsigned long pfn;
+    p2m_type_t pt;
+
+    d = rcu_lock_domain_by_id(sender_domid);
+    p2m = p2m_get_hostp2m(d);
+
+    for ( pfn = 0; pfn < p2m->max_mapped_pfn; pfn++ )
+    {
+        get_gfn_query(d, pfn, &pt);
+        if ( unlikely(p2m_is_broken(pt)) )
+        {
+            printk("!!!!!! before migration, find broken page, "
+                   "dom = %d, pfn = %lx, pt = %d\n",
+                    (unsigned int)d->domain_id, pfn, (unsigned int)pt);
+            broken_pfn = pfn;
+        }
+        put_gfn(d, pfn);
+    }
+
+    rcu_unlock_domain(d);
+}
+
+static struct keyhandler sender_broken_page_keyhandler = {
+    .diagnostic = 1,
+    .u.fn = sender_broken_page,
+};
+
+static __init int sender_broken_page_init(void)
+{
+    register_keyhandler('6', &sender_broken_page_keyhandler);
+    return 0;
+}
+__initcall(sender_broken_page_init);
+
+/****************************************************/
+
+
+static void target_broken_page(unsigned char key)
+{
+    domid_t target_domid = 2;
+    struct domain *d;
+    struct p2m_domain *p2m;
+    unsigned long pfn;
+    p2m_type_t pt;
+
+    d = rcu_lock_domain_by_id(target_domid);
+    p2m = p2m_get_hostp2m(d);
+
+    for ( pfn = 0; pfn < p2m->max_mapped_pfn; pfn++ )
+    {
+        get_gfn_query(d, pfn, &pt);
+        if ( unlikely(p2m_is_broken(pt)) )
+            printk("@@@@@@ after migration, find broken page, "
+                   "dom = %d, pfn = %lx, pt = %d\n",
+                    (unsigned int)d->domain_id, pfn, (unsigned int)pt);
+        put_gfn(d, pfn);
+    }
+
+    get_gfn_query(d, broken_pfn, &pt);
+    printk("@@@@@@ after migration, broken_pfn type is, "
+           "dom = %d, broken_pfn = %lx, pt = %d\n",
+           (unsigned int)d->domain_id, broken_pfn, (unsigned int)pt);
+    put_gfn(d, broken_pfn);
+
+    rcu_unlock_domain(d);
+}
+
+static struct keyhandler target_broken_page_keyhandler = {
+    .diagnostic = 1,
+    .u.fn = target_broken_page,
+};
+
+static __init int target_broken_page_init(void)
+{
+    register_keyhandler('7', &target_broken_page_keyhandler);
+    return 0;
+}
+__initcall(target_broken_page_init);
+
+/*****************************************************/

[-- Attachment #3: test2.patch --]
[-- Type: application/octet-stream, Size: 2128 bytes --]

diff -r de462f2f1db8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Mon Aug 27 21:38:11 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Mon Aug 27 21:56:53 2012 +0800
@@ -1207,8 +1207,11 @@
 
         if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
         {
+            fprintf(stderr, "@target, find broken page, "
+                      "dom=%d, pfn=0x%lx, pagetype=0x%lx\n", dom, pfn, pagetype);
             if ( xc_set_broken_page_p2m(xch, dom, pfn) )
             {
+                fprintf(stderr, "... ERROR WHEN SET P2M FOR BROKEN PAGE\n");
                 ERROR("Set p2m for broken page fail, "
                       "dom=%d, pfn=%lx\n", dom, pfn);
                 goto err_mapped;
diff -r de462f2f1db8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Mon Aug 27 21:38:11 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Mon Aug 27 21:56:53 2012 +0800
@@ -1289,6 +1289,9 @@
                 {
                     pfn_type[j] |= pfn_batch[j];
                     ++run;
+                    fprintf(stderr, "@sender, find broken page, "
+                            "pfn_err[0x%x]=%d, pfn_type[0x%x] =%lx\n",
+                    (int)j, (int)pfn_err[j], (int)j, (unsigned long)pfn_type[j]);
                     continue;
                 }
 
diff -r de462f2f1db8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Mon Aug 27 21:38:11 2012 +0800
+++ b/xen/arch/x86/domctl.c	Mon Aug 27 21:56:53 2012 +0800
@@ -1566,13 +1566,19 @@
         struct domain *d;
         p2m_type_t pt;
         unsigned long pfn;
+        mfn_t r_mfn;
 
         d = rcu_lock_domain_by_id(domctl->domain);
         if ( d != NULL )
         {
             pfn = domctl->u.set_broken_page_p2m.pfn;
 
-            get_gfn_query(d, pfn, &pt);
+            r_mfn = get_gfn_query(d, pfn, &pt);
+            printk("@XEN_DOMCTL_set_broken_page_p2m, before set p2m, "
+                   "pfn=%lx, pt=%d\n", pfn, (int)pt);
+            if (!mfn_valid(mfn_x(r_mfn)))
+                printk("r_mfn IS INVALID!!!\n");
+
             p2m_change_type(d, pfn, pt, p2m_ram_broken);
             put_gfn(d, pfn);
 

[-- Attachment #4: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-19 17:09   ` George Dunlap
  2012-10-19 21:07     ` Liu, Jinsong
@ 2012-10-22 10:54     ` Ian Jackson
  2012-10-22 19:26       ` Shriram Rajagopalan
  1 sibling, 1 reply; 15+ messages in thread
From: Ian Jackson @ 2012-10-22 10:54 UTC (permalink / raw)
  To: George Dunlap
  Cc: Liu, Jinsong, Christoph Egger, xen-devel, Keir (Xen.org),
	Ian Campbell, Jan Beulich

George Dunlap writes ("Re: [Xen-devel] [PATCH 5/5] X86/vMCE: guest broken page handling when migration"):
> On Fri, Oct 19, 2012 at 4:14 PM, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
> > This looks plausible to me, as far as the tools go.  Can you explain
> > how you have tested this ?  Did you manage to do any tests of the
> > remus codepaths ?
> 
> I'm pretty sure that this shouldn't cause any problems with Remus.  If
> it's difficult for Jinsong to test Remus, I think probably OK to
> commit it, and then revert it if the Remus guys have any problems.

OK.

Ian.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-22 10:54     ` Ian Jackson
@ 2012-10-22 19:26       ` Shriram Rajagopalan
  2012-11-20 16:57         ` George Dunlap
  0 siblings, 1 reply; 15+ messages in thread
From: Shriram Rajagopalan @ 2012-10-22 19:26 UTC (permalink / raw)
  To: Ian Jackson, Liu, Jinsong
  Cc: Christoph Egger, xen-devel, Keir (Xen.org),
	Ian Campbell, George Dunlap, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 1032 bytes --]

On Mon, Oct 22, 2012 at 3:54 AM, Ian Jackson <Ian.Jackson@eu.citrix.com>wrote:

> George Dunlap writes ("Re: [Xen-devel] [PATCH 5/5] X86/vMCE: guest broken
> page handling when migration"):
> > On Fri, Oct 19, 2012 at 4:14 PM, Ian Jackson <Ian.Jackson@eu.citrix.com>
> wrote:
> > > This looks plausible to me, as far as the tools go.  Can you explain
> > > how you have tested this ?  Did you manage to do any tests of the
> > > remus codepaths ?
> >
> > I'm pretty sure that this shouldn't cause any problems with Remus.  If
> > it's difficult for Jinsong to test Remus, I think probably OK to
> > commit it, and then revert it if the Remus guys have any problems.
>
> OK.
>
>
You can easily test it with Remus. With xl, memory replication
functionality is already
in place. so xl remus command should work.

If you are running it with Xend, run Remus with --no-net option.

shriram


>  Ian.
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

[-- Attachment #1.2: Type: text/html, Size: 2000 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-22 19:26       ` Shriram Rajagopalan
@ 2012-11-20 16:57         ` George Dunlap
  2012-11-26 14:49           ` Shriram Rajagopalan
  0 siblings, 1 reply; 15+ messages in thread
From: George Dunlap @ 2012-11-20 16:57 UTC (permalink / raw)
  To: rshriram; +Cc: xen-devel, Ian Jackson, Ian Campbell, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 1363 bytes --]

On 22/10/12 20:26, Shriram Rajagopalan wrote:
> On Mon, Oct 22, 2012 at 3:54 AM, Ian Jackson 
> <Ian.Jackson@eu.citrix.com <mailto:Ian.Jackson@eu.citrix.com>> wrote:
>
>     George Dunlap writes ("Re: [Xen-devel] [PATCH 5/5] X86/vMCE: guest
>     broken page handling when migration"):
>     > On Fri, Oct 19, 2012 at 4:14 PM, Ian Jackson
>     <Ian.Jackson@eu.citrix.com <mailto:Ian.Jackson@eu.citrix.com>> wrote:
>     > > This looks plausible to me, as far as the tools go.  Can you
>     explain
>     > > how you have tested this ?  Did you manage to do any tests of the
>     > > remus codepaths ?
>     >
>     > I'm pretty sure that this shouldn't cause any problems with
>     Remus.  If
>     > it's difficult for Jinsong to test Remus, I think probably OK to
>     > commit it, and then revert it if the Remus guys have any problems.
>
>     OK.
>
>
> You can easily test it with Remus. With xl, memory replication 
> functionality is already
> in place. so xl remus command should work.

Should "xl remus $domain localhost" work?  How would one test the 
fail-over mechanism?  Are there any other requirements for the guest, 
the kernel, &c?

I just ran the above command on xen-unstable, and after 10 minutes or so 
the guest crashed with some kind of a kernel double-fault.

Are we running any remus stuff in our testing infrastructure?

  -George


[-- Attachment #1.2: Type: text/html, Size: 2895 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-11-20 16:57         ` George Dunlap
@ 2012-11-26 14:49           ` Shriram Rajagopalan
  2013-01-15 15:05             ` George Dunlap
  0 siblings, 1 reply; 15+ messages in thread
From: Shriram Rajagopalan @ 2012-11-26 14:49 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel, Ian Jackson, Ian Campbell, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 2146 bytes --]

Sorry for the delayed response.

On Tue, Nov 20, 2012 at 11:57 AM, George Dunlap <george.dunlap@eu.citrix.com
> wrote:

>  On 22/10/12 20:26, Shriram Rajagopalan wrote:
>
>  On Mon, Oct 22, 2012 at 3:54 AM, Ian Jackson <Ian.Jackson@eu.citrix.com>wrote:
>
>> George Dunlap writes ("Re: [Xen-devel] [PATCH 5/5] X86/vMCE: guest broken
>> page handling when migration"):
>> > On Fri, Oct 19, 2012 at 4:14 PM, Ian Jackson <Ian.Jackson@eu.citrix.com>
>> wrote:
>>  > > This looks plausible to me, as far as the tools go.  Can you explain
>> > > how you have tested this ?  Did you manage to do any tests of the
>> > > remus codepaths ?
>> >
>> > I'm pretty sure that this shouldn't cause any problems with Remus.  If
>> > it's difficult for Jinsong to test Remus, I think probably OK to
>> > commit it, and then revert it if the Remus guys have any problems.
>>
>>  OK.
>>
>>
>  You can easily test it with Remus. With xl, memory replication
> functionality is already
> in place. so xl remus command should work.
>
>
> Should "xl remus $domain localhost" work?  How would one test the
> fail-over mechanism?  Are there any other requirements for the guest, the
> kernel, &c?
>
>
xl remus $domain localhost should work. And xl remus $domain remoteHost
will work too.
Atleast that was the case, when the patches went in a few months ago.

If you are using a 3.0+ kernel for the Guest, things should work.
There are no other requirements for the Guest/Kernel - HVM wise it should
work, since its basically
 doing xl migrate -l $domain $host continually (just memory).


> I just ran the above command on xen-unstable, and after 10 minutes or so
> the guest crashed with some kind of a kernel double-fault.
>
>
As far as the kernel double-fault, I have one possible candidate. The dirty
page compression could
potentially be causing this - since iirc there were some proposals to
re-introduce superpages, etc
which changed a lot of xc_domain_restore code.

So try "xl remus -u $domain localhost" [disables checkpoint compression].

 Are we running any remus stuff in our testing infrastructure?
>
>
Nope. Wish I could though :).

shriram

>   -George
>
>

[-- Attachment #1.2: Type: text/html, Size: 4939 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-11-26 14:49           ` Shriram Rajagopalan
@ 2013-01-15 15:05             ` George Dunlap
  0 siblings, 0 replies; 15+ messages in thread
From: George Dunlap @ 2013-01-15 15:05 UTC (permalink / raw)
  To: rshriram; +Cc: xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 419 bytes --]

On 26/11/12 14:49, Shriram Rajagopalan wrote:
>
>     Are we running any remus stuff in our testing infrastructure?
>
>
> Nope. Wish I could though :).

IanJ posted a link to his testing suite back in October; it seems to 
have been successfully used by a handful of people now.  You might try 
developing some remus tests, at least some very basic ones:

http://marc.info/?l=xen-devel&m=135058142615440&w=2

  -George

[-- Attachment #1.2: Type: text/html, Size: 1521 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-10-10  9:21 ` Ian Campbell
@ 2012-10-10 14:42   ` Liu, Jinsong
  0 siblings, 0 replies; 15+ messages in thread
From: Liu, Jinsong @ 2012-10-10 14:42 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Christoph Egger, xen-devel, Keir (Xen.org), Jan Beulich

Updated, thanks! w/ some comments below, will send out later.

Ian Campbell wrote:
> On Wed, 2012-09-19 at 09:15 +0100, Liu, Jinsong wrote:
>> X86/vMCE: guest broken page handling when migration
>> 
>> This patch is used to handle guest broken page when migration.
>> 
>> At sender, the broken page would not be mapped, and the error page
>> content would not be copied to target, otherwise it may trigger more
>> serious error (i.e. SRAR error). While its pfn_type and pfn number
>> would be transferred to target so that target take appropriate
>> action. 
>> 
>> At target, it would set p2m as p2m_ram_broken for broken page, so
>> that 
>> if guest access the broken page again, it would kill guest as
>> expected. 
>> 
>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>> 
>> diff -r a1d106d1aec8 tools/libxc/xc_domain.c
>> --- a/tools/libxc/xc_domain.c	Wed Sep 19 03:31:31 2012 +0800
>> +++ b/tools/libxc/xc_domain.c	Wed Sep 19 04:22:26 2012 +0800 @@
>>      -314,6 +314,22 @@ return ret ? -1 : 0;
>>  }
>> 
>> +/* set broken page p2m */
>> +int xc_set_broken_page_p2m(xc_interface *xch,
>> +                           uint32_t domid,
>> +                           unsigned long pfn)
>> +{
>> +    int ret;
>> +    DECLARE_DOMCTL;
>> +
>> +    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
>> +    domctl.domain = (domid_t)domid;
>> +    domctl.u.set_broken_page_p2m.pfn = pfn;
>> +    ret = do_domctl(xch, &domctl);
>> +
>> +    return ret ? -1 : 0;
>> +}
>> +
>>  /* get info from hvm guest for save */
>>  int xc_domain_hvm_getcontext(xc_interface *xch,
>>                               uint32_t domid,
>> diff -r a1d106d1aec8 tools/libxc/xc_domain_restore.c
>> --- a/tools/libxc/xc_domain_restore.c	Wed Sep 19 03:31:31 2012 +0800
>> +++ b/tools/libxc/xc_domain_restore.c	Wed Sep 19 04:22:26 2012 +0800
>> @@ -962,9 +962,15 @@ 
>> 
>>      countpages = count;
>>      for (i = oldcount; i < buf->nr_pages; ++i)
>> -        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
>> XEN_DOMCTL_PFINFO_XTAB 
>> -            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
>> XEN_DOMCTL_PFINFO_XALLOC) +    { +        unsigned long pagetype;
>> +
>> +        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
>> +        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
>> +             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
>> +             pagetype == XEN_DOMCTL_PFINFO_XALLOC )             
>> --countpages; +    }
>> 
>>      if (!countpages)
>>          return count;
>> @@ -1200,6 +1206,17 @@
>>              /* a bogus/unmapped/allocate-only page: skip it */     
>> continue; 
>> 
>> +        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN ) +        {
>> +            if ( xc_set_broken_page_p2m(xch, dom, pfn) ) +         
>> { +                ERROR("Set p2m for broken page fail, "
> 
> "failed"
> 
>> +                      "dom=%d, pfn=%lx\n", dom, pfn);
>> +                goto err_mapped;
>> +            }
>> +            continue;
>> +        }
>> +
>>          if (pfn_err[i])
>>          {
>>              ERROR("unexpected PFN mapping failure pfn %lx map_mfn
>> %lx p2m_mfn %lx", 
>> diff -r a1d106d1aec8 xen/include/public/domctl.h
>> --- a/xen/include/public/domctl.h	Wed Sep 19 03:31:31 2012 +0800
>> +++ b/xen/include/public/domctl.h	Wed Sep 19 04:22:26 2012 +0800 @@
>>  -136,6 +136,7 @@ #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
>>  #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
>>  #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page
>> */ +#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
>>  #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
>>  #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
>> 
>> @@ -834,6 +835,12 @@
>>  typedef struct xen_domctl_vmce_monitor xen_domctl_vmce_monitor_t;
>>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmce_monitor_t);
>> 
>> +struct xen_domctl_set_broken_page_p2m {
>> +    uint64_t pfn;
>> +};
> 
> why not xen_pfn_t? or uint64_aligned_t?
> 
> Is domctl the right interface for this? Seems like more of an add to
> physmap thing?
> 

Hmm, broken page still belong to the domain so don't need touch physmap (like what mce handle broken page at sender side: just unmap p2m is OK). As for domctl, my thinking is, it's per domain staff (belong to the field of domain control/management), setting p2m entry for broken page of a domain.

Thanks,
Jinsong

>> +typedef struct xen_domctl_set_broken_page_p2m
>> xen_domctl_set_broken_page_p2m_t;
>>  +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t); +
>>      struct xen_domctl { uint32_t cmd;
>>  #define XEN_DOMCTL_createdomain                   1 @@ -901,6
>>  +908,7 @@ #define XEN_DOMCTL_set_virq_handler              66
>>  #define XEN_DOMCTL_vmce_monitor_start            67
>>  #define XEN_DOMCTL_vmce_monitor_end              68
>> +#define XEN_DOMCTL_set_broken_page_p2m           69
>>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002 @@ -957,6
>>          +965,7 @@ struct xen_domctl_set_virq_handler 
>>          set_virq_handler; struct xen_domctl_vmce_monitor     
>>          vmce_monitor; struct xen_domctl_gdbsx_memio      
>> gdbsx_guest_memio; +        struct xen_domctl_set_broken_page_p2m
>>          set_broken_page_p2m; struct xen_domctl_gdbsx_pauseunp_vcpu
>>          gdbsx_pauseunp_vcpu; struct xen_domctl_gdbsx_domstatus  
>>          gdbsx_domstatus; uint8_t                            
>> pad[128]; 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 5/5] X86/vMCE: guest broken page handling when migration
  2012-09-19  8:15 Liu, Jinsong
@ 2012-10-10  9:21 ` Ian Campbell
  2012-10-10 14:42   ` Liu, Jinsong
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Campbell @ 2012-10-10  9:21 UTC (permalink / raw)
  To: Liu, Jinsong; +Cc: Christoph Egger, xen-devel, Keir (Xen.org), Jan Beulich

On Wed, 2012-09-19 at 09:15 +0100, Liu, Jinsong wrote:
> X86/vMCE: guest broken page handling when migration
> 
> This patch is used to handle guest broken page when migration.
> 
> At sender, the broken page would not be mapped, and the error page
> content would not be copied to target, otherwise it may trigger more
> serious error (i.e. SRAR error). While its pfn_type and pfn number
> would be transferred to target so that target take appropriate action.
> 
> At target, it would set p2m as p2m_ram_broken for broken page, so that
> if guest access the broken page again, it would kill guest as expected.
> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> 
> diff -r a1d106d1aec8 tools/libxc/xc_domain.c
> --- a/tools/libxc/xc_domain.c	Wed Sep 19 03:31:31 2012 +0800
> +++ b/tools/libxc/xc_domain.c	Wed Sep 19 04:22:26 2012 +0800
> @@ -314,6 +314,22 @@
>      return ret ? -1 : 0;
>  }
>  
> +/* set broken page p2m */
> +int xc_set_broken_page_p2m(xc_interface *xch,
> +                           uint32_t domid,
> +                           unsigned long pfn)
> +{
> +    int ret;
> +    DECLARE_DOMCTL;
> +
> +    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.set_broken_page_p2m.pfn = pfn;
> +    ret = do_domctl(xch, &domctl);
> +
> +    return ret ? -1 : 0;
> +}
> +
>  /* get info from hvm guest for save */
>  int xc_domain_hvm_getcontext(xc_interface *xch,
>                               uint32_t domid,
> diff -r a1d106d1aec8 tools/libxc/xc_domain_restore.c
> --- a/tools/libxc/xc_domain_restore.c	Wed Sep 19 03:31:31 2012 +0800
> +++ b/tools/libxc/xc_domain_restore.c	Wed Sep 19 04:22:26 2012 +0800
> @@ -962,9 +962,15 @@
>  
>      countpages = count;
>      for (i = oldcount; i < buf->nr_pages; ++i)
> -        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB
> -            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XALLOC)
> +    {
> +        unsigned long pagetype;
> +
> +        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
> +        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
> +             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
> +             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
>              --countpages;
> +    }
>  
>      if (!countpages)
>          return count;
> @@ -1200,6 +1206,17 @@
>              /* a bogus/unmapped/allocate-only page: skip it */
>              continue;
>  
> +        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
> +        {
> +            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
> +            {
> +                ERROR("Set p2m for broken page fail, "

"failed"

> +                      "dom=%d, pfn=%lx\n", dom, pfn);
> +                goto err_mapped;
> +            }
> +            continue;
> +        }
> +
>          if (pfn_err[i])
>          {
>              ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx",
> diff -r a1d106d1aec8 xen/include/public/domctl.h
> --- a/xen/include/public/domctl.h	Wed Sep 19 03:31:31 2012 +0800
> +++ b/xen/include/public/domctl.h	Wed Sep 19 04:22:26 2012 +0800
> @@ -136,6 +136,7 @@
>  #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
>  #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
>  #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
> +#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
>  #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
>  #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
>  
> @@ -834,6 +835,12 @@
>  typedef struct xen_domctl_vmce_monitor xen_domctl_vmce_monitor_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmce_monitor_t);
>  
> +struct xen_domctl_set_broken_page_p2m {
> +    uint64_t pfn;
> +};

why not xen_pfn_t? or uint64_aligned_t?

Is domctl the right interface for this? Seems like more of an add to
physmap thing?

> +typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
> +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -901,6 +908,7 @@
>  #define XEN_DOMCTL_set_virq_handler              66
>  #define XEN_DOMCTL_vmce_monitor_start            67
>  #define XEN_DOMCTL_vmce_monitor_end              68
> +#define XEN_DOMCTL_set_broken_page_p2m           69
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -957,6 +965,7 @@
>          struct xen_domctl_set_virq_handler  set_virq_handler;
>          struct xen_domctl_vmce_monitor      vmce_monitor;
>          struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
> +        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>          uint8_t                             pad[128];

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 5/5] X86/vMCE: guest broken page handling when migration
@ 2012-09-19  8:15 Liu, Jinsong
  2012-10-10  9:21 ` Ian Campbell
  0 siblings, 1 reply; 15+ messages in thread
From: Liu, Jinsong @ 2012-09-19  8:15 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Christoph Egger, keir, Ian.Campbell

[-- Attachment #1: Type: text/plain, Size: 8731 bytes --]

X86/vMCE: guest broken page handling when migration

This patch is used to handle guest broken page when migration.

At sender, the broken page would not be mapped, and the error page
content would not be copied to target, otherwise it may trigger more
serious error (i.e. SRAR error). While its pfn_type and pfn number
would be transferred to target so that target take appropriate action.

At target, it would set p2m as p2m_ram_broken for broken page, so that
if guest access the broken page again, it would kill guest as expected.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r a1d106d1aec8 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain.c	Wed Sep 19 04:22:26 2012 +0800
@@ -314,6 +314,22 @@
     return ret ? -1 : 0;
 }
 
+/* set broken page p2m */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_broken_page_p2m.pfn = pfn;
+    ret = do_domctl(xch, &domctl);
+
+    return ret ? -1 : 0;
+}
+
 /* get info from hvm guest for save */
 int xc_domain_hvm_getcontext(xc_interface *xch,
                              uint32_t domid,
diff -r a1d106d1aec8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Wed Sep 19 04:22:26 2012 +0800
@@ -962,9 +962,15 @@
 
     countpages = count;
     for (i = oldcount; i < buf->nr_pages; ++i)
-        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB
-            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XALLOC)
+    {
+        unsigned long pagetype;
+
+        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
+             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
+             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
             --countpages;
+    }
 
     if (!countpages)
         return count;
@@ -1200,6 +1206,17 @@
             /* a bogus/unmapped/allocate-only page: skip it */
             continue;
 
+        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
+        {
+            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
+            {
+                ERROR("Set p2m for broken page fail, "
+                      "dom=%d, pfn=%lx\n", dom, pfn);
+                goto err_mapped;
+            }
+            continue;
+        }
+
         if (pfn_err[i])
         {
             ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx",
diff -r a1d106d1aec8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Wed Sep 19 04:22:26 2012 +0800
@@ -1285,6 +1285,13 @@
                 if ( !hvm )
                     gmfn = pfn_to_mfn(gmfn);
 
+                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
+                {
+                    pfn_type[j] |= pfn_batch[j];
+                    ++run;
+                    continue;
+                }
+
                 if ( pfn_err[j] )
                 {
                     if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
@@ -1379,8 +1386,12 @@
                     }
                 }
 
-                /* skip pages that aren't present or are alloc-only */
+                /*
+                 * skip pages that aren't present,
+                 * or are broken, or are alloc-only
+                 */
                 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
+                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
                     || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
                     continue;
 
diff -r a1d106d1aec8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xenctrl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -591,6 +591,17 @@
                                signed char *vmce_while_monitor);
 
 /**
+ * This function set p2m for broken page
+ * &parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id which broken page belong to
+ * @parm pfn the pfn number of the broken page
+ * @return 0 on success, -1 on failure
+ */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn);
+
+/**
  * This function returns information about the context of a hvm domain
  * @parm xch a handle to an open hypervisor interface
  * @parm domid the domain to get information from
diff -r a1d106d1aec8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/arch/x86/domctl.c	Wed Sep 19 04:22:26 2012 +0800
@@ -209,12 +209,18 @@
                 for ( j = 0; j < k; j++ )
                 {
                     unsigned long type = 0;
+                    p2m_type_t t;
 
-                    page = get_page_from_gfn(d, arr[j], NULL, P2M_ALLOC);
+                    page = get_page_from_gfn(d, arr[j], &t, P2M_ALLOC);
 
                     if ( unlikely(!page) ||
                          unlikely(is_xen_heap_page(page)) )
-                        type = XEN_DOMCTL_PFINFO_XTAB;
+                    {
+                        if ( p2m_is_broken(t) )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
+                        else
+                            type = XEN_DOMCTL_PFINFO_XTAB;
+                    }
                     else
                     {
                         switch( page->u.inuse.type_info & PGT_type_mask )
@@ -235,6 +241,9 @@
 
                         if ( page->u.inuse.type_info & PGT_pinned )
                             type |= XEN_DOMCTL_PFINFO_LPINTAB;
+
+                        if ( page->count_info & PGC_broken )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
                     }
 
                     if ( page )
@@ -1548,6 +1557,28 @@
     }
     break;
 
+    case XEN_DOMCTL_set_broken_page_p2m:
+    {
+        struct domain *d;
+        p2m_type_t pt;
+        unsigned long pfn;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            pfn = domctl->u.set_broken_page_p2m.pfn;
+
+            get_gfn_query(d, pfn, &pt);
+            p2m_change_type(d, pfn, pt, p2m_ram_broken);
+            put_gfn(d, pfn);
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, u_domctl);
         break;
diff -r a1d106d1aec8 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/include/public/domctl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -136,6 +136,7 @@
 #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
 #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
 #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
+#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
 #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
 #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
 
@@ -834,6 +835,12 @@
 typedef struct xen_domctl_vmce_monitor xen_domctl_vmce_monitor_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmce_monitor_t);
 
+struct xen_domctl_set_broken_page_p2m {
+    uint64_t pfn;
+};
+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -901,6 +908,7 @@
 #define XEN_DOMCTL_set_virq_handler              66
 #define XEN_DOMCTL_vmce_monitor_start            67
 #define XEN_DOMCTL_vmce_monitor_end              68
+#define XEN_DOMCTL_set_broken_page_p2m           69
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -957,6 +965,7 @@
         struct xen_domctl_set_virq_handler  set_virq_handler;
         struct xen_domctl_vmce_monitor      vmce_monitor;
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
+        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         uint8_t                             pad[128];

[-- Attachment #2: 5_vmce_migration_pfntype_broken.patch --]
[-- Type: application/octet-stream, Size: 8499 bytes --]

X86/vMCE: guest broken page handling when migration

This patch is used to handle guest broken page when migration.

At sender, the broken page would not be mapped, and the error page
content would not be copied to target, otherwise it may trigger more
serious error (i.e. SRAR error). While its pfn_type and pfn number
would be transferred to target so that target take appropriate action.

At target, it would set p2m as p2m_ram_broken for broken page, so that
if guest access the broken page again, it would kill guest as expected.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r a1d106d1aec8 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain.c	Wed Sep 19 04:22:26 2012 +0800
@@ -314,6 +314,22 @@
     return ret ? -1 : 0;
 }
 
+/* set broken page p2m */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_broken_page_p2m.pfn = pfn;
+    ret = do_domctl(xch, &domctl);
+
+    return ret ? -1 : 0;
+}
+
 /* get info from hvm guest for save */
 int xc_domain_hvm_getcontext(xc_interface *xch,
                              uint32_t domid,
diff -r a1d106d1aec8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Wed Sep 19 04:22:26 2012 +0800
@@ -962,9 +962,15 @@
 
     countpages = count;
     for (i = oldcount; i < buf->nr_pages; ++i)
-        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB
-            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XALLOC)
+    {
+        unsigned long pagetype;
+
+        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
+             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
+             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
             --countpages;
+    }
 
     if (!countpages)
         return count;
@@ -1200,6 +1206,17 @@
             /* a bogus/unmapped/allocate-only page: skip it */
             continue;
 
+        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
+        {
+            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
+            {
+                ERROR("Set p2m for broken page fail, "
+                      "dom=%d, pfn=%lx\n", dom, pfn);
+                goto err_mapped;
+            }
+            continue;
+        }
+
         if (pfn_err[i])
         {
             ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx",
diff -r a1d106d1aec8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Wed Sep 19 04:22:26 2012 +0800
@@ -1285,6 +1285,13 @@
                 if ( !hvm )
                     gmfn = pfn_to_mfn(gmfn);
 
+                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
+                {
+                    pfn_type[j] |= pfn_batch[j];
+                    ++run;
+                    continue;
+                }
+
                 if ( pfn_err[j] )
                 {
                     if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
@@ -1379,8 +1386,12 @@
                     }
                 }
 
-                /* skip pages that aren't present or are alloc-only */
+                /*
+                 * skip pages that aren't present,
+                 * or are broken, or are alloc-only
+                 */
                 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
+                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
                     || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
                     continue;
 
diff -r a1d106d1aec8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xenctrl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -591,6 +591,17 @@
                                signed char *vmce_while_monitor);
 
 /**
+ * This function set p2m for broken page
+ * &parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id which broken page belong to
+ * @parm pfn the pfn number of the broken page
+ * @return 0 on success, -1 on failure
+ */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn);
+
+/**
  * This function returns information about the context of a hvm domain
  * @parm xch a handle to an open hypervisor interface
  * @parm domid the domain to get information from
diff -r a1d106d1aec8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/arch/x86/domctl.c	Wed Sep 19 04:22:26 2012 +0800
@@ -209,12 +209,18 @@
                 for ( j = 0; j < k; j++ )
                 {
                     unsigned long type = 0;
+                    p2m_type_t t;
 
-                    page = get_page_from_gfn(d, arr[j], NULL, P2M_ALLOC);
+                    page = get_page_from_gfn(d, arr[j], &t, P2M_ALLOC);
 
                     if ( unlikely(!page) ||
                          unlikely(is_xen_heap_page(page)) )
-                        type = XEN_DOMCTL_PFINFO_XTAB;
+                    {
+                        if ( p2m_is_broken(t) )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
+                        else
+                            type = XEN_DOMCTL_PFINFO_XTAB;
+                    }
                     else
                     {
                         switch( page->u.inuse.type_info & PGT_type_mask )
@@ -235,6 +241,9 @@
 
                         if ( page->u.inuse.type_info & PGT_pinned )
                             type |= XEN_DOMCTL_PFINFO_LPINTAB;
+
+                        if ( page->count_info & PGC_broken )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
                     }
 
                     if ( page )
@@ -1548,6 +1557,28 @@
     }
     break;
 
+    case XEN_DOMCTL_set_broken_page_p2m:
+    {
+        struct domain *d;
+        p2m_type_t pt;
+        unsigned long pfn;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            pfn = domctl->u.set_broken_page_p2m.pfn;
+
+            get_gfn_query(d, pfn, &pt);
+            p2m_change_type(d, pfn, pt, p2m_ram_broken);
+            put_gfn(d, pfn);
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, u_domctl);
         break;
diff -r a1d106d1aec8 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/include/public/domctl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -136,6 +136,7 @@
 #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
 #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
 #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
+#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
 #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
 #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
 
@@ -834,6 +835,12 @@
 typedef struct xen_domctl_vmce_monitor xen_domctl_vmce_monitor_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmce_monitor_t);
 
+struct xen_domctl_set_broken_page_p2m {
+    uint64_t pfn;
+};
+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -901,6 +908,7 @@
 #define XEN_DOMCTL_set_virq_handler              66
 #define XEN_DOMCTL_vmce_monitor_start            67
 #define XEN_DOMCTL_vmce_monitor_end              68
+#define XEN_DOMCTL_set_broken_page_p2m           69
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -957,6 +965,7 @@
         struct xen_domctl_set_virq_handler  set_virq_handler;
         struct xen_domctl_vmce_monitor      vmce_monitor;
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
+        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         uint8_t                             pad[128];

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 5/5] X86/vMCE: guest broken page handling when migration
@ 2012-09-18 13:18 Liu, Jinsong
  0 siblings, 0 replies; 15+ messages in thread
From: Liu, Jinsong @ 2012-09-18 13:18 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: keir, Ian.Campbell

[-- Attachment #1: Type: text/plain, Size: 8731 bytes --]

X86/vMCE: guest broken page handling when migration

This patch is used to handle guest broken page when migration.

At sender, the broken page would not be mapped, and the error page
content would not be copied to target, otherwise it may trigger more
serious error (i.e. SRAR error). While its pfn_type and pfn number
would be transferred to target so that target take appropriate action.

At target, it would set p2m as p2m_ram_broken for broken page, so that
if guest access the broken page again, it would kill guest as expected.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r a1d106d1aec8 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain.c	Wed Sep 19 04:22:26 2012 +0800
@@ -314,6 +314,22 @@
     return ret ? -1 : 0;
 }
 
+/* set broken page p2m */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_broken_page_p2m.pfn = pfn;
+    ret = do_domctl(xch, &domctl);
+
+    return ret ? -1 : 0;
+}
+
 /* get info from hvm guest for save */
 int xc_domain_hvm_getcontext(xc_interface *xch,
                              uint32_t domid,
diff -r a1d106d1aec8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Wed Sep 19 04:22:26 2012 +0800
@@ -962,9 +962,15 @@
 
     countpages = count;
     for (i = oldcount; i < buf->nr_pages; ++i)
-        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB
-            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XALLOC)
+    {
+        unsigned long pagetype;
+
+        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
+             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
+             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
             --countpages;
+    }
 
     if (!countpages)
         return count;
@@ -1200,6 +1206,17 @@
             /* a bogus/unmapped/allocate-only page: skip it */
             continue;
 
+        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
+        {
+            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
+            {
+                ERROR("Set p2m for broken page fail, "
+                      "dom=%d, pfn=%lx\n", dom, pfn);
+                goto err_mapped;
+            }
+            continue;
+        }
+
         if (pfn_err[i])
         {
             ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx",
diff -r a1d106d1aec8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Wed Sep 19 04:22:26 2012 +0800
@@ -1285,6 +1285,13 @@
                 if ( !hvm )
                     gmfn = pfn_to_mfn(gmfn);
 
+                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
+                {
+                    pfn_type[j] |= pfn_batch[j];
+                    ++run;
+                    continue;
+                }
+
                 if ( pfn_err[j] )
                 {
                     if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
@@ -1379,8 +1386,12 @@
                     }
                 }
 
-                /* skip pages that aren't present or are alloc-only */
+                /*
+                 * skip pages that aren't present,
+                 * or are broken, or are alloc-only
+                 */
                 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
+                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
                     || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
                     continue;
 
diff -r a1d106d1aec8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xenctrl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -591,6 +591,17 @@
                                signed char *vmce_while_monitor);
 
 /**
+ * This function set p2m for broken page
+ * &parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id which broken page belong to
+ * @parm pfn the pfn number of the broken page
+ * @return 0 on success, -1 on failure
+ */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn);
+
+/**
  * This function returns information about the context of a hvm domain
  * @parm xch a handle to an open hypervisor interface
  * @parm domid the domain to get information from
diff -r a1d106d1aec8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/arch/x86/domctl.c	Wed Sep 19 04:22:26 2012 +0800
@@ -209,12 +209,18 @@
                 for ( j = 0; j < k; j++ )
                 {
                     unsigned long type = 0;
+                    p2m_type_t t;
 
-                    page = get_page_from_gfn(d, arr[j], NULL, P2M_ALLOC);
+                    page = get_page_from_gfn(d, arr[j], &t, P2M_ALLOC);
 
                     if ( unlikely(!page) ||
                          unlikely(is_xen_heap_page(page)) )
-                        type = XEN_DOMCTL_PFINFO_XTAB;
+                    {
+                        if ( p2m_is_broken(t) )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
+                        else
+                            type = XEN_DOMCTL_PFINFO_XTAB;
+                    }
                     else
                     {
                         switch( page->u.inuse.type_info & PGT_type_mask )
@@ -235,6 +241,9 @@
 
                         if ( page->u.inuse.type_info & PGT_pinned )
                             type |= XEN_DOMCTL_PFINFO_LPINTAB;
+
+                        if ( page->count_info & PGC_broken )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
                     }
 
                     if ( page )
@@ -1548,6 +1557,28 @@
     }
     break;
 
+    case XEN_DOMCTL_set_broken_page_p2m:
+    {
+        struct domain *d;
+        p2m_type_t pt;
+        unsigned long pfn;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            pfn = domctl->u.set_broken_page_p2m.pfn;
+
+            get_gfn_query(d, pfn, &pt);
+            p2m_change_type(d, pfn, pt, p2m_ram_broken);
+            put_gfn(d, pfn);
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, u_domctl);
         break;
diff -r a1d106d1aec8 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/include/public/domctl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -136,6 +136,7 @@
 #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
 #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
 #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
+#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
 #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
 #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
 
@@ -834,6 +835,12 @@
 typedef struct xen_domctl_vmce_monitor xen_domctl_vmce_monitor_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmce_monitor_t);
 
+struct xen_domctl_set_broken_page_p2m {
+    uint64_t pfn;
+};
+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -901,6 +908,7 @@
 #define XEN_DOMCTL_set_virq_handler              66
 #define XEN_DOMCTL_vmce_monitor_start            67
 #define XEN_DOMCTL_vmce_monitor_end              68
+#define XEN_DOMCTL_set_broken_page_p2m           69
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -957,6 +965,7 @@
         struct xen_domctl_set_virq_handler  set_virq_handler;
         struct xen_domctl_vmce_monitor      vmce_monitor;
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
+        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         uint8_t                             pad[128];

[-- Attachment #2: 5_vmce_migration_pfntype_broken.patch --]
[-- Type: application/octet-stream, Size: 8499 bytes --]

X86/vMCE: guest broken page handling when migration

This patch is used to handle guest broken page when migration.

At sender, the broken page would not be mapped, and the error page
content would not be copied to target, otherwise it may trigger more
serious error (i.e. SRAR error). While its pfn_type and pfn number
would be transferred to target so that target take appropriate action.

At target, it would set p2m as p2m_ram_broken for broken page, so that
if guest access the broken page again, it would kill guest as expected.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r a1d106d1aec8 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain.c	Wed Sep 19 04:22:26 2012 +0800
@@ -314,6 +314,22 @@
     return ret ? -1 : 0;
 }
 
+/* set broken page p2m */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn)
+{
+    int ret;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_broken_page_p2m;
+    domctl.domain = (domid_t)domid;
+    domctl.u.set_broken_page_p2m.pfn = pfn;
+    ret = do_domctl(xch, &domctl);
+
+    return ret ? -1 : 0;
+}
+
 /* get info from hvm guest for save */
 int xc_domain_hvm_getcontext(xc_interface *xch,
                              uint32_t domid,
diff -r a1d106d1aec8 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_restore.c	Wed Sep 19 04:22:26 2012 +0800
@@ -962,9 +962,15 @@
 
     countpages = count;
     for (i = oldcount; i < buf->nr_pages; ++i)
-        if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB
-            ||(buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XALLOC)
+    {
+        unsigned long pagetype;
+
+        pagetype = buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ||
+             pagetype == XEN_DOMCTL_PFINFO_BROKEN ||
+             pagetype == XEN_DOMCTL_PFINFO_XALLOC )
             --countpages;
+    }
 
     if (!countpages)
         return count;
@@ -1200,6 +1206,17 @@
             /* a bogus/unmapped/allocate-only page: skip it */
             continue;
 
+        if ( pagetype == XEN_DOMCTL_PFINFO_BROKEN )
+        {
+            if ( xc_set_broken_page_p2m(xch, dom, pfn) )
+            {
+                ERROR("Set p2m for broken page fail, "
+                      "dom=%d, pfn=%lx\n", dom, pfn);
+                goto err_mapped;
+            }
+            continue;
+        }
+
         if (pfn_err[i])
         {
             ERROR("unexpected PFN mapping failure pfn %lx map_mfn %lx p2m_mfn %lx",
diff -r a1d106d1aec8 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xc_domain_save.c	Wed Sep 19 04:22:26 2012 +0800
@@ -1285,6 +1285,13 @@
                 if ( !hvm )
                     gmfn = pfn_to_mfn(gmfn);
 
+                if ( pfn_type[j] == XEN_DOMCTL_PFINFO_BROKEN )
+                {
+                    pfn_type[j] |= pfn_batch[j];
+                    ++run;
+                    continue;
+                }
+
                 if ( pfn_err[j] )
                 {
                     if ( pfn_type[j] == XEN_DOMCTL_PFINFO_XTAB )
@@ -1379,8 +1386,12 @@
                     }
                 }
 
-                /* skip pages that aren't present or are alloc-only */
+                /*
+                 * skip pages that aren't present,
+                 * or are broken, or are alloc-only
+                 */
                 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB
+                    || pagetype == XEN_DOMCTL_PFINFO_BROKEN
                     || pagetype == XEN_DOMCTL_PFINFO_XALLOC )
                     continue;
 
diff -r a1d106d1aec8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/tools/libxc/xenctrl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -591,6 +591,17 @@
                                signed char *vmce_while_monitor);
 
 /**
+ * This function set p2m for broken page
+ * &parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id which broken page belong to
+ * @parm pfn the pfn number of the broken page
+ * @return 0 on success, -1 on failure
+ */
+int xc_set_broken_page_p2m(xc_interface *xch,
+                           uint32_t domid,
+                           unsigned long pfn);
+
+/**
  * This function returns information about the context of a hvm domain
  * @parm xch a handle to an open hypervisor interface
  * @parm domid the domain to get information from
diff -r a1d106d1aec8 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/arch/x86/domctl.c	Wed Sep 19 04:22:26 2012 +0800
@@ -209,12 +209,18 @@
                 for ( j = 0; j < k; j++ )
                 {
                     unsigned long type = 0;
+                    p2m_type_t t;
 
-                    page = get_page_from_gfn(d, arr[j], NULL, P2M_ALLOC);
+                    page = get_page_from_gfn(d, arr[j], &t, P2M_ALLOC);
 
                     if ( unlikely(!page) ||
                          unlikely(is_xen_heap_page(page)) )
-                        type = XEN_DOMCTL_PFINFO_XTAB;
+                    {
+                        if ( p2m_is_broken(t) )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
+                        else
+                            type = XEN_DOMCTL_PFINFO_XTAB;
+                    }
                     else
                     {
                         switch( page->u.inuse.type_info & PGT_type_mask )
@@ -235,6 +241,9 @@
 
                         if ( page->u.inuse.type_info & PGT_pinned )
                             type |= XEN_DOMCTL_PFINFO_LPINTAB;
+
+                        if ( page->count_info & PGC_broken )
+                            type = XEN_DOMCTL_PFINFO_BROKEN;
                     }
 
                     if ( page )
@@ -1548,6 +1557,28 @@
     }
     break;
 
+    case XEN_DOMCTL_set_broken_page_p2m:
+    {
+        struct domain *d;
+        p2m_type_t pt;
+        unsigned long pfn;
+
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d != NULL )
+        {
+            pfn = domctl->u.set_broken_page_p2m.pfn;
+
+            get_gfn_query(d, pfn, &pt);
+            p2m_change_type(d, pfn, pt, p2m_ram_broken);
+            put_gfn(d, pfn);
+
+            rcu_unlock_domain(d);
+        }
+        else
+            ret = -ESRCH;
+    }
+    break;
+
     default:
         ret = iommu_do_domctl(domctl, u_domctl);
         break;
diff -r a1d106d1aec8 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Wed Sep 19 03:31:31 2012 +0800
+++ b/xen/include/public/domctl.h	Wed Sep 19 04:22:26 2012 +0800
@@ -136,6 +136,7 @@
 #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
 #define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
 #define XEN_DOMCTL_PFINFO_XALLOC  (0xeU<<28) /* allocate-only page */
+#define XEN_DOMCTL_PFINFO_BROKEN  (0xdU<<28) /* broken page */
 #define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28)
 #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
 
@@ -834,6 +835,12 @@
 typedef struct xen_domctl_vmce_monitor xen_domctl_vmce_monitor_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmce_monitor_t);
 
+struct xen_domctl_set_broken_page_p2m {
+    uint64_t pfn;
+};
+typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -901,6 +908,7 @@
 #define XEN_DOMCTL_set_virq_handler              66
 #define XEN_DOMCTL_vmce_monitor_start            67
 #define XEN_DOMCTL_vmce_monitor_end              68
+#define XEN_DOMCTL_set_broken_page_p2m           69
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -957,6 +965,7 @@
         struct xen_domctl_set_virq_handler  set_virq_handler;
         struct xen_domctl_vmce_monitor      vmce_monitor;
         struct xen_domctl_gdbsx_memio       gdbsx_guest_memio;
+        struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
         uint8_t                             pad[128];

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-01-15 15:05 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-10 14:47 [PATCH 5/5] X86/vMCE: guest broken page handling when migration Liu, Jinsong
2012-10-16 10:44 ` Liu, Jinsong
2012-10-19 15:14 ` Ian Jackson
2012-10-19 17:09   ` George Dunlap
2012-10-19 21:07     ` Liu, Jinsong
2012-10-22 10:54     ` Ian Jackson
2012-10-22 19:26       ` Shriram Rajagopalan
2012-11-20 16:57         ` George Dunlap
2012-11-26 14:49           ` Shriram Rajagopalan
2013-01-15 15:05             ` George Dunlap
2012-10-19 16:54 ` George Dunlap
  -- strict thread matches above, loose matches on Subject: below --
2012-09-19  8:15 Liu, Jinsong
2012-10-10  9:21 ` Ian Campbell
2012-10-10 14:42   ` Liu, Jinsong
2012-09-18 13:18 Liu, Jinsong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.