All of lore.kernel.org
 help / color / mirror / Atom feed
* RFC: p2m type change draft
@ 2016-12-16  5:05 George Dunlap
  0 siblings, 0 replies; only message in thread
From: George Dunlap @ 2016-12-16  5:05 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Zhang, Yu C

[-- Attachment #1: Type: text/plain, Size: 893 bytes --]

Yu Zhang,

As we discussed, attached is the draft of the change to the p2m_change_entry_type changes I was working on.

Currently, Xen keeps track of what certain types “should” be, via a list of logdirty ranges as well as mmio ranges. 

This patch series attempts to do the opposite: to make “p2m_change_entry_type” simply change all entries of type ot to type nt.

It has a couple of issues at the moment.  It’s not yet complete — the actual number of entries still to be recalculated isn’t counted.

At the moment both migration and vram logdirty are used by the same type, and disambiguated by the “global_logdirty” bit.  To move to the new model, we’d have to have two different types.

And some more listed in the patches.

As I said, I’m not sure this is the right way forward or not — but I think it’s worth looking at.

Peace,

 -George


[-- Attachment #2: 0001-xen-ept-Rename-ept_invalidate_emt-to-ept_misconfigur.patch --]
[-- Type: application/octet-stream, Size: 9354 bytes --]

From 5d3214c72771858f121d6e75b6448438fb741c16 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 15 Dec 2016 16:47:55 +0800
Subject: [PATCH 1/2] xen/ept: Rename ept_invalidate_emt* to ept_misconfigure_*

The callers don't care how the entries are misconfigured; they just
care that the entry gets misconfigured.

Also rename the functions so that their action is clear:
emt_misconfigure_table() for the function which misconfigures an
entire EPT table, ept_misconfigure_range() for the function which
misconfigures the range.

Finally, include helper functions to detect a misconfigured epte,
restricting the knowledge of the "magic" of the emt field to functions
designed to deal with misconfiguration.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
 xen/arch/x86/mm/p2m-ept.c | 73 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 19 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 13cab24..aba80d5 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -384,7 +384,17 @@ static int ept_next_level(struct p2m_domain *p2m, bool_t read_only,
  * present entries in the given page table, optionally marking the entries
  * also for their subtrees needing P2M type re-calculation.
  */
-static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc, int level)
+static inline bool_t is_epte_misconfigured(ept_entry_t *e)
+{
+    return (e->emt == MTRR_NUM_TYPES);
+}
+
+static inline void misconfigure_epte(ept_entry_t *e)
+{
+    e->emt = MTRR_NUM_TYPES;
+}
+
+static bool_t ept_misconfigure_table(mfn_t mfn, bool_t recalc, int level)
 {
     int rc;
     ept_entry_t *epte = map_domain_page(mfn);
@@ -396,10 +406,10 @@ static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc, int level)
         ept_entry_t e = atomic_read_ept_entry(&epte[i]);
 
         if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
-             (e.emt == MTRR_NUM_TYPES && (e.recalc || !recalc)) )
+             (is_epte_misconfigured(&e) && (e.recalc || !recalc)) )
             continue;
 
-        e.emt = MTRR_NUM_TYPES;
+        misconfigure_epte(&e);
         if ( recalc )
             e.recalc = 1;
         rc = atomic_write_ept_entry(&epte[i], e, level);
@@ -413,16 +423,16 @@ static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc, int level)
 }
 
 /*
- * Just like ept_invalidate_emt() except that
+ * Just like ept_misconfigure_table() except that
  * - not all entries at the targeted level may need processing,
  * - the re-calculation flag gets always set.
  * The passed in range is guaranteed to not cross a page (table)
  * boundary at the targeted level.
  */
-static int ept_invalidate_emt_range(struct p2m_domain *p2m,
-                                    unsigned int target,
-                                    unsigned long first_gfn,
-                                    unsigned long last_gfn)
+static int ept_misconfigure_range(struct p2m_domain *p2m,
+                                  unsigned int target,
+                                  unsigned long first_gfn,
+                                  unsigned long last_gfn)
 {
     ept_entry_t *table;
     unsigned long gfn_remainder = first_gfn;
@@ -469,9 +479,9 @@ static int ept_invalidate_emt_range(struct p2m_domain *p2m,
         ept_entry_t e = atomic_read_ept_entry(&table[index]);
 
         if ( is_epte_valid(&e) && is_epte_present(&e) &&
-             (e.emt != MTRR_NUM_TYPES || !e.recalc) )
+             (!is_epte_misconfigured(&e) || !e.recalc) )
         {
-            e.emt = MTRR_NUM_TYPES;
+            misconfigure_epte(&e);
             e.recalc = 1;
             wrc = atomic_write_ept_entry(&table[index], e, target);
             ASSERT(wrc == 0);
@@ -517,11 +527,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
         i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
         e = atomic_read_ept_entry(&epte[i]);
 
+        /* If we've reached a leaf node, handle the reconfigure */
         if ( level == 0 || is_epte_superpage(&e) )
         {
             uint8_t ipat = 0;
 
-            if ( e.emt != MTRR_NUM_TYPES )
+            if ( !is_epte_misconfigured(&e) )
                 break;
 
             if ( level == 0 )
@@ -529,14 +540,19 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
                 for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
                 {
                     e = atomic_read_ept_entry(&epte[i]);
-                    if ( e.emt == MTRR_NUM_TYPES )
+                    if ( is_epte_misconfigured(&e) )
                         e.emt = 0;
                     if ( !is_epte_valid(&e) || !is_epte_present(&e) )
                         continue;
+                    /* Recalculate emt and ipat values */
                     e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
                                                _mfn(e.mfn), 0, &ipat,
                                                e.sa_p2mt == p2m_mmio_direct);
                     e.ipat = ipat;
+                    /* 
+                     * Figure out what the type should be by looking at the current
+                     * logdirty range
+                     */
                     if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
                     {
                          e.sa_p2mt = p2m_is_logdirty_range(p2m, gfn + i, gfn + i)
@@ -555,6 +571,11 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
                                              e.sa_p2mt == p2m_mmio_direct);
                 bool_t recalc = e.recalc;
 
+                /* 
+                 * Figure out what the type should be by looking at the current
+                 * logdirty range; if the whole range isn't one or the other, we
+                 * have to split up the superpage and try again
+                 */
                 if ( recalc && p2m_is_changeable(e.sa_p2mt) )
                 {
                      unsigned long mask = ~0UL << (level * EPT_TABLE_ORDER);
@@ -575,6 +596,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
                           break;
                      }
                 }
+
+                /* 
+                 * This can happen either due to the logdirty range
+                 * above, or because epte_get_entry_empt() returned -1
+                 * due to the MMIO ranges overlap
+                 */
                 if ( unlikely(emt < 0) )
                 {
                     if ( ept_split_super_page(p2m, &e, level, level - 1) )
@@ -602,10 +629,18 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
             break;
         }
 
-        if ( e.emt == MTRR_NUM_TYPES )
+        /* 
+         * Otherwise:
+         * - If it is misconfigured, propagate the misconfiguration
+         * down
+         * - If it's not misconfigured but it's present and has no emt
+         * setting, continue walking
+         * - If it's not misconfigured, and isn't present, just stop.
+         */
+        if ( is_epte_misconfigured(&e) )
         {
             ASSERT(is_epte_present(&e));
-            ept_invalidate_emt(_mfn(e.mfn), e.recalc, level);
+            ept_misconfigure_table(_mfn(e.mfn), e.recalc, level);
             smp_wmb();
             e.emt = 0;
             e.recalc = 0;
@@ -1048,7 +1083,7 @@ static void ept_change_entry_type_global(struct p2m_domain *p2m,
     if ( !mfn )
         return;
 
-    if ( ept_invalidate_emt(_mfn(mfn), 1, ept_get_wl(&p2m->ept)) )
+    if ( ept_misconfigure_table(_mfn(mfn), 1, ept_get_wl(&p2m->ept)) )
         ept_sync_domain(p2m);
 }
 
@@ -1070,7 +1105,7 @@ static int ept_change_entry_type_range(struct p2m_domain *p2m,
         {
             unsigned long end_gfn = min(first_gfn | mask, last_gfn);
 
-            rc = ept_invalidate_emt_range(p2m, i, first_gfn, end_gfn);
+            rc = ept_misconfigure_range(p2m, i, first_gfn, end_gfn);
             sync |= rc;
             if ( rc < 0 || end_gfn >= last_gfn )
                 break;
@@ -1080,7 +1115,7 @@ static int ept_change_entry_type_range(struct p2m_domain *p2m,
         {
             unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
 
-            rc = ept_invalidate_emt_range(p2m, i, start_gfn, last_gfn);
+            rc = ept_misconfigure_range(p2m, i, start_gfn, last_gfn);
             sync |= rc;
             if ( rc < 0 || start_gfn <= first_gfn )
                 break;
@@ -1106,7 +1141,7 @@ static void ept_memory_type_changed(struct p2m_domain *p2m)
     if ( !mfn )
         return;
 
-    if ( ept_invalidate_emt(_mfn(mfn), 0, ept_get_wl(&p2m->ept)) )
+    if ( ept_misconfigure_table(_mfn(mfn), 0, ept_get_wl(&p2m->ept)) )
         ept_sync_domain(p2m);
 }
 
@@ -1292,7 +1327,7 @@ static void ept_dump_p2m_table(unsigned char key)
             for ( i = ept_get_wl(ept); i > 0; i-- )
             {
                 ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
-                if ( ept_entry->emt == MTRR_NUM_TYPES )
+                if ( is_epte_misconfigured(ept_entry) )
                     c = '?';
                 ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
-- 
2.10.0


[-- Attachment #3: 0002-xen-p2m-Make-p2m_change_entry_type-generic.patch --]
[-- Type: application/octet-stream, Size: 8549 bytes --]

From 1687432df0374d7dfdf95a70e577e63d17caed60 Mon Sep 17 00:00:00 2001
From: George Dunlap <george.dunlap@citrix.com>
Date: Thu, 15 Dec 2016 16:47:55 +0800
Subject: [PATCH 2/2] xen/p2m: Make p2m_change_entry_type generic

At the moment, p2m_change_type works by keeping a description of what
the various ranges in the p2m table "should" be.

Instead, make it generic: Keep a list of what the old type you were
trying to change was, and change all entries of that type to the new
type.

If there is already a type change in progress, refuse to initiate a
new type change unless its effects can "stack" properly.  Otherwise
return -EBUSY.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>

---

TODO:
- Split out vram logdirty into a separate p2m type
- Actually count the number of entries
- Modify ept_change_entry_type_global() to return -EBUSY if necessary
- Implement a "p2m_finish_type_change" function / hypercall to clear
syncronously out the old type change
---
 xen/arch/x86/mm/p2m-ept.c | 92 ++++++++++++++++++++++++++---------------------
 xen/include/asm-x86/p2m.h |  5 +++
 2 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index aba80d5..2a503de 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -394,7 +394,8 @@ static inline void misconfigure_epte(ept_entry_t *e)
     e->emt = MTRR_NUM_TYPES;
 }
 
-static bool_t ept_misconfigure_table(mfn_t mfn, bool_t recalc, int level)
+static bool_t ept_misconfigure_table(struct p2m_domain *p2m, mfn_t mfn,
+                                     bool_t recalc, int level)
 {
     int rc;
     ept_entry_t *epte = map_domain_page(mfn);
@@ -553,11 +554,10 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
                      * Figure out what the type should be by looking at the current
                      * logdirty range
                      */
-                    if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
+                    if ( e.recalc && e.sa_p2mt == p2m->recalc.ot )
                     {
-                         e.sa_p2mt = p2m_is_logdirty_range(p2m, gfn + i, gfn + i)
-                                     ? p2m_ram_logdirty : p2m_ram_rw;
-                         ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
+                        e.sa_p2mt = p2m->recalc.nt;
+                        ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
                     }
                     e.recalc = 0;
                     wrc = atomic_write_ept_entry(&epte[i], e, level);
@@ -569,38 +569,11 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
                 int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
                                              level * EPT_TABLE_ORDER, &ipat,
                                              e.sa_p2mt == p2m_mmio_direct);
-                bool_t recalc = e.recalc;
 
-                /* 
-                 * Figure out what the type should be by looking at the current
-                 * logdirty range; if the whole range isn't one or the other, we
-                 * have to split up the superpage and try again
-                 */
-                if ( recalc && p2m_is_changeable(e.sa_p2mt) )
-                {
-                     unsigned long mask = ~0UL << (level * EPT_TABLE_ORDER);
-
-                     switch ( p2m_is_logdirty_range(p2m, gfn & mask,
-                                                    gfn | ~mask) )
-                     {
-                     case 0:
-                          e.sa_p2mt = p2m_ram_rw;
-                          e.recalc = 0;
-                          break;
-                     case 1:
-                          e.sa_p2mt = p2m_ram_logdirty;
-                          e.recalc = 0;
-                          break;
-                     default: /* Force split. */
-                          emt = -1;
-                          break;
-                     }
-                }
 
                 /* 
-                 * This can happen either due to the logdirty range
-                 * above, or because epte_get_entry_empt() returned -1
-                 * due to the MMIO ranges overlap
+                 * This can happen because epte_get_entry_empt()
+                 * returned -1 due to the MMIO ranges overlap
                  */
                 if ( unlikely(emt < 0) )
                 {
@@ -618,9 +591,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
                 }
                 e.emt = emt;
                 e.ipat = ipat;
-                e.recalc = 0;
-                if ( recalc && p2m_is_changeable(e.sa_p2mt) )
+                if ( e.recalc && e.sa_p2mt == p2m->recalc.ot )
+                {
+                    e.sa_p2mt = p2m->recalc.nt;
                     ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
+                }
+                e.recalc = 0;
                 wrc = atomic_write_ept_entry(&epte[i], e, level);
                 ASSERT(wrc == 0);
             }
@@ -640,7 +616,7 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
         if ( is_epte_misconfigured(&e) )
         {
             ASSERT(is_epte_present(&e));
-            ept_misconfigure_table(_mfn(e.mfn), e.recalc, level);
+            ept_misconfigure_table(p2m, _mfn(e.mfn), e.recalc, level);
             smp_wmb();
             e.emt = 0;
             e.recalc = 0;
@@ -995,8 +971,7 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
     {
         if ( (recalc || ept_entry->recalc) &&
              p2m_is_changeable(ept_entry->sa_p2mt) )
-            *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
-                                                      : p2m_ram_rw;
+            *t = p2m->recalc.nt;
         else
             *t = ept_entry->sa_p2mt;
         *a = ept_entry->access;
@@ -1075,6 +1050,27 @@ out:
     return;
 }
 
+static int check_change_entry(struct p2m_domain *p2m,
+                              p2m_type_t ot, p2m_type_t nt)
+{
+    /* 
+     * Using misconfigure to changentry types is safe under the
+     * following conditions:
+     * 1. There are no outstanding 'recalc' entries, or
+     * 2. We're re-iterating the change done before (i.e., 
+     *    we're in the middle of a->b, and we do a->b again)
+     * 3. We're reversing a change done before (i.e.,
+     *    we did a->b and now we want to do b->a).
+     */
+    if ( p2m->recalc.count == 0 ||
+         (p2m->recalc.ot == ot && p2m->recalc.nt == nt) ||
+         (p2m->recalc.ot == nt && p2m->recalc.nt == ot) )
+        return 0;
+
+    return -EBUSY;
+         
+}
+
 static void ept_change_entry_type_global(struct p2m_domain *p2m,
                                          p2m_type_t ot, p2m_type_t nt)
 {
@@ -1083,7 +1079,13 @@ static void ept_change_entry_type_global(struct p2m_domain *p2m,
     if ( !mfn )
         return;
 
-    if ( ept_misconfigure_table(_mfn(mfn), 1, ept_get_wl(&p2m->ept)) )
+    /* FIXME: Change to return -EBUSY */
+    ASSERT(check_change_entry(p2m, ot, nt) == 0);
+
+    p2m->recalc.nt = nt;
+    p2m->recalc.ot = ot;
+
+    if ( ept_misconfigure_table(p2m, _mfn(mfn), 1, ept_get_wl(&p2m->ept)) )
         ept_sync_domain(p2m);
 }
 
@@ -1099,8 +1101,16 @@ static int ept_change_entry_type_range(struct p2m_domain *p2m,
     if ( !ept_get_asr(&p2m->ept) )
         return -EINVAL;
 
+    rc = check_change_entry(p2m, ot, nt);
+    if ( rc != 0 )
+        return rc;
+    
+    p2m->recalc.nt = nt;
+    p2m->recalc.ot = ot;
+
     for ( i = 0; i <= wl; )
     {
+        // FIXME: Change only the necessary entries
         if ( first_gfn & mask )
         {
             unsigned long end_gfn = min(first_gfn | mask, last_gfn);
@@ -1141,7 +1151,7 @@ static void ept_memory_type_changed(struct p2m_domain *p2m)
     if ( !mfn )
         return;
 
-    if ( ept_misconfigure_table(_mfn(mfn), 0, ept_get_wl(&p2m->ept)) )
+    if ( ept_misconfigure_table(p2m, _mfn(mfn), 0, ept_get_wl(&p2m->ept)) )
         ept_sync_domain(p2m);
 }
 
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 7035860..7890263 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -336,6 +336,11 @@ struct p2m_domain {
         struct ept_data ept;
         /* NPT-equivalent structure could be added here. */
     };
+
+    struct {
+        p2m_type_t ot, nt;
+        unsigned long count;
+    } recalc;
 };
 
 /* get host p2m table */
-- 
2.10.0


[-- Attachment #4: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2016-12-16  5:05 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-16  5:05 RFC: p2m type change draft George Dunlap

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.