All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tim Deegan <tim@xen.org>
To: Jan Beulich <JBeulich@suse.com>
Cc: xen-devel <xen-devel@lists.xenproject.org>
Subject: Re: open-coded page list manipulation in shadow code
Date: Thu, 29 Jan 2015 18:30:46 +0100	[thread overview]
Message-ID: <20150129173046.GG30353@deinos.phlegethon.org> (raw)
In-Reply-To: <20150127105026.GA38811@deinos.phlegethon.org>

Hi, 

At 11:50 +0100 on 27 Jan (1422355826), Tim Deegan wrote:
> Let me have a think about it on Thursday (assuming I don't have too
> many patch series to review by then!)

OK, here's what I have.  It keeps the mechanism the same, threading on
the main page list entry, but tidies it up to use the page_list operations
rather than open-coding.  I've tested it (lightly - basically jsut
boot testing) with 32bit, 32pae and 64bit Windows VMs (all SMP), with
bigmem=y and bigmem=n.

It was developed on top of your bigmem series, but it should apply OK
before patch 3/4, removing the need to nobble shadow-paging in that
patch.

Cheers,

Tim.

>From 57d211e47ef42012791252b4897231b392b7aff7 Mon Sep 17 00:00:00 2001
From: Tim Deegan <tim@xen.org>
Date: Thu, 29 Jan 2015 17:09:58 +0000
Subject: [PATCH] x86/shadow: Tidy up fragmentary page lists in multi-page
 shadows.

Multi-page shadows are linked together using the 'list' field.  When
those shadows are in the pinned list, the list fragments are spliced
into the pinned list; otherwise they have no associated list head.

Rework the code that handles these fragments to use the page_list
interface rather than manipulating the fields directly.  This makes
the code cleaner, and allows the 'list' field to be either the
compact pdx form or a normal list_entry.

Signed-off-by: Tim Deegan <tim@xen.org>
---
 xen/arch/x86/mm/shadow/common.c  |  42 +++--------
 xen/arch/x86/mm/shadow/multi.c   |  14 ++--
 xen/arch/x86/mm/shadow/private.h | 151 ++++++++++++++++++++++-----------------
 xen/include/xen/mm.h             |   9 +++
 4 files changed, 114 insertions(+), 102 deletions(-)

diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 61110bb..12a86c2 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -1220,33 +1220,6 @@ static unsigned int shadow_min_acceptable_pages(struct domain *d)
     return (vcpu_count * 128);
 } 
 
-/* Figure out the size (in pages) of a given shadow type */
-static inline u32
-shadow_size(unsigned int shadow_type) 
-{
-    static const u32 type_to_size[SH_type_unused] = {
-        1, /* SH_type_none           */
-        2, /* SH_type_l1_32_shadow   */
-        2, /* SH_type_fl1_32_shadow  */
-        4, /* SH_type_l2_32_shadow   */
-        1, /* SH_type_l1_pae_shadow  */
-        1, /* SH_type_fl1_pae_shadow */
-        1, /* SH_type_l2_pae_shadow  */
-        1, /* SH_type_l2h_pae_shadow */
-        1, /* SH_type_l1_64_shadow   */
-        1, /* SH_type_fl1_64_shadow  */
-        1, /* SH_type_l2_64_shadow   */
-        1, /* SH_type_l2h_64_shadow  */
-        1, /* SH_type_l3_64_shadow   */
-        1, /* SH_type_l4_64_shadow   */
-        1, /* SH_type_p2m_table      */
-        1, /* SH_type_monitor_table  */
-        1  /* SH_type_oos_snapshot   */
-        };
-    ASSERT(shadow_type < SH_type_unused);
-    return type_to_size[shadow_type];
-}
-
 /* Dispatcher function: call the per-mode function that will unhook the
  * non-Xen mappings in this top-level shadow mfn.  With user_only == 1,
  * unhooks only the user-mode mappings. */
@@ -1489,9 +1462,6 @@ mfn_t shadow_alloc(struct domain *d,
         break;
     }
 
-    /* Page lists don't have pointers back to the head structure, so
-     * it's safe to use a head structure on the stack to link the pages
-     * together. */
     INIT_PAGE_LIST_HEAD(&tmp_list);
 
     /* Init page info fields and clear the pages */
@@ -1525,6 +1495,14 @@ mfn_t shadow_alloc(struct domain *d,
     if ( shadow_type >= SH_type_min_shadow 
          && shadow_type <= SH_type_max_shadow )
         sp->u.sh.head = 1;
+
+#ifndef PAGE_LIST_NULL
+    /* The temporary list-head is on our stack.  Blank out the
+     * pointers to it in the shadows, just to get a clean failure if
+     * we accidentally follow them. */
+    tmp_list.next->prev = tmp_list.prev->next = NULL;
+#endif
+
     return page_to_mfn(sp);
 }
 
@@ -1533,6 +1511,7 @@ mfn_t shadow_alloc(struct domain *d,
 void shadow_free(struct domain *d, mfn_t smfn)
 {
     struct page_info *next = NULL, *sp = mfn_to_page(smfn); 
+    struct page_list_head *pin_list;
     unsigned int pages;
     u32 shadow_type;
     int i;
@@ -1544,6 +1523,7 @@ void shadow_free(struct domain *d, mfn_t smfn)
     ASSERT(shadow_type != SH_type_none);
     ASSERT(sp->u.sh.head || (shadow_type > SH_type_max_shadow));
     pages = shadow_size(shadow_type);
+    pin_list = &d->arch.paging.shadow.pinned_shadows;
 
     for ( i = 0; i < pages; i++ ) 
     {
@@ -1564,7 +1544,7 @@ void shadow_free(struct domain *d, mfn_t smfn)
 #endif
         /* Get the next page before we overwrite the list header */
         if ( i < pages - 1 )
-            next = pdx_to_page(sp->list.next);
+            next = page_list_next(sp, pin_list);
         /* Strip out the type: this is now a free shadow page */
         sp->u.sh.type = sp->u.sh.head = 0;
         /* Remember the TLB timestamp so we will know whether to flush 
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 65815bb..5fc10c9 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -428,20 +428,20 @@ sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
 /* From one page of a multi-page shadow, find the next one */
 static inline mfn_t sh_next_page(mfn_t smfn)
 {
-    mfn_t next;
-    struct page_info *pg = mfn_to_page(smfn);
+    struct page_info *pg = mfn_to_page(smfn), *next;
+    struct page_list_head h = PAGE_LIST_HEAD_INIT(h);
 
     ASSERT(pg->u.sh.type == SH_type_l1_32_shadow
            || pg->u.sh.type == SH_type_fl1_32_shadow
            || pg->u.sh.type == SH_type_l2_32_shadow);
     ASSERT(pg->u.sh.type == SH_type_l2_32_shadow || pg->u.sh.head);
-    ASSERT(pg->list.next != PAGE_LIST_NULL);
 
-    next = _mfn(pdx_to_pfn(pg->list.next));
+    next = page_list_next(pg, &h);
 
-    ASSERT(mfn_to_page(next)->u.sh.type == pg->u.sh.type);
-    ASSERT(!mfn_to_page(next)->u.sh.head);
-    return next;
+    ASSERT(next);
+    ASSERT(next->u.sh.type == pg->u.sh.type);
+    ASSERT(!next->u.sh.head);
+    return page_to_mfn(next);
 }
 
 static inline u32
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
index b778fcf..b145cee 100644
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -318,6 +318,33 @@ static inline int mfn_oos_may_write(mfn_t gmfn)
 }
 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
 
+/* Figure out the size (in pages) of a given shadow type */
+static inline u32
+shadow_size(unsigned int shadow_type) 
+{
+    static const u32 type_to_size[SH_type_unused] = {
+        1, /* SH_type_none           */
+        2, /* SH_type_l1_32_shadow   */
+        2, /* SH_type_fl1_32_shadow  */
+        4, /* SH_type_l2_32_shadow   */
+        1, /* SH_type_l1_pae_shadow  */
+        1, /* SH_type_fl1_pae_shadow */
+        1, /* SH_type_l2_pae_shadow  */
+        1, /* SH_type_l2h_pae_shadow */
+        1, /* SH_type_l1_64_shadow   */
+        1, /* SH_type_fl1_64_shadow  */
+        1, /* SH_type_l2_64_shadow   */
+        1, /* SH_type_l2h_64_shadow  */
+        1, /* SH_type_l3_64_shadow   */
+        1, /* SH_type_l4_64_shadow   */
+        1, /* SH_type_p2m_table      */
+        1, /* SH_type_monitor_table  */
+        1  /* SH_type_oos_snapshot   */
+        };
+    ASSERT(shadow_type < SH_type_unused);
+    return type_to_size[shadow_type];
+}
+
 /******************************************************************************
  * Various function declarations 
  */
@@ -586,22 +613,25 @@ prev_pinned_shadow(const struct page_info *page,
                    const struct domain *d)
 {
     struct page_info *p;
+    const struct page_list_head *pin_list;
+
+    pin_list = &d->arch.paging.shadow.pinned_shadows;
 
-    if ( page == d->arch.paging.shadow.pinned_shadows.next ) 
+    if ( page_list_empty(pin_list) || page == page_list_first(pin_list) )
         return NULL;
-    
+
     if ( page == NULL ) /* If no current place, start at the tail */
-        p = d->arch.paging.shadow.pinned_shadows.tail;
+        p = page_list_last(pin_list);
     else
-        p = pdx_to_page(page->list.prev);
+        p = page_list_prev(page, pin_list);
     /* Skip over the non-tail parts of multi-page shadows */
     if ( p && p->u.sh.type == SH_type_l2_32_shadow )
     {
-        p = pdx_to_page(p->list.prev);
+        p = page_list_prev(p, pin_list);
         ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
-        p = pdx_to_page(p->list.prev);
+        p = page_list_prev(p, pin_list);
         ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
-        p = pdx_to_page(p->list.prev);
+        p = page_list_prev(p, pin_list);
         ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow);
     }
     ASSERT(!p || p->u.sh.head);
@@ -618,49 +648,48 @@ prev_pinned_shadow(const struct page_info *page,
  * Returns 0 for failure, 1 for success. */
 static inline int sh_pin(struct vcpu *v, mfn_t smfn)
 {
-    struct page_info *sp;
-    struct page_list_head h, *pin_list;
-    
+    struct page_info *sp[4];
+    struct page_list_head *pin_list;
+    unsigned int i, pages;
+    bool_t already_pinned;
+
     ASSERT(mfn_valid(smfn));
-    sp = mfn_to_page(smfn);
-    ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
-    ASSERT(sp->u.sh.head);
+    sp[0] = mfn_to_page(smfn);
+    pages = shadow_size(sp[0]->u.sh.type);
+    already_pinned = sp[0]->u.sh.pinned;
+    ASSERT(sh_type_is_pinnable(v, sp[0]->u.sh.type));
+    ASSERT(sp[0]->u.sh.head);
+
+    pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
+    if ( already_pinned && sp[0] == page_list_first(pin_list) )
+        return 1;
 
     /* Treat the up-to-four pages of the shadow as a unit in the list ops */
-    h.next = h.tail = sp; 
-    if ( sp->u.sh.type == SH_type_l2_32_shadow ) 
+    for ( i = 1; i < pages; i++ )
     {
-        h.tail = pdx_to_page(h.tail->list.next);
-        h.tail = pdx_to_page(h.tail->list.next);
-        h.tail = pdx_to_page(h.tail->list.next);
-        ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow); 
+        sp[i] = page_list_next(sp[i - 1], pin_list);
+        ASSERT(sp[i]->u.sh.type == sp[0]->u.sh.type);
+        ASSERT(!sp[i]->u.sh.head);
     }
-    pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
 
-    if ( sp->u.sh.pinned )
+    if ( already_pinned )
     {
-        /* Already pinned: take it out of the pinned-list so it can go 
-         * at the front */
-        if ( pin_list->next == h.next )
-            return 1;
-        page_list_prev(h.next, pin_list)->list.next = h.tail->list.next;
-        if ( pin_list->tail == h.tail )
-            pin_list->tail = page_list_prev(h.next, pin_list);
-        else
-            page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev;
-        h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL;
+        /* Take it out of the pinned-list so it can go at the front */
+        for ( i = 0; i < pages; i++ )
+            page_list_del(sp[i], pin_list);
     }
     else
     {
         /* Not pinned: pin it! */
         if ( !sh_get_ref(v, smfn, 0) )
             return 0;
-        sp->u.sh.pinned = 1;
-        ASSERT(h.next->list.prev == PAGE_LIST_NULL);
-        ASSERT(h.tail->list.next == PAGE_LIST_NULL);
+        sp[0]->u.sh.pinned = 1;
     }
+
     /* Put it at the head of the list of pinned shadows */
-    page_list_splice(&h, pin_list);
+    for ( i = pages; i > 0; i-- )
+        page_list_add(sp[i - 1], pin_list);
+
     return 1;
 }
 
@@ -668,46 +697,40 @@ static inline int sh_pin(struct vcpu *v, mfn_t smfn)
  * of pinned shadows, and release the extra ref. */
 static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
 {
-    struct page_list_head h, *pin_list;
-    struct page_info *sp;
-    
+    struct page_list_head tmp_list, *pin_list;
+    struct page_info *sp, *next;
+    unsigned int i, head_type;
+
     ASSERT(mfn_valid(smfn));
     sp = mfn_to_page(smfn);
+    head_type = sp->u.sh.type;
     ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
     ASSERT(sp->u.sh.head);
 
-    /* Treat the up-to-four pages of the shadow as a unit in the list ops */
-    h.next = h.tail = sp; 
-    if ( sp->u.sh.type == SH_type_l2_32_shadow ) 
-    {
-        h.tail = pdx_to_page(h.tail->list.next);
-        h.tail = pdx_to_page(h.tail->list.next);
-        h.tail = pdx_to_page(h.tail->list.next);
-        ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow); 
-    }
-    pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
-
     if ( !sp->u.sh.pinned )
         return;
-
     sp->u.sh.pinned = 0;
 
-    /* Cut the sub-list out of the list of pinned shadows */
-    if ( pin_list->next == h.next && pin_list->tail == h.tail )
-        pin_list->next = pin_list->tail = NULL;
-    else 
+    /* Cut the sub-list out of the list of pinned shadows,
+     * stitching it back into a list fragment of its own. */
+    pin_list = &v->domain->arch.paging.shadow.pinned_shadows;
+    INIT_PAGE_LIST_HEAD(&tmp_list);
+    for ( i = 0; i < shadow_size(head_type); i++ )
     {
-        if ( pin_list->next == h.next )
-            pin_list->next = page_list_next(h.tail, pin_list);
-        else
-            page_list_prev(h.next, pin_list)->list.next = h.tail->list.next;
-        if ( pin_list->tail == h.tail )
-            pin_list->tail = page_list_prev(h.next, pin_list);
-        else
-            page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev;
+        ASSERT(sp->u.sh.type == head_type);
+        ASSERT(!i || !sp->u.sh.head);
+        next = page_list_next(sp, pin_list);
+        page_list_del(sp, pin_list);
+        page_list_add_tail(sp, &tmp_list);
+        sp = next;
     }
-    h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL;
-    
+#ifndef PAGE_LIST_NULL
+    /* The temporary list-head is on our stack.  Blank out the
+     * pointers to it in the shadows, just to get a clean failure if
+     * we accidentally follow them. */
+    tmp_list.next->prev = tmp_list.prev->next = NULL;
+#endif
+
     sh_put_ref(v, smfn, 0);
 }
 
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 74a65a6..a62ee1e 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -172,6 +172,11 @@ page_list_first(const struct page_list_head *head)
     return head->next;
 }
 static inline struct page_info *
+page_list_last(const struct page_list_head *head)
+{
+    return head->tail;
+}
+static inline struct page_info *
 page_list_next(const struct page_info *page,
                const struct page_list_head *head)
 {
@@ -331,8 +336,12 @@ page_list_splice(struct page_list_head *list, struct page_list_head *head)
 # define page_list_empty                 list_empty
 # define page_list_first(hd)             list_entry((hd)->next, \
                                                     struct page_info, list)
+# define page_list_last(hd)              list_entry((hd)->prev, \
+                                                    struct page_info, list)
 # define page_list_next(pg, hd)          list_entry((pg)->list.next, \
                                                     struct page_info, list)
+# define page_list_prev(pg, hd)          list_entry((pg)->list.prev, \
+                                                    struct page_info, list)
 # define page_list_add(pg, hd)           list_add(&(pg)->list, hd)
 # define page_list_add_tail(pg, hd)      list_add_tail(&(pg)->list, hd)
 # define page_list_del(pg, hd)           list_del(&(pg)->list)
-- 
2.1.4

  reply	other threads:[~2015-01-29 17:30 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-26 16:35 open-coded page list manipulation in shadow code Jan Beulich
2015-01-27 10:50 ` Tim Deegan
2015-01-29 17:30   ` Tim Deegan [this message]
2015-01-30  8:36     ` Jan Beulich
2015-01-30 10:20       ` Tim Deegan
2015-01-30 10:26         ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150129173046.GG30353@deinos.phlegethon.org \
    --to=tim@xen.org \
    --cc=JBeulich@suse.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.