All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andres Lagar-Cavilla <andres@lagarcavilla.org>
To: xen-devel@lists.xen.org
Cc: andres@gridcentric.ca, keir.xen@gmail.com, tim@xen.org,
	adin@gridcentric.ca
Subject: [PATCH 3 of 3] x86/mem_sharing: For shared pages with many references, use a hash table instead of a list
Date: Thu, 12 Apr 2012 10:16:14 -0400	[thread overview]
Message-ID: <7b606c043208d98d218b.1334240174@xdev.gridcentric.ca> (raw)
In-Reply-To: <patchbomb.1334240171@xdev.gridcentric.ca>

 xen/arch/x86/mm/mem_sharing.c     |  170 +++++++++++++++++++++++++++++++++++--
 xen/include/asm-x86/mem_sharing.h |   13 ++-
 2 files changed, 169 insertions(+), 14 deletions(-)


For shared frames that have many references, the doubly-linked list used to
store the rmap results in costly scans during unshare operations. To alleviate
the overhead, replace the linked list by a hash table. However, hash tables are
space-intensive, so only use them for pages that have "many" (arbitrary
threshold) references.

Unsharing is heaviliy exercised during domain destroy. In experimental testing,
for a domain that points over 100 thousand pages to the same shared frame,
domain destruction dropped from over 7 minutes(!) to less than two seconds.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff -r 1730bff8fccf -r 7b606c043208 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -49,6 +49,17 @@ DEFINE_PER_CPU(pg_lock_data_t, __pld);
 #define MEM_SHARING_DEBUG(_f, _a...)                                  \
     debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
 
+/* Reverse map defines */
+#define RMAP_HASHTAB_ORDER  0
+#define RMAP_HASHTAB_SIZE   \
+        ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head))
+#define RMAP_USES_HASHTAB(page) \
+        ((page)->sharing->hash_table.flag == NULL)
+#define RMAP_HEAVY_SHARED_PAGE   RMAP_HASHTAB_SIZE
+/* A bit of hysteresis. We don't want to be mutating between list and hash
+ * table constantly. */
+#define RMAP_LIGHT_SHARED_PAGE   (RMAP_HEAVY_SHARED_PAGE >> 2)
+
 #if MEM_SHARING_AUDIT
 
 static struct list_head shr_audit_list;
@@ -72,6 +83,11 @@ static inline void audit_add_list(struct
 /* Removes from the audit list and cleans up the page sharing metadata. */
 static inline void page_sharing_dispose(struct page_info *page)
 {
+    /* Unlikely given our thresholds, but we should be careful. */
+    if ( unlikely(RMAP_USES_HASHTAB(page)) )
+        free_xenheap_pages(page->sharing->hash_table.bucket, 
+                            RMAP_HASHTAB_ORDER);
+
     spin_lock(&shr_audit_lock);
     list_del_rcu(&page->sharing->entry);
     spin_unlock(&shr_audit_lock);
@@ -89,6 +105,10 @@ int mem_sharing_audit(void)
 #define audit_add_list(p)  ((void)0)
 static inline void page_sharing_dispose(struct page_info *page)
 {
+    /* Unlikely given our thresholds, but we should be careful. */
+    if ( unlikely(RMAP_USES_HASHTAB(page)) )
+        free_xenheap_pages(page->sharing->hash_table.bucket, 
+                            RMAP_HASHTAB_ORDER);
     xfree(page->sharing);
 }
 
@@ -145,6 +165,11 @@ static atomic_t nr_saved_mfns   = ATOMIC
 static atomic_t nr_shared_mfns  = ATOMIC_INIT(0);
 
 /** Reverse map **/
+/* Every shared frame keeps a reverse map (rmap) of <domain, gfn> tuples that
+ * this shared frame backs. For pages with a low degree of sharing, a O(n)
+ * search linked list is good enough. For pages with higher degree of sharing,
+ * we use a hash table instead. */
+
 typedef struct gfn_info
 {
     unsigned long gfn;
@@ -155,20 +180,109 @@ typedef struct gfn_info
 static inline void
 rmap_init(struct page_info *page)
 {
+    /* We always start off as a doubly linked list. */
     INIT_LIST_HEAD(&page->sharing->gfns);
 }
 
+/* Exceedingly simple "hash function" */
+#define HASH(domain, gfn)       \
+    (((gfn) + (domain)) % RMAP_HASHTAB_SIZE)
+
+/* Conversions. Tuned by the thresholds. Should only happen twice 
+ * (once each) during the lifetime of a shared page */
+static inline int
+rmap_list_to_hash_table(struct page_info *page)
+{
+    unsigned int i;
+    struct list_head *pos, *tmp, *b =
+        alloc_xenheap_pages(RMAP_HASHTAB_ORDER, 0);
+
+    if ( b == NULL )
+        return -ENOMEM;
+
+    for (i = 0; i < RMAP_HASHTAB_SIZE; i++)
+        INIT_LIST_HEAD(b + i);
+
+    list_for_each_safe(pos, tmp, &page->sharing->gfns)
+    {
+        gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list);
+        struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn);
+        list_del(pos);
+        list_add(pos, bucket);
+    }
+
+    page->sharing->hash_table.bucket = b;
+    page->sharing->hash_table.flag   = NULL;
+
+    return 0;
+}
+
 static inline void
-rmap_del(gfn_info_t *gfn_info, struct page_info *page)
+rmap_hash_table_to_list(struct page_info *page)
 {
+    unsigned int i;
+    struct list_head *bucket = page->sharing->hash_table.bucket;
+
+    INIT_LIST_HEAD(&page->sharing->gfns);
+
+    for (i = 0; i < RMAP_HASHTAB_SIZE; i++)
+    {
+        struct list_head *pos, *tmp, *head = bucket + i;
+        list_for_each_safe(pos, tmp, head)
+        {
+            list_del(pos);
+            list_add(pos, &page->sharing->gfns);
+        }
+    }
+
+    free_xenheap_pages(bucket, RMAP_HASHTAB_ORDER);
+}
+
+/* Generic accessors to the rmap */
+static inline unsigned long
+rmap_count(struct page_info *pg)
+{
+    unsigned long count;
+    unsigned long t = read_atomic(&pg->u.inuse.type_info);
+    count = t & PGT_count_mask;
+    if ( t & PGT_locked )
+        count--;
+    return count;
+}
+
+/* The page type count is always decreased after removing from the rmap.
+ * Use a convert flag to avoid mutating the rmap if in the middle of an 
+ * iterator, or if the page will be soon destroyed anyways. */
+static inline void
+rmap_del(gfn_info_t *gfn_info, struct page_info *page, int convert)
+{
+    if ( RMAP_USES_HASHTAB(page) && convert &&
+         (rmap_count(page) <= RMAP_LIGHT_SHARED_PAGE) )
+        rmap_hash_table_to_list(page);
+
+    /* Regardless of rmap type, same removal operation */
     list_del(&gfn_info->list);
 }
 
+/* The page type count is always increased before adding to the rmap. */
 static inline void
 rmap_add(gfn_info_t *gfn_info, struct page_info *page)
 {
+    struct list_head *head;
+
+    if ( !RMAP_USES_HASHTAB(page) &&
+         (rmap_count(page) >= RMAP_HEAVY_SHARED_PAGE) )
+        /* The conversion may fail with ENOMEM. We'll be less efficient,
+         * but no reason to panic. */
+        (void)rmap_list_to_hash_table(page);
+
+    head = (RMAP_USES_HASHTAB(page)) ?
+        page->sharing->hash_table.bucket + 
+                            HASH(gfn_info->domain, gfn_info->gfn) :
+        &page->sharing->gfns;
+
     INIT_LIST_HEAD(&gfn_info->list);
-    list_add(&gfn_info->list, &page->sharing->gfns);
+    list_add(&gfn_info->list, head);
 }
 
 static inline gfn_info_t *
@@ -176,27 +290,33 @@ rmap_retrieve(uint16_t domain_id, unsign
                             struct page_info *page)
 {
     gfn_info_t *gfn_info;
-    struct list_head *le;
-    list_for_each(le, &page->sharing->gfns)
+    struct list_head *le, *head;
+
+    head = (RMAP_USES_HASHTAB(page)) ?
+        page->sharing->hash_table.bucket + HASH(domain_id, gfn) :
+        &page->sharing->gfns;
+
+    list_for_each(le, head)
     {
         gfn_info = list_entry(le, gfn_info_t, list);
         if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) )
             return gfn_info;
     }
+
+    /* Nothing was found */
     return NULL;
 }
 
 /* Returns true if the rmap has only one entry. O(1) complexity. */
 static inline int rmap_has_one_entry(struct page_info *page)
 {
-    struct list_head *head = &page->sharing->gfns;
-    return (head->next != head) && (head->next->next == head);
+    return (rmap_count(page) == 1);
 }
 
 /* Returns true if the rmap has any entries. O(1) complexity. */
 static inline int rmap_has_entries(struct page_info *page)
 {
-    return (!list_empty(&page->sharing->gfns));
+    return (rmap_count(page) != 0);
 }
 
 /* The iterator hides the details of how the rmap is implemented. This
@@ -204,20 +324,43 @@ static inline int rmap_has_entries(struc
 struct rmap_iterator {
     struct list_head *curr;
     struct list_head *next;
+    unsigned int bucket;
 };
 
 static inline void
 rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri)
 {
-    ri->curr = &page->sharing->gfns;
+    ri->curr = (RMAP_USES_HASHTAB(page)) ?
+                page->sharing->hash_table.bucket :
+                &page->sharing->gfns;
     ri->next = ri->curr->next; 
+    ri->bucket = 0;
 }
 
 static inline gfn_info_t *
 rmap_iterate(struct page_info *page, struct rmap_iterator *ri)
 {
-    if ( ri->next == &page->sharing->gfns)
-        return NULL;
+    struct list_head *head = (RMAP_USES_HASHTAB(page)) ?
+                page->sharing->hash_table.bucket + ri->bucket :
+                &page->sharing->gfns;
+
+retry:
+    if ( ri->next == head)
+    {
+        if ( RMAP_USES_HASHTAB(page) )
+        {
+            ri->bucket++;
+            if ( ri->bucket >= RMAP_HASHTAB_SIZE )
+                /* No more hash table buckets */
+                return NULL;
+            head = page->sharing->hash_table.bucket + ri->bucket;
+            ri->curr = head;
+            ri->next = ri->curr->next;
+            goto retry;
+        } else
+            /* List exhausted */
+            return NULL;
+    }
 
     ri->curr = ri->next;
     ri->next = ri->curr->next;
@@ -253,7 +396,7 @@ static inline void mem_sharing_gfn_destr
     atomic_dec(&d->shr_pages);
 
     /* Free the gfn_info structure. */
-    rmap_del(gfn_info, page);
+    rmap_del(gfn_info, page, 1);
     xfree(gfn_info);
 }
 
@@ -870,8 +1013,9 @@ int mem_sharing_share_pages(struct domai
         /* Get the source page and type, this should never fail: 
          * we are under shr lock, and got a successful lookup */
         BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
-        /* Move the gfn_info from client list to source list */
-        rmap_del(gfn, cpage);
+        /* Move the gfn_info from client list to source list.
+         * Don't change the type of rmap for the client page. */
+        rmap_del(gfn, cpage, 0);
         rmap_add(gfn, spage);
         put_page_and_type(cpage);
         d = get_domain_by_id(gfn->domain);
diff -r 1730bff8fccf -r 7b606c043208 xen/include/asm-x86/mem_sharing.h
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -30,6 +30,13 @@
 
 typedef uint64_t shr_handle_t; 
 
+typedef struct rmap_hashtab {
+    struct list_head *bucket;
+    /* Overlaps with prev pointer of list_head in union below.
+     * Unlike the prev pointer, this can be NULL. */
+    void *flag;
+} rmap_hashtab_t;
+
 struct page_sharing_info
 {
     struct page_info *pg;   /* Back pointer to the page. */
@@ -38,7 +45,11 @@ struct page_sharing_info
     struct list_head entry; /* List of all shared pages (entry). */
     struct rcu_head rcu_head; /* List of all shared pages (entry). */
 #endif
-    struct list_head gfns;  /* List of domains and gfns for this page (head). */
+    /* Reverse map of <domain,gfn> tuples for this shared frame. */
+    union {
+        struct list_head    gfns;
+        rmap_hashtab_t      hash_table;
+    };
 };
 
 #ifdef __x86_64__

  parent reply	other threads:[~2012-04-12 14:16 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-12 14:16 [PATCH 0 of 3] RFC: x86 memory sharing performance improvements Andres Lagar-Cavilla
2012-04-12 14:16 ` [PATCH 1 of 3] x86/mm/sharing: Clean ups for relinquishing shared pages on destroy Andres Lagar-Cavilla
2012-04-18 12:42   ` Tim Deegan
2012-04-18 13:06     ` Andres Lagar-Cavilla
2012-04-12 14:16 ` [PATCH 2 of 3] x86/mem_sharing: modularize reverse map for shared frames Andres Lagar-Cavilla
2012-04-18 14:05   ` Tim Deegan
2012-04-18 14:19     ` Andres Lagar-Cavilla
2012-04-12 14:16 ` Andres Lagar-Cavilla [this message]
2012-04-18 15:35   ` [PATCH 3 of 3] x86/mem_sharing: For shared pages with many references, use a hash table instead of a list Tim Deegan
2012-04-18 16:18     ` Andres Lagar-Cavilla
2012-04-24 19:33       ` Andres Lagar-Cavilla
2012-04-24 19:48 [PATCH 0 of 3] x86/mem_sharing: Improve performance of rmap, fix cascading bugs Andres Lagar-Cavilla
2012-04-24 19:48 ` [PATCH 3 of 3] x86/mem_sharing: For shared pages with many references, use a hash table instead of a list Andres Lagar-Cavilla

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7b606c043208d98d218b.1334240174@xdev.gridcentric.ca \
    --to=andres@lagarcavilla.org \
    --cc=adin@gridcentric.ca \
    --cc=andres@gridcentric.ca \
    --cc=keir.xen@gmail.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.