All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bob Liu <lliubbo@gmail.com>
To: xen-devel@lists.xenproject.org
Cc: keir@xen.org, ian.campbell@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	jbeulich@suse.com
Subject: [PATCH 2/4] xen: introduce an "scrub" free page list
Date: Tue, 17 Jun 2014 19:49:41 +0800	[thread overview]
Message-ID: <1403005783-30746-2-git-send-email-bob.liu@oracle.com> (raw)
In-Reply-To: <1403005783-30746-1-git-send-email-bob.liu@oracle.com>

Because of page scrubbing, it's very slow to destroy a domain with large memory.
It takes around 10 minutes when destroy a guest of nearly 1 TB of memory.

This patch introduced a "scrub" free page list, pages on this list need to be
scrubbed before use. During domain destory just mark pages "PGC_need_scrub"
and then add them to this list, so that xl can return quickly.

In alloc_domheap_pages():
  - If there are pages on the normal "heap" freelist, allocate them.
  - Try to allocate from the "scrub" free list with the same order and do
    scrubbing synchronously.

In order to not fail high order allocate request, merge page trunks for
PGC_need_scrub pages.
Note: PCG_need_scrub pages and normal pages are not mergeable

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 xen/common/page_alloc.c |  108 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 91 insertions(+), 17 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 601319c..723d273 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -267,6 +267,9 @@ typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 #define heap(node, zone, order) ((*_heap[node])[zone][order])
 
+static heap_by_zone_and_order_t _scrub[MAX_NUMNODES];
+#define scrub(node, zone, order) (_scrub[node][zone][order])
+
 static unsigned long *avail[MAX_NUMNODES];
 static long total_avail_pages;
 
@@ -629,8 +632,24 @@ static struct page_info *alloc_heap_pages(
 
             /* Find smallest order which can satisfy the request. */
             for ( j = order; j <= MAX_ORDER; j++ )
+            {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                     goto found;
+                
+                /* Try to scrub a page directly */
+                if ( (pg = page_list_remove_head(&scrub(node, zone, j))) )
+                {
+                    for ( i = 0; i < (1 << j); i++ )
+                    {
+                        if ( test_bit(_PGC_need_scrub, &(pg[i].count_info)) )
+                        {
+                            scrub_one_page(&pg[i]);
+                            pg[i].count_info &= ~(PGC_need_scrub);
+			}
+                    }
+                    goto found;
+                }
+            }
         } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
 
         if ( memflags & MEMF_exact_node )
@@ -810,7 +829,7 @@ static int reserve_offlined_page(struct page_info *head)
 
 /* Free 2^@order set of pages. */
 static void free_heap_pages(
-    struct page_info *pg, unsigned int order)
+    struct page_info *pg, unsigned int order, bool_t need_scrub)
 {
     unsigned long mask, mfn = page_to_mfn(pg);
     unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
@@ -859,6 +878,22 @@ static void free_heap_pages(
         midsize_alloc_zone_pages = max(
             midsize_alloc_zone_pages, total_avail_pages / MIDSIZE_ALLOC_FRAC);
 
+    if ( need_scrub )
+    {
+        /* Specail for tainted case */
+        if ( tainted )
+        {
+            for ( i = 0; i < (1 << order); i++ )
+                scrub_one_page(&pg[i]);
+            need_scrub = 0;
+        }
+        else
+        {
+            for ( i = 0; i < (1 << order); i++ )
+                pg[i].count_info |= PGC_need_scrub;
+        }
+    }
+
     /* Merge chunks as far as possible. */
     while ( order < MAX_ORDER )
     {
@@ -872,8 +907,22 @@ static void free_heap_pages(
                  (PFN_ORDER(pg-mask) != order) ||
                  (phys_to_nid(page_to_maddr(pg-mask)) != node) )
                 break;
-            pg -= mask;
-            page_list_del(pg, &heap(node, zone, order));
+
+            /* If we need scrub, only merge with PGC_need_scrub pages */
+            if ( need_scrub )
+            {
+                if ( !test_bit(_PGC_need_scrub, &((pg-mask)->count_info)) )
+                    break;
+                pg -= mask;
+                page_list_del(pg, &scrub(node, zone, order));
+            }
+	    else
+            {
+                if ( test_bit(_PGC_need_scrub, &((pg-mask)->count_info)) )
+                    break;
+                pg -= mask;
+                page_list_del(pg, &heap(node, zone, order));
+            }
         }
         else
         {
@@ -883,14 +932,35 @@ static void free_heap_pages(
                  (PFN_ORDER(pg+mask) != order) ||
                  (phys_to_nid(page_to_maddr(pg+mask)) != node) )
                 break;
-            page_list_del(pg + mask, &heap(node, zone, order));
+
+            /* If we need scrub, only merge with PGC_need_scrub pages */
+            if ( need_scrub )
+            {
+                if ( !test_bit(_PGC_need_scrub, &((pg+mask)->count_info)) )
+                    break;
+                page_list_del(pg + mask, &scrub(node, zone, order));
+            }
+            else
+            {
+                if ( test_bit(_PGC_need_scrub, &((pg+mask)->count_info)) )
+                    break;
+                page_list_del(pg + mask, &heap(node, zone, order));
+            }
         }
 
         order++;
     }
 
     PFN_ORDER(pg) = order;
-    page_list_add_tail(pg, &heap(node, zone, order));
+    if ( need_scrub )
+    {
+        ASSERT( test_bit(_PGC_need_scrub, &(pg->count_info)) );
+        page_list_add_tail(pg, &scrub(node, zone, order));
+    }
+    else
+    {
+        page_list_add_tail(pg, &heap(node, zone, order));
+    }
 
     if ( tainted )
         reserve_offlined_page(pg);
@@ -1115,7 +1185,7 @@ unsigned int online_page(unsigned long mfn, uint32_t *status)
     spin_unlock(&heap_lock);
 
     if ( (y & PGC_state) == PGC_state_offlined )
-        free_heap_pages(pg, 0);
+        free_heap_pages(pg, 0, 0);
 
     return ret;
 }
@@ -1184,7 +1254,7 @@ static void init_heap_pages(
             nr_pages -= n;
         }
 
-        free_heap_pages(pg+i, 0);
+        free_heap_pages(pg+i, 0, 0);
     }
 }
 
@@ -1216,7 +1286,7 @@ unsigned long total_free_pages(void)
 
 void __init end_boot_allocator(void)
 {
-    unsigned int i;
+    unsigned int i, j, order;
 
     /* Pages that are free now go to the domain sub-allocator. */
     for ( i = 0; i < nr_bootmem_regions; i++ )
@@ -1250,6 +1320,11 @@ void __init end_boot_allocator(void)
 #endif
     }
 
+    for ( i = 0; i < MAX_NUMNODES; i++ )
+        for ( j = 0; j < NR_ZONES; j++ )
+            for ( order = 0; order <= MAX_ORDER; order++ )
+                INIT_PAGE_LIST_HEAD(&scrub(i, j, order));
+
     printk("Domain heap initialised");
     if ( dma_bitsize )
         printk(" DMA width %u bits", dma_bitsize);
@@ -1357,7 +1432,7 @@ void free_xenheap_pages(void *v, unsigned int order)
 
     memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
 
-    free_heap_pages(virt_to_page(v), order);
+    free_heap_pages(virt_to_page(v), order, 0);
 }
 
 #else
@@ -1411,7 +1486,7 @@ void free_xenheap_pages(void *v, unsigned int order)
     for ( i = 0; i < (1u << order); i++ )
         pg[i].count_info &= ~PGC_xen_heap;
 
-    free_heap_pages(pg, order);
+    free_heap_pages(pg, order, 0);
 }
 
 #endif
@@ -1515,7 +1590,7 @@ struct page_info *alloc_domheap_pages(
 
     if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
     {
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, 0);
         return NULL;
     }
     
@@ -1564,22 +1639,21 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
          * domain has died we assume responsibility for erasure.
          */
         if ( unlikely(d->is_dying) )
-            for ( i = 0; i < (1 << order); i++ )
-                scrub_one_page(&pg[i]);
-
-        free_heap_pages(pg, order);
+            free_heap_pages(pg, order, 1);
+        else
+            free_heap_pages(pg, order, 0);
     }
     else if ( unlikely(d == dom_cow) )
     {
         ASSERT(order == 0); 
         scrub_one_page(pg);
-        free_heap_pages(pg, 0);
+        free_heap_pages(pg, 0, 0);
         drop_dom_ref = 0;
     }
     else
     {
         /* Freeing anonymous domain-heap pages. */
-        free_heap_pages(pg, order);
+        free_heap_pages(pg, order, 0);
         drop_dom_ref = 0;
     }
 
-- 
1.7.10.4

  reply	other threads:[~2014-06-17 11:50 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-17 11:49 [PATCH 1/4] xen: asm-x86: introduce PGC_need_scrub page flag Bob Liu
2014-06-17 11:49 ` Bob Liu [this message]
2014-06-17 12:36   ` [PATCH 2/4] xen: introduce an "scrub" free page list Jan Beulich
2014-06-18  0:54     ` Bob Liu
2014-06-18 13:18       ` Konrad Rzeszutek Wilk
2014-06-17 11:49 ` [PATCH 3/4] xen: separate a function merge_free_trunks from Bob Liu
2014-06-17 12:39   ` Jan Beulich
2014-06-17 11:49 ` [PATCH 4/4] xen: use idle vcpus to scrub pages Bob Liu
2014-06-17 13:01   ` Jan Beulich
2014-06-18  1:18     ` Bob Liu
2014-06-18 10:42       ` Jan Beulich
2014-06-17 12:35 ` [PATCH 1/4] xen: asm-x86: introduce PGC_need_scrub page flag Jan Beulich
2014-06-17 12:46 ` Julien Grall
2014-06-18  0:55   ` Bob Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1403005783-30746-2-git-send-email-bob.liu@oracle.com \
    --to=lliubbo@gmail.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=keir@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.