All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	Dario Faggioli <dario.faggioli@citrix.com>,
	ian.jackson@eu.citrix.com, tim@xen.org, julien.grall@arm.com,
	jbeulich@suse.com, Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v7 5/9] mm: Scrub memory from idle loop
Date: Tue,  8 Aug 2017 17:45:03 -0400	[thread overview]
Message-ID: <1502228707-31883-6-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1502228707-31883-1-git-send-email-boris.ostrovsky@oracle.com>

Instead of scrubbing pages during guest destruction (from
free_heap_pages()) do this opportunistically, from the idle loop.

We might come to scrub_free_pages()from idle loop while another CPU
uses mapcache override, resulting in a fault while trying to do
__map_domain_page() in scrub_one_page(). To avoid this, make mapcache
vcpu override a per-cpu variable.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>
---
CC: Dario Faggioli <dario.faggioli@citrix.com>
---
 xen/arch/arm/domain.c      |   8 ++-
 xen/arch/x86/domain.c      |   8 ++-
 xen/arch/x86/domain_page.c |   6 +--
 xen/common/page_alloc.c    | 119 ++++++++++++++++++++++++++++++++++++++++-----
 xen/include/xen/mm.h       |   1 +
 5 files changed, 124 insertions(+), 18 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 2dc8b0a..d7961bb 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -51,7 +51,13 @@ void idle_loop(void)
         /* Are we here for running vcpu context tasklets, or for idling? */
         if ( unlikely(tasklet_work_to_do(cpu)) )
             do_tasklet();
-        else
+        /*
+         * Test softirqs twice --- first to see if should even try scrubbing
+         * and then, after it is done, whether softirqs became pending
+         * while we were scrubbing.
+         */
+        else if ( !softirq_pending(cpu) && !scrub_free_pages() &&
+                    !softirq_pending(cpu) )
         {
             local_irq_disable();
             if ( cpu_is_haltable(cpu) )
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index baaf815..9b4b959 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -122,7 +122,13 @@ static void idle_loop(void)
         /* Are we here for running vcpu context tasklets, or for idling? */
         if ( unlikely(tasklet_work_to_do(cpu)) )
             do_tasklet();
-        else
+        /*
+         * Test softirqs twice --- first to see if should even try scrubbing
+         * and then, after it is done, whether softirqs became pending
+         * while we were scrubbing.
+         */
+        else if ( !softirq_pending(cpu) && !scrub_free_pages()  &&
+                    !softirq_pending(cpu) )
             pm_idle();
         do_softirq();
         /*
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index 71baede..0783c1e 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -18,12 +18,12 @@
 #include <asm/hardirq.h>
 #include <asm/setup.h>
 
-static struct vcpu *__read_mostly override;
+static DEFINE_PER_CPU(struct vcpu *, override);
 
 static inline struct vcpu *mapcache_current_vcpu(void)
 {
     /* In the common case we use the mapcache of the running VCPU. */
-    struct vcpu *v = override ?: current;
+    struct vcpu *v = this_cpu(override) ?: current;
 
     /*
      * When current isn't properly set up yet, this is equivalent to
@@ -59,7 +59,7 @@ static inline struct vcpu *mapcache_current_vcpu(void)
 
 void __init mapcache_override_current(struct vcpu *v)
 {
-    override = v;
+    this_cpu(override) = v;
 }
 
 #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 7fa8896..b886983 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1013,15 +1013,86 @@ static int reserve_offlined_page(struct page_info *head)
     return count;
 }
 
-static void scrub_free_pages(unsigned int node)
+static nodemask_t node_scrubbing;
+
+/*
+ * If get_node is true this will return closest node that needs to be scrubbed,
+ * with appropriate bit in node_scrubbing set.
+ * If get_node is not set, this will return *a* node that needs to be scrubbed.
+ * node_scrubbing bitmask will no be updated.
+ * If no node needs scrubbing then NUMA_NO_NODE is returned.
+ */
+static unsigned int node_to_scrub(bool get_node)
 {
-    struct page_info *pg;
-    unsigned int zone;
+    nodeid_t node = cpu_to_node(smp_processor_id()), local_node;
+    nodeid_t closest = NUMA_NO_NODE;
+    u8 dist, shortest = 0xff;
 
-    ASSERT(spin_is_locked(&heap_lock));
+    if ( node == NUMA_NO_NODE )
+        node = 0;
 
-    if ( !node_need_scrub[node] )
-        return;
+    if ( node_need_scrub[node] &&
+         (!get_node || !node_test_and_set(node, node_scrubbing)) )
+        return node;
+
+    /*
+     * See if there are memory-only nodes that need scrubbing and choose
+     * the closest one.
+     */
+    local_node = node;
+    for ( ; ; )
+    {
+        do {
+            node = cycle_node(node, node_online_map);
+        } while ( !cpumask_empty(&node_to_cpumask(node)) &&
+                  (node != local_node) );
+
+        if ( node == local_node )
+            break;
+
+        if ( node_need_scrub[node] )
+        {
+            if ( !get_node )
+                return node;
+
+            dist = __node_distance(local_node, node);
+
+            /*
+             * Grab the node right away. If we find a closer node later we will
+             * release this one. While there is a chance that another CPU will
+             * not be able to scrub that node when it is searching for scrub work
+             * at the same time it will be able to do so next time it wakes up.
+             * The alternative would be to perform this search under a lock but
+             * then we'd need to take this lock every time we come in here.
+             */
+            if ( (dist < shortest || closest == NUMA_NO_NODE) &&
+                 !node_test_and_set(node, node_scrubbing) )
+            {
+                if ( closest != NUMA_NO_NODE )
+                    node_clear(closest, node_scrubbing);
+                shortest = dist;
+                closest = node;
+            }
+        }
+    }
+
+    return closest;
+}
+
+bool scrub_free_pages(void)
+{
+    struct page_info *pg;
+    unsigned int zone;
+    unsigned int cpu = smp_processor_id();
+    bool preempt = false;
+    nodeid_t node;
+    unsigned int cnt = 0;
+  
+    node = node_to_scrub(true);
+    if ( node == NUMA_NO_NODE )
+        return false;
+ 
+    spin_lock(&heap_lock);
 
     for ( zone = 0; zone < NR_ZONES; zone++ )
     {
@@ -1044,17 +1115,42 @@ static void scrub_free_pages(unsigned int node)
                         scrub_one_page(&pg[i]);
                         pg[i].count_info &= ~PGC_need_scrub;
                         node_need_scrub[node]--;
+                        cnt += 100; /* scrubbed pages add heavier weight. */
+                    }
+                    else
+                        cnt++;
+
+                    /*
+                     * Scrub a few (8) pages before becoming eligible for
+                     * preemption. But also count non-scrubbing loop iterations
+                     * so that we don't get stuck here with an almost clean
+                     * heap.
+                     */
+                    if ( cnt > 800 && softirq_pending(cpu) )
+                    {
+                        preempt = true;
+                        break;
                     }
                 }
 
-                page_list_del(pg, &heap(node, zone, order));
-                page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX);
+                if ( i >= (1U << order) - 1 )
+                {
+                    page_list_del(pg, &heap(node, zone, order));
+                    page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX);
+                }
+                else
+                    pg->u.free.first_dirty = i + 1;
 
-                if ( node_need_scrub[node] == 0 )
-                    return;
+                if ( preempt || (node_need_scrub[node] == 0) )
+                    goto out;
             }
         } while ( order-- != 0 );
     }
+
+ out:
+    spin_unlock(&heap_lock);
+    node_clear(node, node_scrubbing);
+    return node_to_scrub(false) != NUMA_NO_NODE;
 }
 
 /* Free 2^@order set of pages. */
@@ -1166,9 +1262,6 @@ static void free_heap_pages(
     if ( tainted )
         reserve_offlined_page(pg);
 
-    if ( need_scrub )
-        scrub_free_pages(node);
-
     spin_unlock(&heap_lock);
 }
 
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index e1f9c42..ddc3fb3 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -160,6 +160,7 @@ void init_xenheap_pages(paddr_t ps, paddr_t pe);
 void xenheap_max_mfn(unsigned long mfn);
 void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
 void free_xenheap_pages(void *v, unsigned int order);
+bool scrub_free_pages(void);
 #define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
 #define free_xenheap_page(v) (free_xenheap_pages(v,0))
 /* Map machine page range in Xen virtual address space. */
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-08-08 21:45 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-08 21:44 [PATCH v7 0/9] Memory scrubbing from idle loop Boris Ostrovsky
2017-08-08 21:44 ` [PATCH v7 1/9] mm: Clean up free_heap_pages() Boris Ostrovsky
2017-08-10 12:21   ` Wei Liu
2017-08-14 10:30   ` Jan Beulich
2017-08-08 21:45 ` [PATCH v7 2/9] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-08-14 10:37   ` Jan Beulich
2017-08-14 14:29     ` Boris Ostrovsky
2017-08-15  8:18       ` Jan Beulich
2017-08-15 14:41         ` Boris Ostrovsky
2017-08-15 14:51           ` Jan Beulich
2017-08-15 14:52             ` Julien Grall
2017-08-15 15:03               ` Boris Ostrovsky
2017-08-15 15:08                 ` Jan Beulich
2017-08-14 11:16   ` Julien Grall
2017-08-08 21:45 ` [PATCH v7 3/9] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 4/9] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-08-08 21:45 ` Boris Ostrovsky [this message]
2017-08-08 21:45 ` [PATCH v7 6/9] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-08-14 11:22   ` Julien Grall
2017-08-14 14:39     ` Boris Ostrovsky
2017-08-14 14:42       ` Julien Grall
2017-08-14 14:57         ` Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 7/9] mm: Keep heap accessible to others while scrubbing Boris Ostrovsky
2017-08-14 10:38   ` Jan Beulich
2017-08-08 21:45 ` [PATCH v7 8/9] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 9/9] mm: Make sure pages are scrubbed Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1502228707-31883-6-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dario.faggioli@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=julien.grall@arm.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.