All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
	George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
	ian.jackson@eu.citrix.com, tim@xen.org, julien.grall@arm.com,
	jbeulich@suse.com, Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v7 7/9] mm: Keep heap accessible to others while scrubbing
Date: Tue,  8 Aug 2017 17:45:05 -0400	[thread overview]
Message-ID: <1502228707-31883-8-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1502228707-31883-1-git-send-email-boris.ostrovsky@oracle.com>

Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's BUDDY_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v7:
* Replaced page_info with typeof(head->u.free) in check_and_stop_scrub()
* Replaced 1UL with 1U in scrub_free_pages()
* Fixed formatting in asm-*/mm.h

 xen/common/page_alloc.c  | 110 +++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/asm-arm/mm.h |  28 +++++++-----
 xen/include/asm-x86/mm.h |  30 ++++++++-----
 3 files changed, 143 insertions(+), 25 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index b886983..726f857 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -687,6 +687,7 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
 {
     PFN_ORDER(pg) = order;
     pg->u.free.first_dirty = first_dirty;
+    pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
 
     if ( first_dirty != INVALID_DIRTY_IDX )
     {
@@ -697,6 +698,25 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
         page_list_add(pg, &heap(node, zone, order));
 }
 
+static void check_and_stop_scrub(struct page_info *head)
+{
+    if ( head->u.free.scrub_state == BUDDY_SCRUBBING )
+    {
+        typeof(head->u.free) pgfree;
+
+        head->u.free.scrub_state = BUDDY_SCRUB_ABORT;
+        spin_lock_kick();
+        for ( ; ; )
+        {
+            /* Can't ACCESS_ONCE() a bitfield. */
+            pgfree.val = ACCESS_ONCE(head->u.free.val);
+            if ( pgfree.scrub_state != BUDDY_SCRUB_ABORT )
+                break;
+            cpu_relax();
+        }
+    }
+}
+
 static struct page_info *get_free_buddy(unsigned int zone_lo,
                                         unsigned int zone_hi,
                                         unsigned int order, unsigned int memflags,
@@ -741,14 +761,19 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
             {
                 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
                 {
+                    if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+                        return pg;
                     /*
                      * We grab single pages (order=0) even if they are
                      * unscrubbed. Given that scrubbing one page is fairly quick
                      * it is not worth breaking higher orders.
                      */
-                    if ( (order == 0) || use_unscrubbed ||
-                         pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+                    if ( (order == 0) || use_unscrubbed )
+                    {
+                        check_and_stop_scrub(pg);
                         return pg;
+                    }
+
                     page_list_add_tail(pg, &heap(node, zone, j));
                 }
             }
@@ -929,6 +954,7 @@ static int reserve_offlined_page(struct page_info *head)
 
     cur_head = head;
 
+    check_and_stop_scrub(head);
     /*
      * We may break the buddy so let's mark the head as clean. Then, when
      * merging chunks back into the heap, we will see whether the chunk has
@@ -1079,6 +1105,29 @@ static unsigned int node_to_scrub(bool get_node)
     return closest;
 }
 
+struct scrub_wait_state {
+    struct page_info *pg;
+    unsigned int first_dirty;
+    bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+    struct scrub_wait_state *st = data;
+
+    if ( st->drop )
+        return;
+
+    if ( st->pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+    {
+        /* There is a waiter for this buddy. Release it. */
+        st->drop = true;
+        st->pg->u.free.first_dirty = st->first_dirty;
+        smp_wmb();
+        st->pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+    }
+}
+
 bool scrub_free_pages(void)
 {
     struct page_info *pg;
@@ -1101,25 +1150,53 @@ bool scrub_free_pages(void)
         do {
             while ( !page_list_empty(&heap(node, zone, order)) )
             {
-                unsigned int i;
+                unsigned int i, dirty_cnt;
+                struct scrub_wait_state st;
 
                 /* Unscrubbed pages are always at the end of the list. */
                 pg = page_list_last(&heap(node, zone, order));
                 if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
                     break;
 
+                ASSERT(pg->u.free.scrub_state == BUDDY_NOT_SCRUBBING);
+                pg->u.free.scrub_state = BUDDY_SCRUBBING;
+
+                spin_unlock(&heap_lock);
+
+                dirty_cnt = 0;
+
                 for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
                 {
                     if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
                     {
                         scrub_one_page(&pg[i]);
+                        /*
+                         * We can modify count_info without holding heap
+                         * lock since we effectively locked this buddy by
+                         * setting its scrub_state.
+                         */
                         pg[i].count_info &= ~PGC_need_scrub;
-                        node_need_scrub[node]--;
+                        dirty_cnt++;
                         cnt += 100; /* scrubbed pages add heavier weight. */
                     }
                     else
                         cnt++;
 
+                    if ( pg->u.free.scrub_state == BUDDY_SCRUB_ABORT )
+                    {
+                        /* Someone wants this chunk. Drop everything. */
+
+                        pg->u.free.first_dirty = (i == (1U << order) - 1) ?
+                            INVALID_DIRTY_IDX : i + 1; 
+                        smp_wmb();
+                        pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
+                        spin_lock(&heap_lock);
+                        node_need_scrub[node] -= dirty_cnt;
+                        spin_unlock(&heap_lock);
+                        goto out_nolock;
+                    }
+
                     /*
                      * Scrub a few (8) pages before becoming eligible for
                      * preemption. But also count non-scrubbing loop iterations
@@ -1133,6 +1210,23 @@ bool scrub_free_pages(void)
                     }
                 }
 
+                st.pg = pg;
+                /*
+                 * get_free_buddy() grabs a buddy with first_dirty set to
+                 * INVALID_DIRTY_IDX so we can't set pg's first_dirty here.
+                 * It will be set either below or in the lock callback (in
+                 * scrub_continue()).
+                 */
+                st.first_dirty = (i >= (1U << order) - 1) ?
+                    INVALID_DIRTY_IDX : i + 1;
+                st.drop = false;
+                spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+                node_need_scrub[node] -= dirty_cnt;
+
+                if ( st.drop )
+                    goto out;
+
                 if ( i >= (1U << order) - 1 )
                 {
                     page_list_del(pg, &heap(node, zone, order));
@@ -1141,6 +1235,8 @@ bool scrub_free_pages(void)
                 else
                     pg->u.free.first_dirty = i + 1;
 
+                pg->u.free.scrub_state = BUDDY_NOT_SCRUBBING;
+
                 if ( preempt || (node_need_scrub[node] == 0) )
                     goto out;
             }
@@ -1149,6 +1245,8 @@ bool scrub_free_pages(void)
 
  out:
     spin_unlock(&heap_lock);
+
+ out_nolock:
     node_clear(node, node_scrubbing);
     return node_to_scrub(false) != NUMA_NO_NODE;
 }
@@ -1230,6 +1328,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(predecessor)) != node) )
                 break;
 
+            check_and_stop_scrub(predecessor);
+
             page_list_del(predecessor, &heap(node, zone, order));
 
             /* Keep predecessor's first_dirty if it is already set. */
@@ -1251,6 +1351,8 @@ static void free_heap_pages(
                  (phys_to_nid(page_to_maddr(successor)) != node) )
                 break;
 
+            check_and_stop_scrub(successor);
+
             page_list_del(successor, &heap(node, zone, order));
         }
 
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 572337c..d42b070 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -42,18 +42,26 @@ struct page_info
             unsigned long type_info;
         } inuse;
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            bool need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more bit than maximum possible order to accommodate
-             * INVALID_DIRTY_IDX.
-             */
+        union {
+            struct {
+                /* Do TLBs need flushing for safety before next page use? */
+                bool need_tlbflush:1;
+
+                /*
+                 * Index of the first *possibly* unscrubbed page in the buddy.
+                 * One more bit than maximum possible order to accommodate
+                 * INVALID_DIRTY_IDX.
+                 */
 #define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1)
-            unsigned long first_dirty:MAX_ORDER + 1;
+                unsigned long first_dirty:MAX_ORDER + 1;
+
+#define BUDDY_NOT_SCRUBBING    0
+#define BUDDY_SCRUBBING        1
+#define BUDDY_SCRUB_ABORT      2
+                unsigned long scrub_state:2;
+            };
 
+            unsigned long val;
         } free;
 
     } u;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 07dc963..c9cf6c3 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -86,19 +86,27 @@ struct page_info
         } sh;
 
         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
-        struct {
-            /* Do TLBs need flushing for safety before next page use? */
-            bool need_tlbflush:1;
-
-            /*
-             * Index of the first *possibly* unscrubbed page in the buddy.
-             * One more bit than maximum possible order to accommodate
-             * INVALID_DIRTY_IDX.
-             */
+        union {
+            struct {
+                /* Do TLBs need flushing for safety before next page use? */
+                bool need_tlbflush:1;
+
+                /*
+                 * Index of the first *possibly* unscrubbed page in the buddy.
+                 * One more bit than maximum possible order to accommodate
+                 * INVALID_DIRTY_IDX.
+                 */
 #define INVALID_DIRTY_IDX ((1UL << (MAX_ORDER + 1)) - 1)
-            unsigned long first_dirty:MAX_ORDER + 1;
-        } free;
+                unsigned long first_dirty:MAX_ORDER + 1;
 
+#define BUDDY_NOT_SCRUBBING    0
+#define BUDDY_SCRUBBING        1
+#define BUDDY_SCRUB_ABORT      2
+                unsigned long scrub_state:2;
+            };
+
+            unsigned long val;
+        } free;
     } u;
 
     union {
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-08-08 21:45 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-08 21:44 [PATCH v7 0/9] Memory scrubbing from idle loop Boris Ostrovsky
2017-08-08 21:44 ` [PATCH v7 1/9] mm: Clean up free_heap_pages() Boris Ostrovsky
2017-08-10 12:21   ` Wei Liu
2017-08-14 10:30   ` Jan Beulich
2017-08-08 21:45 ` [PATCH v7 2/9] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-08-14 10:37   ` Jan Beulich
2017-08-14 14:29     ` Boris Ostrovsky
2017-08-15  8:18       ` Jan Beulich
2017-08-15 14:41         ` Boris Ostrovsky
2017-08-15 14:51           ` Jan Beulich
2017-08-15 14:52             ` Julien Grall
2017-08-15 15:03               ` Boris Ostrovsky
2017-08-15 15:08                 ` Jan Beulich
2017-08-14 11:16   ` Julien Grall
2017-08-08 21:45 ` [PATCH v7 3/9] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 4/9] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 5/9] mm: Scrub memory from idle loop Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 6/9] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-08-14 11:22   ` Julien Grall
2017-08-14 14:39     ` Boris Ostrovsky
2017-08-14 14:42       ` Julien Grall
2017-08-14 14:57         ` Boris Ostrovsky
2017-08-08 21:45 ` Boris Ostrovsky [this message]
2017-08-14 10:38   ` [PATCH v7 7/9] mm: Keep heap accessible to others while scrubbing Jan Beulich
2017-08-08 21:45 ` [PATCH v7 8/9] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-08-08 21:45 ` [PATCH v7 9/9] mm: Make sure pages are scrubbed Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1502228707-31883-8-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=julien.grall@arm.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.