linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: daniel.m.jordan@oracle.com
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: aaron.lu@intel.com, ak@linux.intel.com,
	akpm@linux-foundation.org, Dave.Dice@oracle.com,
	dave@stgolabs.net, khandual@linux.vnet.ibm.com,
	ldufour@linux.vnet.ibm.com, mgorman@suse.de, mhocko@kernel.org,
	pasha.tatashin@oracle.com, steven.sistare@oracle.com,
	yossi.lev@oracle.com
Subject: [RFC PATCH v1 05/13] mm: add batching logic to add/delete/move API's
Date: Wed, 31 Jan 2018 18:04:05 -0500	[thread overview]
Message-ID: <20180131230413.27653-6-daniel.m.jordan@oracle.com> (raw)
In-Reply-To: <20180131230413.27653-1-daniel.m.jordan@oracle.com>

Change the add/delete/move LRU API's in mm_inline.h to account for LRU
batching.  Now when a page is added to the front of the LRU, it's
assigned a batch number that's used to decide which spinlock in the
lru_batch_lock array to take when removing that page from the LRU.  Each
newly-added page is also unconditionally made a sentinel page.

As more pages are added to the front of an LRU, the same batch number is
used for each until a threshold is reached, at which point a batch is
ready and the sentinel bits are unset in all but the first and last pages
of the batch.  This allows those inner pages to be removed with a batch
lock rather than the heavier lru_lock.

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
---
 include/linux/mm_inline.h | 119 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/mmzone.h    |   3 ++
 mm/swap.c                 |   2 +-
 mm/vmscan.c               |   4 +-
 4 files changed, 122 insertions(+), 6 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index d7fc46ebc33b..ec8b966a1c76 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -3,6 +3,7 @@
 #define LINUX_MM_INLINE_H
 
 #include <linux/huge_mm.h>
+#include <linux/random.h>
 #include <linux/swap.h>
 
 /**
@@ -44,27 +45,139 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
 #endif
 }
 
+static __always_inline void __add_page_to_lru_list(struct page *page,
+				struct lruvec *lruvec, enum lru_list lru)
+{
+	int tag;
+	struct page *cur, *next, *second_page;
+	struct lru_list_head *head = &lruvec->lists[lru];
+
+	list_add(&page->lru, lru_head(head));
+	/* Set sentinel unconditionally until batch is full. */
+	page->lru_sentinel = true;
+
+	second_page = container_of(page->lru.next, struct page, lru);
+	VM_BUG_ON_PAGE(!second_page->lru_sentinel, second_page);
+
+	page->lru_batch = head->first_batch_tag;
+	++head->first_batch_npages;
+
+	if (head->first_batch_npages < LRU_BATCH_MAX)
+		return;
+
+	tag = head->first_batch_tag;
+	if (likely(second_page->lru_batch == tag)) {
+		/* Unset sentinel bit in all non-sentinel nodes. */
+		cur = second_page;
+		list_for_each_entry_from(cur, lru_head(head), lru) {
+			next = list_next_entry(cur, lru);
+			if (next->lru_batch != tag)
+				break;
+			cur->lru_sentinel = false;
+		}
+	}
+
+	tag = prandom_u32_max(NUM_LRU_BATCH_LOCKS);
+	if (unlikely(tag == head->first_batch_tag))
+		tag = (tag + 1) % NUM_LRU_BATCH_LOCKS;
+	head->first_batch_tag = tag;
+	head->first_batch_npages = 0;
+}
+
 static __always_inline void add_page_to_lru_list(struct page *page,
 				struct lruvec *lruvec, enum lru_list lru)
 {
 	update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
-	list_add(&page->lru, lru_head(&lruvec->lists[lru]));
+	__add_page_to_lru_list(page, lruvec, lru);
+}
+
+static __always_inline void __add_page_to_lru_list_tail(struct page *page,
+				struct lruvec *lruvec, enum lru_list lru)
+{
+	int tag;
+	struct page *cur, *prev, *second_page;
+	struct lru_list_head *head = &lruvec->lists[lru];
+
+	list_add_tail(&page->lru, lru_head(head));
+	/* Set sentinel unconditionally until batch is full. */
+	page->lru_sentinel = true;
+
+	second_page = container_of(page->lru.prev, struct page, lru);
+	VM_BUG_ON_PAGE(!second_page->lru_sentinel, second_page);
+
+	page->lru_batch = head->last_batch_tag;
+	++head->last_batch_npages;
+
+	if (head->last_batch_npages < LRU_BATCH_MAX)
+		return;
+
+	tag = head->last_batch_tag;
+	if (likely(second_page->lru_batch == tag)) {
+		/* Unset sentinel bit in all non-sentinel nodes. */
+		cur = second_page;
+		list_for_each_entry_from_reverse(cur, lru_head(head), lru) {
+			prev = list_prev_entry(cur, lru);
+			if (prev->lru_batch != tag)
+				break;
+			cur->lru_sentinel = false;
+		}
+	}
+
+	tag = prandom_u32_max(NUM_LRU_BATCH_LOCKS);
+	if (unlikely(tag == head->last_batch_tag))
+		tag = (tag + 1) % NUM_LRU_BATCH_LOCKS;
+	head->last_batch_tag = tag;
+	head->last_batch_npages = 0;
 }
 
 static __always_inline void add_page_to_lru_list_tail(struct page *page,
 				struct lruvec *lruvec, enum lru_list lru)
 {
+
 	update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
-	list_add_tail(&page->lru, lru_head(&lruvec->lists[lru]));
+	__add_page_to_lru_list_tail(page, lruvec, lru);
 }
 
-static __always_inline void del_page_from_lru_list(struct page *page,
+static __always_inline void __del_page_from_lru_list(struct page *page,
 				struct lruvec *lruvec, enum lru_list lru)
 {
+	struct page *left, *right;
+
+	left  = container_of(page->lru.prev, struct page, lru);
+	right = container_of(page->lru.next, struct page, lru);
+
+	if (page->lru_sentinel) {
+		VM_BUG_ON(!left->lru_sentinel && !right->lru_sentinel);
+		left->lru_sentinel = true;
+		right->lru_sentinel = true;
+	}
+
 	list_del(&page->lru);
+}
+
+static __always_inline void del_page_from_lru_list(struct page *page,
+				struct lruvec *lruvec, enum lru_list lru)
+{
+	__del_page_from_lru_list(page, lruvec, lru);
 	update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
 }
 
+static __always_inline void move_page_to_lru_list(struct page *page,
+						  struct lruvec *lruvec,
+						  enum lru_list lru)
+{
+	__del_page_from_lru_list(page, lruvec, lru);
+	__add_page_to_lru_list(page, lruvec, lru);
+}
+
+static __always_inline void move_page_to_lru_list_tail(struct page *page,
+						       struct lruvec *lruvec,
+						       enum lru_list lru)
+{
+	__del_page_from_lru_list(page, lruvec, lru);
+	__add_page_to_lru_list_tail(page, lruvec, lru);
+}
+
 /**
  * page_lru_base_type - which LRU list type should a page be on?
  * @page: the page to test
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index feca75b8f492..492f86cdb346 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -19,6 +19,7 @@
 #include <linux/page-flags-layout.h>
 #include <linux/atomic.h>
 #include <linux/mm_types.h>
+#include <linux/pagevec.h>
 #include <asm/page.h>
 
 /* Free memory management - zoned buddy allocator.  */
@@ -260,6 +261,8 @@ struct lruvec {
 #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
 #define LRU_ALL	     ((1 << NR_LRU_LISTS) - 1)
 
+#define LRU_BATCH_MAX PAGEVEC_SIZE
+
 #define NUM_LRU_BATCH_LOCKS 32
 struct lru_batch_lock {
 	spinlock_t lock;
diff --git a/mm/swap.c b/mm/swap.c
index 286636bb6a4f..67eb89fc9435 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -561,7 +561,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
-		list_move_tail(&page->lru, lru_head(&lruvec->lists[lru]));
+		move_page_to_lru_list_tail(page, lruvec, lru);
 		__count_vm_event(PGROTATED);
 	}
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index aa629c4720dd..b4c32a65a40f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1553,7 +1553,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		case -EBUSY:
 			/* else it is being freed elsewhere */
-			list_move(&page->lru, src);
+			move_page_to_lru_list(page, lruvec, lru);
 			continue;
 
 		default:
@@ -1943,7 +1943,7 @@ static unsigned move_active_pages_to_lru(struct lruvec *lruvec,
 
 		nr_pages = hpage_nr_pages(page);
 		update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
-		list_move(&page->lru, lru_head(&lruvec->lists[lru]));
+		move_page_to_lru_list(page, lruvec, lru);
 
 		if (put_page_testzero(page)) {
 			__ClearPageLRU(page);
-- 
2.16.1

  parent reply	other threads:[~2018-01-31 23:15 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-31 23:04 [RFC PATCH v1 00/13] lru_lock scalability daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 01/13] mm: add a percpu_pagelist_batch sysctl interface daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 02/13] mm: allow compaction to be disabled daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 03/13] mm: add lock array to pgdat and batch fields to struct page daniel.m.jordan
2018-02-01 22:50   ` Tim Chen
2018-02-02  4:29     ` Daniel Jordan
2018-01-31 23:04 ` [RFC PATCH v1 04/13] mm: introduce struct lru_list_head in lruvec to hold per-LRU batch info daniel.m.jordan
2018-01-31 23:04 ` daniel.m.jordan [this message]
2018-01-31 23:04 ` [RFC PATCH v1 06/13] mm: add lru_[un]lock_all APIs daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 07/13] mm: convert to-be-refactored lru_lock callsites to lock-all API daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 08/13] mm: temporarily convert " daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 09/13] mm: introduce add-only version of pagevec_lru_move_fn daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 10/13] mm: add LRU batch lock API's daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 11/13] mm: use lru_batch locking in release_pages daniel.m.jordan
2018-01-31 23:04 ` [RFC PATCH v1 12/13] mm: split up release_pages into non-sentinel and sentinel passes daniel.m.jordan
2018-02-02 14:40   ` Laurent Dufour
2018-02-02 17:00     ` Laurent Dufour
2018-02-06 17:47       ` Daniel Jordan
2018-02-05  4:58   ` [lkp-robot] [mm] 44b163e12f: kernel_BUG_at_mm/swap.c kernel test robot
2018-01-31 23:04 ` [RFC PATCH v1 13/13] mm: splice local lists onto the front of the LRU daniel.m.jordan
2018-02-01 23:30   ` Tim Chen
2018-02-02  5:17     ` Daniel Jordan
2018-02-02  5:21   ` Aaron Lu
2018-02-06 17:38     ` Daniel Jordan
2018-02-02 15:22   ` Laurent Dufour
2018-02-06 18:18     ` Daniel Jordan
2018-02-01 15:54 ` [RFC PATCH v1 00/13] lru_lock scalability Steven Whitehouse
2018-02-02  4:18   ` Daniel Jordan
2018-02-02 10:50     ` Steven Whitehouse
2018-02-08 23:36 ` Andrew Morton
2018-02-13 21:07   ` Daniel Jordan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180131230413.27653-6-daniel.m.jordan@oracle.com \
    --to=daniel.m.jordan@oracle.com \
    --cc=Dave.Dice@oracle.com \
    --cc=aaron.lu@intel.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave@stgolabs.net \
    --cc=khandual@linux.vnet.ibm.com \
    --cc=ldufour@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@kernel.org \
    --cc=pasha.tatashin@oracle.com \
    --cc=steven.sistare@oracle.com \
    --cc=yossi.lev@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).