From: "Tobin C. Harding" <tobin@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>,
Matthew Wilcox <willy@infradead.org>
Cc: "Tobin C. Harding" <tobin@kernel.org>,
Roman Gushchin <guro@fb.com>,
Alexander Viro <viro@ftp.linux.org.uk>,
Christoph Hellwig <hch@infradead.org>,
Pekka Enberg <penberg@cs.helsinki.fi>,
David Rientjes <rientjes@google.com>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Christopher Lameter <cl@linux.com>,
Miklos Szeredi <mszeredi@redhat.com>,
Andreas Dilger <adilger@dilger.ca>,
Waiman Long <longman@redhat.com>, Tycho Andersen <tycho@tycho.ws>,
Theodore Ts'o <tytso@mit.edu>, Andi Kleen <ak@linux.intel.com>,
David Chinner <david@fromorbit.com>,
Nick Piggin <npiggin@gmail.com>, Rik van Riel <riel@redhat.com>,
Hugh Dickins <hughd@google.com>, Jonathan Corbet <corbet@lwn.net>,
linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [RFC PATCH v5 12/16] slub: Enable moving objects to/from specific nodes
Date: Mon, 20 May 2019 15:40:13 +1000 [thread overview]
Message-ID: <20190520054017.32299-13-tobin@kernel.org> (raw)
In-Reply-To: <20190520054017.32299-1-tobin@kernel.org>
We have just implemented Slab Movable Objects (object migration).
Currently object migration is used to defrag a cache. On NUMA systems
it would be nice to be able to control the source and destination nodes
when moving objects.
Add CONFIG_SMO_NODE to guard this feature. CONFIG_SMO_NODE depends on
CONFIG_SLUB_DEBUG because we use the full list.
Implement moving all objects (including those in full slabs) to a
specific node. Expose this functionality to userspace via a sysfs entry.
Add sysfs entry:
/sysfs/kernel/slab/<cache>/move
With this users get access to the following functionality:
- Move all objects to specified node.
echo "N1" > move
- Move all objects from specified node to other specified
node (from N1 -> to N2):
echo "N1 N2" > move
This also enables shrinking slabs on a specific node:
echo "N1 N1" > move
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
---
mm/Kconfig | 7 ++
mm/slub.c | 249 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 256 insertions(+)
diff --git a/mm/Kconfig b/mm/Kconfig
index ee8d1f311858..aa8d60e69a01 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -258,6 +258,13 @@ config ARCH_ENABLE_THP_MIGRATION
config CONTIG_ALLOC
def_bool (MEMORY_ISOLATION && COMPACTION) || CMA
+config SMO_NODE
+ bool "Enable per node control of Slab Movable Objects"
+ depends on SLUB && SYSFS
+ select SLUB_DEBUG
+ help
+ On NUMA systems enable moving objects to and from a specified node.
+
config PHYS_ADDR_T_64BIT
def_bool 64BIT
diff --git a/mm/slub.c b/mm/slub.c
index 2157205df7ba..9582f2fc97d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4336,6 +4336,106 @@ static void move_slab_page(struct page *page, void *scratch, int node)
s->migrate(s, vector, count, node, private);
}
+#ifdef CONFIG_SMO_NODE
+/*
+ * kmem_cache_move() - Attempt to move all slab objects.
+ * @s: The cache we are working on.
+ * @node: The node to move objects away from.
+ * @target_node: The node to move objects on to.
+ *
+ * Attempts to move all objects (partial slabs and full slabs) to target
+ * node.
+ *
+ * Context: Takes the list_lock.
+ * Return: The number of slabs remaining on node.
+ */
+static unsigned long kmem_cache_move(struct kmem_cache *s,
+ int node, int target_node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+ LIST_HEAD(move_list);
+ struct page *page, *page2;
+ unsigned long flags;
+ void **scratch;
+
+ if (!s->migrate) {
+ pr_warn("%s SMO not enabled, cannot move objects\n", s->name);
+ goto out;
+ }
+
+ scratch = alloc_scratch(s);
+ if (!scratch)
+ goto out;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+
+ list_for_each_entry_safe(page, page2, &n->partial, lru) {
+ if (!slab_trylock(page))
+ /* Busy slab. Get out of the way */
+ continue;
+
+ if (page->inuse) {
+ list_move(&page->lru, &move_list);
+ /* Stop page being considered for allocations */
+ n->nr_partial--;
+ page->frozen = 1;
+
+ slab_unlock(page);
+ } else { /* Empty slab page */
+ list_del(&page->lru);
+ n->nr_partial--;
+ slab_unlock(page);
+ discard_slab(s, page);
+ }
+ }
+ list_for_each_entry_safe(page, page2, &n->full, lru) {
+ if (!slab_trylock(page))
+ continue;
+
+ list_move(&page->lru, &move_list);
+ page->frozen = 1;
+ slab_unlock(page);
+ }
+
+ spin_unlock_irqrestore(&n->list_lock, flags);
+
+ list_for_each_entry(page, &move_list, lru) {
+ if (page->inuse)
+ move_slab_page(page, scratch, target_node);
+ }
+ kfree(scratch);
+
+ /* Bail here to save taking the list_lock */
+ if (list_empty(&move_list))
+ goto out;
+
+ /* Inspect results and dispose of pages */
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry_safe(page, page2, &move_list, lru) {
+ list_del(&page->lru);
+ slab_lock(page);
+ page->frozen = 0;
+
+ if (page->inuse) {
+ if (page->inuse == page->objects) {
+ list_add(&page->lru, &n->full);
+ slab_unlock(page);
+ } else {
+ n->nr_partial++;
+ list_add_tail(&page->lru, &n->partial);
+ slab_unlock(page);
+ }
+ } else {
+ slab_unlock(page);
+ discard_slab(s, page);
+ }
+ }
+ spin_unlock_irqrestore(&n->list_lock, flags);
+out:
+ return atomic_long_read(&n->nr_slabs);
+}
+#endif /* CONFIG_SMO_NODE */
+
/*
* kmem_cache_defrag() - Defragment node.
* @s: cache we are working on.
@@ -4450,6 +4550,32 @@ static unsigned long kmem_cache_defrag(struct kmem_cache *s,
return n->nr_partial;
}
+#ifdef CONFIG_SMO_NODE
+/*
+ * kmem_cache_move_to_node() - Move all slab objects to node.
+ * @s: The cache we are working on.
+ * @node: The target node to move objects to.
+ *
+ * Attempt to move all slab objects from all nodes to @node.
+ *
+ * Return: The total number of slabs left on emptied nodes.
+ */
+static unsigned long kmem_cache_move_to_node(struct kmem_cache *s, int node)
+{
+ unsigned long left = 0;
+ int nid;
+
+ for_each_node_state(nid, N_NORMAL_MEMORY) {
+ if (nid == node)
+ continue;
+
+ left += kmem_cache_move(s, nid, node);
+ }
+
+ return left;
+}
+#endif
+
/**
* kmem_defrag_slabs() - Defrag slab caches.
* @node: The node to defrag or -1 for all nodes.
@@ -5594,6 +5720,126 @@ static ssize_t shrink_store(struct kmem_cache *s,
}
SLAB_ATTR(shrink);
+#ifdef CONFIG_SMO_NODE
+static ssize_t move_show(struct kmem_cache *s, char *buf)
+{
+ return 0;
+}
+
+/*
+ * parse_move_store_input() - Parse buf getting integer arguments.
+ * @buf: Buffer to parse.
+ * @length: Length of @buf.
+ * @arg0: Return parameter, first argument.
+ * @arg1: Return parameter, second argument.
+ *
+ * Parses the input from user write to sysfs file 'move'. Input string
+ * should contain either one or two node specifiers of form Nx where x
+ * is an integer specifying the NUMA node ID. 'N' or 'n' may be used.
+ * n/N may be omitted.
+ *
+ * e.g.
+ * echo 'N1' > /sysfs/kernel/slab/cache/move
+ * or
+ * echo 'N0 N2' > /sysfs/kernel/slab/cache/move
+ *
+ * Regex matching accepted forms: '[nN]?[0-9]( [nN]?[0-9])?'
+ *
+ * FIXME: This is really fragile. Input must be exactly correct,
+ * spurious whitespace causes parse errors.
+ *
+ * Return: 0 if an argument was successfully converted, or an error code.
+ */
+static ssize_t parse_move_store_input(const char *buf, size_t length,
+ long *arg0, long *arg1)
+{
+ char *s, *save, *ptr;
+ int ret = 0;
+
+ if (!buf)
+ return -EINVAL;
+
+ s = kstrdup(buf, GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+ save = s;
+
+ if (s[length - 1] == '\n') {
+ s[length - 1] = '\0';
+ length--;
+ }
+
+ ptr = strsep(&s, " ");
+ if (!ptr || strcmp(ptr, "") == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (*ptr == 'N' || *ptr == 'n')
+ ptr++;
+ ret = kstrtol(ptr, 10, arg0);
+ if (ret < 0)
+ goto out;
+
+ if (s) {
+ if (*s == 'N' || *s == 'n')
+ s++;
+ ret = kstrtol(s, 10, arg1);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ kfree(save);
+ return ret;
+}
+
+static bool is_valid_node(int node)
+{
+ int nid;
+
+ for_each_node_state(nid, N_NORMAL_MEMORY) {
+ if (nid == node)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * move_store() - Move objects between nodes.
+ * @s: The cache we are working on.
+ * @buf: String received.
+ * @length: Length of @buf.
+ *
+ * Writes to /sys/kernel/slab/<cache>/move are interpreted as follows:
+ *
+ * echo "N1" > move : Move all objects (from all nodes) to node 1.
+ * echo "N0 N1" > move : Move all objects from node 0 to node 1.
+ *
+ * 'N' may be omitted:
+ */
+static ssize_t move_store(struct kmem_cache *s, const char *buf, size_t length)
+{
+ long arg0 = -1;
+ long arg1 = -1;
+ int ret;
+
+ ret = parse_move_store_input(buf, length, &arg0, &arg1);
+ if (ret < 0)
+ return -EINVAL;
+
+ if (is_valid_node(arg0) && is_valid_node(arg1))
+ (void)kmem_cache_move(s, arg0, arg1);
+ else if (is_valid_node(arg0))
+ (void)kmem_cache_move_to_node(s, arg0);
+
+ /* FIXME: What should we be returning here? */
+ return length;
+}
+SLAB_ATTR(move);
+#endif /* CONFIG_SMO_NODE */
+
#ifdef CONFIG_NUMA
static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
{
@@ -5718,6 +5964,9 @@ static struct attribute *slab_attrs[] = {
&reclaim_account_attr.attr,
&destroy_by_rcu_attr.attr,
&shrink_attr.attr,
+#ifdef CONFIG_SMO_NODE
+ &move_attr.attr,
+#endif
&slabs_cpu_partial_attr.attr,
#ifdef CONFIG_SLUB_DEBUG
&total_objects_attr.attr,
--
2.21.0
next prev parent reply other threads:[~2019-05-20 5:42 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-20 5:40 [RFC PATCH v5 00/16] Slab Movable Objects (SMO) Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 01/16] slub: Add isolate() and migrate() methods Tobin C. Harding
2019-05-21 0:37 ` Roman Gushchin
2019-05-20 5:40 ` [RFC PATCH v5 02/16] tools/vm/slabinfo: Add support for -C and -M options Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 03/16] slub: Sort slab cache list Tobin C. Harding
2019-05-21 0:38 ` Roman Gushchin
2019-05-20 5:40 ` [RFC PATCH v5 04/16] slub: Slab defrag core Tobin C. Harding
2019-05-21 0:51 ` Roman Gushchin
2019-05-21 1:15 ` Tobin C. Harding
2019-05-21 1:25 ` Roman Gushchin
2019-05-20 5:40 ` [RFC PATCH v5 05/16] tools/vm/slabinfo: Add remote node defrag ratio output Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 06/16] tools/vm/slabinfo: Add defrag_used_ratio output Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 07/16] tools/testing/slab: Add object migration test module Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 08/16] tools/testing/slab: Add object migration test suite Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 09/16] lib: Separate radix_tree_node and xa_node slab cache Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 10/16] xarray: Implement migration function for xa_node objects Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 11/16] tools/testing/slab: Add XArray movable objects tests Tobin C. Harding
2019-05-20 5:40 ` Tobin C. Harding [this message]
2019-05-20 5:40 ` [RFC PATCH v5 13/16] slub: Enable balancing slabs across nodes Tobin C. Harding
2019-05-21 1:04 ` Roman Gushchin
2019-05-21 1:44 ` Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 14/16] dcache: Provide a dentry constructor Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 15/16] dcache: Implement partial shrink via Slab Movable Objects Tobin C. Harding
2019-05-20 5:40 ` [RFC PATCH v5 16/16] dcache: Add CONFIG_DCACHE_SMO Tobin C. Harding
2019-05-21 0:57 ` Roman Gushchin
2019-05-21 1:31 ` Tobin C. Harding
2019-05-21 2:05 ` Roman Gushchin
2019-05-21 3:15 ` Tobin C. Harding
2019-05-29 3:54 ` Tobin C. Harding
2019-05-29 16:16 ` Roman Gushchin
2019-06-03 4:26 ` Tobin C. Harding
2019-06-03 20:34 ` Roman Gushchin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190520054017.32299-13-tobin@kernel.org \
--to=tobin@kernel.org \
--cc=adilger@dilger.ca \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=cl@linux.com \
--cc=corbet@lwn.net \
--cc=david@fromorbit.com \
--cc=guro@fb.com \
--cc=hch@infradead.org \
--cc=hughd@google.com \
--cc=iamjoonsoo.kim@lge.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mszeredi@redhat.com \
--cc=npiggin@gmail.com \
--cc=penberg@cs.helsinki.fi \
--cc=riel@redhat.com \
--cc=rientjes@google.com \
--cc=tycho@tycho.ws \
--cc=tytso@mit.edu \
--cc=viro@ftp.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).