linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] [RFC] KSM: numa awareness sysfs knob
@ 2011-11-30 10:37 Petr Holasek
  2011-11-30 23:47 ` Andrew Morton
  0 siblings, 1 reply; 5+ messages in thread
From: Petr Holasek @ 2011-11-30 10:37 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Andrea Arcangeli, Andrew Morton, linux-kernel, linux-mm,
	Anton Arapov, Petr Holasek

Introduce a new sysfs knob /sys/kernel/mm/ksm/max_node_dist, whose
value will be used as the limitation for node distance of merged pages.

Signed-off-by: Petr Holasek <pholasek@redhat.com>
---
 Documentation/vm/ksm.txt |    4 ++
 mm/ksm.c                 |  122 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 99 insertions(+), 27 deletions(-)

diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
index b392e49..b882140 100644
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -58,6 +58,10 @@ sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
                    e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
                    Default: 20 (chosen for demonstration purposes)
 
+max_node_dist    - maximum node distance between two pages which could be
+                   merged.
+                   Default: 255 (without any limitations)
+
 run              - set 0 to stop ksmd from running but keep merged pages,
                    set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
                    set 2 to stop ksmd and unmerge all pages currently merged,
diff --git a/mm/ksm.c b/mm/ksm.c
index 310544a..ea33040 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -36,6 +36,7 @@
 #include <linux/hash.h>
 #include <linux/freezer.h>
 #include <linux/oom.h>
+#include <linux/numa.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -120,6 +121,7 @@ struct stable_node {
 	struct rb_node node;
 	struct hlist_head hlist;
 	unsigned long kpfn;
+	int nid;
 };
 
 /**
@@ -153,8 +155,8 @@ struct rmap_item {
 #define STABLE_FLAG	0x200	/* is listed from the stable tree */
 
 /* The stable and unstable tree heads */
-static struct rb_root root_stable_tree = RB_ROOT;
 static struct rb_root root_unstable_tree = RB_ROOT;
+static struct rb_root root_stable_tree[MAX_NUMNODES] = { RB_ROOT, };
 
 #define MM_SLOTS_HASH_SHIFT 10
 #define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
@@ -189,6 +191,9 @@ static unsigned int ksm_thread_pages_to_scan = 100;
 /* Milliseconds ksmd should sleep between batches */
 static unsigned int ksm_thread_sleep_millisecs = 20;
 
+/* Maximum distance of nodes in which pages are merged */
+static unsigned int ksm_node_distance = 255;
+
 #define KSM_RUN_STOP	0
 #define KSM_RUN_MERGE	1
 #define KSM_RUN_UNMERGE	2
@@ -302,6 +307,25 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
 	return rmap_item->address & STABLE_FLAG;
 }
 
+#ifdef CONFIG_NUMA
+static inline int node_dist(int from, int to)
+{
+	int dist = node_distance(from, to);
+
+	return dist == -1 ? 0 : dist;
+}
+#else
+static inline int node_dist(int from, int to)
+{
+	return 0;
+}
+#endif
+
+static inline int page_distance(struct page *from, struct page *to)
+{
+	return node_dist(page_to_nid(from), page_to_nid(to));
+}
+
 /*
  * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
  * page tables after it has passed through ksm_exit() - which, if necessary,
@@ -458,7 +482,8 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
 		cond_resched();
 	}
 
-	rb_erase(&stable_node->node, &root_stable_tree);
+	rb_erase(&stable_node->node, &root_stable_tree[stable_node->nid]);
+	printk(KERN_DEBUG "Node erased from tree %d\n", stable_node->nid);
 	free_stable_node(stable_node);
 }
 
@@ -960,6 +985,9 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
 {
 	int err;
 
+	if (page_distance(page, tree_page) > ksm_node_distance)
+		return NULL;
+
 	err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
 	if (!err) {
 		err = try_to_merge_with_ksm_page(tree_rmap_item,
@@ -983,10 +1011,11 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  * This function returns the stable tree node of identical content if found,
  * NULL otherwise.
  */
-static struct page *stable_tree_search(struct page *page)
+static struct page *stable_tree_search(struct page *page, int tree_nid)
 {
-	struct rb_node *node = root_stable_tree.rb_node;
+	struct rb_node *node = root_stable_tree[tree_nid].rb_node;
 	struct stable_node *stable_node;
+	int page_nid = page_to_nid(page);
 
 	stable_node = page_stable_node(page);
 	if (stable_node) {			/* ksm page forked */
@@ -994,6 +1023,10 @@ static struct page *stable_tree_search(struct page *page)
 		return page;
 	}
 
+	/* Pages are too far for merge */
+	if (node_dist(tree_nid, page_nid) > ksm_node_distance)
+		return NULL;
+
 	while (node) {
 		struct page *tree_page;
 		int ret;
@@ -1028,7 +1061,8 @@ static struct page *stable_tree_search(struct page *page)
  */
 static struct stable_node *stable_tree_insert(struct page *kpage)
 {
-	struct rb_node **new = &root_stable_tree.rb_node;
+	int nid = page_to_nid(kpage);
+	struct rb_node **new = &root_stable_tree[nid].rb_node;
 	struct rb_node *parent = NULL;
 	struct stable_node *stable_node;
 
@@ -1065,12 +1099,14 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
 		return NULL;
 
 	rb_link_node(&stable_node->node, parent, new);
-	rb_insert_color(&stable_node->node, &root_stable_tree);
+	rb_insert_color(&stable_node->node, &root_stable_tree[nid]);
 
 	INIT_HLIST_HEAD(&stable_node->hlist);
 
 	stable_node->kpfn = page_to_pfn(kpage);
+	stable_node->nid = nid;
 	set_page_stable_node(kpage, stable_node);
+	printk(KERN_DEBUG "Stable node was inserted into tree %d\n", nid);
 
 	return stable_node;
 }
@@ -1173,27 +1209,32 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	struct rmap_item *tree_rmap_item;
 	struct page *tree_page = NULL;
 	struct stable_node *stable_node;
-	struct page *kpage;
+	struct page *kpage = NULL;
 	unsigned int checksum;
 	int err;
+	int i;
+	int nid = page_to_nid(page);
 
 	remove_rmap_item_from_tree(rmap_item);
 
 	/* We first start with searching the page inside the stable tree */
-	kpage = stable_tree_search(page);
-	if (kpage) {
-		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
-		if (!err) {
-			/*
-			 * The page was successfully merged:
-			 * add its rmap_item to the stable tree.
-			 */
-			lock_page(kpage);
-			stable_tree_append(rmap_item, page_stable_node(kpage));
-			unlock_page(kpage);
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		if (node_distance(i, nid) <= ksm_node_distance)
+			kpage = stable_tree_search(page, nid);
+		if (kpage) {
+			err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
+			if (!err) {
+				/*
+				 * The page was successfully merged:
+				 * add its rmap_item to the stable tree.
+				 */
+				lock_page(kpage);
+				stable_tree_append(rmap_item, page_stable_node(kpage));
+				unlock_page(kpage);
+			}
+			put_page(kpage);
+			return;
 		}
-		put_page(kpage);
-		return;
 	}
 
 	/*
@@ -1764,15 +1805,18 @@ static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
 						 unsigned long end_pfn)
 {
 	struct rb_node *node;
+	int i;
 
-	for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
-		struct stable_node *stable_node;
+	for (i = 0; i < MAX_NUMNODES; i++)
+		for (node = rb_first(&root_stable_tree[i]); node; node = rb_next(node)) {
+			struct stable_node *stable_node;
+
+			stable_node = rb_entry(node, struct stable_node, node);
+			if (stable_node->kpfn >= start_pfn &&
+			    stable_node->kpfn < end_pfn)
+				return stable_node;
+		}
 
-		stable_node = rb_entry(node, struct stable_node, node);
-		if (stable_node->kpfn >= start_pfn &&
-		    stable_node->kpfn < end_pfn)
-			return stable_node;
-	}
 	return NULL;
 }
 
@@ -1922,6 +1966,29 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 }
 KSM_ATTR(run);
 
+static ssize_t max_node_dist_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", ksm_node_distance);
+}
+
+static ssize_t max_node_dist_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int err;
+	unsigned long node_dist;
+
+	err = kstrtoul(buf, 10, &node_dist);
+	if (err || node_dist > 255)
+		return -EINVAL;
+
+	ksm_node_distance = node_dist;
+
+	return count;
+}
+KSM_ATTR(max_node_dist);
+
 static ssize_t pages_shared_show(struct kobject *kobj,
 				 struct kobj_attribute *attr, char *buf)
 {
@@ -1976,6 +2043,7 @@ static struct attribute *ksm_attrs[] = {
 	&pages_unshared_attr.attr,
 	&pages_volatile_attr.attr,
 	&full_scans_attr.attr,
+	&max_node_dist_attr.attr,
 	NULL,
 };
 
-- 
1.7.6.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] [RFC] KSM: numa awareness sysfs knob
  2011-11-30 10:37 [PATCH] [RFC] KSM: numa awareness sysfs knob Petr Holasek
@ 2011-11-30 23:47 ` Andrew Morton
  2011-12-01 10:16   ` Petr Holasek
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2011-11-30 23:47 UTC (permalink / raw)
  To: Petr Holasek
  Cc: Hugh Dickins, Andrea Arcangeli, linux-kernel, linux-mm, Anton Arapov

On Wed, 30 Nov 2011 11:37:26 +0100
Petr Holasek <pholasek@redhat.com> wrote:

> Introduce a new sysfs knob /sys/kernel/mm/ksm/max_node_dist, whose
> value will be used as the limitation for node distance of merged pages.
> 

The changelog doesn't really describe why you think Linux needs this
feature?  What's the reasoning?  Use cases?  What value does it provide?

> index b392e49..b882140 100644
> --- a/Documentation/vm/ksm.txt
> +++ b/Documentation/vm/ksm.txt
> @@ -58,6 +58,10 @@ sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
>                     e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
>                     Default: 20 (chosen for demonstration purposes)
>  
> +max_node_dist    - maximum node distance between two pages which could be
> +                   merged.
> +                   Default: 255 (without any limitations)

And this doesn't explain to our users why they might want to alter it,
and what effects they would see from doing so.  Maybe that's obvious to
them...

>  run              - set 0 to stop ksmd from running but keep merged pages,
>                     set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
>                     set 2 to stop ksmd and unmerge all pages currently merged,
>
> ...
>
> +#ifdef CONFIG_NUMA
> +static inline int node_dist(int from, int to)
> +{
> +	int dist = node_distance(from, to);
> +
> +	return dist == -1 ? 0 : dist;
> +}

So I spent some time grubbing around trying to work out what a return
value of -1 from node_distance() means, and wasn't successful.  Perhaps
an explanatory comment here would have helped.

> +#else
> +static inline int node_dist(int from, int to)
> +{
> +	return 0;
> +}
> +#endif
>
> ...
>
> +static ssize_t max_node_dist_store(struct kobject *kobj,
> +				   struct kobj_attribute *attr,
> +				   const char *buf, size_t count)
> +{
> +	int err;
> +	unsigned long node_dist;
> +
> +	err = kstrtoul(buf, 10, &node_dist);
> +	if (err || node_dist > 255)
> +		return -EINVAL;

If kstrtoul() returned an errno we should propagate that back rather than
overwriting it with -EINVAL.

> +	ksm_node_distance = node_dist;
> +
> +	return count;
> +}
>
> ...
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: KSM: numa awareness sysfs knob
  2011-11-30 23:47 ` Andrew Morton
@ 2011-12-01 10:16   ` Petr Holasek
       [not found]     ` <201112011940.19022.nai.xia@gmail.com>
  0 siblings, 1 reply; 5+ messages in thread
From: Petr Holasek @ 2011-12-01 10:16 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Andrea Arcangeli, linux-kernel, linux-mm, Anton Arapov

On Wed, 30 Nov 2011, Andrew Morton wrote:

> Date: Wed, 30 Nov 2011 15:47:19 -0800
> From: Andrew Morton <akpm@linux-foundation.org>
> To: Petr Holasek <pholasek@redhat.com>
> Cc: Hugh Dickins <hughd@google.com>, Andrea Arcangeli
>  <aarcange@redhat.com>, linux-kernel@vger.kernel.org, linux-mm@kvack.org,
>  Anton Arapov <anton@redhat.com>
> Subject: Re: [PATCH] [RFC] KSM: numa awareness sysfs knob
> 
> On Wed, 30 Nov 2011 11:37:26 +0100
> Petr Holasek <pholasek@redhat.com> wrote:
> 
> > Introduce a new sysfs knob /sys/kernel/mm/ksm/max_node_dist, whose
> > value will be used as the limitation for node distance of merged pages.
> > 
> 
> The changelog doesn't really describe why you think Linux needs this
> feature?  What's the reasoning?  Use cases?  What value does it provide?

Typical use-case could be a lot of KVM guests on NUMA machine and cpus from
more distant nodes would have significant increase of access latency to the
merged ksm page. I chose sysfs knob for higher scalability.

> 
> > index b392e49..b882140 100644
> > --- a/Documentation/vm/ksm.txt
> > +++ b/Documentation/vm/ksm.txt
> > @@ -58,6 +58,10 @@ sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
> >                     e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
> >                     Default: 20 (chosen for demonstration purposes)
> >  
> > +max_node_dist    - maximum node distance between two pages which could be
> > +                   merged.
> > +                   Default: 255 (without any limitations)
> 
> And this doesn't explain to our users why they might want to alter it,
> and what effects they would see from doing so.  Maybe that's obvious to
> them...

Now I can't figure out more extensive description of this feature, but we
could explain it deeply, of course.

> 
> >  run              - set 0 to stop ksmd from running but keep merged pages,
> >                     set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
> >                     set 2 to stop ksmd and unmerge all pages currently merged,
> >
> > ...
> >
> > +#ifdef CONFIG_NUMA
> > +static inline int node_dist(int from, int to)
> > +{
> > +	int dist = node_distance(from, to);
> > +
> > +	return dist == -1 ? 0 : dist;
> > +}
> 
> So I spent some time grubbing around trying to work out what a return
> value of -1 from node_distance() means, and wasn't successful.  Perhaps
> an explanatory comment here would have helped.

Yes, apologize, my mistake. I've overlooked that it should never return value
lower than LOCAL_DISTANCE even with CONFIG_NUMA unset. So those wrappers are
pointless and I'll remove them.

> 
> > +#else
> > +static inline int node_dist(int from, int to)
> > +{
> > +	return 0;
> > +}
> > +#endif
> >
> > ...
> >
> > +static ssize_t max_node_dist_store(struct kobject *kobj,
> > +				   struct kobj_attribute *attr,
> > +				   const char *buf, size_t count)
> > +{
> > +	int err;
> > +	unsigned long node_dist;
> > +
> > +	err = kstrtoul(buf, 10, &node_dist);
> > +	if (err || node_dist > 255)
> > +		return -EINVAL;
> 
> If kstrtoul() returned an errno we should propagate that back rather than
> overwriting it with -EINVAL.

Ok, I'll fix it.

> 
> > +	ksm_node_distance = node_dist;
> > +
> > +	return count;
> > +}
> >
> > ...
> >
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: KSM: numa awareness sysfs knob
       [not found]     ` <201112011940.19022.nai.xia@gmail.com>
@ 2011-12-01 13:04       ` Petr Holasek
  0 siblings, 0 replies; 5+ messages in thread
From: Petr Holasek @ 2011-12-01 13:04 UTC (permalink / raw)
  To: Nai Xia
  Cc: Andrew Morton, Hugh Dickins, Andrea Arcangeli, linux-kernel,
	linux-mm, Anton Arapov

On Thu, 01 Dec 2011, Nai Xia wrote:

> Date: Thu, 1 Dec 2011 19:40:18 +0800
> From: Nai Xia <nai.xia@gmail.com>
> To: Petr Holasek <pholasek@redhat.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>, Hugh Dickins
>  <hughd@google.com>, Andrea Arcangeli <aarcange@redhat.com>,
>  linux-kernel@vger.kernel.org, linux-mm@kvack.org, Anton Arapov
>  <anton@redhat.com>
> Subject: Re: KSM: numa awareness sysfs knob
> Reply-To: nai.xia@gmail.com
> 
> On Thursday 01 December 2011 18:16:40 Petr Holasek wrote:
> > On Wed, 30 Nov 2011, Andrew Morton wrote:
> > 
> > > Date: Wed, 30 Nov 2011 15:47:19 -0800
> > > From: Andrew Morton <akpm@linux-foundation.org>
> > > To: Petr Holasek <pholasek@redhat.com>
> > > Cc: Hugh Dickins <hughd@google.com>, Andrea Arcangeli
> > >  <aarcange@redhat.com>, linux-kernel@vger.kernel.org, linux-mm@kvack.org,
> > >  Anton Arapov <anton@redhat.com>
> > > Subject: Re: [PATCH] [RFC] KSM: numa awareness sysfs knob
> > > 
> > > On Wed, 30 Nov 2011 11:37:26 +0100
> > > Petr Holasek <pholasek@redhat.com> wrote:
> > > 
> > > > Introduce a new sysfs knob /sys/kernel/mm/ksm/max_node_dist, whose
> > > > value will be used as the limitation for node distance of merged pages.
> > > > 
> > > 
> > > The changelog doesn't really describe why you think Linux needs this
> > > feature?  What's the reasoning?  Use cases?  What value does it provide?
> > 
> > Typical use-case could be a lot of KVM guests on NUMA machine and cpus from
> > more distant nodes would have significant increase of access latency to the
> > merged ksm page. I chose sysfs knob for higher scalability.
> 
> Seems this consideration for NUMA is sound. 
> 
> > 
> > > 
> > > > index b392e49..b882140 100644
> > > > --- a/Documentation/vm/ksm.txt
> > > > +++ b/Documentation/vm/ksm.txt
> > > > @@ -58,6 +58,10 @@ sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
> > > >                     e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
> > > >                     Default: 20 (chosen for demonstration purposes)
> > > >  
> > > > +max_node_dist    - maximum node distance between two pages which could be
> > > > +                   merged.
> > > > +                   Default: 255 (without any limitations)
> > > 
> > > And this doesn't explain to our users why they might want to alter it,
> > > and what effects they would see from doing so.  Maybe that's obvious to
> > > them...
> > 
> > Now I can't figure out more extensive description of this feature, but we
> > could explain it deeply, of course.
> 
> However, if we don't know what the number fed into this knob really means, 
> seems nobody would think of using this knob...
> 
> Then why not make this NUMA feature automatically adjusted by some algorithm
> instread of dropping it to userland?
> 
> BTW, the algrothim you already include in this patch seems unstable itself:
> 
> Suppose we have three duplicated pages in order: Page_a, Page_b, Page_c with 
> distance(Page_a, Page_b) == distance(Page_b, Page_c) == 3, 
> but distance(Page_a, Page_c) == 6 and if max_node_dist == 3, 
> a stable algorithm should result in Page_a and Page_c being merged to Page_b,
> independent of the order these pages get scanned. 
> 
> But with your patch, if ksmd goes Page_b --> Page_c --> Page_a, will it 
> result in Page_b being merged to Page_c but Page_a not merged since its 
> distance to Page_c is 6?

Yes, you're right. With this patch, merge order depends only on the order of
scanning. Use of some algorithm (maybe from graph-theory field?) is a really 
good point. Although the complexity of code will rise a lot, it maybe the
best solution for most of usecases when this algorithm would be able to do 
some heuristics and determine max_distance for merging on its own without 
any userspace inputs.

> 
> It may easy to further deduce that maybe a worst case(or even many cases?) 
> for your patch will get many many could-be-merged pages not merged simply 
> because of the sequence they are scanned.
> 
> The problem you plan to solve maybe worthwhile, but it may also be much more
> complex than you expected ;-)

That's the reason why it is only RFC, I mainly wanted to gather your opinions:)

>   
> 
> BR,
> 
> Nai

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: KSM: numa awareness sysfs knob
  2012-01-25  0:03 ` Andrew Morton
@ 2012-01-25 16:40   ` Petr Holasek
  0 siblings, 0 replies; 5+ messages in thread
From: Petr Holasek @ 2012-01-25 16:40 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Andrea Arcangeli, Chris Wright, Izik Eidus,
	linux-kernel, linux-mm, Anton Arapov

On Tue, 24 Jan 2012, Andrew Morton wrote:
> On Mon, 23 Jan 2012 11:29:28 +0100
> Petr Holasek <pholasek@redhat.com> wrote:
> 
> > This patch is based on RFC
> > 
> > https://lkml.org/lkml/2011/11/30/91
> > 
> > Introduces new sysfs binary knob /sys/kernel/mm/ksm/merge_nodes
> 
> It's not binary - it's ascii text!  "boolean" is a better term here ;)
> 

Of course, I'll fix it :)

> > which control merging pages across different numa nodes.
> > When it is set to zero only pages from the same node are merged,
> > otherwise pages from all nodes can be merged together (default behavior).
> > 
> > Typical use-case could be a lot of KVM guests on NUMA machine
> > where cpus from more distant nodes would have significant increase
> > of access latency to the merged ksm page. Switching merge_nodes
> > to 1 will result into these steps:
> > 
> > 	1) unmerging all ksm pages
> > 	2) re-merging all pages from VM_MERGEABLE vmas only within
> > 		their NUMA nodes.
> > 	3) lower average access latency to merged pages at the
> > 	   expense of higher memory usage.
> > 
> > Every numa node has its own stable & unstable trees because
> > of faster searching and inserting. Changing of merge_nodes
> > value breaks COW on all current ksm pages.
> > 
> 
> How useful is this code?  Do you have any performance testing results
> to help make the case for merging it?

I didn't any no performance testing, but number of nodes is still the same, 
the only difference is that they are distributed among more trees, 
so searching is faster within specified numa node. Every node includes pointer
to the tree's root, but I assume it is quite small payload for faster searching.
Or not?

> 
> Should the unmerged case be made permanent and not configurable?  IOW,
> what is the argument for continuing to permit the user to merge across
> nodes?
> 
> Should the code bother doing this unmerge when
> /sys/kernel/mm/ksm/merge_nodes is written to?  It would be simpler to
> expect the user to configure /sys/kernel/mm/ksm/merge_nodes prior to
> using KSM at all?

The only reason for this feature is being more user-friendly. But if 
we find some issue in doing merging/unmerging interactively, forcing user
to set merge_nodes value before first ksm run will be more safe.

> 
> > @@ -58,6 +58,9 @@ sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
> >                     e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
> >                     Default: 20 (chosen for demonstration purposes)
> >  
> > +merge_nodes      - specifies if pages from different numa nodes can be merged
> > +                   Default: 1
> 
> This documentation would be better if it informed the user about how to
> use merge_nodes.  What are the effects of altering it and why might
> they wish to do this?
> 
> >
> > ...
> >
> > +static ssize_t merge_nodes_store(struct kobject *kobj,
> > +				   struct kobj_attribute *attr,
> > +				   const char *buf, size_t count)
> > +{
> > +	int err;
> > +	long unsigned int knob;
> 
> Plain old "unsigned long" is more usual.
> 
> Better would be to make this "unsigned", to match ksm_merge_nodes.  Use
> kstrtouint() below.
> 
> >
> > ...
> >
> > @@ -1987,6 +2070,9 @@ static struct attribute *ksm_attrs[] = {
> >  	&pages_unshared_attr.attr,
> >  	&pages_volatile_attr.attr,
> >  	&full_scans_attr.attr,
> > +#ifdef CONFIG_NUMA
> > +	&merge_nodes_attr.attr,
> > +#endif
> 
> One might think that with CONFIG_NUMA=n, we just added a pile of
> useless codebloat to vmlinux.  But gcc is sneaky and removes the
> unreferenced functions.
> 
> However while doing so, gcc shows that it reads
> Documentation/SubmitChecklist, section 2:
> 
> mm/ksm.c:2017: warning: 'merge_nodes_attr' defined but not used
> 
> So...
> 
> diff -puN mm/ksm.c~ksm-numa-awareness-sysfs-knob-fix mm/ksm.c
> --- a/mm/ksm.c~ksm-numa-awareness-sysfs-knob-fix
> +++ a/mm/ksm.c
> @@ -1973,6 +1973,7 @@ static ssize_t run_store(struct kobject 
>  }
>  KSM_ATTR(run);
>  
> +#ifdef CONFIG_NUMA
>  static ssize_t merge_nodes_show(struct kobject *kobj,
>  				struct kobj_attribute *attr, char *buf)
>  {
> @@ -2015,6 +2016,7 @@ static ssize_t merge_nodes_store(struct 
>  	return count;
>  }
>  KSM_ATTR(merge_nodes);
> +#endif
>  
>  static ssize_t pages_shared_show(struct kobject *kobj,
>  				 struct kobj_attribute *attr, char *buf)
> _
> 

Apologize, I overlooked that. I'll fix it and other issues you pointed out
above in next version. 

Many thanks for reviewing!

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-01-25 16:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-11-30 10:37 [PATCH] [RFC] KSM: numa awareness sysfs knob Petr Holasek
2011-11-30 23:47 ` Andrew Morton
2011-12-01 10:16   ` Petr Holasek
     [not found]     ` <201112011940.19022.nai.xia@gmail.com>
2011-12-01 13:04       ` Petr Holasek
2012-01-23 10:29 [PATCH] " Petr Holasek
2012-01-25  0:03 ` Andrew Morton
2012-01-25 16:40   ` Petr Holasek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).