linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] z3fold: add shrinker
@ 2016-10-11 22:18 Vitaly Wool
  2016-10-11 22:52 ` Dave Chinner
  0 siblings, 1 reply; 5+ messages in thread
From: Vitaly Wool @ 2016-10-11 22:18 UTC (permalink / raw)
  To: Linux-MM, linux-kernel; +Cc: Seth Jennings, Dan Streetman, Andrew Morton


Here comes the correct shrinker patch for z3fold. This shrinker
implementation does not free up any pages directly but it allows
for a denser placement of compressed objects which results in
less actual pages consumed and higher compression ratio therefore.

This patch has been checked with the latest Linus's tree.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
---
 mm/z3fold.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 127 insertions(+), 24 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8f9e89c..4841972 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/zpool.h>
+#include <linux/shrinker.h>
 
 /*****************
  * Structures
@@ -69,8 +70,11 @@ struct z3fold_ops {
  * @lru:	list tracking the z3fold pages in LRU order by most recently
  *		added buddy.
  * @pages_nr:	number of z3fold pages in the pool.
+ * @unbuddied_nr:	number of unbuddied z3fold pages in the pool.
  * @ops:	pointer to a structure of user defined operations specified at
  *		pool creation time.
+ * @shrinker:	shrinker structure to optimize page layout in background
+ * @no_shrinker:	flag showing if we run with shrinker or not
  *
  * This structure is allocated at pool creation time and maintains metadata
  * pertaining to a particular z3fold pool.
@@ -81,9 +85,12 @@ struct z3fold_pool {
 	struct list_head buddied;
 	struct list_head lru;
 	u64 pages_nr;
+	u64 unbuddied_nr;
 	const struct z3fold_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
+	struct shrinker shrinker;
+	bool no_shrinker;
 };
 
 enum buddy {
@@ -134,6 +141,9 @@ static int size_to_chunks(size_t size)
 #define for_each_unbuddied_list(_iter, _begin) \
 	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
 
+#define for_each_unbuddied_list_down(_iter, _end) \
+	for ((_iter) = (_end); (_iter) > 0; (_iter)--)
+
 /* Initializes the z3fold header of a newly allocated z3fold page */
 static struct z3fold_header *init_z3fold_page(struct page *page)
 {
@@ -209,6 +219,97 @@ static int num_free_chunks(struct z3fold_header *zhdr)
 	return nfree;
 }
 
+/* Has to be called with lock held */
+static int z3fold_compact_page(struct z3fold_header *zhdr, bool sync)
+{
+	struct page *page = virt_to_page(zhdr);
+	void *beg = zhdr;
+
+
+	if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) {
+		if (zhdr->middle_chunks != 0 &&
+		    zhdr->first_chunks == 0 &&
+		    zhdr->last_chunks == 0) {
+			memmove(beg + ZHDR_SIZE_ALIGNED,
+				beg + (zhdr->start_middle << CHUNK_SHIFT),
+				zhdr->middle_chunks << CHUNK_SHIFT);
+			zhdr->first_chunks = zhdr->middle_chunks;
+			zhdr->middle_chunks = 0;
+			zhdr->start_middle = 0;
+			zhdr->first_num++;
+			return 1;
+		}
+		if (sync)
+			goto out;
+
+		/* moving data is expensive, so let's only do that if
+		 * there's substantial gain (2+ chunks)
+		 */
+		if (zhdr->middle_chunks != 0 && zhdr->first_chunks != 0 &&
+		    zhdr->last_chunks == 0 &&
+		    zhdr->start_middle > zhdr->first_chunks + 2) {
+			unsigned short new_start = zhdr->first_chunks + 1;
+			memmove(beg + (new_start << CHUNK_SHIFT),
+				beg + (zhdr->start_middle << CHUNK_SHIFT),
+				zhdr->middle_chunks << CHUNK_SHIFT);
+			zhdr->start_middle = new_start;
+			return 1;
+		}
+		if (zhdr->middle_chunks != 0 && zhdr->last_chunks != 0 &&
+		    zhdr->first_chunks == 0 &&
+		    zhdr->middle_chunks + zhdr->last_chunks <=
+		    NCHUNKS - zhdr->start_middle - 2) {
+			unsigned short new_start = NCHUNKS - zhdr->last_chunks -
+				zhdr->middle_chunks;
+			memmove(beg + (new_start << CHUNK_SHIFT),
+				beg + (zhdr->start_middle << CHUNK_SHIFT),
+				zhdr->middle_chunks << CHUNK_SHIFT);
+			zhdr->start_middle = new_start;
+			return 1;
+		}
+	}
+out:
+	return 0;
+}
+
+static unsigned long z3fold_shrink_count(struct shrinker *shrink,
+				struct shrink_control *sc)
+{
+	struct z3fold_pool *pool = container_of(shrink, struct z3fold_pool,
+						shrinker);
+
+	return pool->unbuddied_nr;
+}
+
+static unsigned long z3fold_shrink_scan(struct shrinker *shrink,
+				struct shrink_control *sc)
+{
+	struct z3fold_pool *pool = container_of(shrink, struct z3fold_pool,
+						shrinker);
+	struct z3fold_header *zhdr;
+	int i, nr_to_scan = sc->nr_to_scan;
+
+	spin_lock(&pool->lock);
+
+	for_each_unbuddied_list_down(i, NCHUNKS - 3) {
+		if (!list_empty(&pool->unbuddied[i])) {
+			zhdr = list_first_entry(&pool->unbuddied[i],
+						struct z3fold_header, buddy);
+			if (z3fold_compact_page(zhdr, false)) {
+				int nchunks = num_free_chunks(zhdr);
+				list_del(&zhdr->buddy);
+				list_add(&zhdr->buddy,
+					&pool->unbuddied[nchunks]);
+			}
+			if (!--nr_to_scan)
+				break;
+		}
+	}
+	spin_unlock(&pool->lock);
+	return 0;
+}
+
+
 /*****************
  * API Functions
 *****************/
@@ -234,6 +335,13 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
 		INIT_LIST_HEAD(&pool->unbuddied[i]);
 	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
+	pool->shrinker.count_objects = z3fold_shrink_count;
+	pool->shrinker.scan_objects = z3fold_shrink_scan;
+	pool->shrinker.seeks = DEFAULT_SEEKS;
+	if (register_shrinker(&pool->shrinker)) {
+		pr_warn("z3fold: could not register shrinker\n");
+		pool->no_shrinker = true;
+	}
 	pool->pages_nr = 0;
 	pool->ops = ops;
 	return pool;
@@ -247,31 +355,11 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
  */
 static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
+	if (!pool->no_shrinker)
+		unregister_shrinker(&pool->shrinker);
 	kfree(pool);
 }
 
-/* Has to be called with lock held */
-static int z3fold_compact_page(struct z3fold_header *zhdr)
-{
-	struct page *page = virt_to_page(zhdr);
-	void *beg = zhdr;
-
-
-	if (!test_bit(MIDDLE_CHUNK_MAPPED, &page->private) &&
-	    zhdr->middle_chunks != 0 &&
-	    zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
-		memmove(beg + ZHDR_SIZE_ALIGNED,
-			beg + (zhdr->start_middle << CHUNK_SHIFT),
-			zhdr->middle_chunks << CHUNK_SHIFT);
-		zhdr->first_chunks = zhdr->middle_chunks;
-		zhdr->middle_chunks = 0;
-		zhdr->start_middle = 0;
-		zhdr->first_num++;
-		return 1;
-	}
-	return 0;
-}
-
 /**
  * z3fold_alloc() - allocates a region of a given size
  * @pool:	z3fold pool from which to allocate
@@ -334,6 +422,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 					continue;
 				}
 				list_del(&zhdr->buddy);
+				pool->unbuddied_nr--;
 				goto found;
 			}
 		}
@@ -369,6 +458,7 @@ found:
 		/* Add to unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		pool->unbuddied_nr++;
 	} else {
 		/* Add to buddied list */
 		list_add(&zhdr->buddy, &pool->buddied);
@@ -412,6 +502,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		/* HEADLESS page stored */
 		bud = HEADLESS;
 	} else {
+		if (zhdr->first_chunks == 0 ||
+		    zhdr->middle_chunks == 0 ||
+		    zhdr->last_chunks == 0)
+			pool->unbuddied_nr--;
+
 		bud = handle_to_buddy(handle);
 
 		switch (bud) {
@@ -428,6 +523,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		default:
 			pr_err("%s: unknown bud %d\n", __func__, bud);
 			WARN_ON(1);
+			pool->unbuddied_nr++;
 			spin_unlock(&pool->lock);
 			return;
 		}
@@ -453,10 +549,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		free_z3fold_page(zhdr);
 		pool->pages_nr--;
 	} else {
-		z3fold_compact_page(zhdr);
+		z3fold_compact_page(zhdr, true);
 		/* Add to the unbuddied list */
 		freechunks = num_free_chunks(zhdr);
 		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		pool->unbuddied_nr++;
 	}
 
 	spin_unlock(&pool->lock);
@@ -520,6 +617,11 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		zhdr = page_address(page);
 		if (!test_bit(PAGE_HEADLESS, &page->private)) {
 			list_del(&zhdr->buddy);
+			if (zhdr->first_chunks == 0 ||
+			    zhdr->middle_chunks == 0 ||
+			    zhdr->last_chunks == 0)
+				pool->unbuddied_nr--;
+
 			/*
 			 * We need encode the handles before unlocking, since
 			 * we can race with free that will set
@@ -579,11 +681,12 @@ next:
 				/* Full, add to buddied list */
 				list_add(&zhdr->buddy, &pool->buddied);
 			} else {
-				z3fold_compact_page(zhdr);
+				z3fold_compact_page(zhdr, true);
 				/* add to unbuddied list */
 				freechunks = num_free_chunks(zhdr);
 				list_add(&zhdr->buddy,
 					 &pool->unbuddied[freechunks]);
+				pool->unbuddied_nr++;
 			}
 		}
 
-- 
2.4.2

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] z3fold: add shrinker
  2016-10-11 22:18 [PATCH v2] z3fold: add shrinker Vitaly Wool
@ 2016-10-11 22:52 ` Dave Chinner
  2016-10-12  8:26   ` Vitaly Wool
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2016-10-11 22:52 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Linux-MM, linux-kernel, Seth Jennings, Dan Streetman, Andrew Morton

On Wed, Oct 12, 2016 at 12:18:27AM +0200, Vitaly Wool wrote:
> 
> Here comes the correct shrinker patch for z3fold. This shrinker
> implementation does not free up any pages directly but it allows
> for a denser placement of compressed objects which results in
> less actual pages consumed and higher compression ratio therefore.
> 
> This patch has been checked with the latest Linus's tree.
> 
> Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
> ---
>  mm/z3fold.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 127 insertions(+), 24 deletions(-)
> 
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index 8f9e89c..4841972 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -30,6 +30,7 @@
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
>  #include <linux/zpool.h>
> +#include <linux/shrinker.h>
>  
>  /*****************
>   * Structures
> @@ -69,8 +70,11 @@ struct z3fold_ops {
>   * @lru:	list tracking the z3fold pages in LRU order by most recently
>   *		added buddy.
>   * @pages_nr:	number of z3fold pages in the pool.
> + * @unbuddied_nr:	number of unbuddied z3fold pages in the pool.
>   * @ops:	pointer to a structure of user defined operations specified at
>   *		pool creation time.
> + * @shrinker:	shrinker structure to optimize page layout in background
> + * @no_shrinker:	flag showing if we run with shrinker or not

Ugh.

>  
> +/* Has to be called with lock held */
> +static int z3fold_compact_page(struct z3fold_header *zhdr, bool sync)
> +{
> +	struct page *page = virt_to_page(zhdr);
> +	void *beg = zhdr;
> +

[snip using memmove() to shift chunks around]

> +static unsigned long z3fold_shrink_scan(struct shrinker *shrink,
> +				struct shrink_control *sc)
> +{
> +	struct z3fold_pool *pool = container_of(shrink, struct z3fold_pool,
> +						shrinker);
> +	struct z3fold_header *zhdr;
> +	int i, nr_to_scan = sc->nr_to_scan;
> +
> +	spin_lock(&pool->lock);

Do not do this. Shrinkers should not run entirely under a spin lock
like this - it causes scheduling latency problems and when the
shrinker is run concurrently on different CPUs it will simply burn
CPU doing no useful work. Especially, in this case, as each call to
z3fold_compact_page() may be copying a significant amount of data
around and so there is potentially a /lot/ of work being done on
each call to the shrinker.

If you need compaction exclusion for the shrinker invocation, then
please use a sleeping lock to protect the compaction work.

>  *****************/
> @@ -234,6 +335,13 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
>  		INIT_LIST_HEAD(&pool->unbuddied[i]);
>  	INIT_LIST_HEAD(&pool->buddied);
>  	INIT_LIST_HEAD(&pool->lru);
> +	pool->shrinker.count_objects = z3fold_shrink_count;
> +	pool->shrinker.scan_objects = z3fold_shrink_scan;
> +	pool->shrinker.seeks = DEFAULT_SEEKS;
> +	if (register_shrinker(&pool->shrinker)) {
> +		pr_warn("z3fold: could not register shrinker\n");
> +		pool->no_shrinker = true;
> +	}

Just fail creation of the pool. If you can't register a shrinker,
then much bigger problems are about to happen to your system, and
running a new memory consumer that /can't be shrunk/ is not going to
help anyone.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] z3fold: add shrinker
  2016-10-11 22:52 ` Dave Chinner
@ 2016-10-12  8:26   ` Vitaly Wool
  2016-10-13  0:20     ` Dave Chinner
  0 siblings, 1 reply; 5+ messages in thread
From: Vitaly Wool @ 2016-10-12  8:26 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Linux-MM, linux-kernel, Seth Jennings, Dan Streetman, Andrew Morton

On Wed, 12 Oct 2016 09:52:06 +1100
Dave Chinner <david@fromorbit.com> wrote:

<snip>
> 
> > +static unsigned long z3fold_shrink_scan(struct shrinker *shrink,
> > +				struct shrink_control *sc)
> > +{
> > +	struct z3fold_pool *pool = container_of(shrink, struct z3fold_pool,
> > +						shrinker);
> > +	struct z3fold_header *zhdr;
> > +	int i, nr_to_scan = sc->nr_to_scan;
> > +
> > +	spin_lock(&pool->lock);
> 
> Do not do this. Shrinkers should not run entirely under a spin lock
> like this - it causes scheduling latency problems and when the
> shrinker is run concurrently on different CPUs it will simply burn
> CPU doing no useful work. Especially, in this case, as each call to
> z3fold_compact_page() may be copying a significant amount of data
> around and so there is potentially a /lot/ of work being done on
> each call to the shrinker.
> 
> If you need compaction exclusion for the shrinker invocation, then
> please use a sleeping lock to protect the compaction work.

Well, as far as I recall, spin_lock() will resolve to a sleeping lock
for PREEMPT_RT, so it is not that much of a problem for configurations
which do care much about latencies. Please also note that the time
spent in the loop is deterministic since we take not more than one entry
from every unbuddied list.

What I could do though is add the following piece of code at the end of
the loop, right after the /break/:
		spin_unlock(&pool->lock);
		cond_resched();
		spin_lock(&pool->lock);

Would that make sense for you?

> 
> >  *****************/
> > @@ -234,6 +335,13 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
> >  		INIT_LIST_HEAD(&pool->unbuddied[i]);
> >  	INIT_LIST_HEAD(&pool->buddied);
> >  	INIT_LIST_HEAD(&pool->lru);
> > +	pool->shrinker.count_objects = z3fold_shrink_count;
> > +	pool->shrinker.scan_objects = z3fold_shrink_scan;
> > +	pool->shrinker.seeks = DEFAULT_SEEKS;
> > +	if (register_shrinker(&pool->shrinker)) {
> > +		pr_warn("z3fold: could not register shrinker\n");
> > +		pool->no_shrinker = true;
> > +	} 
> 
> Just fail creation of the pool. If you can't register a shrinker,
> then much bigger problems are about to happen to your system, and
> running a new memory consumer that /can't be shrunk/ is not going to
> help anyone.

I don't have a strong opinion on this but it doesn't look fatal to me
in _this_ particular case (z3fold) since even without the shrinker, the
compression ratio will never be lower than the one of zbud, which
doesn't have a shrinker at all.

Best regards,
   Vitaly

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] z3fold: add shrinker
  2016-10-12  8:26   ` Vitaly Wool
@ 2016-10-13  0:20     ` Dave Chinner
  2016-10-13 13:15       ` Vitaly Wool
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2016-10-13  0:20 UTC (permalink / raw)
  To: Vitaly Wool
  Cc: Linux-MM, linux-kernel, Seth Jennings, Dan Streetman, Andrew Morton

On Wed, Oct 12, 2016 at 10:26:34AM +0200, Vitaly Wool wrote:
> On Wed, 12 Oct 2016 09:52:06 +1100
> Dave Chinner <david@fromorbit.com> wrote:
> 
> <snip>
> > 
> > > +static unsigned long z3fold_shrink_scan(struct shrinker *shrink,
> > > +				struct shrink_control *sc)
> > > +{
> > > +	struct z3fold_pool *pool = container_of(shrink, struct z3fold_pool,
> > > +						shrinker);
> > > +	struct z3fold_header *zhdr;
> > > +	int i, nr_to_scan = sc->nr_to_scan;
> > > +
> > > +	spin_lock(&pool->lock);
> > 
> > Do not do this. Shrinkers should not run entirely under a spin lock
> > like this - it causes scheduling latency problems and when the
> > shrinker is run concurrently on different CPUs it will simply burn
> > CPU doing no useful work. Especially, in this case, as each call to
> > z3fold_compact_page() may be copying a significant amount of data
> > around and so there is potentially a /lot/ of work being done on
> > each call to the shrinker.
> > 
> > If you need compaction exclusion for the shrinker invocation, then
> > please use a sleeping lock to protect the compaction work.
> 
> Well, as far as I recall, spin_lock() will resolve to a sleeping lock
> for PREEMPT_RT,

Irrelevant for mainline kernels....

> so it is not that much of a problem for configurations
> which do care much about latencies.

That's an incorrect assumption. Long spinlock holds prevent
scheduling on that CPU, and so we still get latency problems.

> Please also note that the time
> spent in the loop is deterministic since we take not more than one entry
> from every unbuddied list.

So the loop is:

	for_each_unbuddied_list_down(i, NCHUNKS - 3) {

NCHUNKS = (PAGE_SIZE - (1 << (PAGE_SHIFT - 6)) >> (PAGE_SHIFT - 6)

So for 4k page, NCHUNKS = (4096 - (1<<6)) >> 6, which is 63. So,
potentially 60 memmoves under a single spinlock on a 4k page
machine. That's a lot of work, especially as some of those memmoves
are going to move a large amount of data in the page.

And if we consider 64k pages, we've now got NCHUNKS = 1023, which
means your shrinker is not, by default, going to scan all your
unbuddied lists because it will expire nr_to_scan (usually
SHRINK_BATCH = 128) before it's got through all of them. So not only
will the shrinker do too much under a spinlock, it won't even do
what you want it to do correctly on such setups.

Further, the way nr_to_scan is decremented and the shrinker return
value are incorrect. nr_to_scan is not the /number of objects to
free/, but the number of objects to /check for reclaim/. The
shrinker is then supposed to return the number it frees (or
compacts) to give feedback to the shrinker infrastructure about how
much reclaim work is being done (i.e. scanned vs freed ratio). This
code always returns 0, which tells the shrinker infrastructure that
it's not making progress...

> What I could do though is add the following piece of code at the end of
> the loop, right after the /break/:
> 		spin_unlock(&pool->lock);
> 		cond_resched();
> 		spin_lock(&pool->lock);
> 
> Would that make sense for you?

Not really, because it ignores the fact that shrinkers can (and
often do) run concurrently on multiple CPUs, and so serialising them
all on a spinlock just causes contention, even if you do this.

Memory reclaim is only as good as the worst shrinker it runs. I
don't care what your subsystem does, but if you're implementing a
shrinker then it needs to play by memory reclaim and shrinker
context rules.....

> > >  *****************/
> > > @@ -234,6 +335,13 @@ static struct z3fold_pool *z3fold_create_pool(gfp_t gfp,
> > >  		INIT_LIST_HEAD(&pool->unbuddied[i]);
> > >  	INIT_LIST_HEAD(&pool->buddied);
> > >  	INIT_LIST_HEAD(&pool->lru);
> > > +	pool->shrinker.count_objects = z3fold_shrink_count;
> > > +	pool->shrinker.scan_objects = z3fold_shrink_scan;
> > > +	pool->shrinker.seeks = DEFAULT_SEEKS;
> > > +	if (register_shrinker(&pool->shrinker)) {
> > > +		pr_warn("z3fold: could not register shrinker\n");
> > > +		pool->no_shrinker = true;
> > > +	} 
> > 
> > Just fail creation of the pool. If you can't register a shrinker,
> > then much bigger problems are about to happen to your system, and
> > running a new memory consumer that /can't be shrunk/ is not going to
> > help anyone.
> 
> I don't have a strong opinion on this but it doesn't look fatal to me
> in _this_ particular case (z3fold) since even without the shrinker, the
> compression ratio will never be lower than the one of zbud, which
> doesn't have a shrinker at all.

Either your subsystem needs a shrinker or it doesn't. If it needs
one, then it should follow the accepted norm of failing
initialisation because a shrinker register failure is indicative of
a serious memory allocation problem already occurring in the
machine. We can only make it worse by continuing without a
shrinker....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] z3fold: add shrinker
  2016-10-13  0:20     ` Dave Chinner
@ 2016-10-13 13:15       ` Vitaly Wool
  0 siblings, 0 replies; 5+ messages in thread
From: Vitaly Wool @ 2016-10-13 13:15 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Linux-MM, linux-kernel, Seth Jennings, Dan Streetman, Andrew Morton

On Thu, 13 Oct 2016 11:20:06 +1100
Dave Chinner <david@fromorbit.com> wrote:

<snip>
> 
> That's an incorrect assumption. Long spinlock holds prevent
> scheduling on that CPU, and so we still get latency problems.

Fair enough. The problem is, some of the z3fold code that need mutual
exclusion runs with preemption disabled so spinlock is still the way
to go. I'll try to avoid taking it while actual shrinking is in progress
though.
 
> > Please also note that the time
> > spent in the loop is deterministic since we take not more than one entry
> > from every unbuddied list.
> 
> So the loop is:
> 
> 	for_each_unbuddied_list_down(i, NCHUNKS - 3) {
> 
> NCHUNKS = (PAGE_SIZE - (1 << (PAGE_SHIFT - 6)) >> (PAGE_SHIFT - 6)
> 
> So for 4k page, NCHUNKS = (4096 - (1<<6)) >> 6, which is 63. So,
> potentially 60 memmoves under a single spinlock on a 4k page
> machine. That's a lot of work, especially as some of those memmoves
> are going to move a large amount of data in the page.
> 
> And if we consider 64k pages, we've now got NCHUNKS = 1023, which
> means your shrinker is not, by default, going to scan all your
> unbuddied lists because it will expire nr_to_scan (usually
> SHRINK_BATCH = 128) before it's got through all of them. So not only
> will the shrinker do too much under a spinlock, it won't even do
> what you want it to do correctly on such setups.

Thanks for the pointer, I'll address that in the new patch.

> Further, the way nr_to_scan is decremented and the shrinker return
> value are incorrect. nr_to_scan is not the /number of objects to
> free/, but the number of objects to /check for reclaim/. The
> shrinker is then supposed to return the number it frees (or
> compacts) to give feedback to the shrinker infrastructure about how
> much reclaim work is being done (i.e. scanned vs freed ratio). This
> code always returns 0, which tells the shrinker infrastructure that
> it's not making progress...

Will fix.
> 
> > What I could do though is add the following piece of code at the end of
> > the loop, right after the /break/:
> > 		spin_unlock(&pool->lock);
> > 		cond_resched();
> > 		spin_lock(&pool->lock);
> > 
> > Would that make sense for you?
> 
> Not really, because it ignores the fact that shrinkers can (and
> often do) run concurrently on multiple CPUs, and so serialising them
> all on a spinlock just causes contention, even if you do this.
> 
> Memory reclaim is only as good as the worst shrinker it runs. I
> don't care what your subsystem does, but if you're implementing a
> shrinker then it needs to play by memory reclaim and shrinker
> context rules.....
> 

Ok, see above.

Best regards,
   Vitaly

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-10-13 13:16 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-11 22:18 [PATCH v2] z3fold: add shrinker Vitaly Wool
2016-10-11 22:52 ` Dave Chinner
2016-10-12  8:26   ` Vitaly Wool
2016-10-13  0:20     ` Dave Chinner
2016-10-13 13:15       ` Vitaly Wool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).