All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] zswap: add zswap shrinker
@ 2013-05-21  6:26 Bob Liu
  2013-05-21 18:57 ` Seth Jennings
  0 siblings, 1 reply; 6+ messages in thread
From: Bob Liu @ 2013-05-21  6:26 UTC (permalink / raw)
  To: linux-mm
  Cc: akpm, sjenning, ngupta, minchan, konrad.wilk, dan.magenheimer,
	rcj, mgorman, riel, dave, hughd, Bob Liu

In my understanding, currenlty zswap have a few problems.
1. The zswap pool size is 20% of total memory that's too random and once it
gets full the performance may even worse because everytime pageout() an anon
page two disk-io write ops may happend instead of one.

2. The reclaim hook will only be triggered in frontswap_store().
It may be result that the zswap pool size can't be adjusted in time which may
caused 20% memory lose for other users.

This patch introduce a zswap shrinker, it make the balance that the zswap
pool size will be the same as anon pages in use.
It's more flexiable and the size of zswap pool can be dynamically changed
during different memory situation.

This patch was based on Seth's zswap v12. It's very draft and only compile
tested now.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 include/linux/zbud.h |    2 +-
 mm/zbud.c            |   17 ++++++++--
 mm/zswap.c           |   84 +++++++++++++++++++++++++++++++++++---------------
 3 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 2571a5c..afd2eb2 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -14,7 +14,7 @@ void zbud_destroy_pool(struct zbud_pool *pool);
 int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
 	unsigned long *handle);
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
-int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
+int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries, struct page *page);
 void *zbud_map(struct zbud_pool *pool, unsigned long handle);
 void zbud_unmap(struct zbud_pool *pool, unsigned long handle);
 u64 zbud_get_pool_size(struct zbud_pool *pool);
diff --git a/mm/zbud.c b/mm/zbud.c
index b10a1f1..3045bfb 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -294,8 +294,15 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
 	/* Couldn't find unbuddied zbpage, create new one */
 	spin_unlock(&pool->lock);
 	page = alloc_page(gfp);
+	if (!page) {
+		/* Couldn't alloc new page, try to direct reclaim */
+		if (zbud_reclaim_page(pool, 16, page))
+			return -ENOMEM;
+	}
+
 	if (!page)
 		return -ENOMEM;
+
 	spin_lock(&pool->lock);
 	pool->pages_nr++;
 	zbpage = init_zbud_page(page);
@@ -412,7 +419,7 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
  * no pages to evict or an eviction handler is not registered, -EAGAIN if
  * the retry limit was hit.
  */
-int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
+int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries, struct page *page)
 {
 	int i, ret, freechunks;
 	struct zbud_page *zbpage;
@@ -461,8 +468,12 @@ next:
 			 * Both buddies are now free, free the zbpage and
 			 * return success.
 			 */
-			free_zbud_page(zbpage);
-			pool->pages_nr--;
+			if (page)
+				page = &zbpage->page;
+			else {
+				free_zbud_page(zbpage);
+				pool->pages_nr--;
+			}
 			spin_unlock(&pool->lock);
 			return 0;
 		} else if (zbpage->first_chunks == 0 ||
diff --git a/mm/zswap.c b/mm/zswap.c
index 22cc034..9703bb5 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -84,11 +84,6 @@ module_param_named(enabled, zswap_enabled, bool, 0);
 static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
 module_param_named(compressor, zswap_compressor, charp, 0);
 
-/* The maximum percentage of memory that the compressed pool can occupy */
-static unsigned int zswap_max_pool_percent = 20;
-module_param_named(max_pool_percent,
-			zswap_max_pool_percent, uint, 0644);
-
 /*********************************
 * compression functions
 **********************************/
@@ -362,15 +357,6 @@ cleanup:
 	return -ENOMEM;
 }
 
-/*********************************
-* helpers
-**********************************/
-static inline bool zswap_is_full(void)
-{
-	return (totalram_pages * zswap_max_pool_percent / 100 <
-		zswap_pool_pages);
-}
-
 /*
  * Carries out the common pattern of freeing and entry's zsmalloc allocation,
  * freeing the entry itself, and decrementing the number of stored pages.
@@ -430,6 +416,9 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
 		 * Get a new page to read into from swap.
 		 */
 		if (!new_page) {
+			/* Need more agressive here to alloc memory so that pages in
+			 * zswap pool can be written out to disk and finally can shrink
+			 * zswap pool size.*/
 			new_page = alloc_page(GFP_KERNEL);
 			if (!new_page)
 				break; /* Out of memory */
@@ -620,16 +609,6 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 		goto reject;
 	}
 
-	/* reclaim space if needed */
-	if (zswap_is_full()) {
-		zswap_pool_limit_hit++;
-		if (zbud_reclaim_page(tree->pool, 8)) {
-			zswap_reject_reclaim_fail++;
-			ret = -ENOMEM;
-			goto reject;
-		}
-	}
-
 	/* allocate entry */
 	entry = zswap_entry_cache_alloc(GFP_KERNEL);
 	if (!entry) {
@@ -650,7 +629,9 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 
 	/* store */
 	len = dlen + sizeof(struct zswap_header);
-	ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,
+	/* Don't use reserve memory so that system won't enter very bad memory
+	 * situation becasue of zswap*/
+	ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC,
 		&handle);
 	if (ret == -E2BIG) {
 		zswap_reject_compress_poor++;
@@ -907,11 +888,60 @@ static inline int __init zswap_debugfs_init(void)
 static inline void __exit zswap_debugfs_exit(void) { }
 #endif
 
+/*
+ * This zswap shrinker interface reduces the number of pageframes
+ * used by zswap to approximately the same as the total number of LRU_ANON
+ * pageframes in use which means 1:1
+ * The policy can be changed if there is better scale proved in future.
+ */
+static int shrink_zswap_memory(struct shrinker *shrink,
+				struct shrink_control *sc)
+{
+	static bool in_progress;
+	int nr_evict = 0;
+	int nr_reclaim = 0;
+	int  global_anon_pages_inuse;
+	struct zswap_tree *tree;
+	int tree_type;
+
+	if (!sc->nr_to_scan)
+		goto skip_evict;
+	/* don't allow more than one eviction thread at a time */
+	if (in_progress)
+		goto skip_evict;
+	in_progress = true;
+
+	global_anon_pages_inuse = global_page_state(NR_LRU_BASE + LRU_ACTIVE_ANON) +
+		global_page_state(NR_LRU_BASE + LRU_INACTIVE_ANON);
+
+	if (zswap_pool_pages > global_anon_pages_inuse)
+		nr_reclaim = zswap_pool_pages - global_anon_pages_inuse;
+	else
+		nr_reclaim = 0;
+
+	while (nr_reclaim > 0)
+		for (tree_type = 0; tree_type < MAX_SWAPFILES; tree_type++) {
+			tree = zswap_trees[tree_type];
+			if (tree) {
+				if (zbud_reclaim_page(tree->pool, 8, NULL))
+					zswap_reject_reclaim_fail++;
+				else {
+					nr_evict++;
+					nr_reclaim--;
+				}
+			}
+		}
+	in_progress = false;
+skip_evict:
+	return nr_evict;
+}
+
 /*********************************
 * module init and exit
 **********************************/
 static int __init init_zswap(void)
 {
+	struct shrinker zswap_shrinker;
 	if (!zswap_enabled)
 		return 0;
 
@@ -931,6 +961,10 @@ static int __init init_zswap(void)
 	frontswap_register_ops(&zswap_frontswap_ops);
 	if (zswap_debugfs_init())
 		pr_warn("debugfs initialization failed\n");
+
+	zswap_shrinker.shrink = shrink_zswap_memory;
+	zswap_shrinker.seeks = DEFAULT_SEEKS;
+	register_shrinker(&zswap_shrinker);
 	return 0;
 pcpufail:
 	zswap_comp_exit();
-- 
1.7.10.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] zswap: add zswap shrinker
  2013-05-21  6:26 [RFC PATCH] zswap: add zswap shrinker Bob Liu
@ 2013-05-21 18:57 ` Seth Jennings
  2013-05-22  4:03   ` Bob Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Seth Jennings @ 2013-05-21 18:57 UTC (permalink / raw)
  To: Bob Liu
  Cc: linux-mm, akpm, ngupta, minchan, konrad.wilk, dan.magenheimer,
	rcj, mgorman, riel, dave, hughd, Bob Liu

On Tue, May 21, 2013 at 02:26:07PM +0800, Bob Liu wrote:
> In my understanding, currenlty zswap have a few problems.
> 1. The zswap pool size is 20% of total memory that's too random and once it
> gets full the performance may even worse because everytime pageout() an anon
> page two disk-io write ops may happend instead of one.

Just to clarify, 20% is a default maximum amount that zswap can occupy.

Also, in the steady over-the-limit state, the average number of writebacks is
equal to the number of pages coming into zswap.  The description above makes it
sound like there is a reclaim amplification effect (two writebacks per zswap
store) when, on average, there is none. The 2:1 effect only happens on one or
two store operations right after the pool becomes full.

This is unclear though, mostly because the pool limit is enforced in
zswap.  A situation exists where there might be an unbuddied zbud page with
room for the upcoming allocation but, because we are over the pool limit,
reclaim is done during that store anyway. I'm working on a clean way to fix
that up, probably by moving the limit enforcement into zbud as suggested by
Mel.

> 2. The reclaim hook will only be triggered in frontswap_store().
> It may be result that the zswap pool size can't be adjusted in time which may
> caused 20% memory lose for other users.
> 
> This patch introduce a zswap shrinker, it make the balance that the zswap
> pool size will be the same as anon pages in use.

Using zbud, with 2 zpages per zbud page, that would mean that up to 2/3 of anon
pages could be compressed while 1/3 remain uncompressed.

How did you conclude that this is the right balance?

If nr_reclaim in the shrinker became very large due to global_anon_pages_inuse
suddenly dropping, we could be writing back a LOT of pages all at once.

Having already looked at the patch, I can say that this isn't going to be the
way to do this.  I agree that there should be some sort of dynamic sizing, but
IMHO using a shrinker isn't the way.  Dave Chinner would not be happy about
this since it is based on the zcache shrinker logic and he didn't have many
kind words to say about it: https://lkml.org/lkml/2012/11/27/552

Seth

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] zswap: add zswap shrinker
  2013-05-21 18:57 ` Seth Jennings
@ 2013-05-22  4:03   ` Bob Liu
  2013-05-22  6:05     ` Bob Liu
  2013-05-22 14:08     ` Seth Jennings
  0 siblings, 2 replies; 6+ messages in thread
From: Bob Liu @ 2013-05-22  4:03 UTC (permalink / raw)
  To: Seth Jennings
  Cc: Bob Liu, linux-mm, akpm, ngupta, minchan, konrad.wilk,
	dan.magenheimer, rcj, mgorman, riel, dave, hughd


On 05/22/2013 02:57 AM, Seth Jennings wrote:
> On Tue, May 21, 2013 at 02:26:07PM +0800, Bob Liu wrote:
>> In my understanding, currenlty zswap have a few problems.
>> 1. The zswap pool size is 20% of total memory that's too random and once it
>> gets full the performance may even worse because everytime pageout() an anon
>> page two disk-io write ops may happend instead of one.
> 
> Just to clarify, 20% is a default maximum amount that zswap can occupy.
> 
> Also, in the steady over-the-limit state, the average number of writebacks is
> equal to the number of pages coming into zswap.  The description above makes it
> sound like there is a reclaim amplification effect (two writebacks per zswap
> store) when, on average, there is none. The 2:1 effect only happens on one or
> two store operations right after the pool becomes full.

I don't think it only happens on one or two store operations.

When the system enter a situation or run a workload which have many anon
pages, the zswap pool will be full easily and most of the time.

But after it's full there are still many anon pages need to be reclaimed
and frontswap_store() will be entered and call zbud_reclaim_page() to
writeout two pages every time.

The effect to the user will be after the zswap is full, the disk IO is
always twice than disable it.

> 
> This is unclear though, mostly because the pool limit is enforced in
> zswap.  A situation exists where there might be an unbuddied zbud page with
> room for the upcoming allocation but, because we are over the pool limit,
> reclaim is done during that store anyway. I'm working on a clean way to fix

Yes, but always reclaim by writing out two pages.
So after the pool is full, there will be always more disk IO than normal
which can cause performance drop and make the user surprise.

> that up, probably by moving the limit enforcement into zbud as suggested by
> Mel.

Nice :)

> 
>> 2. The reclaim hook will only be triggered in frontswap_store().
>> It may be result that the zswap pool size can't be adjusted in time which may
>> caused 20% memory lose for other users.
>>
>> This patch introduce a zswap shrinker, it make the balance that the zswap
>> pool size will be the same as anon pages in use.
> 
> Using zbud, with 2 zpages per zbud page, that would mean that up to 2/3 of anon
> pages could be compressed while 1/3 remain uncompressed.
> 
> How did you conclude that this is the right balance?
> 

It may not, but at least it can be changed dynamically for different
workloads. It can be higher than 20%*total_mem if there are too many
anon pages and can be shrinked easily.

> If nr_reclaim in the shrinker became very large due to global_anon_pages_inuse
> suddenly dropping, we could be writing back a LOT of pages all at once.
> 

Hmm, that's a problem.

> Having already looked at the patch, I can say that this isn't going to be the
> way to do this.  I agree that there should be some sort of dynamic sizing, but

Yes, that's what I'm looking forward to see. A policy to manage/balance
the size of zswap pool dynamically.
Maybe you have better idea to implement it.

> IMHO using a shrinker isn't the way.  Dave Chinner would not be happy about
> this since it is based on the zcache shrinker logic and he didn't have many
> kind words to say about it: https://lkml.org/lkml/2012/11/27/552
> 
> Seth
> 

Thanks!

-- 
Regards,
-Bob

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] zswap: add zswap shrinker
  2013-05-22  4:03   ` Bob Liu
@ 2013-05-22  6:05     ` Bob Liu
  2013-05-22 14:08     ` Seth Jennings
  1 sibling, 0 replies; 6+ messages in thread
From: Bob Liu @ 2013-05-22  6:05 UTC (permalink / raw)
  To: Seth Jennings
  Cc: Bob Liu, linux-mm, akpm, ngupta, minchan, konrad.wilk,
	dan.magenheimer, rcj, mgorman, riel, dave, hughd

On 05/22/2013 12:03 PM, Bob Liu wrote:
> 
> On 05/22/2013 02:57 AM, Seth Jennings wrote:
>> On Tue, May 21, 2013 at 02:26:07PM +0800, Bob Liu wrote:
>>> In my understanding, currenlty zswap have a few problems.
>>> 1. The zswap pool size is 20% of total memory that's too random and once it
>>> gets full the performance may even worse because everytime pageout() an anon
>>> page two disk-io write ops may happend instead of one.
>>
>> Just to clarify, 20% is a default maximum amount that zswap can occupy.
>>
>> Also, in the steady over-the-limit state, the average number of writebacks is
>> equal to the number of pages coming into zswap.  The description above makes it
>> sound like there is a reclaim amplification effect (two writebacks per zswap
>> store) when, on average, there is none. The 2:1 effect only happens on one or
>> two store operations right after the pool becomes full.
> 
> I don't think it only happens on one or two store operations.
> 
> When the system enter a situation or run a workload which have many anon
> pages, the zswap pool will be full easily and most of the time.
> 
> But after it's full there are still many anon pages need to be reclaimed
> and frontswap_store() will be entered and call zbud_reclaim_page() to
> writeout two pages every time.
> 
> The effect to the user will be after the zswap is full, the disk IO is
> always twice than disable it.
> 
>>
>> This is unclear though, mostly because the pool limit is enforced in
>> zswap.  A situation exists where there might be an unbuddied zbud page with
>> room for the upcoming allocation but, because we are over the pool limit,
>> reclaim is done during that store anyway. I'm working on a clean way to fix
> 
> Yes, but always reclaim by writing out two pages.
> So after the pool is full, there will be always more disk IO than normal
> which can cause performance drop and make the user surprise.
> 

I found my concern is the same as Mel mentioned about nchunks.
Setting nchunks=2 is a way to workaround this issue.

But what I'm trying to do is dynamically change the pool size hoping to
reduce zswap_is_full() happen. And do the writeback in background in a
shrinker instead of the direct path pageout() > frontswap_store().

-- 
Regards,
-Bob

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] zswap: add zswap shrinker
  2013-05-22  4:03   ` Bob Liu
  2013-05-22  6:05     ` Bob Liu
@ 2013-05-22 14:08     ` Seth Jennings
  2013-05-23  1:56       ` Bob Liu
  1 sibling, 1 reply; 6+ messages in thread
From: Seth Jennings @ 2013-05-22 14:08 UTC (permalink / raw)
  To: Bob Liu
  Cc: Bob Liu, linux-mm, akpm, ngupta, minchan, konrad.wilk,
	dan.magenheimer, rcj, mgorman, riel, dave, hughd

On Wed, May 22, 2013 at 12:03:03PM +0800, Bob Liu wrote:
> 
> On 05/22/2013 02:57 AM, Seth Jennings wrote:
> > On Tue, May 21, 2013 at 02:26:07PM +0800, Bob Liu wrote:
> >> In my understanding, currenlty zswap have a few problems.
> >> 1. The zswap pool size is 20% of total memory that's too random and once it
> >> gets full the performance may even worse because everytime pageout() an anon
> >> page two disk-io write ops may happend instead of one.
> > 
> > Just to clarify, 20% is a default maximum amount that zswap can occupy.
> > 
> > Also, in the steady over-the-limit state, the average number of writebacks is
> > equal to the number of pages coming into zswap.  The description above makes it
> > sound like there is a reclaim amplification effect (two writebacks per zswap
> > store) when, on average, there is none. The 2:1 effect only happens on one or
> > two store operations right after the pool becomes full.
> 
> I don't think it only happens on one or two store operations.
> 
> When the system enter a situation or run a workload which have many anon
> pages, the zswap pool will be full easily and most of the time.

I think the part missing here is the just because a page is reclaimed on a
particular store because we are over the zswap limit doesn't necessarily mean
that page will be reallocated to the pool on the next zbud_alloc().  The
reclaimed page is only reallocated if there is no unbuddied page in the pool
with enough free space to hold the requested allocation.

In the case that the reclaimed page is not reallocated to the pool, we will be
under the pool limit on the next zswap store and not do reclaim.

> 
> But after it's full there are still many anon pages need to be reclaimed
> and frontswap_store() will be entered and call zbud_reclaim_page() to
> writeout two pages every time.
> 
> The effect to the user will be after the zswap is full, the disk IO is
> always twice than disable it.
> 
> > 
> > This is unclear though, mostly because the pool limit is enforced in
> > zswap.  A situation exists where there might be an unbuddied zbud page with
> > room for the upcoming allocation but, because we are over the pool limit,
> > reclaim is done during that store anyway. I'm working on a clean way to fix
> 
> Yes, but always reclaim by writing out two pages.
> So after the pool is full, there will be always more disk IO than normal
> which can cause performance drop and make the user surprise.
> 
> > that up, probably by moving the limit enforcement into zbud as suggested by
> > Mel.
> 
> Nice :)
> 
> > 
> >> 2. The reclaim hook will only be triggered in frontswap_store().
> >> It may be result that the zswap pool size can't be adjusted in time which may
> >> caused 20% memory lose for other users.
> >>
> >> This patch introduce a zswap shrinker, it make the balance that the zswap
> >> pool size will be the same as anon pages in use.
> > 
> > Using zbud, with 2 zpages per zbud page, that would mean that up to 2/3 of anon
> > pages could be compressed while 1/3 remain uncompressed.
> > 
> > How did you conclude that this is the right balance?
> > 
> 
> It may not, but at least it can be changed dynamically for different
> workloads. It can be higher than 20%*total_mem if there are too many
> anon pages and can be shrinked easily.
> 
> > If nr_reclaim in the shrinker became very large due to global_anon_pages_inuse
> > suddenly dropping, we could be writing back a LOT of pages all at once.
> > 
> 
> Hmm, that's a problem.
> 
> > Having already looked at the patch, I can say that this isn't going to be the
> > way to do this.  I agree that there should be some sort of dynamic sizing, but
> 
> Yes, that's what I'm looking forward to see. A policy to manage/balance
> the size of zswap pool dynamically.
> Maybe you have better idea to implement it.

I think we first have to figure out "what the policy should be" for a dynamic
approach, then figure out how to implement it, the latter being the
easier part of the exercise.  I haven't come up with a good answer to the
policy question yet.  Otherwise I would have implemented it already :)

Thanks,
Seth

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] zswap: add zswap shrinker
  2013-05-22 14:08     ` Seth Jennings
@ 2013-05-23  1:56       ` Bob Liu
  0 siblings, 0 replies; 6+ messages in thread
From: Bob Liu @ 2013-05-23  1:56 UTC (permalink / raw)
  To: Seth Jennings
  Cc: Bob Liu, linux-mm, akpm, ngupta, minchan, konrad.wilk,
	dan.magenheimer, rcj, mgorman, riel, dave, hughd


On 05/22/2013 10:08 PM, Seth Jennings wrote:
> On Wed, May 22, 2013 at 12:03:03PM +0800, Bob Liu wrote:
>>
>> On 05/22/2013 02:57 AM, Seth Jennings wrote:
>>> On Tue, May 21, 2013 at 02:26:07PM +0800, Bob Liu wrote:
>>>> In my understanding, currenlty zswap have a few problems.
>>>> 1. The zswap pool size is 20% of total memory that's too random and once it
>>>> gets full the performance may even worse because everytime pageout() an anon
>>>> page two disk-io write ops may happend instead of one.
>>>
>>> Just to clarify, 20% is a default maximum amount that zswap can occupy.
>>>
>>> Also, in the steady over-the-limit state, the average number of writebacks is
>>> equal to the number of pages coming into zswap.  The description above makes it
>>> sound like there is a reclaim amplification effect (two writebacks per zswap
>>> store) when, on average, there is none. The 2:1 effect only happens on one or
>>> two store operations right after the pool becomes full.
>>
>> I don't think it only happens on one or two store operations.
>>
>> When the system enter a situation or run a workload which have many anon
>> pages, the zswap pool will be full easily and most of the time.
> 
> I think the part missing here is the just because a page is reclaimed on a
> particular store because we are over the zswap limit doesn't necessarily mean
> that page will be reallocated to the pool on the next zbud_alloc().  The
> reclaimed page is only reallocated if there is no unbuddied page in the pool
> with enough free space to hold the requested allocation.
> 
> In the case that the reclaimed page is not reallocated to the pool, we will be
> under the pool limit on the next zswap store and not do reclaim.
> 

That's true, I see your idea here.
But it's probably that there will be no suitable unbuddied pages in the
pool.
Mel gave a very good and detail example about nchunks in thread:
Re: [PATCHv11 2/4] zbud: add to mm/

-- 
Regards,
-Bob

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-05-23  1:56 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-21  6:26 [RFC PATCH] zswap: add zswap shrinker Bob Liu
2013-05-21 18:57 ` Seth Jennings
2013-05-22  4:03   ` Bob Liu
2013-05-22  6:05     ` Bob Liu
2013-05-22 14:08     ` Seth Jennings
2013-05-23  1:56       ` Bob Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.