All of lore.kernel.org
 help / color / mirror / Atom feed
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>,
	"hugh.dickins@tiscali.co.uk" <hugh.dickins@tiscali.co.uk>,
	"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: [RFC][PATCH 5/5] (experimental) chase and free cache only swap
Date: Tue, 26 May 2009 12:18:34 +0900	[thread overview]
Message-ID: <20090526121834.dd9a4193.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20090526121259.b91b3e9d.kamezawa.hiroyu@jp.fujitsu.com>


From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Just a trial/example patch.
I'd like to consider more. Better implementation idea is welcome.

When the system does swap-in/swap-out repeatedly, there are 
cache-only swaps in general.
Typically,
 - swapped out in past but on memory now while vm_swap_full() returns true
pages are cache-only swaps. (swap_map has no references.)

This cache-only swaps can be an obstacles for smooth page reclaiming.
Current implemantation is very naive, just scan & free.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 include/linux/swap.h |    1 
 mm/swapfile.c        |   88 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

Index: new-trial-swapcount/mm/swapfile.c
===================================================================
--- new-trial-swapcount.orig/mm/swapfile.c
+++ new-trial-swapcount/mm/swapfile.c
@@ -74,6 +74,8 @@ static inline unsigned short make_swap_c
 	return ret;
 }
 
+static void call_gc_cache_only_swap(void);
+
 /*
  * We need this because the bdev->unplug_fn can sleep and we cannot
  * hold swap_lock while calling the unplug_fn. And swap_lock
@@ -432,6 +434,8 @@ swp_entry_t get_swap_page(void)
 		offset = scan_swap_map(si, 1);
 		if (offset) {
 			spin_unlock(&swap_lock);
+			/* reclaim cache-only swaps if vm_swap_full() */
+			call_gc_cache_only_swap();
 			return swp_entry(type, offset);
 		}
 		next = swap_list.next;
@@ -2147,3 +2151,87 @@ int valid_swaphandles(swp_entry_t entry,
 	*offset = ++toff;
 	return nr_pages? ++nr_pages: 0;
 }
+
+/*
+ * Following code is for freeing Cache-only swap entries. These are calle in
+ * vm_swap_full() situation, and freeing cache-only swap and make some swap
+ * entries usable.
+ */
+
+static int find_free_cache_only_swap(int type)
+{
+	unsigned long buf[SWAP_CLUSTER_MAX];
+	int nr, offset, i;
+	unsigned short count;
+	struct swap_info_struct *si = swap_info + type;
+	int ret = 0;
+
+	spin_lock(&swap_lock);
+	nr = 0;
+	if (!(si->flags & SWP_WRITEOK) || si->cache_only == 0) {
+		ret = 1;
+		goto unlock;
+	}
+	offset = si->garbage_scan_offset;
+	/* Scan 2048 entries at most and free up to 32 entries per scan.*/
+	for (i = 2048; i > 0 && nr < 32; i--, offset++) {
+		if (offset >= si->max) {
+			offset = 0;
+			ret = 1;
+			break;
+		}
+		count = si->swap_map[offset];
+		if (count == SWAP_HAS_CACHE)
+			buf[nr++] = offset;
+	}
+	si->garbage_scan_offset = offset;
+unlock:
+	spin_unlock(&swap_lock);
+
+	for (i = 0; i < nr; i++) {
+		swp_entry_t ent;
+		struct page *page;
+
+		ent = swp_entry(type, buf[i]);
+
+		page = find_get_page(&swapper_space, ent.val);
+		if (page) {
+			lock_page(page);
+			try_to_free_swap(page);
+			unlock_page(page);
+		}
+	}
+	return ret;
+}
+
+#define SWAP_GC_THRESH	(4096)
+static void scan_and_free_cache_only_swap_work(struct work_struct *work);
+DECLARE_DELAYED_WORK(swap_gc_work, scan_and_free_cache_only_swap_work);
+static int swap_gc_last_scan;
+
+static void scan_and_free_cache_only_swap_work(struct work_struct *work)
+{
+	int type = swap_gc_last_scan;
+	int i;
+
+	spin_lock(&swap_lock);
+	for (i = type; i < MAX_SWAPFILES; i++) {
+		if (swap_info[i].flags & SWP_WRITEOK)
+			break;
+	}
+	if (i >= MAX_SWAPFILES)
+		i = 0;
+	spin_unlock(&swap_lock);
+	if (find_free_cache_only_swap(i))
+		swap_gc_last_scan = i + 1;
+
+	if (vm_swap_full() && (nr_cache_only_swaps > SWAP_GC_THRESH))
+		schedule_delayed_work(&swap_gc_work, HZ/10);
+}
+
+static void call_gc_cache_only_swap(void)
+{
+	if (vm_swap_full()  && (nr_cache_only_swaps > SWAP_GC_THRESH))
+		schedule_delayed_work(&swap_gc_work, HZ/10);
+}
+
Index: new-trial-swapcount/include/linux/swap.h
===================================================================
--- new-trial-swapcount.orig/include/linux/swap.h
+++ new-trial-swapcount/include/linux/swap.h
@@ -156,6 +156,7 @@ struct swap_info_struct {
 	unsigned int inuse_pages;
 	unsigned int old_block_size;
 	unsigned int cache_only;
+	unsigned int garbage_scan_offset;
 };
 
 struct swap_list_t {


WARNING: multiple messages have this Message-ID (diff)
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>,
	"hugh.dickins@tiscali.co.uk" <hugh.dickins@tiscali.co.uk>,
	"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: [RFC][PATCH 5/5] (experimental) chase and free cache only swap
Date: Tue, 26 May 2009 12:18:34 +0900	[thread overview]
Message-ID: <20090526121834.dd9a4193.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20090526121259.b91b3e9d.kamezawa.hiroyu@jp.fujitsu.com>


From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Just a trial/example patch.
I'd like to consider more. Better implementation idea is welcome.

When the system does swap-in/swap-out repeatedly, there are 
cache-only swaps in general.
Typically,
 - swapped out in past but on memory now while vm_swap_full() returns true
pages are cache-only swaps. (swap_map has no references.)

This cache-only swaps can be an obstacles for smooth page reclaiming.
Current implemantation is very naive, just scan & free.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 include/linux/swap.h |    1 
 mm/swapfile.c        |   88 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

Index: new-trial-swapcount/mm/swapfile.c
===================================================================
--- new-trial-swapcount.orig/mm/swapfile.c
+++ new-trial-swapcount/mm/swapfile.c
@@ -74,6 +74,8 @@ static inline unsigned short make_swap_c
 	return ret;
 }
 
+static void call_gc_cache_only_swap(void);
+
 /*
  * We need this because the bdev->unplug_fn can sleep and we cannot
  * hold swap_lock while calling the unplug_fn. And swap_lock
@@ -432,6 +434,8 @@ swp_entry_t get_swap_page(void)
 		offset = scan_swap_map(si, 1);
 		if (offset) {
 			spin_unlock(&swap_lock);
+			/* reclaim cache-only swaps if vm_swap_full() */
+			call_gc_cache_only_swap();
 			return swp_entry(type, offset);
 		}
 		next = swap_list.next;
@@ -2147,3 +2151,87 @@ int valid_swaphandles(swp_entry_t entry,
 	*offset = ++toff;
 	return nr_pages? ++nr_pages: 0;
 }
+
+/*
+ * Following code is for freeing Cache-only swap entries. These are calle in
+ * vm_swap_full() situation, and freeing cache-only swap and make some swap
+ * entries usable.
+ */
+
+static int find_free_cache_only_swap(int type)
+{
+	unsigned long buf[SWAP_CLUSTER_MAX];
+	int nr, offset, i;
+	unsigned short count;
+	struct swap_info_struct *si = swap_info + type;
+	int ret = 0;
+
+	spin_lock(&swap_lock);
+	nr = 0;
+	if (!(si->flags & SWP_WRITEOK) || si->cache_only == 0) {
+		ret = 1;
+		goto unlock;
+	}
+	offset = si->garbage_scan_offset;
+	/* Scan 2048 entries at most and free up to 32 entries per scan.*/
+	for (i = 2048; i > 0 && nr < 32; i--, offset++) {
+		if (offset >= si->max) {
+			offset = 0;
+			ret = 1;
+			break;
+		}
+		count = si->swap_map[offset];
+		if (count == SWAP_HAS_CACHE)
+			buf[nr++] = offset;
+	}
+	si->garbage_scan_offset = offset;
+unlock:
+	spin_unlock(&swap_lock);
+
+	for (i = 0; i < nr; i++) {
+		swp_entry_t ent;
+		struct page *page;
+
+		ent = swp_entry(type, buf[i]);
+
+		page = find_get_page(&swapper_space, ent.val);
+		if (page) {
+			lock_page(page);
+			try_to_free_swap(page);
+			unlock_page(page);
+		}
+	}
+	return ret;
+}
+
+#define SWAP_GC_THRESH	(4096)
+static void scan_and_free_cache_only_swap_work(struct work_struct *work);
+DECLARE_DELAYED_WORK(swap_gc_work, scan_and_free_cache_only_swap_work);
+static int swap_gc_last_scan;
+
+static void scan_and_free_cache_only_swap_work(struct work_struct *work)
+{
+	int type = swap_gc_last_scan;
+	int i;
+
+	spin_lock(&swap_lock);
+	for (i = type; i < MAX_SWAPFILES; i++) {
+		if (swap_info[i].flags & SWP_WRITEOK)
+			break;
+	}
+	if (i >= MAX_SWAPFILES)
+		i = 0;
+	spin_unlock(&swap_lock);
+	if (find_free_cache_only_swap(i))
+		swap_gc_last_scan = i + 1;
+
+	if (vm_swap_full() && (nr_cache_only_swaps > SWAP_GC_THRESH))
+		schedule_delayed_work(&swap_gc_work, HZ/10);
+}
+
+static void call_gc_cache_only_swap(void)
+{
+	if (vm_swap_full()  && (nr_cache_only_swaps > SWAP_GC_THRESH))
+		schedule_delayed_work(&swap_gc_work, HZ/10);
+}
+
Index: new-trial-swapcount/include/linux/swap.h
===================================================================
--- new-trial-swapcount.orig/include/linux/swap.h
+++ new-trial-swapcount/include/linux/swap.h
@@ -156,6 +156,7 @@ struct swap_info_struct {
 	unsigned int inuse_pages;
 	unsigned int old_block_size;
 	unsigned int cache_only;
+	unsigned int garbage_scan_offset;
 };
 
 struct swap_list_t {

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-05-26  3:20 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-26  3:12 [RFC][PATCH] memcg: fix swap account (26/May)[0/5] KAMEZAWA Hiroyuki
2009-05-26  3:12 ` KAMEZAWA Hiroyuki
2009-05-26  3:14 ` [RFC][PATCH 1/5] change swap cache interfaces KAMEZAWA Hiroyuki
2009-05-26  3:14   ` KAMEZAWA Hiroyuki
2009-05-26  3:15 ` [RFC][PATCH 2/5] add SWAP_HAS_CACHE flag to swap_map KAMEZAWA Hiroyuki
2009-05-26  3:15   ` KAMEZAWA Hiroyuki
2009-05-27  4:02   ` Daisuke Nishimura
2009-05-27  4:02     ` Daisuke Nishimura
2009-05-27  4:36     ` KAMEZAWA Hiroyuki
2009-05-27  4:36       ` KAMEZAWA Hiroyuki
2009-05-27  5:00       ` Daisuke Nishimura
2009-05-27  5:00         ` Daisuke Nishimura
2009-05-28  0:41   ` Daisuke Nishimura
2009-05-28  0:41     ` Daisuke Nishimura
2009-05-28  1:05     ` KAMEZAWA Hiroyuki
2009-05-28  1:05       ` KAMEZAWA Hiroyuki
2009-05-28  1:40       ` Daisuke Nishimura
2009-05-28  1:40         ` Daisuke Nishimura
2009-05-28  1:44         ` KAMEZAWA Hiroyuki
2009-05-28  1:44           ` KAMEZAWA Hiroyuki
2009-05-26  3:16 ` [RFC][PATCH 3/5] count cache-only swaps KAMEZAWA Hiroyuki
2009-05-26  3:16   ` KAMEZAWA Hiroyuki
2009-05-26 17:37   ` Johannes Weiner
2009-05-26 17:37     ` Johannes Weiner
2009-05-26 23:49     ` KAMEZAWA Hiroyuki
2009-05-26 23:49       ` KAMEZAWA Hiroyuki
2009-05-26  3:17 ` [RFC][PATCH 4/5] memcg: fix swap account KAMEZAWA Hiroyuki
2009-05-26  3:17   ` KAMEZAWA Hiroyuki
2009-05-26  3:18 ` KAMEZAWA Hiroyuki [this message]
2009-05-26  3:18   ` [RFC][PATCH 5/5] (experimental) chase and free cache only swap KAMEZAWA Hiroyuki
2009-05-26 18:14   ` Johannes Weiner
2009-05-26 18:14     ` Johannes Weiner
2009-05-27  0:08     ` KAMEZAWA Hiroyuki
2009-05-27  0:08       ` KAMEZAWA Hiroyuki
2009-05-27  1:26       ` Johannes Weiner
2009-05-27  1:26         ` Johannes Weiner
2009-05-27  1:31         ` KAMEZAWA Hiroyuki
2009-05-27  1:31           ` KAMEZAWA Hiroyuki
2009-05-27  2:06           ` Johannes Weiner
2009-05-27  2:06             ` Johannes Weiner
2009-05-27  5:14   ` KAMEZAWA Hiroyuki
2009-05-27  5:14     ` KAMEZAWA Hiroyuki
2009-05-27  6:30     ` Daisuke Nishimura
2009-05-27  6:30       ` Daisuke Nishimura
2009-05-27  6:50       ` KAMEZAWA Hiroyuki
2009-05-27  6:50         ` KAMEZAWA Hiroyuki
2009-05-27  6:43 ` [RFC][PATCH] memcg: fix swap account (26/May)[0/5] KAMEZAWA Hiroyuki
2009-05-27  6:43   ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090526121834.dd9a4193.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=hannes@cmpxchg.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nishimura@mxp.nes.nec.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.