linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dan Streetman <ddstreet@ieee.org>
To: Hugh Dickins <hughd@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mel Gorman <mgorman@suse.de>
Cc: Dan Streetman <ddstreet@ieee.org>, Michal Hocko <mhocko@suse.cz>,
	Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>,
	Weijie Yang <weijieut@gmail.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [PATCH 2/2] swap: use separate priority list for available swap_infos
Date: Sat, 12 Apr 2014 17:00:54 -0400	[thread overview]
Message-ID: <1397336454-13855-3-git-send-email-ddstreet@ieee.org> (raw)
In-Reply-To: <1397336454-13855-1-git-send-email-ddstreet@ieee.org>

Originally get_swap_page() started iterating through the singly-linked
list of swap_info_structs using swap_list.next or highest_priority_index,
which both were intended to point to the highest priority active swap
target that was not full.  The previous patch in this series changed the
singly-linked list to a doubly-linked list, and removed the logic to start
at the highest priority non-full entry; it starts scanning at the highest
priority entry each time, even if the entry is full.

Add a new list, also priority ordered, to track only swap_info_structs
that are available, i.e. active and not full.  Use a new spinlock so that
entries can be added/removed outside of get_swap_page; that wasn't possible
previously because the main list is protected by swap_lock, which can't be
taken when holding a swap_info_struct->lock because of locking order.
The get_swap_page() logic now does not need to hold the swap_lock, and it
iterates only through swap_info_structs that are available.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>


---
 include/linux/swap.h |   1 +
 mm/swapfile.c        | 128 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 87 insertions(+), 42 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 96662d8..d9263db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,6 +214,7 @@ struct percpu_cluster {
 struct swap_info_struct {
 	unsigned long	flags;		/* SWP_USED etc: see above */
 	signed short	prio;		/* swap priority of this type */
+	struct list_head prio_list;	/* entry in priority list */
 	struct list_head list;		/* entry in swap list */
 	signed char	type;		/* strange name for an index */
 	unsigned int	max;		/* extent of the swap_map */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b958645..3c38461 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -57,9 +57,13 @@ static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
 static const char Unused_offset[] = "Unused swap offset entry ";
 
-/* all active swap_info */
+/* all active swap_info; protected with swap_lock */
 LIST_HEAD(swap_list_head);
 
+/* all available (active, not full) swap_info, priority ordered */
+static LIST_HEAD(prio_head);
+static DEFINE_SPINLOCK(prio_lock);
+
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
@@ -73,6 +77,27 @@ static inline unsigned char swap_count(unsigned char ent)
 	return ent & ~SWAP_HAS_CACHE;	/* may include SWAP_HAS_CONT flag */
 }
 
+/*
+ * add, in priority order, swap_info (p)->(le) list_head to list (lh)
+ * this list-generic function is needed because both swap_list_head
+ * and prio_head need to be priority ordered:
+ * swap_list_head in swapoff to adjust lower negative prio swap_infos
+ * prio_list in get_swap_page to scan highest prio swap_info first
+ */
+#define swap_info_list_add(p, lh, le) do {			\
+	struct swap_info_struct *_si;				\
+	BUG_ON(!list_empty(&(p)->le));				\
+	list_for_each_entry(_si, (lh), le) {			\
+		if ((p)->prio >= _si->prio) {			\
+			list_add_tail(&(p)->le, &_si->le);	\
+			break;					\
+		}						\
+	}							\
+	/* lh empty, or p lowest prio */			\
+	if (list_empty(&(p)->le))				\
+		list_add_tail(&(p)->le, (lh));			\
+} while (0)
+
 /* returns 1 if swap entry is freed */
 static int
 __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
@@ -591,6 +616,9 @@ checks:
 	if (si->inuse_pages == si->pages) {
 		si->lowest_bit = si->max;
 		si->highest_bit = 0;
+		spin_lock(&prio_lock);
+		list_del_init(&si->prio_list);
+		spin_unlock(&prio_lock);
 	}
 	si->swap_map[offset] = usage;
 	inc_cluster_info_page(si, si->cluster_info, offset);
@@ -642,53 +670,68 @@ swp_entry_t get_swap_page(void)
 {
 	struct swap_info_struct *si, *next;
 	pgoff_t offset;
-	struct list_head *tmp;
 
-	spin_lock(&swap_lock);
 	if (atomic_long_read(&nr_swap_pages) <= 0)
 		goto noswap;
 	atomic_long_dec(&nr_swap_pages);
 
-	list_for_each(tmp, &swap_list_head) {
-		si = list_entry(tmp, typeof(*si), list);
-		spin_lock(&si->lock);
-		if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
-			spin_unlock(&si->lock);
-			continue;
-		}
-
+	spin_lock(&prio_lock);
+start_over:
+	list_for_each_entry_safe(si, next, &prio_head, prio_list) {
 		/*
-		 * rotate the current swap_info that we're going to use
+		 * rotate the current swap_info that we're checking
 		 * to after any other swap_info that have the same prio,
 		 * so that all equal-priority swap_info get used equally
 		 */
-		next = si;
-		list_for_each_entry_continue(next, &swap_list_head, list) {
-			if (si->prio != next->prio)
+		struct swap_info_struct *eq_prio = si;
+		list_for_each_entry_continue(eq_prio, &prio_head, prio_list) {
+			if (si->prio != eq_prio->prio)
 				break;
-			list_rotate_left(&si->list);
-			next = si;
+			list_rotate_left(&si->prio_list);
+			eq_prio = si;
+		}
+		spin_unlock(&prio_lock);
+		spin_lock(&si->lock);
+		if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
+			spin_lock(&prio_lock);
+			if (list_empty(&si->prio_list)) {
+				spin_unlock(&si->lock);
+				goto nextsi;
+			}
+			WARN(!si->highest_bit,
+			     "swap_info %d in list but !highest_bit\n",
+			     si->type);
+			WARN(!(si->flags & SWP_WRITEOK),
+			     "swap_info %d in list but !SWP_WRITEOK\n",
+			     si->type);
+			list_del_init(&si->prio_list);
+			spin_unlock(&si->lock);
+			goto nextsi;
 		}
 
-		spin_unlock(&swap_lock);
 		/* This is called for allocating swap entry for cache */
 		offset = scan_swap_map(si, SWAP_HAS_CACHE);
 		spin_unlock(&si->lock);
 		if (offset)
 			return swp_entry(si->type, offset);
-		spin_lock(&swap_lock);
+		printk(KERN_DEBUG "scan_swap_map of si %d failed to find offset\n",
+		       si->type);
+		spin_lock(&prio_lock);
+nextsi:
 		/*
-		 * shouldn't really have got here, but for some reason the
-		 * scan_swap_map came back empty for this swap_info.
-		 * Since we dropped the swap_lock, there may now be
-		 * non-full higher prio swap_infos; let's start over.
+		 * shouldn't really have got here.  either si was
+		 * in the prio_head list but was full or !writeok, or
+		 * scan_swap_map came back empty.  Since we dropped
+		 * the prio_lock, the prio_head list may have been
+		 * modified; so if next is still in the prio_head
+		 * list then try it, otherwise start over.
 		 */
-		tmp = &swap_list_head;
+		if (list_empty(&next->prio_list))
+			goto start_over;
 	}
 
 	atomic_long_inc(&nr_swap_pages);
 noswap:
-	spin_unlock(&swap_lock);
 	return (swp_entry_t) {0};
 }
 
@@ -791,8 +834,17 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 		dec_cluster_info_page(p, p->cluster_info, offset);
 		if (offset < p->lowest_bit)
 			p->lowest_bit = offset;
-		if (offset > p->highest_bit)
+		if (offset > p->highest_bit) {
+			bool was_full = !p->highest_bit;
 			p->highest_bit = offset;
+			if (was_full && (p->flags & SWP_WRITEOK)) {
+				spin_lock(&prio_lock);
+				if (list_empty(&p->prio_list))
+					swap_info_list_add(p, &prio_head,
+							   prio_list);
+				spin_unlock(&prio_lock);
+			}
+		}
 		atomic_long_inc(&nr_swap_pages);
 		p->inuse_pages--;
 		frontswap_invalidate_page(p->type, offset);
@@ -1727,8 +1779,6 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 				unsigned char *swap_map,
 				struct swap_cluster_info *cluster_info)
 {
-	struct swap_info_struct *si;
-
 	if (prio >= 0)
 		p->prio = prio;
 	else
@@ -1740,20 +1790,10 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 	total_swap_pages += p->pages;
 
 	assert_spin_locked(&swap_lock);
-	BUG_ON(!list_empty(&p->list));
-	/* insert into swap list: */
-	list_for_each_entry(si, &swap_list_head, list) {
-		if (p->prio >= si->prio) {
-			list_add_tail(&p->list, &si->list);
-			return;
-		}
-	}
-	/*
-	 * this covers two cases:
-	 * 1) p->prio is less than all existing prio
-	 * 2) the swap list is empty
-	 */
-	list_add_tail(&p->list, &swap_list_head);
+	swap_info_list_add(p, &swap_list_head, list);
+	spin_lock(&prio_lock);
+	swap_info_list_add(p, &prio_head, prio_list);
+	spin_unlock(&prio_lock);
 }
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1827,6 +1867,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		spin_unlock(&swap_lock);
 		goto out_dput;
 	}
+	spin_lock(&prio_lock);
+	list_del_init(&p->prio_list);
+	spin_unlock(&prio_lock);
 	spin_lock(&p->lock);
 	if (p->prio < 0) {
 		struct swap_info_struct *si = p;
@@ -2101,6 +2144,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 	}
 	INIT_LIST_HEAD(&p->first_swap_extent.list);
 	INIT_LIST_HEAD(&p->list);
+	INIT_LIST_HEAD(&p->prio_list);
 	p->flags = SWP_USED;
 	spin_unlock(&swap_lock);
 	spin_lock_init(&p->lock);
-- 
1.8.3.1


  parent reply	other threads:[~2014-04-12 21:04 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-13 10:42 [PATCH] mm: swap: Use swapfiles in priority order Mel Gorman
2014-02-13 15:58 ` Weijie Yang
2014-02-14 10:17   ` Mel Gorman
2014-02-14 13:33     ` Weijie Yang
     [not found]   ` <loom.20140214T135753-812@post.gmane.org>
     [not found]     ` <CABdxLJHS5kw0rpD=+77iQtc6PMeRXoWnh-nh5VzjjfGHJ5wLGQ@mail.gmail.com>
2014-02-24  8:28       ` Hugh Dickins
2014-04-12 21:00         ` [PATCH 0/2] swap: simplify/fix swap_list handling and iteration Dan Streetman
2014-04-12 21:00           ` [PATCH 1/2] swap: change swap_info singly-linked list to list_head Dan Streetman
2014-04-23 10:34             ` Mel Gorman
2014-04-24  0:17               ` Shaohua Li
2014-04-24  8:30                 ` Mel Gorman
2014-04-24 18:48               ` Dan Streetman
2014-04-25  4:15                 ` Weijie Yang
2014-05-02 20:00                   ` Dan Streetman
2014-05-04  9:39                     ` Bob Liu
2014-05-04 20:16                       ` Dan Streetman
2014-04-25  8:38                 ` Mel Gorman
2014-04-12 21:00           ` Dan Streetman [this message]
2014-04-23 13:14             ` [PATCH 2/2] swap: use separate priority list for available swap_infos Mel Gorman
2014-04-24 17:52               ` Dan Streetman
2014-04-25  8:49                 ` Mel Gorman
2014-05-02 19:02           ` [PATCHv2 0/4] swap: simplify/fix swap_list handling and iteration Dan Streetman
2014-05-02 19:02             ` [PATCHv2 1/4] swap: change swap_info singly-linked list to list_head Dan Streetman
2014-05-02 19:02             ` [PATCH 2/4] plist: add helper functions Dan Streetman
2014-05-12 10:35               ` Mel Gorman
2014-05-02 19:02             ` [PATCH 3/4] plist: add plist_rotate Dan Streetman
2014-05-06  2:18               ` Steven Rostedt
2014-05-06 20:12                 ` Dan Streetman
2014-05-06 20:39                   ` Steven Rostedt
2014-05-06 21:47                     ` Dan Streetman
2014-05-06 22:43                       ` Steven Rostedt
2014-05-02 19:02             ` [PATCH 4/4] swap: change swap_list_head to plist, add swap_avail_head Dan Streetman
2014-05-05 15:51               ` Dan Streetman
2014-05-05 19:13               ` Steven Rostedt
2014-05-05 19:38                 ` Peter Zijlstra
2014-05-09 20:42                 ` [PATCH] plist: make CONFIG_DEBUG_PI_LIST selectable Dan Streetman
2014-05-09 21:17                   ` Steven Rostedt
2014-05-12 11:11               ` [PATCH 4/4] swap: change swap_list_head to plist, add swap_avail_head Mel Gorman
2014-05-12 13:00                 ` Dan Streetman
2014-05-12 16:38             ` [PATCHv3 0/4] swap: simplify/fix swap_list handling and iteration Dan Streetman
2014-05-12 16:38               ` [PATCHv2 1/4] swap: change swap_info singly-linked list to list_head Dan Streetman
2014-05-12 16:38               ` [PATCH 2/4] plist: add helper functions Dan Streetman
2014-05-12 16:38               ` [PATCHv2 3/4] plist: add plist_requeue Dan Streetman
2014-05-13 10:33                 ` Mel Gorman
2014-05-12 16:38               ` [PATCHv2 4/4] swap: change swap_list_head to plist, add swap_avail_head Dan Streetman
2014-05-13 10:34                 ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1397336454-13855-3-git-send-email-ddstreet@ieee.org \
    --to=ddstreet@ieee.org \
    --cc=akpm@linux-foundation.org \
    --cc=ehrhardt@linux.vnet.ibm.com \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=weijieut@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).