From: Linus Torvalds <torvalds@linux-foundation.org>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Stephen Rothwell <sfr@canb.auug.org.au>,
linux-next <linux-next@vger.kernel.org>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Dave Chinner <dchinner@redhat.com>,
Glauber Costa <glommer@openvz.org>
Subject: Re: linux-next: manual merge of the akpm tree with Linus' tree
Date: Fri, 13 Sep 2013 16:25:48 -0400 [thread overview]
Message-ID: <CA+55aFxizbVmQE2Y94BRj3kDFdf6hzNHuy-O0qBFw3jnT_mMLw@mail.gmail.com> (raw)
In-Reply-To: <20130913200000.GT13318@ZenIV.linux.org.uk>
[-- Attachment #1: Type: text/plain, Size: 853 bytes --]
On Fri, Sep 13, 2013 at 4:00 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
\>
> It is right - for one thing, we are holding the lock on that LRU list,
> so list_lru_del() would deadlock right there. For another, the same
> list_lru_walk (OK, list_lru_walk_node()) will do ->nr_items decrement
> when we return LRU_REMOVED to it, so we don't want to do it twice.
> Plain list_del_init() is correct here.
Yes. And I found the opposite bug in one place: when we are collecting
dentries by walking the parents etc, we do *not* hold the global RCU
lock, so we cannot use the "d_lru_shrink_list()" thing after all. It's
correct as far as the internal logic of fs/dcache.c goes, but it
violates the global LRU list rules. So I replaced that with a
dentry_lru_del() followed by a d_shrink_add() instead.
Updated patch attached.
Linus
[-- Attachment #2: patch.diff --]
[-- Type: application/octet-stream, Size: 6636 bytes --]
fs/dcache.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 102 insertions(+), 27 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 1bd4614ce93b..435b97560674 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -357,15 +357,80 @@ static void dentry_unlink_inode(struct dentry * dentry)
}
/*
+ * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
+ * is in use - which includes both the "real" per-superblock
+ * LRU list _and_ the DCACHE_SHRINK_LIST use.
+ *
+ * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
+ * on the shrink list (ie not on the superblock LRU list).
+ *
+ * The per-cpu "nr_dentry_unused" counters are updated with
+ * the DCACHE_LRU_LIST bit.
+ *
+ * These helper functions make sure we always follow the
+ * rules. d_lock must be held by the caller.
+ */
+#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
+static void d_lru_add(struct dentry *dentry)
+{
+ D_FLAG_VERIFY(dentry, 0);
+ dentry->d_flags |= DCACHE_LRU_LIST;
+ this_cpu_inc(nr_dentry_unused);
+ WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
+}
+
+static void d_lru_del(struct dentry *dentry)
+{
+ D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
+ dentry->d_flags &= ~DCACHE_LRU_LIST;
+ this_cpu_dec(nr_dentry_unused);
+ WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
+}
+
+static void d_shrink_del(struct dentry *dentry)
+{
+ D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
+ list_del_init(&dentry->d_lru);
+ dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
+ this_cpu_dec(nr_dentry_unused);
+}
+
+static void d_shrink_add(struct dentry *dentry, struct list_head *list)
+{
+ D_FLAG_VERIFY(dentry, 0);
+ list_add(&dentry->d_lru, list);
+ dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
+ this_cpu_inc(nr_dentry_unused);
+}
+
+/*
+ * These can only be called under the global LRU lock, ie during the
+ * callback for freeing the LRU list. "isolate" removes it from the
+ * LRU lists entirely, while shrink_move moves it to the indicated
+ * private list.
+ */
+static void d_lru_isolate(struct dentry *dentry)
+{
+ D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
+ dentry->d_flags &= ~DCACHE_LRU_LIST;
+ this_cpu_dec(nr_dentry_unused);
+ list_del_init(&dentry->d_lru);
+}
+
+static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list)
+{
+ D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
+ dentry->d_flags |= DCACHE_SHRINK_LIST;
+ list_move_tail(&dentry->d_lru, list);
+}
+
+/*
* dentry_lru_(add|del)_list) must be called with d_lock held.
*/
static void dentry_lru_add(struct dentry *dentry)
{
- if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) {
- if (list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
- this_cpu_inc(nr_dentry_unused);
- dentry->d_flags |= DCACHE_LRU_LIST;
- }
+ if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
+ d_lru_add(dentry);
}
/*
@@ -377,15 +442,11 @@ static void dentry_lru_add(struct dentry *dentry)
*/
static void dentry_lru_del(struct dentry *dentry)
{
- if (dentry->d_flags & DCACHE_SHRINK_LIST) {
- list_del_init(&dentry->d_lru);
- dentry->d_flags &= ~DCACHE_SHRINK_LIST;
- return;
+ if (dentry->d_flags & DCACHE_LRU_LIST) {
+ if (dentry->d_flags & DCACHE_SHRINK_LIST)
+ return d_shrink_del(dentry);
+ d_lru_del(dentry);
}
-
- if (list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru))
- this_cpu_dec(nr_dentry_unused);
- dentry->d_flags &= ~DCACHE_LRU_LIST;
}
/**
@@ -837,6 +898,13 @@ static void shrink_dentry_list(struct list_head *list)
dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
if (&dentry->d_lru == list)
break; /* empty */
+
+
+ /*
+ * Get the dentry lock, and re-verify that the dentry is
+ * this on the shrinking list. If it is, we know that
+ * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set.
+ */
spin_lock(&dentry->d_lock);
if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
spin_unlock(&dentry->d_lock);
@@ -848,8 +916,7 @@ static void shrink_dentry_list(struct list_head *list)
* to the LRU here, so we can simply remove it from the list
* here regardless of whether it is referenced or not.
*/
- list_del_init(&dentry->d_lru);
- dentry->d_flags &= ~DCACHE_SHRINK_LIST;
+ d_shrink_del(dentry);
/*
* We found an inuse dentry which was not removed from
@@ -861,12 +928,20 @@ static void shrink_dentry_list(struct list_head *list)
}
rcu_read_unlock();
+ /*
+ * If 'try_to_prune()' returns a dentry, it will
+ * be the same one we passed in, and d_lock will
+ * have been held the whole time, so it will not
+ * have been added to any other lists. We failed
+ * to get the inode lock.
+ *
+ * We just add it back to the shrink list.
+ */
dentry = try_prune_one_dentry(dentry);
rcu_read_lock();
if (dentry) {
- dentry->d_flags |= DCACHE_SHRINK_LIST;
- list_add(&dentry->d_lru, list);
+ d_shrink_add(dentry, list);
spin_unlock(&dentry->d_lock);
}
}
@@ -894,7 +969,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
* another pass through the LRU.
*/
if (dentry->d_lockref.count) {
- list_del_init(&dentry->d_lru);
+ d_lru_isolate(dentry);
spin_unlock(&dentry->d_lock);
return LRU_REMOVED;
}
@@ -925,9 +1000,7 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
return LRU_ROTATE;
}
- dentry->d_flags |= DCACHE_SHRINK_LIST;
- list_move_tail(&dentry->d_lru, freeable);
- this_cpu_dec(nr_dentry_unused);
+ d_lru_shrink_move(dentry, freeable);
spin_unlock(&dentry->d_lock);
return LRU_REMOVED;
@@ -972,9 +1045,7 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
if (!spin_trylock(&dentry->d_lock))
return LRU_SKIP;
- dentry->d_flags |= DCACHE_SHRINK_LIST;
- list_move_tail(&dentry->d_lru, freeable);
- this_cpu_dec(nr_dentry_unused);
+ d_lru_shrink_move(dentry, freeable);
spin_unlock(&dentry->d_lock);
return LRU_REMOVED;
@@ -1362,9 +1433,13 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
if (dentry->d_lockref.count) {
dentry_lru_del(dentry);
} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
- dentry_lru_del(dentry);
- list_add_tail(&dentry->d_lru, &data->dispose);
- dentry->d_flags |= DCACHE_SHRINK_LIST;
+ /*
+ * We can't use d_lru_shrink_move() because we
+ * need to get the global LRU lock and do the
+ * RLU accounting.
+ */
+ d_lru_del(dentry);
+ d_shrink_add(dentry, &data->dispose);
data->found++;
ret = D_WALK_NORETRY;
}
next prev parent reply other threads:[~2013-09-13 20:25 UTC|newest]
Thread overview: 106+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-10 4:38 linux-next: manual merge of the akpm tree with Linus' tree Stephen Rothwell
2013-09-10 22:27 ` Andrew Morton
2013-09-10 22:29 ` Al Viro
2013-09-10 22:35 ` Andrew Morton
2013-09-10 22:36 ` Al Viro
2013-09-10 22:39 ` Al Viro
2013-09-10 22:41 ` Andrew Morton
2013-09-10 22:48 ` Al Viro
2013-09-10 22:59 ` Al Viro
2013-09-10 23:13 ` Andrew Morton
2013-09-10 23:55 ` Al Viro
2013-09-11 4:30 ` Stephen Rothwell
2013-09-10 23:37 ` Linus Torvalds
2013-09-10 23:53 ` Al Viro
2013-09-11 0:01 ` Linus Torvalds
2013-09-11 0:39 ` Dave Chinner
2013-09-13 0:56 ` Linus Torvalds
2013-09-13 1:12 ` Linus Torvalds
2013-09-13 1:35 ` Al Viro
2013-09-13 19:12 ` Linus Torvalds
2013-09-13 19:28 ` Linus Torvalds
2013-09-13 19:54 ` Linus Torvalds
2013-09-13 20:00 ` Al Viro
2013-09-13 20:18 ` Al Viro
2013-09-13 20:23 ` Al Viro
2013-09-13 20:25 ` Linus Torvalds [this message]
2013-09-13 20:31 ` Linus Torvalds
2013-09-13 20:31 ` Al Viro
2013-09-13 20:34 ` Linus Torvalds
2013-09-10 22:35 ` Linus Torvalds
2013-09-10 22:44 ` Andrew Morton
2013-09-11 0:30 ` Stephen Rothwell
2013-09-11 0:41 ` Linus Torvalds
-- strict thread matches above, loose matches on Subject: below --
2019-12-02 2:17 Stephen Rothwell
2019-12-02 2:08 Stephen Rothwell
2018-10-15 7:22 Stephen Rothwell
2018-10-15 7:04 Stephen Rothwell
2018-03-26 8:56 Stephen Rothwell
2017-01-09 2:51 Stephen Rothwell
2016-12-12 5:49 Stephen Rothwell
2016-01-19 2:42 Stephen Rothwell
2016-01-19 2:36 Stephen Rothwell
2015-07-27 5:26 Stephen Rothwell
2013-09-10 4:21 Stephen Rothwell
2013-09-10 4:09 Stephen Rothwell
2013-09-10 4:12 ` Stephen Rothwell
2013-09-09 5:38 Stephen Rothwell
2013-09-09 5:22 Stephen Rothwell
2013-09-09 5:16 Stephen Rothwell
2013-08-30 8:44 Stephen Rothwell
2013-05-27 6:20 Stephen Rothwell
2013-05-20 4:04 Stephen Rothwell
2013-05-20 12:19 ` Chris Mason
2013-05-13 5:20 Eric Paris
2013-05-13 2:07 Stephen Rothwell
2013-05-13 2:11 ` Eric Paris
2013-05-13 4:16 ` Stephen Rothwell
2013-05-13 4:49 ` Kees Cook
2013-05-13 5:14 ` Eric Paris
2013-05-02 6:01 Stephen Rothwell
2013-04-29 8:38 Stephen Rothwell
2013-04-19 7:40 Stephen Rothwell
2013-04-16 7:25 Stephen Rothwell
2013-04-03 6:10 Stephen Rothwell
2013-03-25 4:22 Stephen Rothwell
2013-03-04 2:21 Stephen Rothwell
2013-01-04 3:27 Stephen Rothwell
2012-12-11 5:25 Stephen Rothwell
2012-12-11 5:22 Stephen Rothwell
2012-12-11 7:58 ` Glauber Costa
2012-12-07 6:39 Stephen Rothwell
2012-11-30 6:24 Stephen Rothwell
2012-11-26 12:52 Stephen Rothwell
2012-11-26 12:48 Stephen Rothwell
2012-11-26 13:25 ` Xiaotian Feng
2012-11-26 12:34 Stephen Rothwell
2012-10-15 2:07 Stephen Rothwell
2012-10-15 22:14 ` Catalin Marinas
2012-10-01 14:15 Stephen Rothwell
2012-08-22 5:59 Stephen Rothwell
2012-08-22 8:58 ` Mel Gorman
2012-07-27 3:57 Stephen Rothwell
2012-07-02 6:39 Stephen Rothwell
2012-06-04 4:58 Stephen Rothwell
2012-05-31 4:24 Stephen Rothwell
2012-05-31 4:13 Stephen Rothwell
2012-05-31 7:25 ` Johannes Weiner
2012-05-31 8:24 ` Stephen Rothwell
2012-05-31 8:27 ` Stephen Rothwell
2012-05-21 8:23 Stephen Rothwell
2012-05-21 8:13 Stephen Rothwell
2012-05-21 8:16 ` Cyrill Gorcunov
2012-03-08 6:53 Stephen Rothwell
2012-03-08 7:32 ` Andrew Morton
2012-03-08 7:41 ` Stephen Rothwell
2012-03-08 7:50 ` Andrew Morton
2012-03-08 7:50 ` Xiao Guangrong
2012-03-08 9:59 ` Xiao Guangrong
2012-03-08 21:24 ` Andrew Morton
2012-03-08 23:42 ` Linus Torvalds
2011-12-28 7:54 Stephen Rothwell
2011-11-08 3:24 Stephen Rothwell
2011-11-01 8:16 Stephen Rothwell
2011-11-01 10:47 ` Tao Ma
2011-09-16 6:09 Stephen Rothwell
2011-08-15 4:52 Stephen Rothwell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CA+55aFxizbVmQE2Y94BRj3kDFdf6hzNHuy-O0qBFw3jnT_mMLw@mail.gmail.com \
--to=torvalds@linux-foundation.org \
--cc=akpm@linux-foundation.org \
--cc=dchinner@redhat.com \
--cc=glommer@openvz.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-next@vger.kernel.org \
--cc=sfr@canb.auug.org.au \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).