From: Kirill Korotaev <dev@sw.ru>
To: Neil Brown <neilb@suse.de>
Cc: Kirill Korotaev <dev@openvz.org>,
linux-kernel@vger.kernel.org, Andrew Morton <akpm@osdl.org>,
Olaf Hering <olh@suse.de>, Jan Blunck <jblunck@suse.de>,
Al Viro <viro@ftp.linux.org.uk>
Subject: Re: [PATCH] Busy inodes after unmount, be more verbose in generic_shutdown_super
Date: Thu, 09 Mar 2006 15:03:41 +0300 [thread overview]
Message-ID: <4410199D.1050505@sw.ru> (raw)
In-Reply-To: <17422.5456.875119.579068@cse.unsw.edu.au>
[-- Attachment #1: Type: text/plain, Size: 969 bytes --]
>>>>In general your patch is still does what mine do, so I will be happy if
>>>>any of this is commited mainstream. In future, please, keep the
>>>>reference to original authors, this will also make sure that I'm on CC
>>>>if something goes wrong.
>>>
>>>
>>>Sorry: which 'original author' did I miss ?
>>
>>I mean, it is better to mention original author
>>(http://marc.theaimsgroup.com/?l=linux-kernel&m=114123870406751&w=2) in
>>patch description, as it makes sure that he will be on CC if this patch
>>will be discussed later again. My patch fixing this race was in -mm tree
>>for half a year already.
>
>
> Which patch is that? The race still seems to be present in -mm.
I attached you my latest patch for 2.6.16-rc5
in -mm tree these patch was until the discussion arose again recently:
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.14/2.6.14-mm2/broken-out/fix-of-dcache-race-leading-to-busy-inodes-on-umount.patch
Thanks,
Kirill
[-- Attachment #2: diff-ms-dcache-race-20060303 --]
[-- Type: text/plain, Size: 10441 bytes --]
--- linux-2.6.15.orig/fs/dcache.c 2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/fs/dcache.c 2006-03-03 16:22:38.862706448 +0300
@@ -114,6 +114,75 @@ static inline void dentry_iput(struct de
}
}
+struct dcache_shrinker {
+ struct list_head list;
+ struct dentry *dentry;
+};
+
+DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq);
+
+/* called under dcache_lock */
+static void dcache_shrinker_add(struct dcache_shrinker *ds,
+ struct dentry *parent, struct dentry *dentry)
+{
+ struct super_block *sb;
+
+ sb = parent->d_sb;
+ ds->dentry = parent;
+ list_add(&ds->list, &sb->s_dshrinkers);
+}
+
+/* called under dcache_lock */
+static void dcache_shrinker_del(struct dcache_shrinker *ds)
+{
+ if (ds == NULL || list_empty(&ds->list))
+ return;
+
+ list_del_init(&ds->list);
+ wake_up_all(&dcache_shrinker_wq);
+}
+
+/* called under dcache_lock, drops inside */
+static void dcache_shrinker_wait(struct super_block *sb)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&dcache_shrinker_wq, &wq);
+ spin_unlock(&dcache_lock);
+
+ schedule();
+ remove_wait_queue(&dcache_shrinker_wq, &wq);
+ __set_current_state(TASK_RUNNING);
+}
+
+void dcache_shrinker_wait_sb(struct super_block *sb)
+{
+ /* the root dentry can be held in dput_recursive */
+ spin_lock(&dcache_lock);
+ while (!list_empty(&sb->s_dshrinkers)) {
+ dcache_shrinker_wait(sb);
+ spin_lock(&dcache_lock);
+ }
+ spin_unlock(&dcache_lock);
+}
+
+/* dcache_lock protects shrinker's list */
+static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
+{
+ struct super_block *sb;
+ struct dcache_shrinker *ds;
+
+ sb = parent->d_sb;
+ list_for_each_entry(ds, &sb->s_dshrinkers, list) {
+ /* is one of dcache shrinkers working on the dentry? */
+ if (ds->dentry == parent) {
+ *racecheck = 1;
+ break;
+ }
+ }
+}
+
/*
* This is dput
*
@@ -132,8 +201,9 @@ static inline void dentry_iput(struct de
*/
/*
- * dput - release a dentry
- * @dentry: dentry to release
+ * dput_recursive - go upward through the dentry tree and release dentries
+ * @dentry: starting dentry
+ * @ds: shrinker to be added to active list (see shrink_dcache_parent)
*
* Release a dentry. This will drop the usage count and if appropriate
* call the dentry unlink method as well as removing it from the queues and
@@ -142,18 +212,15 @@ static inline void dentry_iput(struct de
*
* no dcache lock, please.
*/
-
-void dput(struct dentry *dentry)
+static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
{
- if (!dentry)
- return;
-
-repeat:
if (atomic_read(&dentry->d_count) == 1)
might_sleep();
if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;
+ dcache_shrinker_del(ds);
+repeat:
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count)) {
spin_unlock(&dentry->d_lock);
@@ -185,6 +252,7 @@ unhash_it:
kill_it: {
struct dentry *parent;
+ struct dcache_shrinker lds;
/* If dentry was on d_lru list
* delete it from there
@@ -194,18 +262,47 @@ kill_it: {
dentry_stat.nr_unused--;
}
list_del(&dentry->d_u.d_child);
+ parent = dentry->d_parent;
+ dcache_shrinker_add(&lds, parent, dentry);
dentry_stat.nr_dentry--; /* For d_free, below */
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
- parent = dentry->d_parent;
d_free(dentry);
- if (dentry == parent)
+ if (unlikely(dentry == parent)) {
+ spin_lock(&dcache_lock);
+ dcache_shrinker_del(&lds);
+ spin_unlock(&dcache_lock);
return;
+ }
dentry = parent;
- goto repeat;
+ spin_lock(&dcache_lock);
+ dcache_shrinker_del(&lds);
+ if (atomic_dec_and_test(&dentry->d_count))
+ goto repeat;
+ spin_unlock(&dcache_lock);
}
}
+/*
+ * dput - release a dentry
+ * @dentry: dentry to release
+ *
+ * Release a dentry. This will drop the usage count and if appropriate
+ * call the dentry unlink method as well as removing it from the queues and
+ * releasing its resources. If the parent dentries were scheduled for release
+ * they too may now get deleted.
+ *
+ * no dcache lock, please.
+ */
+
+void dput(struct dentry *dentry)
+{
+ if (!dentry)
+ return;
+
+ dput_recursive(dentry, NULL);
+}
+
/**
* d_invalidate - invalidate a dentry
* @dentry: dentry to invalidate
@@ -362,19 +459,23 @@ restart:
* removed.
* Called with dcache_lock, drops it and then regains.
*/
-static inline void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry * dentry)
{
struct dentry * parent;
+ struct dcache_shrinker ds;
__d_drop(dentry);
list_del(&dentry->d_u.d_child);
+ parent = dentry->d_parent;
+ dcache_shrinker_add(&ds, parent, dentry);
dentry_stat.nr_dentry--; /* For d_free, below */
dentry_iput(dentry);
parent = dentry->d_parent;
d_free(dentry);
if (parent != dentry)
- dput(parent);
+ dput_recursive(parent, &ds);
spin_lock(&dcache_lock);
+ dcache_shrinker_del(&ds);
}
/**
@@ -557,13 +658,12 @@ positive:
* drop the lock and return early due to latency
* constraints.
*/
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry * parent, int * racecheck)
{
struct dentry *this_parent = parent;
struct list_head *next;
int found = 0;
- spin_lock(&dcache_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -605,6 +705,9 @@ dentry->d_parent->d_name.name, dentry->d
#endif
goto repeat;
}
+
+ if (!found && racecheck != NULL)
+ shrink_dcache_racecheck(dentry, racecheck);
}
/*
* All done at this level ... ascend and resume the search.
@@ -619,7 +722,6 @@ this_parent->d_parent->d_name.name, this
goto resume;
}
out:
- spin_unlock(&dcache_lock);
return found;
}
@@ -632,10 +734,66 @@ out:
void shrink_dcache_parent(struct dentry * parent)
{
- int found;
+ int found, r;
- while ((found = select_parent(parent)) != 0)
+ while (1) {
+ spin_lock(&dcache_lock);
+ found = select_parent(parent, NULL);
+ if (found)
+ goto found;
+
+ /*
+ * try again with a dput_recursive() race check.
+ * it returns quickly if everything was really shrinked
+ */
+ r = 0;
+ found = select_parent(parent, &r);
+ if (found)
+ goto found;
+ if (!r)
+ break;
+
+ /* drops the lock inside */
+ dcache_shrinker_wait(parent->d_sb);
+ continue;
+
+found:
+ spin_unlock(&dcache_lock);
prune_dcache(found);
+ }
+ spin_unlock(&dcache_lock);
+}
+
+/*
+ * Move any unused anon dentries to the end of the unused list.
+ * called under dcache_lock
+ */
+static int select_anon(struct hlist_head *head, int *racecheck)
+{
+ struct hlist_node *lp;
+ int found = 0;
+
+ hlist_for_each(lp, head) {
+ struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
+ if (!list_empty(&this->d_lru)) {
+ dentry_stat.nr_unused--;
+ list_del_init(&this->d_lru);
+ }
+
+ /*
+ * move only zero ref count dentries to the end
+ * of the unused list for prune_dcache
+ */
+ if (!atomic_read(&this->d_count)) {
+ list_add_tail(&this->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ found++;
+ }
+
+ if (!found && racecheck != NULL)
+ shrink_dcache_racecheck(this, racecheck);
+ }
+ return found;
}
/**
@@ -648,33 +806,36 @@ void shrink_dcache_parent(struct dentry
* done under dcache_lock.
*
*/
-void shrink_dcache_anon(struct hlist_head *head)
+void shrink_dcache_anon(struct super_block *sb)
{
- struct hlist_node *lp;
- int found;
- do {
- found = 0;
+ int found, r;
+
+ while (1) {
spin_lock(&dcache_lock);
- hlist_for_each(lp, head) {
- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }
+ found = select_anon(&sb->s_anon, NULL);
+ if (found)
+ goto found;
- /*
- * move only zero ref count dentries to the end
- * of the unused list for prune_dcache
- */
- if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
- }
+ /*
+ * try again with a dput_recursive() race check.
+ * it returns quickly if everything was really shrinked
+ */
+ r = 0;
+ found = select_anon(&sb->s_anon, &r);
+ if (found)
+ goto found;
+ if (!r)
+ break;
+
+ /* drops the lock inside */
+ dcache_shrinker_wait(sb);
+ continue;
+
+found:
spin_unlock(&dcache_lock);
prune_dcache(found);
- } while(found);
+ }
+ spin_unlock(&dcache_lock);
}
/*
--- linux-2.6.15.orig/fs/super.c 2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/fs/super.c 2006-03-03 16:22:38.841709640 +0300
@@ -69,6 +69,7 @@ static struct super_block *alloc_super(v
INIT_LIST_HEAD(&s->s_io);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_instances);
+ INIT_LIST_HEAD(&s->s_dshrinkers);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
init_rwsem(&s->s_umount);
@@ -231,8 +232,9 @@ void generic_shutdown_super(struct super
if (root) {
sb->s_root = NULL;
shrink_dcache_parent(root);
- shrink_dcache_anon(&sb->s_anon);
+ shrink_dcache_anon(sb);
dput(root);
+ dcache_shrinker_wait_sb(sb);
fsync_super(sb);
lock_super(sb);
sb->s_flags &= ~MS_ACTIVE;
--- linux-2.6.15.orig/include/linux/dcache.h 2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/include/linux/dcache.h 2006-03-03 16:22:38.843709336 +0300
@@ -209,7 +209,8 @@ extern struct dentry * d_alloc_anon(stru
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern void shrink_dcache_sb(struct super_block *);
extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct hlist_head *);
+extern void shrink_dcache_anon(struct super_block *);
+extern void dcache_shrinker_wait_sb(struct super_block *sb);
extern int d_invalidate(struct dentry *);
/* only used at mount-time */
--- linux-2.6.15.orig/include/linux/fs.h 2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/include/linux/fs.h 2006-03-03 16:22:38.821712680 +0300
@@ -803,6 +803,7 @@ struct super_block {
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_files;
+ struct list_head s_dshrinkers; /* active dcache shrinkers */
struct block_device *s_bdev;
struct list_head s_instances;
next prev parent reply other threads:[~2006-03-09 12:00 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-03-02 6:57 [PATCH] Busy inodes after unmount, be more verbose in generic_shutdown_super Neil Brown
2006-03-02 10:48 ` Jan Blunck
2006-03-03 11:42 ` Jan Blunck
2006-03-06 6:09 ` Neil Brown
2006-03-06 7:32 ` Balbir Singh
2006-03-07 1:58 ` Neil Brown
2006-03-07 2:49 ` Balbir Singh
2006-03-07 6:22 ` Kirill Korotaev
2006-03-07 6:16 ` Kirill Korotaev
2006-03-07 7:03 ` Balbir Singh
2006-03-07 7:21 ` Kirill Korotaev
2006-03-07 11:05 ` Balbir Singh
2006-03-08 0:29 ` Neil Brown
2006-03-08 2:17 ` Balbir Singh
2006-03-08 2:39 ` Neil Brown
2006-03-08 3:05 ` Balbir Singh
2006-03-08 11:01 ` Jan Blunck
2006-03-06 11:56 ` Jan Blunck
2006-03-07 2:15 ` Neil Brown
2006-03-06 11:56 ` Kirill Korotaev
2006-03-07 2:01 ` Neil Brown
2006-03-07 6:20 ` Kirill Korotaev
2006-03-07 23:20 ` Neil Brown
2006-03-09 12:03 ` Kirill Korotaev [this message]
-- strict thread matches above, loose matches on Subject: below --
2006-01-16 22:34 Olaf Hering
2006-01-16 23:23 ` Kirill Korotaev
2006-01-16 23:29 ` Olaf Hering
2006-01-17 2:05 ` Andrew Morton
2006-01-17 7:03 ` Kirill Korotaev
2006-01-18 22:49 ` Jan Blunck
2006-01-18 23:10 ` Andrew Morton
2006-01-19 10:08 ` Kirill Korotaev
2006-01-19 9:52 ` Kirill Korotaev
2006-01-19 10:04 ` Jan Blunck
2006-01-19 10:26 ` Kirill Korotaev
2006-01-20 19:06 ` Jan Blunck
2006-01-23 8:14 ` Kirill Korotaev
2006-01-30 11:54 ` Jan Blunck
2006-01-30 14:05 ` Kirill Korotaev
2006-01-30 14:21 ` Jan Blunck
2006-01-30 14:34 ` Kirill Korotaev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4410199D.1050505@sw.ru \
--to=dev@sw.ru \
--cc=akpm@osdl.org \
--cc=dev@openvz.org \
--cc=jblunck@suse.de \
--cc=linux-kernel@vger.kernel.org \
--cc=neilb@suse.de \
--cc=olh@suse.de \
--cc=viro@ftp.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).