linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kirill Korotaev <dev@sw.ru>
To: Neil Brown <neilb@suse.de>
Cc: Kirill Korotaev <dev@openvz.org>,
	linux-kernel@vger.kernel.org, Andrew Morton <akpm@osdl.org>,
	Olaf Hering <olh@suse.de>, Jan Blunck <jblunck@suse.de>,
	Al Viro <viro@ftp.linux.org.uk>
Subject: Re: [PATCH] Busy inodes after unmount, be more verbose in generic_shutdown_super
Date: Thu, 09 Mar 2006 15:03:41 +0300	[thread overview]
Message-ID: <4410199D.1050505@sw.ru> (raw)
In-Reply-To: <17422.5456.875119.579068@cse.unsw.edu.au>

[-- Attachment #1: Type: text/plain, Size: 969 bytes --]

>>>>In general your patch is still does what mine do, so I will be happy if 
>>>>any of this is commited mainstream. In future, please, keep the 
>>>>reference to original authors, this will also make sure that I'm on CC 
>>>>if something goes wrong.
>>>
>>>
>>>Sorry: which 'original author' did I miss ?
>>
>>I mean, it is better to mention original author 
>>(http://marc.theaimsgroup.com/?l=linux-kernel&m=114123870406751&w=2) in 
>>patch description, as it makes sure that he will be on CC if this patch 
>>will be discussed later again. My patch fixing this race was in -mm tree 
>>for half a year already.
> 
> 
> Which patch is that?  The race still seems to be present in -mm.
I attached you my latest patch for 2.6.16-rc5

in -mm tree these patch was until the discussion arose again recently:
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.14/2.6.14-mm2/broken-out/fix-of-dcache-race-leading-to-busy-inodes-on-umount.patch

Thanks,
Kirill

[-- Attachment #2: diff-ms-dcache-race-20060303 --]
[-- Type: text/plain, Size: 10441 bytes --]

--- linux-2.6.15.orig/fs/dcache.c	2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/fs/dcache.c	2006-03-03 16:22:38.862706448 +0300
@@ -114,6 +114,75 @@ static inline void dentry_iput(struct de
 	}
 }
 
+struct dcache_shrinker {
+	struct list_head list;
+	struct dentry *dentry;
+};
+
+DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq);
+
+/* called under dcache_lock */
+static void dcache_shrinker_add(struct dcache_shrinker *ds,
+		struct dentry *parent, struct dentry *dentry)
+{
+	struct super_block *sb;
+
+	sb = parent->d_sb;
+	ds->dentry = parent;
+	list_add(&ds->list, &sb->s_dshrinkers);
+}
+
+/* called under dcache_lock */
+static void dcache_shrinker_del(struct dcache_shrinker *ds)
+{
+	if (ds == NULL || list_empty(&ds->list))
+		return;
+
+	list_del_init(&ds->list);
+	wake_up_all(&dcache_shrinker_wq);
+}
+
+/* called under dcache_lock, drops inside */
+static void dcache_shrinker_wait(struct super_block *sb)
+{
+	DECLARE_WAITQUEUE(wq, current);
+
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	add_wait_queue(&dcache_shrinker_wq, &wq);
+	spin_unlock(&dcache_lock);
+
+	schedule();
+	remove_wait_queue(&dcache_shrinker_wq, &wq);
+	__set_current_state(TASK_RUNNING);
+}
+
+void dcache_shrinker_wait_sb(struct super_block *sb)
+{
+	/* the root dentry can be held in dput_recursive */
+	spin_lock(&dcache_lock);
+	while (!list_empty(&sb->s_dshrinkers)) {
+		dcache_shrinker_wait(sb);
+		spin_lock(&dcache_lock);
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/* dcache_lock protects shrinker's list */
+static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
+{
+	struct super_block *sb;
+	struct dcache_shrinker *ds;
+
+	sb = parent->d_sb;
+	list_for_each_entry(ds, &sb->s_dshrinkers, list) {
+		/* is one of dcache shrinkers working on the dentry? */
+		if (ds->dentry == parent) {
+			*racecheck = 1;
+			break;
+		}
+	}
+}
+
 /* 
  * This is dput
  *
@@ -132,8 +201,9 @@ static inline void dentry_iput(struct de
  */
 
 /*
- * dput - release a dentry
- * @dentry: dentry to release 
+ * dput_recursive - go upward through the dentry tree and release dentries
+ * @dentry: starting dentry
+ * @ds: shrinker to be added to active list (see shrink_dcache_parent)
  *
  * Release a dentry. This will drop the usage count and if appropriate
  * call the dentry unlink method as well as removing it from the queues and
@@ -142,18 +212,15 @@ static inline void dentry_iput(struct de
  *
  * no dcache lock, please.
  */
-
-void dput(struct dentry *dentry)
+static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
 {
-	if (!dentry)
-		return;
-
-repeat:
 	if (atomic_read(&dentry->d_count) == 1)
 		might_sleep();
 	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
 		return;
+	dcache_shrinker_del(ds);
 
+repeat:
 	spin_lock(&dentry->d_lock);
 	if (atomic_read(&dentry->d_count)) {
 		spin_unlock(&dentry->d_lock);
@@ -185,6 +252,7 @@ unhash_it:
 
 kill_it: {
 		struct dentry *parent;
+		struct dcache_shrinker lds;
 
 		/* If dentry was on d_lru list
 		 * delete it from there
@@ -194,18 +262,47 @@ kill_it: {
   			dentry_stat.nr_unused--;
   		}
   		list_del(&dentry->d_u.d_child);
+		parent = dentry->d_parent;
+		dcache_shrinker_add(&lds, parent, dentry);
 		dentry_stat.nr_dentry--;	/* For d_free, below */
 		/*drops the locks, at that point nobody can reach this dentry */
 		dentry_iput(dentry);
-		parent = dentry->d_parent;
 		d_free(dentry);
-		if (dentry == parent)
+		if (unlikely(dentry == parent)) {
+			spin_lock(&dcache_lock);
+			dcache_shrinker_del(&lds);
+			spin_unlock(&dcache_lock);
 			return;
+		}
 		dentry = parent;
-		goto repeat;
+		spin_lock(&dcache_lock);
+		dcache_shrinker_del(&lds);
+		if (atomic_dec_and_test(&dentry->d_count))
+			goto repeat;
+		spin_unlock(&dcache_lock);
 	}
 }
 
+/*
+ * dput - release a dentry
+ * @dentry: dentry to release 
+ *
+ * Release a dentry. This will drop the usage count and if appropriate
+ * call the dentry unlink method as well as removing it from the queues and
+ * releasing its resources. If the parent dentries were scheduled for release
+ * they too may now get deleted.
+ *
+ * no dcache lock, please.
+ */
+
+void dput(struct dentry *dentry)
+{
+	if (!dentry)
+		return;
+
+	dput_recursive(dentry, NULL);
+}
+
 /**
  * d_invalidate - invalidate a dentry
  * @dentry: dentry to invalidate
@@ -362,19 +459,23 @@ restart:
  * removed.
  * Called with dcache_lock, drops it and then regains.
  */
-static inline void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry * dentry)
 {
 	struct dentry * parent;
+	struct dcache_shrinker ds;
 
 	__d_drop(dentry);
 	list_del(&dentry->d_u.d_child);
+	parent = dentry->d_parent;
+	dcache_shrinker_add(&ds, parent, dentry);
 	dentry_stat.nr_dentry--;	/* For d_free, below */
 	dentry_iput(dentry);
 	parent = dentry->d_parent;
 	d_free(dentry);
 	if (parent != dentry)
-		dput(parent);
+		dput_recursive(parent, &ds);
 	spin_lock(&dcache_lock);
+	dcache_shrinker_del(&ds);
 }
 
 /**
@@ -557,13 +658,12 @@ positive:
  * drop the lock and return early due to latency
  * constraints.
  */
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry * parent, int * racecheck)
 {
 	struct dentry *this_parent = parent;
 	struct list_head *next;
 	int found = 0;
 
-	spin_lock(&dcache_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -605,6 +705,9 @@ dentry->d_parent->d_name.name, dentry->d
 #endif
 			goto repeat;
 		}
+
+		if (!found && racecheck != NULL)
+			shrink_dcache_racecheck(dentry, racecheck);
 	}
 	/*
 	 * All done at this level ... ascend and resume the search.
@@ -619,7 +722,6 @@ this_parent->d_parent->d_name.name, this
 		goto resume;
 	}
 out:
-	spin_unlock(&dcache_lock);
 	return found;
 }
 
@@ -632,10 +734,66 @@ out:
  
 void shrink_dcache_parent(struct dentry * parent)
 {
-	int found;
+	int found, r;
 
-	while ((found = select_parent(parent)) != 0)
+	while (1) {
+		spin_lock(&dcache_lock);
+		found = select_parent(parent, NULL);
+		if (found)
+			goto found;
+
+		/*
+		 * try again with a dput_recursive() race check.
+		 * it returns quickly if everything was really shrinked
+		 */
+		r = 0;
+		found = select_parent(parent, &r);
+		if (found)
+			goto found;
+		if (!r)
+			break;
+
+		/* drops the lock inside */
+		dcache_shrinker_wait(parent->d_sb);
+		continue;
+
+found:
+		spin_unlock(&dcache_lock);
 		prune_dcache(found);
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/*
+ * Move any unused anon dentries to the end of the unused list.
+ * called under dcache_lock
+ */
+static int select_anon(struct hlist_head *head, int *racecheck)
+{
+	struct hlist_node *lp;
+	int found = 0;
+
+	hlist_for_each(lp, head) {
+		struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
+		if (!list_empty(&this->d_lru)) {
+			dentry_stat.nr_unused--;
+			list_del_init(&this->d_lru);
+		}
+
+		/* 
+		 * move only zero ref count dentries to the end 
+		 * of the unused list for prune_dcache
+		 */
+		if (!atomic_read(&this->d_count)) {
+			list_add_tail(&this->d_lru, &dentry_unused);
+			dentry_stat.nr_unused++;
+			found++;
+		}
+
+		if (!found && racecheck != NULL)
+			shrink_dcache_racecheck(this, racecheck);
+	}
+	return found;
 }
 
 /**
@@ -648,33 +806,36 @@ void shrink_dcache_parent(struct dentry 
  * done under dcache_lock.
  *
  */
-void shrink_dcache_anon(struct hlist_head *head)
+void shrink_dcache_anon(struct super_block *sb)
 {
-	struct hlist_node *lp;
-	int found;
-	do {
-		found = 0;
+	int found, r;
+
+	while (1) {
 		spin_lock(&dcache_lock);
-		hlist_for_each(lp, head) {
-			struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
-			if (!list_empty(&this->d_lru)) {
-				dentry_stat.nr_unused--;
-				list_del_init(&this->d_lru);
-			}
+		found = select_anon(&sb->s_anon, NULL);
+		if (found)
+			goto found;
 
-			/* 
-			 * move only zero ref count dentries to the end 
-			 * of the unused list for prune_dcache
-			 */
-			if (!atomic_read(&this->d_count)) {
-				list_add_tail(&this->d_lru, &dentry_unused);
-				dentry_stat.nr_unused++;
-				found++;
-			}
-		}
+		/*
+		 * try again with a dput_recursive() race check.
+		 * it returns quickly if everything was really shrinked
+		 */
+		r = 0;
+		found = select_anon(&sb->s_anon, &r);
+		if (found)
+			goto found;
+		if (!r)
+			break;
+
+		/* drops the lock inside */
+		dcache_shrinker_wait(sb);
+		continue;
+
+found:
 		spin_unlock(&dcache_lock);
 		prune_dcache(found);
-	} while(found);
+	}
+	spin_unlock(&dcache_lock);
 }
 
 /*
--- linux-2.6.15.orig/fs/super.c	2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/fs/super.c	2006-03-03 16:22:38.841709640 +0300
@@ -69,6 +69,7 @@ static struct super_block *alloc_super(v
 		INIT_LIST_HEAD(&s->s_io);
 		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
+		INIT_LIST_HEAD(&s->s_dshrinkers);
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
@@ -231,8 +232,9 @@ void generic_shutdown_super(struct super
 	if (root) {
 		sb->s_root = NULL;
 		shrink_dcache_parent(root);
-		shrink_dcache_anon(&sb->s_anon);
+		shrink_dcache_anon(sb);
 		dput(root);
+		dcache_shrinker_wait_sb(sb);
 		fsync_super(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
--- linux-2.6.15.orig/include/linux/dcache.h	2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/include/linux/dcache.h	2006-03-03 16:22:38.843709336 +0300
@@ -209,7 +209,8 @@ extern struct dentry * d_alloc_anon(stru
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
-extern void shrink_dcache_anon(struct hlist_head *);
+extern void shrink_dcache_anon(struct super_block *);
+extern void dcache_shrinker_wait_sb(struct super_block *sb);
 extern int d_invalidate(struct dentry *);
 
 /* only used at mount-time */
--- linux-2.6.15.orig/include/linux/fs.h	2006-01-03 06:21:10.000000000 +0300
+++ linux-2.6.15-025stab015-dcache-race/include/linux/fs.h	2006-03-03 16:22:38.821712680 +0300
@@ -803,6 +803,7 @@ struct super_block {
 	struct list_head	s_io;		/* parked for writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
+	struct list_head	s_dshrinkers;	/* active dcache shrinkers */
 
 	struct block_device	*s_bdev;
 	struct list_head	s_instances;

  reply	other threads:[~2006-03-09 12:00 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-02  6:57 [PATCH] Busy inodes after unmount, be more verbose in generic_shutdown_super Neil Brown
2006-03-02 10:48 ` Jan Blunck
2006-03-03 11:42 ` Jan Blunck
2006-03-06  6:09 ` Neil Brown
2006-03-06  7:32   ` Balbir Singh
2006-03-07  1:58     ` Neil Brown
2006-03-07  2:49       ` Balbir Singh
2006-03-07  6:22         ` Kirill Korotaev
2006-03-07  6:16       ` Kirill Korotaev
2006-03-07  7:03         ` Balbir Singh
2006-03-07  7:21           ` Kirill Korotaev
2006-03-07 11:05             ` Balbir Singh
2006-03-08  0:29         ` Neil Brown
2006-03-08  2:17           ` Balbir Singh
2006-03-08  2:39             ` Neil Brown
2006-03-08  3:05               ` Balbir Singh
2006-03-08 11:01                 ` Jan Blunck
2006-03-06 11:56   ` Jan Blunck
2006-03-07  2:15     ` Neil Brown
2006-03-06 11:56   ` Kirill Korotaev
2006-03-07  2:01     ` Neil Brown
2006-03-07  6:20       ` Kirill Korotaev
2006-03-07 23:20         ` Neil Brown
2006-03-09 12:03           ` Kirill Korotaev [this message]
  -- strict thread matches above, loose matches on Subject: below --
2006-01-16 22:34 Olaf Hering
2006-01-16 23:23 ` Kirill Korotaev
2006-01-16 23:29   ` Olaf Hering
2006-01-17  2:05     ` Andrew Morton
2006-01-17  7:03       ` Kirill Korotaev
2006-01-18 22:49   ` Jan Blunck
2006-01-18 23:10     ` Andrew Morton
2006-01-19 10:08       ` Kirill Korotaev
2006-01-19  9:52     ` Kirill Korotaev
2006-01-19 10:04       ` Jan Blunck
2006-01-19 10:26         ` Kirill Korotaev
2006-01-20 19:06           ` Jan Blunck
2006-01-23  8:14             ` Kirill Korotaev
2006-01-30 11:54               ` Jan Blunck
2006-01-30 14:05                 ` Kirill Korotaev
2006-01-30 14:21                   ` Jan Blunck
2006-01-30 14:34                     ` Kirill Korotaev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4410199D.1050505@sw.ru \
    --to=dev@sw.ru \
    --cc=akpm@osdl.org \
    --cc=dev@openvz.org \
    --cc=jblunck@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=olh@suse.de \
    --cc=viro@ftp.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).