All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] stop taking the iolock in the reclaim path
@ 2009-08-25 18:41 Christoph Hellwig
  2009-08-25 18:41 ` [PATCH 1/3] xfs: do not take the iolock in xfs_inactive Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Christoph Hellwig @ 2009-08-25 18:41 UTC (permalink / raw)
  To: xfs

Currently we take the iolock in the reclaim path in various places.
Taking it in the inode reclaim path means however that we can't take
it while doing memory allocations that recurse back into the filesystem,
and recently lockdep has been enhanced to find these cases and noticed
quite a few of these in XFS.

We had similar issues with the ilock, but we could get away with just
stopping to do filesystem-recursing allocation under the ilock as there
were just a few.  The iolock is however taken over larger critical sections
protection actual I/O and it's almost impossible to switch all these to
NOFS allocations.  Based on what the iolock is used for we don't actually
need it in the reclaim path, though - at this point the inode is dead
and no one has any other reference to it.  See the listing in the
first patch for a more detailed list of our current iolock holders and
why they can't contend with the reclaim path.

I would greatly appreciate some indepth-review of this to make sure I
haven't missed a big loophole..

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] xfs: do not take the iolock in xfs_inactive
  2009-08-25 18:41 [PATCH 0/3] stop taking the iolock in the reclaim path Christoph Hellwig
@ 2009-08-25 18:41 ` Christoph Hellwig
  2009-08-25 18:41 ` [PATCH 2/3] xfs: do not take the iolock in xfs_ireclaim Christoph Hellwig
  2009-08-25 18:41 ` [PATCH 3/3] xfs: do not wait for the iolock in xfs_free_eofblocks Christoph Hellwig
  2 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2009-08-25 18:41 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-lockdep-inactive --]
[-- Type: text/plain, Size: 4927 bytes --]

When xfs_inactive is called we already have the inode torn down and no
one else can have a reference to it.  That means the iolock here is
superflous as all other users require a proper reference to it:

 - xfs_vm_vmap, xfs_vn_fallocate, xfs_read, xfs_write, xfs_splice_read,
   xfs_splice_write and xfs_setattr are all implementations of VFS methods
   that require such a life inode
 - xfs_getbmap and xfs_swap_extents are ioctl sub command for which the
   same is true
 - xfs_truncate_file is only called on quota inodes just return from
   xfs_iget
 - xfs_sync_inode_data does the lock just after an igrab()
 - xfs_filestream_associate and xfs_filestream_new_ag take the iolock
   on the parent inode of an inode which by VFS rules must be referenced

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: xfs/fs/xfs/xfs_vnodeops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_vnodeops.c	2009-08-20 13:51:15.699188904 -0300
+++ xfs/fs/xfs/xfs_vnodeops.c	2009-08-24 11:23:28.798259252 -0300
@@ -869,10 +869,10 @@ xfs_inactive_symlink_rmt(
 	 * the second transaction.  In the error paths we need it
 	 * held so the cancel won't rele it, see below.
 	 */
-	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	size = (int)ip->i_d.di_size;
 	ip->i_d.di_size = 0;
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	xfs_trans_ihold(tp, ip);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	/*
@@ -917,7 +917,7 @@ xfs_inactive_symlink_rmt(
 	 * Mark it dirty so it will be logged and moved forward in the log as
 	 * part of every commit.
 	 */
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	xfs_trans_ihold(tp, ip);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	/*
@@ -977,7 +977,7 @@ xfs_inactive_symlink_rmt(
 	 * joined to the transaction.
 	 */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	*tpp = NULL;
 	return error;
 
@@ -1006,7 +1006,7 @@ xfs_inactive_symlink_local(
 		*tpp = NULL;
 		return error;
 	}
-	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 
 	/*
 	 * Zero length symlinks _can_ exist.
@@ -1029,7 +1029,6 @@ xfs_inactive_attrs(
 	int		error;
 	xfs_mount_t	*mp;
 
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 	tp = *tpp;
 	mp = ip->i_mount;
 	ASSERT(ip->i_d.di_forkoff != 0);
@@ -1051,7 +1050,7 @@ xfs_inactive_attrs(
 		goto error_cancel;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	xfs_trans_ihold(tp, ip);
 	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
 
@@ -1065,7 +1064,6 @@ error_cancel:
 	xfs_trans_cancel(tp, 0);
 error_unlock:
 	*tpp = NULL;
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 	return error;
 }
 
@@ -1210,12 +1208,10 @@ xfs_inactive(
 		 * will call into the buffer cache and we can't
 		 * do that within a transaction.
 		 */
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
 
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
 		if (error) {
 			xfs_trans_cancel(tp, 0);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return VN_INACTIVE_CACHE;
 		}
 
@@ -1227,12 +1223,11 @@ xfs_inactive(
 			/* Don't call itruncate_cleanup */
 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
 			xfs_trans_cancel(tp, 0);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return VN_INACTIVE_CACHE;
 		}
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_trans_ihold(tp, ip);
 
 		/*
@@ -1248,7 +1243,7 @@ xfs_inactive(
 		if (error) {
 			xfs_trans_cancel(tp,
 				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 			return VN_INACTIVE_CACHE;
 		}
 	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
@@ -1266,7 +1261,7 @@ xfs_inactive(
 			return VN_INACTIVE_CACHE;
 		}
 
-		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_trans_ihold(tp, ip);
 	} else {
 		error = xfs_trans_reserve(tp, 0,
@@ -1279,8 +1274,8 @@ xfs_inactive(
 			return VN_INACTIVE_CACHE;
 		}
 
-		xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 		xfs_trans_ihold(tp, ip);
 	}
 
@@ -1346,7 +1341,7 @@ xfs_inactive(
 	 * Release the dquots held by inode, if any.
 	 */
 	xfs_qm_dqdetach(ip);
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
  out:
 	return VN_INACTIVE_CACHE;

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/3] xfs: do not take the iolock in xfs_ireclaim
  2009-08-25 18:41 [PATCH 0/3] stop taking the iolock in the reclaim path Christoph Hellwig
  2009-08-25 18:41 ` [PATCH 1/3] xfs: do not take the iolock in xfs_inactive Christoph Hellwig
@ 2009-08-25 18:41 ` Christoph Hellwig
  2009-08-25 18:41 ` [PATCH 3/3] xfs: do not wait for the iolock in xfs_free_eofblocks Christoph Hellwig
  2 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2009-08-25 18:41 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-lockdep-ireclaim --]
[-- Type: text/plain, Size: 1345 bytes --]

While the comment claims we need it to synchronize with inode cache lookups
that is not the case.  Today we make sure to always do an igrab in all
sync variants which do the proper checks for a deleted inode, and we also
have the proper checks in xfs_iget.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: xfs/fs/xfs/xfs_iget.c
===================================================================
--- xfs.orig/fs/xfs/xfs_iget.c	2009-08-20 13:51:15.695189094 -0300
+++ xfs/fs/xfs/xfs_iget.c	2009-08-24 11:23:32.793789269 -0300
@@ -500,13 +500,11 @@ xfs_ireclaim(
 	 * can reference the inodes in the cache without taking references.
 	 *
 	 * We make that OK here by ensuring that we wait until the inode is
-	 * unlocked after the lookup before we go ahead and free it.  We get
-	 * both the ilock and the iolock because the code may need to drop the
-	 * ilock one but will still hold the iolock.
+	 * unlocked after the lookup before we go ahead and free it.
 	 */
-	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_qm_dqdetach(ip);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
 	switch (ip->i_d.di_mode & S_IFMT) {
 	case S_IFREG:

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 3/3] xfs: do not wait for the iolock in xfs_free_eofblocks
  2009-08-25 18:41 [PATCH 0/3] stop taking the iolock in the reclaim path Christoph Hellwig
  2009-08-25 18:41 ` [PATCH 1/3] xfs: do not take the iolock in xfs_inactive Christoph Hellwig
  2009-08-25 18:41 ` [PATCH 2/3] xfs: do not take the iolock in xfs_ireclaim Christoph Hellwig
@ 2009-08-25 18:41 ` Christoph Hellwig
  2 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2009-08-25 18:41 UTC (permalink / raw)
  To: xfs

[-- Attachment #1: xfs-lockdep-free_eofblocks --]
[-- Type: text/plain, Size: 3834 bytes --]

For both callers of xfs_free_eofblocks taking the iolock in blocking
fashion causes problems:

 - in xfs_release is causes a lock order reversal with the mmap lock
 - due to xfs_inactive beeing called from reclaim context it would
   forbid memory allocation under the iolock.

As mention in the previous platch there is no need to acquire iolock in
the inactive path as the inode is dead enough at that point.  For release
we can just acquire the lock in a non-blocking fashion.  This will leave
the pre-allocation on the file in case of lock contention, but we will
still take care of it on the final iput.


Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: xfs/fs/xfs/xfs_vnodeops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_vnodeops.c	2009-08-24 11:23:28.798259252 -0300
+++ xfs/fs/xfs/xfs_vnodeops.c	2009-08-24 11:23:34.886280913 -0300
@@ -714,9 +714,14 @@ xfs_fsync(
 }
 
 /*
- * This is called by xfs_inactive to free any blocks beyond eof
- * when the link count isn't zero and by xfs_dm_punch_hole() when
- * punching a hole to EOF.
+ * We either call this from xfs_release when dropping the last reference to
+ * a file structure, or from xfs_inactive when dropping an inode out of the
+ * cache that has not been deleted.  In the first case we must take the
+ * iolock to protect against concurrent truncate calls, but we can't
+ * acquire it in the normal blocking way as that would created a lock order
+ * reversal against mmap_sem.  In the latter case we don't need the iolock
+ * at all because we're called on inode that is dead enough to not see any
+ * I/O at all.
  */
 STATIC int
 xfs_free_eofblocks(
@@ -766,6 +771,11 @@ xfs_free_eofblocks(
 		 */
 		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 
+		if (use_iolock && !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+			xfs_trans_cancel(tp, 0);
+			return EAGAIN;
+		}
+
 		/*
 		 * Do the xfs_itruncate_start() call before
 		 * reserving any log space because
@@ -773,16 +783,10 @@ xfs_free_eofblocks(
 		 * cache and we can't
 		 * do that within a transaction.
 		 */
-		if (use_iolock)
-			xfs_ilock(ip, XFS_IOLOCK_EXCL);
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
 				    ip->i_size);
-		if (error) {
-			xfs_trans_cancel(tp, 0);
-			if (use_iolock)
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-			return error;
-		}
+		if (error)
+			goto out_cancel;
 
 		error = xfs_trans_reserve(tp, 0,
 					  XFS_ITRUNCATE_LOG_RES(mp),
@@ -790,14 +794,12 @@ xfs_free_eofblocks(
 					  XFS_ITRUNCATE_LOG_COUNT);
 		if (error) {
 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
-			xfs_trans_cancel(tp, 0);
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-			return error;
+			goto out_cancel;
 		}
 
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip,
-				XFS_IOLOCK_EXCL |
+		xfs_trans_ijoin(tp, ip, use_iolock ?
+				XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL :
 				XFS_ILOCK_EXCL);
 		xfs_trans_ihold(tp, ip);
 
@@ -821,6 +823,12 @@ xfs_free_eofblocks(
 					    : XFS_ILOCK_EXCL));
 	}
 	return error;
+
+out_cancel:
+	xfs_trans_cancel(tp, 0);
+	if (use_iolock)
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	return error;
 }
 
 /*
@@ -1117,7 +1125,7 @@ xfs_release(
 		    (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
 			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
-			if (error)
+			if (error && error != EAGAIN)
 				return error;
 		}
 	}
@@ -1187,7 +1195,7 @@ xfs_inactive(
 		     (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
 		      (ip->i_delayed_blks != 0)))) {
-			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_NOLOCK);
 			if (error)
 				return VN_INACTIVE_CACHE;
 		}

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-08-25 18:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-08-25 18:41 [PATCH 0/3] stop taking the iolock in the reclaim path Christoph Hellwig
2009-08-25 18:41 ` [PATCH 1/3] xfs: do not take the iolock in xfs_inactive Christoph Hellwig
2009-08-25 18:41 ` [PATCH 2/3] xfs: do not take the iolock in xfs_ireclaim Christoph Hellwig
2009-08-25 18:41 ` [PATCH 3/3] xfs: do not wait for the iolock in xfs_free_eofblocks Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.