All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocks
@ 2009-10-19  4:03 Christoph Hellwig
  2009-10-30  8:55 ` Christoph Hellwig
  2009-11-02 21:10 ` [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocks Alex Elder
  0 siblings, 2 replies; 4+ messages in thread
From: Christoph Hellwig @ 2009-10-19  4:03 UTC (permalink / raw)
  To: xfs

When xfs_free_eofblocks is called from ->release the VM might already
hold the mmap_sem, but in the write path we take the iolock before
taking the mmap_sem in the generic write code.

Switch xfs_free_eofblocks to only trylock the iolock if called from ->release
and skip trimming the prellocated blocks in that case.  We'll still free
them later on the final iput.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: xfs/fs/xfs/xfs_rw.h
===================================================================
--- xfs.orig/fs/xfs/xfs_rw.h	2009-10-14 17:30:02.396028076 +0200
+++ xfs/fs/xfs/xfs_rw.h	2009-10-14 17:30:20.472287472 +0200
@@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_
 }
 
 /*
- * Flags for xfs_free_eofblocks
- */
-#define XFS_FREE_EOF_LOCK	(1<<0)
-#define XFS_FREE_EOF_NOLOCK	(1<<1)
-
-
-/*
  * helper function to extract extent size hint from inode
  */
 STATIC_INLINE xfs_extlen_t
Index: xfs/fs/xfs/xfs_vnodeops.c
===================================================================
--- xfs.orig/fs/xfs/xfs_vnodeops.c	2009-10-14 17:30:13.363024201 +0200
+++ xfs/fs/xfs/xfs_vnodeops.c	2009-10-14 17:35:39.314256388 +0200
@@ -709,6 +709,11 @@ xfs_fsync(
 }
 
 /*
+ * Flags for xfs_free_eofblocks
+ */
+#define XFS_FREE_EOF_TRYLOCK	(1<<0)
+
+/*
  * This is called by xfs_inactive to free any blocks beyond eof
  * when the link count isn't zero and by xfs_dm_punch_hole() when
  * punching a hole to EOF.
@@ -726,7 +731,6 @@ xfs_free_eofblocks(
 	xfs_filblks_t	map_len;
 	int		nimaps;
 	xfs_bmbt_irec_t	imap;
-	int		use_iolock = (flags & XFS_FREE_EOF_LOCK);
 
 	/*
 	 * Figure out if there are any blocks beyond the end
@@ -768,14 +772,19 @@ xfs_free_eofblocks(
 		 * cache and we can't
 		 * do that within a transaction.
 		 */
-		if (use_iolock)
+		if (flags & XFS_FREE_EOF_TRYLOCK) {
+			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+				xfs_trans_cancel(tp, 0);
+				return 0;
+			}
+		} else {
 			xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		}
 		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
 				    ip->i_size);
 		if (error) {
 			xfs_trans_cancel(tp, 0);
-			if (use_iolock)
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return error;
 		}
 
@@ -812,8 +821,7 @@ xfs_free_eofblocks(
 			error = xfs_trans_commit(tp,
 						XFS_TRANS_RELEASE_LOG_RES);
 		}
-		xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
-					    : XFS_ILOCK_EXCL));
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
 	}
 	return error;
 }
@@ -1113,7 +1121,17 @@ xfs_release(
 		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
 		    (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+
+			/*
+			 * If we can't get the iolock just skip truncating
+			 * the blocks past EOF because we could deadlock
+			 * with the mmap_sem otherwise.  We'll get another
+			 * chance to drop them once the last reference to
+			 * the inode is dropped, so we'll never leak blocks
+			 * permanently.
+			 */
+			error = xfs_free_eofblocks(mp, ip,
+						   XFS_FREE_EOF_TRYLOCK);
 			if (error)
 				return error;
 		}
@@ -1184,7 +1202,7 @@ xfs_inactive(
 		     (!(ip->i_d.di_flags &
 				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
 		      (ip->i_delayed_blks != 0)))) {
-			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+			error = xfs_free_eofblocks(mp, ip, 0);
 			if (error)
 				return VN_INACTIVE_CACHE;
 		}

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocks
  2009-10-19  4:03 [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocks Christoph Hellwig
@ 2009-10-30  8:55 ` Christoph Hellwig
  2009-11-02 21:10 ` [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocks Alex Elder
  1 sibling, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2009-10-30  8:55 UTC (permalink / raw)
  To: xfs

ping?

On Mon, Oct 19, 2009 at 12:03:46AM -0400, Christoph Hellwig wrote:
> When xfs_free_eofblocks is called from ->release the VM might already
> hold the mmap_sem, but in the write path we take the iolock before
> taking the mmap_sem in the generic write code.
> 
> Switch xfs_free_eofblocks to only trylock the iolock if called from ->release
> and skip trimming the prellocated blocks in that case.  We'll still free
> them later on the final iput.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: xfs/fs/xfs/xfs_rw.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_rw.h	2009-10-14 17:30:02.396028076 +0200
> +++ xfs/fs/xfs/xfs_rw.h	2009-10-14 17:30:20.472287472 +0200
> @@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_
>  }
>  
>  /*
> - * Flags for xfs_free_eofblocks
> - */
> -#define XFS_FREE_EOF_LOCK	(1<<0)
> -#define XFS_FREE_EOF_NOLOCK	(1<<1)
> -
> -
> -/*
>   * helper function to extract extent size hint from inode
>   */
>  STATIC_INLINE xfs_extlen_t
> Index: xfs/fs/xfs/xfs_vnodeops.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_vnodeops.c	2009-10-14 17:30:13.363024201 +0200
> +++ xfs/fs/xfs/xfs_vnodeops.c	2009-10-14 17:35:39.314256388 +0200
> @@ -709,6 +709,11 @@ xfs_fsync(
>  }
>  
>  /*
> + * Flags for xfs_free_eofblocks
> + */
> +#define XFS_FREE_EOF_TRYLOCK	(1<<0)
> +
> +/*
>   * This is called by xfs_inactive to free any blocks beyond eof
>   * when the link count isn't zero and by xfs_dm_punch_hole() when
>   * punching a hole to EOF.
> @@ -726,7 +731,6 @@ xfs_free_eofblocks(
>  	xfs_filblks_t	map_len;
>  	int		nimaps;
>  	xfs_bmbt_irec_t	imap;
> -	int		use_iolock = (flags & XFS_FREE_EOF_LOCK);
>  
>  	/*
>  	 * Figure out if there are any blocks beyond the end
> @@ -768,14 +772,19 @@ xfs_free_eofblocks(
>  		 * cache and we can't
>  		 * do that within a transaction.
>  		 */
> -		if (use_iolock)
> +		if (flags & XFS_FREE_EOF_TRYLOCK) {
> +			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
> +				xfs_trans_cancel(tp, 0);
> +				return 0;
> +			}
> +		} else {
>  			xfs_ilock(ip, XFS_IOLOCK_EXCL);
> +		}
>  		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
>  				    ip->i_size);
>  		if (error) {
>  			xfs_trans_cancel(tp, 0);
> -			if (use_iolock)
> -				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
> +			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
>  			return error;
>  		}
>  
> @@ -812,8 +821,7 @@ xfs_free_eofblocks(
>  			error = xfs_trans_commit(tp,
>  						XFS_TRANS_RELEASE_LOG_RES);
>  		}
> -		xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
> -					    : XFS_ILOCK_EXCL));
> +		xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
>  	}
>  	return error;
>  }
> @@ -1113,7 +1121,17 @@ xfs_release(
>  		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
>  		    (!(ip->i_d.di_flags &
>  				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
> -			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
> +
> +			/*
> +			 * If we can't get the iolock just skip truncating
> +			 * the blocks past EOF because we could deadlock
> +			 * with the mmap_sem otherwise.  We'll get another
> +			 * chance to drop them once the last reference to
> +			 * the inode is dropped, so we'll never leak blocks
> +			 * permanently.
> +			 */
> +			error = xfs_free_eofblocks(mp, ip,
> +						   XFS_FREE_EOF_TRYLOCK);
>  			if (error)
>  				return error;
>  		}
> @@ -1184,7 +1202,7 @@ xfs_inactive(
>  		     (!(ip->i_d.di_flags &
>  				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
>  		      (ip->i_delayed_blks != 0)))) {
> -			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
> +			error = xfs_free_eofblocks(mp, ip, 0);
>  			if (error)
>  				return VN_INACTIVE_CACHE;
>  		}
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
---end quoted text---

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocks
  2009-10-19  4:03 [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocks Christoph Hellwig
  2009-10-30  8:55 ` Christoph Hellwig
@ 2009-11-02 21:10 ` Alex Elder
  2009-11-03 14:51   ` Christoph Hellwig
  1 sibling, 1 reply; 4+ messages in thread
From: Alex Elder @ 2009-11-02 21:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs

Christoph Hellwig wrote:
> When xfs_free_eofblocks is called from ->release the VM might already
> hold the mmap_sem, but in the write path we take the iolock before
> taking the mmap_sem in the generic write code.
> 
> Switch xfs_free_eofblocks to only trylock the iolock if called from ->release
> and skip trimming the prellocated blocks in that case.  We'll still free
> them later on the final iput.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

I have a minor suggestion below, but it looks correct to me.
I tried to get a better idea of what the conditions were
where mmap_sem would be held by VM when ->release gets
called, but didn't get to the bottom of that.  If it is
easily characterized you could mention it in comments.

Reviewed-by: Alex Elder <aelder@sgi.com>


> Index: xfs/fs/xfs/xfs_rw.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_rw.h	2009-10-14 17:30:02.396028076 +0200
> +++ xfs/fs/xfs/xfs_rw.h	2009-10-14 17:30:20.472287472 +0200
> @@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_
>  }
> 
>  /*
> - * Flags for xfs_free_eofblocks
> - */
> -#define XFS_FREE_EOF_LOCK	(1<<0)
> -#define XFS_FREE_EOF_NOLOCK	(1<<1)
> -
> -
> -/*
>   * helper function to extract extent size hint from inode
>   */
>  STATIC_INLINE xfs_extlen_t
> Index: xfs/fs/xfs/xfs_vnodeops.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_vnodeops.c	2009-10-14 17:30:13.363024201 +0200
> +++ xfs/fs/xfs/xfs_vnodeops.c	2009-10-14 17:35:39.314256388 +0200
> @@ -709,6 +709,11 @@ xfs_fsync(
>  }
> 
>  /*
> + * Flags for xfs_free_eofblocks
> + */
> +#define XFS_FREE_EOF_TRYLOCK	(1<<0)

This is really a Boolean in the place it's used, rather
than a flags mask.  Unless you have plans to add other
flags, maybe just don't bother defining this, and pass
a zero/non-zero for a renamed argument to xfs_free_eofblocks().
(I mention this again below, in that context.)

> +/*
>   * This is called by xfs_inactive to free any blocks beyond eof
>   * when the link count isn't zero and by xfs_dm_punch_hole() when
>   * punching a hole to EOF.
> @@ -726,7 +731,6 @@ xfs_free_eofblocks(
>  	xfs_filblks_t	map_len;
>  	int		nimaps;
>  	xfs_bmbt_irec_t	imap;
> -	int		use_iolock = (flags & XFS_FREE_EOF_LOCK);
> 
>  	/*
>  	 * Figure out if there are any blocks beyond the end
> @@ -768,14 +772,19 @@ xfs_free_eofblocks(
>  		 * cache and we can't
>  		 * do that within a transaction.
>  		 */
> -		if (use_iolock)
> +		if (flags & XFS_FREE_EOF_TRYLOCK) {

If you rename "flag" to "skip_ok" (or maybe "try_lock":

            if (skip_ok && ! xfs_ilock_nowait(...)) {
                      xfs_trans_cancel();
                      return 0;
            } else
                      xfs_ilock(...);

             if (try_lock)
> +			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
> +				xfs_trans_cancel(tp, 0);
> +				return 0;
> +			}
> +		} else {
>  			xfs_ilock(ip, XFS_IOLOCK_EXCL);
> +		}
>  		error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
>  				    ip->i_size);
>  		if (error) {
>  			xfs_trans_cancel(tp, 0);
> -			if (use_iolock)
> -				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
> +			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
>  			return error;
>  		}
> 
> @@ -812,8 +821,7 @@ xfs_free_eofblocks(
>  			error = xfs_trans_commit(tp,
>  						XFS_TRANS_RELEASE_LOG_RES);
>  		}
> -		xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
> -					    : XFS_ILOCK_EXCL));
> +		xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
>  	}
>  	return error;
>  }
> @@ -1113,7 +1121,17 @@ xfs_release(
>  		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
>  		    (!(ip->i_d.di_flags &
>  				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
> -			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
> +
> +			/*
> +			 * If we can't get the iolock just skip truncating
> +			 * the blocks past EOF because we could deadlock
> +			 * with the mmap_sem otherwise.  We'll get another
> +			 * chance to drop them once the last reference to
> +			 * the inode is dropped, so we'll never leak blocks
> +			 * permanently.
> +			 */
> +			error = xfs_free_eofblocks(mp, ip,
> +						   XFS_FREE_EOF_TRYLOCK);
>  			if (error)
>  				return error;
>  		}
> @@ -1184,7 +1202,7 @@ xfs_inactive(
>  		     (!(ip->i_d.di_flags &
>  				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
>  		      (ip->i_delayed_blks != 0)))) {
> -			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
> +			error = xfs_free_eofblocks(mp, ip, 0);
>  			if (error)
>  				return VN_INACTIVE_CACHE;
>  		}
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocks
  2009-11-02 21:10 ` [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocks Alex Elder
@ 2009-11-03 14:51   ` Christoph Hellwig
  0 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2009-11-03 14:51 UTC (permalink / raw)
  To: Alex Elder; +Cc: Christoph Hellwig, xfs

On Mon, Nov 02, 2009 at 03:10:33PM -0600, Alex Elder wrote:
> I have a minor suggestion below, but it looks correct to me.
> I tried to get a better idea of what the conditions were
> where mmap_sem would be held by VM when ->release gets
> called, but didn't get to the bottom of that.  If it is
> easily characterized you could mention it in comments.

It's the VMA merging code.  But IMHO that's too much of an
implementation detail to put into the comment here.  Commit message
might be fine.

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-11-03 14:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-10-19  4:03 [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocks Christoph Hellwig
2009-10-30  8:55 ` Christoph Hellwig
2009-11-02 21:10 ` [PATCH] xfs: fix mmap_sem vs iolock lock order inversion inxfs_free_eofblocks Alex Elder
2009-11-03 14:51   ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.