All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc
@ 2018-02-21  2:28 Chandan Rajendra
  2018-02-21  2:28 ` [PATCH V2 2/2] xfs: Fix "use after free" of intent items Chandan Rajendra
  2018-04-02 22:50 ` [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc Darrick J. Wong
  0 siblings, 2 replies; 7+ messages in thread
From: Chandan Rajendra @ 2018-02-21  2:28 UTC (permalink / raw)
  To: linux-xfs; +Cc: Chandan Rajendra, hch, darrick.wong, bfoster

xfs_dir_ialloc() rolls the current transaction when allocation of a new
inode required the space manager to perform an allocation and replinish
the Inode btree.

None of the callers of xfs_dir_ialloc() need to know if the
transaction was committed. Hence this commit removes the "committed"
argument of xfs_dir_ialloc.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
Changelog:
v1 -> v2:
1. Add a patch to remove the "commited" argument of xfs_dir_ialloc.
   Hence the second patch now has xfs_dir_ialloc() invoking
   xfs_trans_roll() by passing NULL as the second argument.
   
 fs/xfs/xfs_inode.c   | 14 +++-----------
 fs/xfs/xfs_inode.h   |  2 +-
 fs/xfs/xfs_qm.c      |  4 +---
 fs/xfs/xfs_symlink.c |  2 +-
 4 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 604ee38..37d9426 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -972,10 +972,8 @@ xfs_dir_ialloc(
 	xfs_nlink_t	nlink,
 	dev_t		rdev,
 	prid_t		prid,		/* project id */
-	xfs_inode_t	**ipp,		/* pointer to inode; it will be
+	xfs_inode_t	**ipp)		/* pointer to inode; it will be
 					   locked. */
-	int		*committed)
-
 {
 	xfs_trans_t	*tp;
 	xfs_inode_t	*ip;
@@ -1050,8 +1048,6 @@ xfs_dir_ialloc(
 		}
 
 		code = xfs_trans_roll(&tp);
-		if (committed != NULL)
-			*committed = 1;
 
 		/*
 		 * Re-attach the quota info that we detached from prev trx.
@@ -1088,9 +1084,6 @@ xfs_dir_ialloc(
 		}
 		ASSERT(!ialloc_context && ip);
 
-	} else {
-		if (committed != NULL)
-			*committed = 0;
 	}
 
 	*ipp = ip;
@@ -1217,8 +1210,7 @@ xfs_create(
 	 * entry pointing to them, but a directory also the "." entry
 	 * pointing to itself.
 	 */
-	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
-			NULL);
+	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1351,7 +1343,7 @@ xfs_create_tmpfile(
 	if (error)
 		goto out_trans_cancel;
 
-	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
+	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip);
 	if (error)
 		goto out_trans_cancel;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3e8dc99..50a2a84 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -431,7 +431,7 @@ xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
 int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
 			       xfs_nlink_t, dev_t, prid_t,
-			       struct xfs_inode **, int *);
+			       struct xfs_inode **);
 
 /* from xfs_file.c */
 enum xfs_prealloc_flags {
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5b848f4..ec39ae2 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -748,7 +748,6 @@ xfs_qm_qino_alloc(
 {
 	xfs_trans_t	*tp;
 	int		error;
-	int		committed;
 	bool		need_alloc = true;
 
 	*ip = NULL;
@@ -788,8 +787,7 @@ xfs_qm_qino_alloc(
 		return error;
 
 	if (need_alloc) {
-		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
-				&committed);
+		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip);
 		if (error) {
 			xfs_trans_cancel(tp);
 			return error;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 2e9e793..5b66ac1 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -264,7 +264,7 @@ xfs_symlink(
 	 * Allocate an inode for the symlink.
 	 */
 	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-			       prid, &ip, NULL);
+			       prid, &ip);
 	if (error)
 		goto out_trans_cancel;
 
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH V2 2/2] xfs: Fix "use after free" of intent items
  2018-02-21  2:28 [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc Chandan Rajendra
@ 2018-02-21  2:28 ` Chandan Rajendra
  2018-02-21  4:01   ` Dave Chinner
  2018-04-02 22:50 ` [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc Darrick J. Wong
  1 sibling, 1 reply; 7+ messages in thread
From: Chandan Rajendra @ 2018-02-21  2:28 UTC (permalink / raw)
  To: linux-xfs; +Cc: Chandan Rajendra, hch, darrick.wong, bfoster

generic/388 can cause the following "use after free" error to occur,

 =============================================================================
 BUG xfs_efi_item (Not tainted): Poison overwritten
 -----------------------------------------------------------------------------

 Disabling lock debugging due to kernel taint
 INFO: 0x00000000292c4bd4-0x00000000292c4bd4. First byte 0x6a instead of 0x6b
 INFO: Allocated in .kmem_zone_alloc+0xcc/0x190 age=79 cpu=0 pid=12436
        .__slab_alloc+0x54/0x80
        .kmem_cache_alloc+0x124/0x350
        .kmem_zone_alloc+0xcc/0x190
        .xfs_efi_init+0x48/0xf0
        .xfs_extent_free_create_intent+0x40/0x130
        .xfs_defer_intake_work+0x74/0x1e0
        .xfs_defer_finish+0xac/0x5c0
        .xfs_itruncate_extents+0x170/0x590
        .xfs_inactive_truncate+0xcc/0x170
        .xfs_inactive+0x1d8/0x2f0
        .xfs_fs_destroy_inode+0xe4/0x3d0
        .destroy_inode+0x68/0xb0
        .do_unlinkat+0x1e8/0x390
        system_call+0x58/0x6c
 INFO: Freed in .xfs_efi_item_free+0x44/0x80 age=79 cpu=0 pid=12436
        .kmem_cache_free+0x120/0x2b0
        .xfs_efi_item_free+0x44/0x80
        .xfs_trans_free_items+0xd4/0x130
        .__xfs_trans_commit+0xd0/0x350
        .xfs_trans_roll+0x4c/0x90
        .xfs_defer_trans_roll+0xa4/0x2b0
        .xfs_defer_finish+0xb8/0x5c0
        .xfs_itruncate_extents+0x170/0x590
        .xfs_inactive_truncate+0xcc/0x170
        .xfs_inactive+0x1d8/0x2f0
        .xfs_fs_destroy_inode+0xe4/0x3d0
        .destroy_inode+0x68/0xb0
        .do_unlinkat+0x1e8/0x390
        system_call+0x58/0x6c

This happens due to the following interaction,
1. xfs_defer_finish() creates "extent free" intent item and adds it to the
   per-transction list of log items.
2. xfs_defer_trans_roll() invokes __xfs_trans_commit(). Here, if the
   XFS_MOUNT_FS_SHUTDOWN flag is set, we invoke io_unlock() operation
   for each of the log items in the per-transction list. For "extent
   free" log items xfs_efi_item_unlock() gets invoked which then frees
   the xfs_efi_log_item.
3. xfs_defer_trans_roll() then invokes xfs_defer_trans_abort(). Since the
   xfs_defer_pending->dfp_intent is still set to the "extent free" intent
   item, we invoke xfs_extent_free_abort_intent(). This accesses the
   previously freed xfs_efi_log_item to decrement the ref count.

This commit fixes the bug by invoking xfs_defer_trans_abort() only when
the log items in the per-transaction list have been committed to the
CIL. The log item "committed" status is being tracked by
xfs_defer_ops->dop_committed. This was the behaviour prior to commit
3ab78df2a59a485f479d26852a060acfd8c4ecd7 (xfs: rework xfs_bmap_free
callers to use xfs_defer_ops).

Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
 fs/xfs/libxfs/xfs_defer.c | 6 +++---
 fs/xfs/xfs_bmap_util.c    | 2 +-
 fs/xfs/xfs_inode.c        | 2 +-
 fs/xfs/xfs_trans.c        | 8 +++++++-
 fs/xfs/xfs_trans.h        | 2 +-
 fs/xfs/xfs_trans_inode.c  | 2 +-
 6 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 087fea0..eb879a0 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -256,13 +256,13 @@ xfs_defer_trans_roll(
 	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
 
 	/* Roll the transaction. */
-	error = xfs_trans_roll(tp);
+	error = xfs_trans_roll(tp, &dop->dop_committed);
 	if (error) {
 		trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error);
-		xfs_defer_trans_abort(*tp, dop, error);
+		if (dop->dop_committed == true)
+			xfs_defer_trans_abort(*tp, dop, error);
 		return error;
 	}
-	dop->dop_committed = true;
 
 	/* Rejoin the joined inodes. */
 	for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c83f549..9d84d36 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1830,7 +1830,7 @@ xfs_swap_change_owner(
 		if (error != -EAGAIN)
 			break;
 
-		error = xfs_trans_roll(tpp);
+		error = xfs_trans_roll(tpp, NULL);
 		if (error)
 			break;
 		tp = *tpp;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 37d9426..9b74539 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1047,7 +1047,7 @@ xfs_dir_ialloc(
 			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
 		}
 
-		code = xfs_trans_roll(&tp);
+		code = xfs_trans_roll(&tp, NULL);
 
 		/*
 		 * Re-attach the quota info that we detached from prev trx.
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 86f92df..1bf2505 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1057,12 +1057,15 @@ xfs_trans_cancel(
  */
 int
 xfs_trans_roll(
-	struct xfs_trans	**tpp)
+	struct xfs_trans	**tpp,
+	bool 			*committed)
 {
 	struct xfs_trans	*trans = *tpp;
 	struct xfs_trans_res	tres;
 	int			error;
 
+	if (committed)
+		*committed = false;
 	/*
 	 * Copy the critical parameters from one trans to the next.
 	 */
@@ -1082,6 +1085,9 @@ xfs_trans_roll(
 	if (error)
 		return error;
 
+	if (committed)
+		*committed = true;
+
 	/*
 	 * Reserve space in the log for the next transaction.
 	 * This also pushes items in the "AIL", the list of logged items,
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9d542df..d4deb49 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -230,7 +230,7 @@ int		xfs_trans_free_extent(struct xfs_trans *,
 				      struct xfs_efd_log_item *, xfs_fsblock_t,
 				      xfs_extlen_t, struct xfs_owner_info *);
 int		xfs_trans_commit(struct xfs_trans *);
-int		xfs_trans_roll(struct xfs_trans **);
+int		xfs_trans_roll(struct xfs_trans **, bool *);
 int		xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
 void		xfs_trans_cancel(xfs_trans_t *);
 int		xfs_trans_ail_init(struct xfs_mount *);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4a89da4..bedd5fd 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -147,7 +147,7 @@ xfs_trans_roll_inode(
 	int			error;
 
 	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
-	error = xfs_trans_roll(tpp);
+	error = xfs_trans_roll(tpp, NULL);
 	if (!error)
 		xfs_trans_ijoin(*tpp, ip, 0);
 	return error;
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 2/2] xfs: Fix "use after free" of intent items
  2018-02-21  2:28 ` [PATCH V2 2/2] xfs: Fix "use after free" of intent items Chandan Rajendra
@ 2018-02-21  4:01   ` Dave Chinner
  2018-02-21  5:45     ` Chandan Rajendra
  0 siblings, 1 reply; 7+ messages in thread
From: Dave Chinner @ 2018-02-21  4:01 UTC (permalink / raw)
  To: Chandan Rajendra; +Cc: linux-xfs, hch, darrick.wong, bfoster

On Wed, Feb 21, 2018 at 07:58:26AM +0530, Chandan Rajendra wrote:
> generic/388 can cause the following "use after free" error to occur,
> 
>  =============================================================================
>  BUG xfs_efi_item (Not tainted): Poison overwritten
>  -----------------------------------------------------------------------------
> 
>  Disabling lock debugging due to kernel taint
>  INFO: 0x00000000292c4bd4-0x00000000292c4bd4. First byte 0x6a instead of 0x6b
>  INFO: Allocated in .kmem_zone_alloc+0xcc/0x190 age=79 cpu=0 pid=12436
>         .__slab_alloc+0x54/0x80
>         .kmem_cache_alloc+0x124/0x350
>         .kmem_zone_alloc+0xcc/0x190
>         .xfs_efi_init+0x48/0xf0
>         .xfs_extent_free_create_intent+0x40/0x130
>         .xfs_defer_intake_work+0x74/0x1e0
>         .xfs_defer_finish+0xac/0x5c0
>         .xfs_itruncate_extents+0x170/0x590
>         .xfs_inactive_truncate+0xcc/0x170
>         .xfs_inactive+0x1d8/0x2f0
>         .xfs_fs_destroy_inode+0xe4/0x3d0
>         .destroy_inode+0x68/0xb0
>         .do_unlinkat+0x1e8/0x390
>         system_call+0x58/0x6c
>  INFO: Freed in .xfs_efi_item_free+0x44/0x80 age=79 cpu=0 pid=12436
>         .kmem_cache_free+0x120/0x2b0
>         .xfs_efi_item_free+0x44/0x80
>         .xfs_trans_free_items+0xd4/0x130
>         .__xfs_trans_commit+0xd0/0x350
>         .xfs_trans_roll+0x4c/0x90
>         .xfs_defer_trans_roll+0xa4/0x2b0
>         .xfs_defer_finish+0xb8/0x5c0
>         .xfs_itruncate_extents+0x170/0x590
>         .xfs_inactive_truncate+0xcc/0x170
>         .xfs_inactive+0x1d8/0x2f0
>         .xfs_fs_destroy_inode+0xe4/0x3d0
>         .destroy_inode+0x68/0xb0
>         .do_unlinkat+0x1e8/0x390
>         system_call+0x58/0x6c
> 
> This happens due to the following interaction,
> 1. xfs_defer_finish() creates "extent free" intent item and adds it to the
>    per-transction list of log items.
> 2. xfs_defer_trans_roll() invokes __xfs_trans_commit(). Here, if the
>    XFS_MOUNT_FS_SHUTDOWN flag is set, we invoke io_unlock() operation
>    for each of the log items in the per-transction list. For "extent
>    free" log items xfs_efi_item_unlock() gets invoked which then frees
>    the xfs_efi_log_item.

Isn't this the problem? i.e. it's freeing the EFI item because the
abort flag is set, but there are still other references to it? Isn't
this wrong now that we have a cleanup function that will free the
EFI is the commit fails? i.e: this ....

> 3. xfs_defer_trans_roll() then invokes xfs_defer_trans_abort(). Since the
>    xfs_defer_pending->dfp_intent is still set to the "extent free" intent
>    item, we invoke xfs_extent_free_abort_intent(). This accesses the
>    previously freed xfs_efi_log_item to decrement the ref count.

... will free the EFI correctly on transaction commit failure and so
we should be able to remove the "free immediately" hack from the
iop_unlock() code that we used to need because there was no external
reference that could free the EFI on commit failure....

> This commit fixes the bug by invoking xfs_defer_trans_abort() only when
> the log items in the per-transaction list have been committed to the
> CIL. The log item "committed" status is being tracked by
> xfs_defer_ops->dop_committed. This was the behaviour prior to commit
> 3ab78df2a59a485f479d26852a060acfd8c4ecd7 (xfs: rework xfs_bmap_free
> callers to use xfs_defer_ops).

Freeing in ->iop_unlock was always troublesome. We should get
rid of it, not go back to it.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 2/2] xfs: Fix "use after free" of intent items
  2018-02-21  4:01   ` Dave Chinner
@ 2018-02-21  5:45     ` Chandan Rajendra
  2018-02-21 21:04       ` Dave Chinner
  0 siblings, 1 reply; 7+ messages in thread
From: Chandan Rajendra @ 2018-02-21  5:45 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs, hch, darrick.wong, bfoster

On Wednesday, February 21, 2018 9:31:09 AM IST Dave Chinner wrote:
> On Wed, Feb 21, 2018 at 07:58:26AM +0530, Chandan Rajendra wrote:
> > generic/388 can cause the following "use after free" error to occur,
> > 
> >  =============================================================================
> >  BUG xfs_efi_item (Not tainted): Poison overwritten
> >  -----------------------------------------------------------------------------
> > 
> >  Disabling lock debugging due to kernel taint
> >  INFO: 0x00000000292c4bd4-0x00000000292c4bd4. First byte 0x6a instead of 0x6b
> >  INFO: Allocated in .kmem_zone_alloc+0xcc/0x190 age=79 cpu=0 pid=12436
> >         .__slab_alloc+0x54/0x80
> >         .kmem_cache_alloc+0x124/0x350
> >         .kmem_zone_alloc+0xcc/0x190
> >         .xfs_efi_init+0x48/0xf0
> >         .xfs_extent_free_create_intent+0x40/0x130
> >         .xfs_defer_intake_work+0x74/0x1e0
> >         .xfs_defer_finish+0xac/0x5c0
> >         .xfs_itruncate_extents+0x170/0x590
> >         .xfs_inactive_truncate+0xcc/0x170
> >         .xfs_inactive+0x1d8/0x2f0
> >         .xfs_fs_destroy_inode+0xe4/0x3d0
> >         .destroy_inode+0x68/0xb0
> >         .do_unlinkat+0x1e8/0x390
> >         system_call+0x58/0x6c
> >  INFO: Freed in .xfs_efi_item_free+0x44/0x80 age=79 cpu=0 pid=12436
> >         .kmem_cache_free+0x120/0x2b0
> >         .xfs_efi_item_free+0x44/0x80
> >         .xfs_trans_free_items+0xd4/0x130
> >         .__xfs_trans_commit+0xd0/0x350
> >         .xfs_trans_roll+0x4c/0x90
> >         .xfs_defer_trans_roll+0xa4/0x2b0
> >         .xfs_defer_finish+0xb8/0x5c0
> >         .xfs_itruncate_extents+0x170/0x590
> >         .xfs_inactive_truncate+0xcc/0x170
> >         .xfs_inactive+0x1d8/0x2f0
> >         .xfs_fs_destroy_inode+0xe4/0x3d0
> >         .destroy_inode+0x68/0xb0
> >         .do_unlinkat+0x1e8/0x390
> >         system_call+0x58/0x6c
> > 
> > This happens due to the following interaction,
> > 1. xfs_defer_finish() creates "extent free" intent item and adds it to the
> >    per-transction list of log items.
> > 2. xfs_defer_trans_roll() invokes __xfs_trans_commit(). Here, if the
> >    XFS_MOUNT_FS_SHUTDOWN flag is set, we invoke io_unlock() operation
> >    for each of the log items in the per-transction list. For "extent
> >    free" log items xfs_efi_item_unlock() gets invoked which then frees
> >    the xfs_efi_log_item.
> 
> Isn't this the problem? i.e. it's freeing the EFI item because the
> abort flag is set, but there are still other references to it? Isn't
> this wrong now that we have a cleanup function that will free the
> EFI is the commit fails? i.e: this ....
> 
> > 3. xfs_defer_trans_roll() then invokes xfs_defer_trans_abort(). Since the
> >    xfs_defer_pending->dfp_intent is still set to the "extent free" intent
> >    item, we invoke xfs_extent_free_abort_intent(). This accesses the
> >    previously freed xfs_efi_log_item to decrement the ref count.
> 
> ... will free the EFI correctly on transaction commit failure and so
> we should be able to remove the "free immediately" hack from the
> iop_unlock() code that we used to need because there was no external
> reference that could free the EFI on commit failure....

The EFI log item would have its ref count initialized to 2 (One for the
CIL/AIL and one for EFD).  In the "use after free" case the EFI was not
committed to the CIL. At this point, we don't have any entity owning a
reference to the log item since the EFI was not committed to the CIL nor was
the EFD created. Hence IMHO, freeing the log item is the right approach.

Also, the cleanup function (i.e. xfs_extent_free_abort_intent()) would
decrement the ref count by 1 i.e. Its refcount would go from the initial value
of 2 to 1. Hence the cleanup function won't free the EFI log item causing a
memory leak.

> 
> > This commit fixes the bug by invoking xfs_defer_trans_abort() only when
> > the log items in the per-transaction list have been committed to the
> > CIL. The log item "committed" status is being tracked by
> > xfs_defer_ops->dop_committed. This was the behaviour prior to commit
> > 3ab78df2a59a485f479d26852a060acfd8c4ecd7 (xfs: rework xfs_bmap_free
> > callers to use xfs_defer_ops).
> 
> Freeing in ->iop_unlock was always troublesome. We should get
> rid of it, not go back to it.
> 
> Cheers,
> 
> Dave.
> 


-- 
chandan


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 2/2] xfs: Fix "use after free" of intent items
  2018-02-21  5:45     ` Chandan Rajendra
@ 2018-02-21 21:04       ` Dave Chinner
  2018-02-22  2:41         ` Chandan Rajendra
  0 siblings, 1 reply; 7+ messages in thread
From: Dave Chinner @ 2018-02-21 21:04 UTC (permalink / raw)
  To: Chandan Rajendra; +Cc: linux-xfs, hch, darrick.wong, bfoster

On Wed, Feb 21, 2018 at 11:15:22AM +0530, Chandan Rajendra wrote:
> On Wednesday, February 21, 2018 9:31:09 AM IST Dave Chinner wrote:
> > On Wed, Feb 21, 2018 at 07:58:26AM +0530, Chandan Rajendra wrote:
> > > generic/388 can cause the following "use after free" error to occur,
> > > 
> > >  =============================================================================
> > >  BUG xfs_efi_item (Not tainted): Poison overwritten
> > >  -----------------------------------------------------------------------------
> > > 
> > >  Disabling lock debugging due to kernel taint
> > >  INFO: 0x00000000292c4bd4-0x00000000292c4bd4. First byte 0x6a instead of 0x6b
> > >  INFO: Allocated in .kmem_zone_alloc+0xcc/0x190 age=79 cpu=0 pid=12436
> > >         .__slab_alloc+0x54/0x80
> > >         .kmem_cache_alloc+0x124/0x350
> > >         .kmem_zone_alloc+0xcc/0x190
> > >         .xfs_efi_init+0x48/0xf0
> > >         .xfs_extent_free_create_intent+0x40/0x130
> > >         .xfs_defer_intake_work+0x74/0x1e0
> > >         .xfs_defer_finish+0xac/0x5c0
> > >         .xfs_itruncate_extents+0x170/0x590
> > >         .xfs_inactive_truncate+0xcc/0x170
> > >         .xfs_inactive+0x1d8/0x2f0
> > >         .xfs_fs_destroy_inode+0xe4/0x3d0
> > >         .destroy_inode+0x68/0xb0
> > >         .do_unlinkat+0x1e8/0x390
> > >         system_call+0x58/0x6c
> > >  INFO: Freed in .xfs_efi_item_free+0x44/0x80 age=79 cpu=0 pid=12436
> > >         .kmem_cache_free+0x120/0x2b0
> > >         .xfs_efi_item_free+0x44/0x80
> > >         .xfs_trans_free_items+0xd4/0x130
> > >         .__xfs_trans_commit+0xd0/0x350
> > >         .xfs_trans_roll+0x4c/0x90
> > >         .xfs_defer_trans_roll+0xa4/0x2b0
> > >         .xfs_defer_finish+0xb8/0x5c0
> > >         .xfs_itruncate_extents+0x170/0x590
> > >         .xfs_inactive_truncate+0xcc/0x170
> > >         .xfs_inactive+0x1d8/0x2f0
> > >         .xfs_fs_destroy_inode+0xe4/0x3d0
> > >         .destroy_inode+0x68/0xb0
> > >         .do_unlinkat+0x1e8/0x390
> > >         system_call+0x58/0x6c
> > > 
> > > This happens due to the following interaction,
> > > 1. xfs_defer_finish() creates "extent free" intent item and adds it to the
> > >    per-transction list of log items.
> > > 2. xfs_defer_trans_roll() invokes __xfs_trans_commit(). Here, if the
> > >    XFS_MOUNT_FS_SHUTDOWN flag is set, we invoke io_unlock() operation
> > >    for each of the log items in the per-transction list. For "extent
> > >    free" log items xfs_efi_item_unlock() gets invoked which then frees
> > >    the xfs_efi_log_item.
> > 
> > Isn't this the problem? i.e. it's freeing the EFI item because the
> > abort flag is set, but there are still other references to it? Isn't
> > this wrong now that we have a cleanup function that will free the
> > EFI is the commit fails? i.e: this ....
> > 
> > > 3. xfs_defer_trans_roll() then invokes xfs_defer_trans_abort(). Since the
> > >    xfs_defer_pending->dfp_intent is still set to the "extent free" intent
> > >    item, we invoke xfs_extent_free_abort_intent(). This accesses the
> > >    previously freed xfs_efi_log_item to decrement the ref count.
> > 
> > ... will free the EFI correctly on transaction commit failure and so
> > we should be able to remove the "free immediately" hack from the
> > iop_unlock() code that we used to need because there was no external
> > reference that could free the EFI on commit failure....
> 
> The EFI log item would have its ref count initialized to 2 (One for the
> CIL/AIL and one for EFD).

And that's part of what has always been wrong with this code - it
takes references for things that don't really have references yet,
instead of taking them when the EFI is actually referenced. Hence we
have to hack around that with things like ignoring the reference
count when we get an abort signal, because we haven't reference
counted the object properly.

> In the "use after free" case the EFI was not
> committed to the CIL. At this point, we don't have any entity owning a
> reference to the log item since the EFI was not committed to the CIL nor was
> the EFD created. Hence IMHO, freeing the log item is the right approach.

No, we clearly have a reference - the deferops structure has a
reference to it. If it didn't have a reference, then we wouldn't
have a use after free situation....

> Also, the cleanup function (i.e. xfs_extent_free_abort_intent()) would
> decrement the ref count by 1 i.e. Its refcount would go from the initial value
> of 2 to 1. Hence the cleanup function won't free the EFI log item causing a
> memory leak.

No, then the xfs_extent_free_abort_intent() call releases it,
dropping the remaining reference that was intended for the EFD
which, because we aborted, will never take over control of that
reference.

i.e. the two references that are created at initialisation are for
the EFI itself as it passes through the CIL and journal commit, and
the second reference is used by the defer_ops it is attached to and
then handed to the EFD for it's pass through the CIL and journal
commit.

If we never commit the EFI, then we need to release that reference
(on abort), and then we need to clean up the extra reference that is
destined for the EFD which is held by the deferops. That's done by
xfs_extent_free_abort_intent().

And I'm guessing that we need to make sure the same fix goes through
all the other items that are used as deferops intents that were
modeled on the EFI/EFD infrastructure, too.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 2/2] xfs: Fix "use after free" of intent items
  2018-02-21 21:04       ` Dave Chinner
@ 2018-02-22  2:41         ` Chandan Rajendra
  0 siblings, 0 replies; 7+ messages in thread
From: Chandan Rajendra @ 2018-02-22  2:41 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs, hch, darrick.wong, bfoster

On Thursday, February 22, 2018 2:34:04 AM IST Dave Chinner wrote:
> On Wed, Feb 21, 2018 at 11:15:22AM +0530, Chandan Rajendra wrote:
> > On Wednesday, February 21, 2018 9:31:09 AM IST Dave Chinner wrote:
> > > On Wed, Feb 21, 2018 at 07:58:26AM +0530, Chandan Rajendra wrote:
> > > > generic/388 can cause the following "use after free" error to occur,
> > > > 
> > > >  =============================================================================
> > > >  BUG xfs_efi_item (Not tainted): Poison overwritten
> > > >  -----------------------------------------------------------------------------
> > > > 
> > > >  Disabling lock debugging due to kernel taint
> > > >  INFO: 0x00000000292c4bd4-0x00000000292c4bd4. First byte 0x6a instead of 0x6b
> > > >  INFO: Allocated in .kmem_zone_alloc+0xcc/0x190 age=79 cpu=0 pid=12436
> > > >         .__slab_alloc+0x54/0x80
> > > >         .kmem_cache_alloc+0x124/0x350
> > > >         .kmem_zone_alloc+0xcc/0x190
> > > >         .xfs_efi_init+0x48/0xf0
> > > >         .xfs_extent_free_create_intent+0x40/0x130
> > > >         .xfs_defer_intake_work+0x74/0x1e0
> > > >         .xfs_defer_finish+0xac/0x5c0
> > > >         .xfs_itruncate_extents+0x170/0x590
> > > >         .xfs_inactive_truncate+0xcc/0x170
> > > >         .xfs_inactive+0x1d8/0x2f0
> > > >         .xfs_fs_destroy_inode+0xe4/0x3d0
> > > >         .destroy_inode+0x68/0xb0
> > > >         .do_unlinkat+0x1e8/0x390
> > > >         system_call+0x58/0x6c
> > > >  INFO: Freed in .xfs_efi_item_free+0x44/0x80 age=79 cpu=0 pid=12436
> > > >         .kmem_cache_free+0x120/0x2b0
> > > >         .xfs_efi_item_free+0x44/0x80
> > > >         .xfs_trans_free_items+0xd4/0x130
> > > >         .__xfs_trans_commit+0xd0/0x350
> > > >         .xfs_trans_roll+0x4c/0x90
> > > >         .xfs_defer_trans_roll+0xa4/0x2b0
> > > >         .xfs_defer_finish+0xb8/0x5c0
> > > >         .xfs_itruncate_extents+0x170/0x590
> > > >         .xfs_inactive_truncate+0xcc/0x170
> > > >         .xfs_inactive+0x1d8/0x2f0
> > > >         .xfs_fs_destroy_inode+0xe4/0x3d0
> > > >         .destroy_inode+0x68/0xb0
> > > >         .do_unlinkat+0x1e8/0x390
> > > >         system_call+0x58/0x6c
> > > > 
> > > > This happens due to the following interaction,
> > > > 1. xfs_defer_finish() creates "extent free" intent item and adds it to the
> > > >    per-transction list of log items.
> > > > 2. xfs_defer_trans_roll() invokes __xfs_trans_commit(). Here, if the
> > > >    XFS_MOUNT_FS_SHUTDOWN flag is set, we invoke io_unlock() operation
> > > >    for each of the log items in the per-transction list. For "extent
> > > >    free" log items xfs_efi_item_unlock() gets invoked which then frees
> > > >    the xfs_efi_log_item.
> > > 
> > > Isn't this the problem? i.e. it's freeing the EFI item because the
> > > abort flag is set, but there are still other references to it? Isn't
> > > this wrong now that we have a cleanup function that will free the
> > > EFI is the commit fails? i.e: this ....
> > > 
> > > > 3. xfs_defer_trans_roll() then invokes xfs_defer_trans_abort(). Since the
> > > >    xfs_defer_pending->dfp_intent is still set to the "extent free" intent
> > > >    item, we invoke xfs_extent_free_abort_intent(). This accesses the
> > > >    previously freed xfs_efi_log_item to decrement the ref count.
> > > 
> > > ... will free the EFI correctly on transaction commit failure and so
> > > we should be able to remove the "free immediately" hack from the
> > > iop_unlock() code that we used to need because there was no external
> > > reference that could free the EFI on commit failure....
> > 
> > The EFI log item would have its ref count initialized to 2 (One for the
> > CIL/AIL and one for EFD).
> 
> And that's part of what has always been wrong with this code - it
> takes references for things that don't really have references yet,
> instead of taking them when the EFI is actually referenced. Hence we
> have to hack around that with things like ignoring the reference
> count when we get an abort signal, because we haven't reference
> counted the object properly.
> 
> > In the "use after free" case the EFI was not
> > committed to the CIL. At this point, we don't have any entity owning a
> > reference to the log item since the EFI was not committed to the CIL nor was
> > the EFD created. Hence IMHO, freeing the log item is the right approach.
> 
> No, we clearly have a reference - the deferops structure has a
> reference to it. If it didn't have a reference, then we wouldn't
> have a use after free situation....
> 
> > Also, the cleanup function (i.e. xfs_extent_free_abort_intent()) would
> > decrement the ref count by 1 i.e. Its refcount would go from the initial value
> > of 2 to 1. Hence the cleanup function won't free the EFI log item causing a
> > memory leak.
> 
> No, then the xfs_extent_free_abort_intent() call releases it,
> dropping the remaining reference that was intended for the EFD
> which, because we aborted, will never take over control of that
> reference.
> 
> i.e. the two references that are created at initialisation are for
> the EFI itself as it passes through the CIL and journal commit, and
> the second reference is used by the defer_ops it is attached to and
> then handed to the EFD for it's pass through the CIL and journal
> commit.
> 
> If we never commit the EFI, then we need to release that reference
> (on abort), and then we need to clean up the extra reference that is
> destined for the EFD which is held by the deferops. That's done by
> xfs_extent_free_abort_intent().
> 
> And I'm guessing that we need to make sure the same fix goes through
> all the other items that are used as deferops intents that were
> modeled on the EFI/EFD infrastructure, too.
> 

I agree. Thanks for the explaination. I will work on writing up a 
patch to fix this issue for all the log items which have 
corresponding intent and done items.

-- 
chandan


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc
  2018-02-21  2:28 [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc Chandan Rajendra
  2018-02-21  2:28 ` [PATCH V2 2/2] xfs: Fix "use after free" of intent items Chandan Rajendra
@ 2018-04-02 22:50 ` Darrick J. Wong
  1 sibling, 0 replies; 7+ messages in thread
From: Darrick J. Wong @ 2018-04-02 22:50 UTC (permalink / raw)
  To: Chandan Rajendra; +Cc: linux-xfs, hch, bfoster

On Wed, Feb 21, 2018 at 07:58:25AM +0530, Chandan Rajendra wrote:
> xfs_dir_ialloc() rolls the current transaction when allocation of a new
> inode required the space manager to perform an allocation and replinish
> the Inode btree.
> 
> None of the callers of xfs_dir_ialloc() need to know if the
> transaction was committed. Hence this commit removes the "committed"
> argument of xfs_dir_ialloc.
> 
> Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>

This first patch looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
> Changelog:
> v1 -> v2:
> 1. Add a patch to remove the "commited" argument of xfs_dir_ialloc.
>    Hence the second patch now has xfs_dir_ialloc() invoking
>    xfs_trans_roll() by passing NULL as the second argument.
>    
>  fs/xfs/xfs_inode.c   | 14 +++-----------
>  fs/xfs/xfs_inode.h   |  2 +-
>  fs/xfs/xfs_qm.c      |  4 +---
>  fs/xfs/xfs_symlink.c |  2 +-
>  4 files changed, 6 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 604ee38..37d9426 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -972,10 +972,8 @@ xfs_dir_ialloc(
>  	xfs_nlink_t	nlink,
>  	dev_t		rdev,
>  	prid_t		prid,		/* project id */
> -	xfs_inode_t	**ipp,		/* pointer to inode; it will be
> +	xfs_inode_t	**ipp)		/* pointer to inode; it will be
>  					   locked. */
> -	int		*committed)
> -
>  {
>  	xfs_trans_t	*tp;
>  	xfs_inode_t	*ip;
> @@ -1050,8 +1048,6 @@ xfs_dir_ialloc(
>  		}
>  
>  		code = xfs_trans_roll(&tp);
> -		if (committed != NULL)
> -			*committed = 1;
>  
>  		/*
>  		 * Re-attach the quota info that we detached from prev trx.
> @@ -1088,9 +1084,6 @@ xfs_dir_ialloc(
>  		}
>  		ASSERT(!ialloc_context && ip);
>  
> -	} else {
> -		if (committed != NULL)
> -			*committed = 0;
>  	}
>  
>  	*ipp = ip;
> @@ -1217,8 +1210,7 @@ xfs_create(
>  	 * entry pointing to them, but a directory also the "." entry
>  	 * pointing to itself.
>  	 */
> -	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
> -			NULL);
> +	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip);
>  	if (error)
>  		goto out_trans_cancel;
>  
> @@ -1351,7 +1343,7 @@ xfs_create_tmpfile(
>  	if (error)
>  		goto out_trans_cancel;
>  
> -	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
> +	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip);
>  	if (error)
>  		goto out_trans_cancel;
>  
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 3e8dc99..50a2a84 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -431,7 +431,7 @@ xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);
>  
>  int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
>  			       xfs_nlink_t, dev_t, prid_t,
> -			       struct xfs_inode **, int *);
> +			       struct xfs_inode **);
>  
>  /* from xfs_file.c */
>  enum xfs_prealloc_flags {
> diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
> index 5b848f4..ec39ae2 100644
> --- a/fs/xfs/xfs_qm.c
> +++ b/fs/xfs/xfs_qm.c
> @@ -748,7 +748,6 @@ xfs_qm_qino_alloc(
>  {
>  	xfs_trans_t	*tp;
>  	int		error;
> -	int		committed;
>  	bool		need_alloc = true;
>  
>  	*ip = NULL;
> @@ -788,8 +787,7 @@ xfs_qm_qino_alloc(
>  		return error;
>  
>  	if (need_alloc) {
> -		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
> -				&committed);
> +		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip);
>  		if (error) {
>  			xfs_trans_cancel(tp);
>  			return error;
> diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
> index 2e9e793..5b66ac1 100644
> --- a/fs/xfs/xfs_symlink.c
> +++ b/fs/xfs/xfs_symlink.c
> @@ -264,7 +264,7 @@ xfs_symlink(
>  	 * Allocate an inode for the symlink.
>  	 */
>  	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
> -			       prid, &ip, NULL);
> +			       prid, &ip);
>  	if (error)
>  		goto out_trans_cancel;
>  
> -- 
> 2.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-04-02 22:50 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-21  2:28 [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc Chandan Rajendra
2018-02-21  2:28 ` [PATCH V2 2/2] xfs: Fix "use after free" of intent items Chandan Rajendra
2018-02-21  4:01   ` Dave Chinner
2018-02-21  5:45     ` Chandan Rajendra
2018-02-21 21:04       ` Dave Chinner
2018-02-22  2:41         ` Chandan Rajendra
2018-04-02 22:50 ` [PATCH V2 1/2] xfs: Remove "committed" argument of xfs_dir_ialloc Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.