All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: sandeen@sandeen.net, darrick.wong@oracle.com
Cc: Brian Foster <bfoster@redhat.com>, Christoph Hellwig <hch@lst.de>,
	linux-xfs@vger.kernel.org
Subject: [PATCH 16/21] xfs: fix an incore inode UAF in xfs_bui_recover
Date: Mon, 26 Oct 2020 16:38:38 -0700	[thread overview]
Message-ID: <160375551822.882906.6397999012355771666.stgit@magnolia> (raw)
In-Reply-To: <160375541713.882906.11902959014062334120.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Source kernel commit: ff4ab5e02a0447dd1e290883eb6cd7d94848e590

In xfs_bui_item_recover, there exists a use-after-free bug with regards
to the inode that is involved in the bmap replay operation.  If the
mapping operation does not complete, we call xfs_bmap_unmap_extent to
create a deferred op to finish the unmapping work, and we retain a
pointer to the incore inode.

Unfortunately, the very next thing we do is commit the transaction and
drop the inode.  If reclaim tears down the inode before we try to finish
the defer ops, we dereference garbage and blow up.  Therefore, create a
way to join inodes to the defer ops freezer so that we can maintain the
xfs_inode reference until we're done with the inode.

Note: This imposes the requirement that there be enough memory to keep
every incore inode in memory throughout recovery.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/xfs_inode.h      |    6 ++++++
 libxfs/libxfs_api_defs.h |    1 +
 libxfs/rdwr.c            |   11 ++++++++---
 libxfs/xfs_defer.c       |   42 +++++++++++++++++++++++++++++++++++++-----
 libxfs/xfs_defer.h       |   11 +++++++++--
 5 files changed, 61 insertions(+), 10 deletions(-)


diff --git a/include/xfs_inode.h b/include/xfs_inode.h
index 40310df6a785..742aebc8c3e3 100644
--- a/include/xfs_inode.h
+++ b/include/xfs_inode.h
@@ -36,6 +36,7 @@ struct inode {
 	uint32_t		i_gid;
 	uint32_t		i_nlink;
 	xfs_dev_t		i_rdev;	 /* This actually holds xfs_dev_t */
+	unsigned int		i_count;
 	unsigned long		i_state; /* Not actually used in userspace */
 	uint32_t		i_generation;
 	uint64_t		i_version;
@@ -61,6 +62,11 @@ static inline void i_gid_write(struct inode *inode, uint32_t gid)
 	inode->i_gid = gid;
 }
 
+static inline void ihold(struct inode *inode)
+{
+	inode->i_count++;
+}
+
 typedef struct xfs_inode {
 	struct cache_node	i_node;
 	struct xfs_mount	*i_mount;	/* fs mount struct ptr */
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 419e6d9888cf..9a00ce6609b3 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -123,6 +123,7 @@
 #define xfs_inode_validate_extsize	libxfs_inode_validate_extsize
 
 #define xfs_iread_extents		libxfs_iread_extents
+#define xfs_irele			libxfs_irele
 #define xfs_log_calc_minimum_size	libxfs_log_calc_minimum_size
 #define xfs_log_get_max_trans_res	libxfs_log_get_max_trans_res
 #define xfs_log_sb			libxfs_log_sb
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 79c1029b1109..0001a459aa64 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -1254,6 +1254,7 @@ libxfs_iget(
 	if (!ip)
 		return -ENOMEM;
 
+	VFS_I(ip)->i_count = 1;
 	ip->i_ino = ino;
 	ip->i_mount = mp;
 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, 0);
@@ -1305,9 +1306,13 @@ void
 libxfs_irele(
 	struct xfs_inode	*ip)
 {
-	ASSERT(ip->i_itemp == NULL);
-	libxfs_idestroy(ip);
-	kmem_cache_free(xfs_inode_zone, ip);
+	VFS_I(ip)->i_count--;
+
+	if (VFS_I(ip)->i_count == 0) {
+		ASSERT(ip->i_itemp == NULL);
+		libxfs_idestroy(ip);
+		kmem_cache_free(xfs_inode_zone, ip);
+	}
 }
 
 /*
diff --git a/libxfs/xfs_defer.c b/libxfs/xfs_defer.c
index 8e660f1a6cfc..efcb9e008275 100644
--- a/libxfs/xfs_defer.c
+++ b/libxfs/xfs_defer.c
@@ -551,10 +551,14 @@ xfs_defer_move(
  * deferred ops state is transferred to the capture structure and the
  * transaction is then ready for the caller to commit it.  If there are no
  * intent items to capture, this function returns NULL.
+ *
+ * If capture_ip is not NULL, the capture structure will obtain an extra
+ * reference to the inode.
  */
 static struct xfs_defer_capture *
 xfs_defer_ops_capture(
-	struct xfs_trans		*tp)
+	struct xfs_trans		*tp,
+	struct xfs_inode		*capture_ip)
 {
 	struct xfs_defer_capture	*dfc;
 
@@ -580,6 +584,15 @@ xfs_defer_ops_capture(
 	/* Preserve the log reservation size. */
 	dfc->dfc_logres = tp->t_log_res;
 
+	/*
+	 * Grab an extra reference to this inode and attach it to the capture
+	 * structure.
+	 */
+	if (capture_ip) {
+		ihold(VFS_I(capture_ip));
+		dfc->dfc_capture_ip = capture_ip;
+	}
+
 	return dfc;
 }
 
@@ -590,24 +603,33 @@ xfs_defer_ops_release(
 	struct xfs_defer_capture	*dfc)
 {
 	xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+	if (dfc->dfc_capture_ip)
+		xfs_irele(dfc->dfc_capture_ip);
 	kmem_free(dfc);
 }
 
 /*
  * Capture any deferred ops and commit the transaction.  This is the last step
- * needed to finish a log intent item that we recovered from the log.
+ * needed to finish a log intent item that we recovered from the log.  If any
+ * of the deferred ops operate on an inode, the caller must pass in that inode
+ * so that the reference can be transferred to the capture structure.  The
+ * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
+ * xfs_defer_ops_continue.
  */
 int
 xfs_defer_ops_capture_and_commit(
 	struct xfs_trans		*tp,
+	struct xfs_inode		*capture_ip,
 	struct list_head		*capture_list)
 {
 	struct xfs_mount		*mp = tp->t_mountp;
 	struct xfs_defer_capture	*dfc;
 	int				error;
 
+	ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
+
 	/* If we don't capture anything, commit transaction and exit. */
-	dfc = xfs_defer_ops_capture(tp);
+	dfc = xfs_defer_ops_capture(tp, capture_ip);
 	if (!dfc)
 		return xfs_trans_commit(tp);
 
@@ -624,16 +646,26 @@ xfs_defer_ops_capture_and_commit(
 
 /*
  * Attach a chain of captured deferred ops to a new transaction and free the
- * capture structure.
+ * capture structure.  If an inode was captured, it will be passed back to the
+ * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
+ * The caller now owns the inode reference.
  */
 void
 xfs_defer_ops_continue(
 	struct xfs_defer_capture	*dfc,
-	struct xfs_trans		*tp)
+	struct xfs_trans		*tp,
+	struct xfs_inode		**captured_ipp)
 {
 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
 	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
 
+	/* Lock and join the captured inode to the new transaction. */
+	if (dfc->dfc_capture_ip) {
+		xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
+	}
+	*captured_ipp = dfc->dfc_capture_ip;
+
 	/* Move captured dfops chain and state to the transaction. */
 	list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
 	tp->t_flags |= dfc->dfc_tpflags;
diff --git a/libxfs/xfs_defer.h b/libxfs/xfs_defer.h
index 6cde6f0713f7..05472f71fffe 100644
--- a/libxfs/xfs_defer.h
+++ b/libxfs/xfs_defer.h
@@ -82,6 +82,12 @@ struct xfs_defer_capture {
 
 	/* Log reservation saved from the transaction. */
 	unsigned int		dfc_logres;
+
+	/*
+	 * An inode reference that must be maintained to complete the deferred
+	 * work.
+	 */
+	struct xfs_inode	*dfc_capture_ip;
 };
 
 /*
@@ -89,8 +95,9 @@ struct xfs_defer_capture {
  * This doesn't normally happen except log recovery.
  */
 int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
-		struct list_head *capture_list);
-void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp);
+		struct xfs_inode *capture_ip, struct list_head *capture_list);
+void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
+		struct xfs_inode **captured_ipp);
 void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d);
 
 #endif /* __XFS_DEFER_H__ */


  parent reply	other threads:[~2020-10-26 23:41 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-26 23:36 [PATCH 00/21] xfsprogs: sync with 5.10, part 2 Darrick J. Wong
2020-10-26 23:37 ` [PATCH 01/21] xfs: remove typedef xfs_attr_sf_entry_t Darrick J. Wong
2020-10-26 23:37 ` [PATCH 02/21] xfs: Remove typedef xfs_attr_shortform_t Darrick J. Wong
2020-10-26 23:37 ` [PATCH 03/21] xfs: Use variable-size array for nameval in xfs_attr_sf_entry Darrick J. Wong
2020-10-26 23:37 ` [PATCH 04/21] xfs: Convert xfs_attr_sf macros to inline functions Darrick J. Wong
2020-10-26 23:37 ` [PATCH 05/21] xfs: don't free rt blocks when we're doing a REMAP bunmapi call Darrick J. Wong
2020-10-26 23:37 ` [PATCH 06/21] xfs: log new intent items created as part of finishing recovered intent items Darrick J. Wong
2020-10-26 23:37 ` [PATCH 07/21] xfs: use the existing type definition for di_projid Darrick J. Wong
2020-10-26 23:37 ` [PATCH 08/21] xfs: fix some comments Darrick J. Wong
2020-10-26 23:37 ` [PATCH 09/21] xfs: remove the redundant crc feature check in xfs_attr3_rmt_verify Darrick J. Wong
2020-10-26 23:38 ` [PATCH 10/21] xfs: code cleanup in xfs_attr_leaf_entsize_{remote,local} Darrick J. Wong
2020-10-26 23:38 ` [PATCH 11/21] xfs: avoid shared rmap operations for attr fork extents Darrick J. Wong
2020-10-26 23:38 ` [PATCH 12/21] xfs: remove xfs_defer_reset Darrick J. Wong
2020-10-26 23:38 ` [PATCH 13/21] xfs: proper replay of deferred ops queued during log recovery Darrick J. Wong
2020-10-26 23:38 ` [PATCH 14/21] xfs: xfs_defer_capture should absorb remaining block reservations Darrick J. Wong
2020-10-26 23:38 ` [PATCH 15/21] xfs: xfs_defer_capture should absorb remaining transaction reservation Darrick J. Wong
2020-10-26 23:38 ` Darrick J. Wong [this message]
2020-10-26 23:38 ` [PATCH 17/21] xfs: change the order in which child and parent defer ops are finished Darrick J. Wong
2020-10-26 23:38 ` [PATCH 18/21] xfs: periodically relog deferred intent items Darrick J. Wong
2020-10-26 23:38 ` [PATCH 19/21] xfs: only relog deferred intent items if free space in the log gets low Darrick J. Wong
2020-10-26 23:39 ` [PATCH 20/21] xfs: fix high key handling in the rt allocator's query_range function Darrick J. Wong
2020-10-26 23:39 ` [PATCH 21/21] xfs: set xefi_discard when creating a deferred agfl free log intent item Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=160375551822.882906.6397999012355771666.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=bfoster@redhat.com \
    --cc=hch@lst.de \
    --cc=linux-xfs@vger.kernel.org \
    --cc=sandeen@sandeen.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.