All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] xfs: take the ILOCK when accessing the inode core
@ 2022-01-05 19:52 Darrick J. Wong
  2022-01-06  1:47 ` Dave Chinner
  2022-01-06  2:32 ` [PATCH v3] " Darrick J. Wong
  0 siblings, 2 replies; 5+ messages in thread
From: Darrick J. Wong @ 2022-01-05 19:52 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

I was poking around in the directory code while diagnosing online fsck
bugs, and noticed that xfs_readdir doesn't actually take the directory
ILOCK when it calls xfs_dir2_isblock.  xfs_dir_open most probably loaded
the data fork mappings and the VFS took i_rwsem (aka IOLOCK_SHARED) so
we're protected against writer threads, but we really need to follow the
locking model like we do in other places.

To avoid unnecessarily cycling the ILOCK for fairly small directories,
change the block/leaf _getdents functions to consume the ILOCK hold that
the parent readdir function took to decide on a _getdents implementation.

It is ok to cycle the ILOCK in readdir because the VFS takes the IOLOCK
in the appropriate mode during lookups and writes, and we don't want to
be holding the ILOCK when we copy directory entries to userspace in case
there's a page fault.  We really only need it to protect against data
fork lookups, like we do for other files.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
v2: reduce the scope of the locked region, and reduce lock cycling
---
 fs/xfs/xfs_dir2_readdir.c |   52 +++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 8310005af00f..74844edd86a7 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -138,7 +138,8 @@ xfs_dir2_sf_getdents(
 STATIC int
 xfs_dir2_block_getdents(
 	struct xfs_da_args	*args,
-	struct dir_context	*ctx)
+	struct dir_context	*ctx,
+	unsigned int		*lock_mode)
 {
 	struct xfs_inode	*dp = args->dp;	/* incore directory inode */
 	struct xfs_buf		*bp;		/* buffer for block */
@@ -146,7 +147,6 @@ xfs_dir2_block_getdents(
 	int			wantoff;	/* starting block offset */
 	xfs_off_t		cook;
 	struct xfs_da_geometry	*geo = args->geo;
-	int			lock_mode;
 	unsigned int		offset, next_offset;
 	unsigned int		end;
 
@@ -156,12 +156,13 @@ xfs_dir2_block_getdents(
 	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
 		return 0;
 
-	lock_mode = xfs_ilock_data_map_shared(dp);
 	error = xfs_dir3_block_read(args->trans, dp, &bp);
-	xfs_iunlock(dp, lock_mode);
 	if (error)
 		return error;
 
+	xfs_iunlock(dp, *lock_mode);
+	*lock_mode = 0;
+
 	/*
 	 * Extract the byte offset we start at from the seek pointer.
 	 * We'll skip entries before this.
@@ -344,7 +345,8 @@ STATIC int
 xfs_dir2_leaf_getdents(
 	struct xfs_da_args	*args,
 	struct dir_context	*ctx,
-	size_t			bufsize)
+	size_t			bufsize,
+	unsigned int		*lock_mode)
 {
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_mount	*mp = dp->i_mount;
@@ -356,7 +358,6 @@ xfs_dir2_leaf_getdents(
 	xfs_dir2_off_t		curoff;		/* current overall offset */
 	int			length;		/* temporary length value */
 	int			byteoff;	/* offset in current block */
-	int			lock_mode;
 	unsigned int		offset = 0;
 	int			error = 0;	/* error return value */
 
@@ -390,13 +391,16 @@ xfs_dir2_leaf_getdents(
 				bp = NULL;
 			}
 
-			lock_mode = xfs_ilock_data_map_shared(dp);
+			if (*lock_mode == 0)
+				*lock_mode = xfs_ilock_data_map_shared(dp);
 			error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
 					&rablk, &bp);
-			xfs_iunlock(dp, lock_mode);
 			if (error || !bp)
 				break;
 
+			xfs_iunlock(dp, *lock_mode);
+			*lock_mode = 0;
+
 			xfs_dir3_data_check(dp, bp);
 			/*
 			 * Find our position in the block.
@@ -507,8 +511,9 @@ xfs_readdir(
 	size_t			bufsize)
 {
 	struct xfs_da_args	args = { NULL };
-	int			rval;
-	int			v;
+	unsigned int		lock_mode;
+	int			isblock;
+	int			error;
 
 	trace_xfs_readdir(dp);
 
@@ -523,13 +528,22 @@ xfs_readdir(
 	args.trans = tp;
 
 	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
-		rval = xfs_dir2_sf_getdents(&args, ctx);
-	else if ((rval = xfs_dir2_isblock(&args, &v)))
-		;
-	else if (v)
-		rval = xfs_dir2_block_getdents(&args, ctx);
-	else
-		rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
-
-	return rval;
+		return xfs_dir2_sf_getdents(&args, ctx);
+
+	lock_mode = xfs_ilock_data_map_shared(dp);
+	error = xfs_dir2_isblock(&args, &isblock);
+	if (error)
+		goto out_unlock;
+
+	if (isblock) {
+		error = xfs_dir2_block_getdents(&args, ctx, &lock_mode);
+		goto out_unlock;
+	}
+
+	error = xfs_dir2_leaf_getdents(&args, ctx, bufsize, &lock_mode);
+
+out_unlock:
+	if (lock_mode)
+		xfs_iunlock(dp, lock_mode);
+	return error;
 }

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] xfs: take the ILOCK when accessing the inode core
  2022-01-05 19:52 [PATCH v2] xfs: take the ILOCK when accessing the inode core Darrick J. Wong
@ 2022-01-06  1:47 ` Dave Chinner
  2022-01-06  2:13   ` Darrick J. Wong
  2022-01-06  2:32 ` [PATCH v3] " Darrick J. Wong
  1 sibling, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2022-01-06  1:47 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Wed, Jan 05, 2022 at 11:52:26AM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> I was poking around in the directory code while diagnosing online fsck
> bugs, and noticed that xfs_readdir doesn't actually take the directory
> ILOCK when it calls xfs_dir2_isblock.  xfs_dir_open most probably loaded
> the data fork mappings and the VFS took i_rwsem (aka IOLOCK_SHARED) so
> we're protected against writer threads, but we really need to follow the
> locking model like we do in other places.
> 
> To avoid unnecessarily cycling the ILOCK for fairly small directories,
> change the block/leaf _getdents functions to consume the ILOCK hold that
> the parent readdir function took to decide on a _getdents implementation.
> 
> It is ok to cycle the ILOCK in readdir because the VFS takes the IOLOCK
> in the appropriate mode during lookups and writes, and we don't want to
> be holding the ILOCK when we copy directory entries to userspace in case
> there's a page fault.  We really only need it to protect against data
> fork lookups, like we do for other files.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> v2: reduce the scope of the locked region, and reduce lock cycling

Looks good, one minor thing: can you add a comment to xfs_readdir()
that callers/VFS needs to hold the i_rwsem to ensure that the
directory is not being concurrently modified? Maybe even add a
ASSERT(rwsem_is_locked(VFS_I(ip)->i_rwsem)) to catch cases where
this gets broken?

Other than than it looks good.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] xfs: take the ILOCK when accessing the inode core
  2022-01-06  1:47 ` Dave Chinner
@ 2022-01-06  2:13   ` Darrick J. Wong
  0 siblings, 0 replies; 5+ messages in thread
From: Darrick J. Wong @ 2022-01-06  2:13 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

On Thu, Jan 06, 2022 at 12:47:12PM +1100, Dave Chinner wrote:
> On Wed, Jan 05, 2022 at 11:52:26AM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > I was poking around in the directory code while diagnosing online fsck
> > bugs, and noticed that xfs_readdir doesn't actually take the directory
> > ILOCK when it calls xfs_dir2_isblock.  xfs_dir_open most probably loaded
> > the data fork mappings and the VFS took i_rwsem (aka IOLOCK_SHARED) so
> > we're protected against writer threads, but we really need to follow the
> > locking model like we do in other places.
> > 
> > To avoid unnecessarily cycling the ILOCK for fairly small directories,
> > change the block/leaf _getdents functions to consume the ILOCK hold that
> > the parent readdir function took to decide on a _getdents implementation.
> > 
> > It is ok to cycle the ILOCK in readdir because the VFS takes the IOLOCK
> > in the appropriate mode during lookups and writes, and we don't want to
> > be holding the ILOCK when we copy directory entries to userspace in case
> > there's a page fault.  We really only need it to protect against data
> > fork lookups, like we do for other files.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> > v2: reduce the scope of the locked region, and reduce lock cycling
> 
> Looks good, one minor thing: can you add a comment to xfs_readdir()
> that callers/VFS needs to hold the i_rwsem to ensure that the
> directory is not being concurrently modified? Maybe even add a
> ASSERT(rwsem_is_locked(VFS_I(ip)->i_rwsem)) to catch cases where
> this gets broken?

The documentation already says the caller has to hold the inode lock,
but I will change it to say the IOLOCK specifically.  And add the
ASSERT.

--D

> 
> Other than than it looks good.
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3] xfs: take the ILOCK when accessing the inode core
  2022-01-05 19:52 [PATCH v2] xfs: take the ILOCK when accessing the inode core Darrick J. Wong
  2022-01-06  1:47 ` Dave Chinner
@ 2022-01-06  2:32 ` Darrick J. Wong
  2022-01-06  4:10   ` Dave Chinner
  1 sibling, 1 reply; 5+ messages in thread
From: Darrick J. Wong @ 2022-01-06  2:32 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

I was poking around in the directory code while diagnosing online fsck
bugs, and noticed that xfs_readdir doesn't actually take the directory
ILOCK when it calls xfs_dir2_isblock.  xfs_dir_open most probably loaded
the data fork mappings and the VFS took i_rwsem (aka IOLOCK_SHARED) so
we're protected against writer threads, but we really need to follow the
locking model like we do in other places.

To avoid unnecessarily cycling the ILOCK for fairly small directories,
change the block/leaf _getdents functions to consume the ILOCK hold that
the parent readdir function took to decide on a _getdents implementation.

It is ok to cycle the ILOCK in readdir because the VFS takes the IOLOCK
in the appropriate mode during lookups and writes, and we don't want to
be holding the ILOCK when we copy directory entries to userspace in case
there's a page fault.  We really only need it to protect against data
fork lookups, like we do for other files.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
v3: better documentation and assertions around the IOLOCK
v2: reduce the scope of the locked region, and reduce lock cycling
---
 fs/xfs/xfs_dir2_readdir.c |   55 +++++++++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 8310005af00f..a7174a5b3203 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -138,7 +138,8 @@ xfs_dir2_sf_getdents(
 STATIC int
 xfs_dir2_block_getdents(
 	struct xfs_da_args	*args,
-	struct dir_context	*ctx)
+	struct dir_context	*ctx,
+	unsigned int		*lock_mode)
 {
 	struct xfs_inode	*dp = args->dp;	/* incore directory inode */
 	struct xfs_buf		*bp;		/* buffer for block */
@@ -146,7 +147,6 @@ xfs_dir2_block_getdents(
 	int			wantoff;	/* starting block offset */
 	xfs_off_t		cook;
 	struct xfs_da_geometry	*geo = args->geo;
-	int			lock_mode;
 	unsigned int		offset, next_offset;
 	unsigned int		end;
 
@@ -156,12 +156,13 @@ xfs_dir2_block_getdents(
 	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
 		return 0;
 
-	lock_mode = xfs_ilock_data_map_shared(dp);
 	error = xfs_dir3_block_read(args->trans, dp, &bp);
-	xfs_iunlock(dp, lock_mode);
 	if (error)
 		return error;
 
+	xfs_iunlock(dp, *lock_mode);
+	*lock_mode = 0;
+
 	/*
 	 * Extract the byte offset we start at from the seek pointer.
 	 * We'll skip entries before this.
@@ -344,7 +345,8 @@ STATIC int
 xfs_dir2_leaf_getdents(
 	struct xfs_da_args	*args,
 	struct dir_context	*ctx,
-	size_t			bufsize)
+	size_t			bufsize,
+	unsigned int		*lock_mode)
 {
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_mount	*mp = dp->i_mount;
@@ -356,7 +358,6 @@ xfs_dir2_leaf_getdents(
 	xfs_dir2_off_t		curoff;		/* current overall offset */
 	int			length;		/* temporary length value */
 	int			byteoff;	/* offset in current block */
-	int			lock_mode;
 	unsigned int		offset = 0;
 	int			error = 0;	/* error return value */
 
@@ -390,13 +391,16 @@ xfs_dir2_leaf_getdents(
 				bp = NULL;
 			}
 
-			lock_mode = xfs_ilock_data_map_shared(dp);
+			if (*lock_mode == 0)
+				*lock_mode = xfs_ilock_data_map_shared(dp);
 			error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
 					&rablk, &bp);
-			xfs_iunlock(dp, lock_mode);
 			if (error || !bp)
 				break;
 
+			xfs_iunlock(dp, *lock_mode);
+			*lock_mode = 0;
+
 			xfs_dir3_data_check(dp, bp);
 			/*
 			 * Find our position in the block.
@@ -496,7 +500,7 @@ xfs_dir2_leaf_getdents(
  *
  * If supplied, the transaction collects locked dir buffers to avoid
  * nested buffer deadlocks.  This function does not dirty the
- * transaction.  The caller should ensure that the inode is locked
+ * transaction.  The caller must hold the IOLOCK (shared or exclusive)
  * before calling this function.
  */
 int
@@ -507,8 +511,9 @@ xfs_readdir(
 	size_t			bufsize)
 {
 	struct xfs_da_args	args = { NULL };
-	int			rval;
-	int			v;
+	unsigned int		lock_mode;
+	int			isblock;
+	int			error;
 
 	trace_xfs_readdir(dp);
 
@@ -516,6 +521,7 @@ xfs_readdir(
 		return -EIO;
 
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+	ASSERT(xfs_isilocked(dp, XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
 	XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
 
 	args.dp = dp;
@@ -523,13 +529,22 @@ xfs_readdir(
 	args.trans = tp;
 
 	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
-		rval = xfs_dir2_sf_getdents(&args, ctx);
-	else if ((rval = xfs_dir2_isblock(&args, &v)))
-		;
-	else if (v)
-		rval = xfs_dir2_block_getdents(&args, ctx);
-	else
-		rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
-
-	return rval;
+		return xfs_dir2_sf_getdents(&args, ctx);
+
+	lock_mode = xfs_ilock_data_map_shared(dp);
+	error = xfs_dir2_isblock(&args, &isblock);
+	if (error)
+		goto out_unlock;
+
+	if (isblock) {
+		error = xfs_dir2_block_getdents(&args, ctx, &lock_mode);
+		goto out_unlock;
+	}
+
+	error = xfs_dir2_leaf_getdents(&args, ctx, bufsize, &lock_mode);
+
+out_unlock:
+	if (lock_mode)
+		xfs_iunlock(dp, lock_mode);
+	return error;
 }

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] xfs: take the ILOCK when accessing the inode core
  2022-01-06  2:32 ` [PATCH v3] " Darrick J. Wong
@ 2022-01-06  4:10   ` Dave Chinner
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2022-01-06  4:10 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Wed, Jan 05, 2022 at 06:32:35PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> I was poking around in the directory code while diagnosing online fsck
> bugs, and noticed that xfs_readdir doesn't actually take the directory
> ILOCK when it calls xfs_dir2_isblock.  xfs_dir_open most probably loaded
> the data fork mappings and the VFS took i_rwsem (aka IOLOCK_SHARED) so
> we're protected against writer threads, but we really need to follow the
> locking model like we do in other places.
> 
> To avoid unnecessarily cycling the ILOCK for fairly small directories,
> change the block/leaf _getdents functions to consume the ILOCK hold that
> the parent readdir function took to decide on a _getdents implementation.
> 
> It is ok to cycle the ILOCK in readdir because the VFS takes the IOLOCK
> in the appropriate mode during lookups and writes, and we don't want to
> be holding the ILOCK when we copy directory entries to userspace in case
> there's a page fault.  We really only need it to protect against data
> fork lookups, like we do for other files.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> v3: better documentation and assertions around the IOLOCK
> v2: reduce the scope of the locked region, and reduce lock cycling
> ---
>  fs/xfs/xfs_dir2_readdir.c |   55 +++++++++++++++++++++++++++++----------------
>  1 file changed, 35 insertions(+), 20 deletions(-)

Looks good now.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-01-06  4:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05 19:52 [PATCH v2] xfs: take the ILOCK when accessing the inode core Darrick J. Wong
2022-01-06  1:47 ` Dave Chinner
2022-01-06  2:13   ` Darrick J. Wong
2022-01-06  2:32 ` [PATCH v3] " Darrick J. Wong
2022-01-06  4:10   ` Dave Chinner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.