* [PATCH 2/4] xfs: simplify xfs_rmap_lookup_le call sites
2022-04-27 0:51 [PATCHSET v2 0/4] xfs: fix rmap inefficiencies Darrick J. Wong
2022-04-27 0:51 ` [PATCH 1/4] xfs: capture buffer ops in the xfs_buf tracepoints Darrick J. Wong
@ 2022-04-27 0:51 ` Darrick J. Wong
2022-04-27 0:51 ` [PATCH 3/4] xfs: speed up rmap lookups by using non-overlapped lookups when possible Darrick J. Wong
2022-04-27 0:51 ` [PATCH 4/4] xfs: speed up write operations " Darrick J. Wong
3 siblings, 0 replies; 9+ messages in thread
From: Darrick J. Wong @ 2022-04-27 0:51 UTC (permalink / raw)
To: djwong, david; +Cc: Dave Chinner, Christoph Hellwig, linux-xfs
From: Darrick J. Wong <djwong@kernel.org>
Most callers of xfs_rmap_lookup_le will retrieve the btree record
immediately if the lookup succeeds. The overlapped version of this
function (xfs_rmap_lookup_le_range) will return the record if the lookup
succeeds, so make the regular version do it too. Get rid of the useless
len argument, since it's not part of the lookup key.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_rmap.c | 59 +++++++++++++++++-----------------------------
fs/xfs/libxfs/xfs_rmap.h | 4 ++-
fs/xfs/scrub/bmap.c | 24 +++----------------
3 files changed, 28 insertions(+), 59 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index cd322174dbff..3eea8056e7bc 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -34,18 +34,32 @@ int
xfs_rmap_lookup_le(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
- xfs_extlen_t len,
uint64_t owner,
uint64_t offset,
unsigned int flags,
+ struct xfs_rmap_irec *irec,
int *stat)
{
+ int get_stat = 0;
+ int error;
+
cur->bc_rec.r.rm_startblock = bno;
- cur->bc_rec.r.rm_blockcount = len;
+ cur->bc_rec.r.rm_blockcount = 0;
cur->bc_rec.r.rm_owner = owner;
cur->bc_rec.r.rm_offset = offset;
cur->bc_rec.r.rm_flags = flags;
- return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+
+ error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+ if (error || !(*stat) || !irec)
+ return error;
+
+ error = xfs_rmap_get_rec(cur, irec, &get_stat);
+ if (error)
+ return error;
+ if (!get_stat)
+ return -EFSCORRUPTED;
+
+ return 0;
}
/*
@@ -510,7 +524,7 @@ xfs_rmap_unmap(
* for the AG headers at rm_startblock == 0 created by mkfs/growfs that
* will not ever be removed from the tree.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i);
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, <rec, &i);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -518,13 +532,6 @@ xfs_rmap_unmap(
goto out_error;
}
- error = xfs_rmap_get_rec(cur, <rec, &i);
- if (error)
- goto out_error;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto out_error;
- }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -786,18 +793,11 @@ xfs_rmap_map(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, <rec,
&have_lt);
if (error)
goto out_error;
if (have_lt) {
- error = xfs_rmap_get_rec(cur, <rec, &have_lt);
- if (error)
- goto out_error;
- if (XFS_IS_CORRUPT(mp, have_lt != 1)) {
- error = -EFSCORRUPTED;
- goto out_error;
- }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -1022,7 +1022,7 @@ xfs_rmap_convert(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i);
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -1030,13 +1030,6 @@ xfs_rmap_convert(
goto done;
}
- error = xfs_rmap_get_rec(cur, &PREV, &i);
- if (error)
- goto done;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto done;
- }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
PREV.rm_blockcount, PREV.rm_owner,
@@ -1140,7 +1133,7 @@ xfs_rmap_convert(
_RET_IP_);
/* reset the cursor back to PREV */
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -2677,7 +2670,7 @@ xfs_rmap_record_exists(
ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
(flags & XFS_RMAP_BMBT_BLOCK));
- error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec,
&has_record);
if (error)
return error;
@@ -2686,14 +2679,6 @@ xfs_rmap_record_exists(
return 0;
}
- error = xfs_rmap_get_rec(cur, &irec, &has_record);
- if (error)
- return error;
- if (!has_record) {
- *has_rmap = false;
- return 0;
- }
-
*has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
irec.rm_startblock + irec.rm_blockcount >= bno + len);
return 0;
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index b718ebeda372..11ec9406a0ea 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -122,8 +122,8 @@ int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
const struct xfs_owner_info *oinfo);
int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- xfs_extlen_t len, uint64_t owner, uint64_t offset,
- unsigned int flags, int *stat);
+ uint64_t owner, uint64_t offset, unsigned int flags,
+ struct xfs_rmap_irec *irec, int *stat);
int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, uint64_t owner, uint64_t offset,
unsigned int flags, int *stat);
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index c357593e0a02..285995ba3947 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -133,29 +133,13 @@ xchk_bmap_get_rmap(
if (info->is_shared) {
error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
owner, offset, rflags, rmap, &has_rmap);
- if (!xchk_should_check_xref(info->sc, &error,
- &info->sc->sa.rmap_cur))
- return false;
- goto out;
+ } else {
+ error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
+ owner, offset, rflags, rmap, &has_rmap);
}
-
- /*
- * Otherwise, use the (faster) regular lookup.
- */
- error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
- offset, rflags, &has_rmap);
- if (!xchk_should_check_xref(info->sc, &error,
- &info->sc->sa.rmap_cur))
- return false;
- if (!has_rmap)
- goto out;
-
- error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
- if (!xchk_should_check_xref(info->sc, &error,
- &info->sc->sa.rmap_cur))
+ if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
return false;
-out:
if (!has_rmap)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/4] xfs: speed up rmap lookups by using non-overlapped lookups when possible
2022-04-27 0:51 [PATCHSET v2 0/4] xfs: fix rmap inefficiencies Darrick J. Wong
2022-04-27 0:51 ` [PATCH 1/4] xfs: capture buffer ops in the xfs_buf tracepoints Darrick J. Wong
2022-04-27 0:51 ` [PATCH 2/4] xfs: simplify xfs_rmap_lookup_le call sites Darrick J. Wong
@ 2022-04-27 0:51 ` Darrick J. Wong
2022-04-27 4:19 ` Dave Chinner
2022-04-28 12:44 ` Christoph Hellwig
2022-04-27 0:51 ` [PATCH 4/4] xfs: speed up write operations " Darrick J. Wong
3 siblings, 2 replies; 9+ messages in thread
From: Darrick J. Wong @ 2022-04-27 0:51 UTC (permalink / raw)
To: djwong, david; +Cc: linux-xfs
From: Darrick J. Wong <djwong@kernel.org>
Reverse mapping on a reflink-capable filesystem has some pretty high
overhead when performing file operations. This is because the rmap
records for logically and physically adjacent extents might not be
adjacent in the rmap index due to data block sharing. As a result, we
use expensive overlapped-interval btree search, which walks every record
that overlaps with the supplied key in the hopes of finding the record.
However, profiling data shows that when the index contains a record that
is an exact match for a query key, the non-overlapped btree search
function can find the record much faster than the overlapped version.
Try the non-overlapped lookup first, which will make scrub run much
faster.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_rmap.c | 52 ++++++++++++++++++++++++++++++++++------------
1 file changed, 38 insertions(+), 14 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3eea8056e7bc..6f74dcda44b5 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -367,7 +367,6 @@ xfs_rmap_lookup_le_range_helper(
return 0;
*info->irec = *rec;
- *info->stat = 1;
return -ECANCELED;
}
@@ -388,6 +387,7 @@ xfs_rmap_lookup_le_range(
int *stat)
{
struct xfs_find_left_neighbor_info info;
+ int found = 0;
int error;
info.high.rm_startblock = bno;
@@ -400,20 +400,44 @@ xfs_rmap_lookup_le_range(
info.high.rm_blockcount = 0;
*stat = 0;
info.irec = irec;
- info.stat = stat;
- trace_xfs_rmap_lookup_le_range(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
- error = xfs_rmap_query_range(cur, &info.high, &info.high,
- xfs_rmap_lookup_le_range_helper, &info);
- if (error == -ECANCELED)
- error = 0;
- if (*stat)
- trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
- irec->rm_blockcount, irec->rm_owner,
- irec->rm_offset, irec->rm_flags);
- return error;
+ trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ bno, 0, owner, offset, flags);
+
+ /*
+ * Historically, we always used the range query to walk every reverse
+ * mapping that could possibly overlap the key that the caller asked
+ * for, and filter out the ones that don't. That is very slow when
+ * there are a lot of records.
+ *
+ * However, there are two scenarios where the classic btree search can
+ * produce correct results -- if the index contains a record that is an
+ * exact match for the lookup key; and if there are no other records
+ * between the record we want and the key we supplied.
+ *
+ * As an optimization, try a non-overlapped lookup first. This makes
+ * scrub run much faster on most filesystems because bmbt records are
+ * usually an exact match for rmap records. If we don't find what we
+ * want, we fall back to the overlapped query.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
+ &found);
+ if (error)
+ return error;
+ if (found)
+ error = xfs_rmap_lookup_le_range_helper(cur, irec, &info);
+ if (!error)
+ error = xfs_rmap_query_range(cur, &info.high, &info.high,
+ xfs_rmap_lookup_le_range_helper, &info);
+ if (error != -ECANCELED)
+ return error;
+
+ *stat = 1;
+ trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
+ irec->rm_flags);
+ return 0;
}
/*
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/4] xfs: speed up write operations by using non-overlapped lookups when possible
2022-04-27 0:51 [PATCHSET v2 0/4] xfs: fix rmap inefficiencies Darrick J. Wong
` (2 preceding siblings ...)
2022-04-27 0:51 ` [PATCH 3/4] xfs: speed up rmap lookups by using non-overlapped lookups when possible Darrick J. Wong
@ 2022-04-27 0:51 ` Darrick J. Wong
2022-04-27 4:22 ` Dave Chinner
2022-04-28 12:45 ` Christoph Hellwig
3 siblings, 2 replies; 9+ messages in thread
From: Darrick J. Wong @ 2022-04-27 0:51 UTC (permalink / raw)
To: djwong, david; +Cc: linux-xfs
From: Darrick J. Wong <djwong@kernel.org>
Reverse mapping on a reflink-capable filesystem has some pretty high
overhead when performing file operations. This is because the rmap
records for logically and physically adjacent extents might not be
adjacent in the rmap index due to data block sharing. As a result, we
use expensive overlapped-interval btree search, which walks every record
that overlaps with the supplied key in the hopes of finding the record.
However, profiling data shows that when the index contains a record that
is an exact match for a query key, the non-overlapped btree search
function can find the record much faster than the overlapped version.
Try the non-overlapped lookup first when we're trying to find the left
neighbor rmap record for a given file mapping, which makes unwritten
extent conversion and remap operations run faster if data block sharing
is minimal in this part of the filesystem.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_rmap.c | 50 +++++++++++++++++++++++++++++++++-------------
fs/xfs/libxfs/xfs_rmap.h | 3 ---
2 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 6f74dcda44b5..2845019d31da 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -265,7 +265,6 @@ xfs_rmap_get_rec(
struct xfs_find_left_neighbor_info {
struct xfs_rmap_irec high;
struct xfs_rmap_irec *irec;
- int *stat;
};
/* For each rmap given, figure out if it matches the key we want. */
@@ -290,7 +289,6 @@ xfs_rmap_find_left_neighbor_helper(
return 0;
*info->irec = *rec;
- *info->stat = 1;
return -ECANCELED;
}
@@ -299,7 +297,7 @@ xfs_rmap_find_left_neighbor_helper(
* return a match with the same owner and adjacent physical and logical
* block ranges.
*/
-int
+STATIC int
xfs_rmap_find_left_neighbor(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
@@ -310,6 +308,7 @@ xfs_rmap_find_left_neighbor(
int *stat)
{
struct xfs_find_left_neighbor_info info;
+ int found = 0;
int error;
*stat = 0;
@@ -327,21 +326,44 @@ xfs_rmap_find_left_neighbor(
info.high.rm_flags = flags;
info.high.rm_blockcount = 0;
info.irec = irec;
- info.stat = stat;
trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
- error = xfs_rmap_query_range(cur, &info.high, &info.high,
- xfs_rmap_find_left_neighbor_helper, &info);
- if (error == -ECANCELED)
- error = 0;
- if (*stat)
- trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
- cur->bc_ag.pag->pag_agno, irec->rm_startblock,
- irec->rm_blockcount, irec->rm_owner,
- irec->rm_offset, irec->rm_flags);
- return error;
+ /*
+ * Historically, we always used the range query to walk every reverse
+ * mapping that could possibly overlap the key that the caller asked
+ * for, and filter out the ones that don't. That is very slow when
+ * there are a lot of records.
+ *
+ * However, there are two scenarios where the classic btree search can
+ * produce correct results -- if the index contains a record that is an
+ * exact match for the lookup key; and if there are no other records
+ * between the record we want and the key we supplied.
+ *
+ * As an optimization, try a non-overlapped lookup first. This makes
+ * extent conversion and remap operations run a bit faster if the
+ * physical extents aren't being shared. If we don't find what we
+ * want, we fall back to the overlapped query.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
+ &found);
+ if (error)
+ return error;
+ if (found)
+ error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info);
+ if (!error)
+ error = xfs_rmap_query_range(cur, &info.high, &info.high,
+ xfs_rmap_find_left_neighbor_helper, &info);
+ if (error != -ECANCELED)
+ return error;
+
+ *stat = 1;
+ trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno, irec->rm_startblock,
+ irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
+ irec->rm_flags);
+ return 0;
}
/* For each rmap given, figure out if it matches the key we want. */
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 11ec9406a0ea..54741a591a17 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -184,9 +184,6 @@ int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur);
-int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno,
- uint64_t owner, uint64_t offset, unsigned int flags,
- struct xfs_rmap_irec *irec, int *stat);
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
struct xfs_rmap_irec *irec, int *stat);
^ permalink raw reply related [flat|nested] 9+ messages in thread