* [PATCH 01/47] xfs: plumb in needed functions for range querying of the freespace btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
@ 2017-01-07 0:35 ` Darrick J. Wong
2017-01-07 0:35 ` [PATCH 02/47] xfs: provide a query_range function for " Darrick J. Wong
` (46 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:35 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Plumb in the pieces (init_high_key, diff_two_keys) necessary to call
query_range on the free space btrees. Remove the debugging asserts
so that we can make queries starting from block 0.
While we're at it, merge the redundant "if (btnum ==" hunks.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc_btree.c | 162 +++++++++++++++++++++++++++++----------
1 file changed, 119 insertions(+), 43 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index efb467b..ba3ec9c 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -205,19 +205,28 @@ xfs_allocbt_init_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
- ASSERT(rec->alloc.ar_startblock != 0);
-
key->alloc.ar_startblock = rec->alloc.ar_startblock;
key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
}
STATIC void
+xfs_bnobt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ __u32 x;
+
+ x = be32_to_cpu(rec->alloc.ar_startblock);
+ x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
+ key->alloc.ar_startblock = cpu_to_be32(x);
+ key->alloc.ar_blockcount = 0;
+}
+
+STATIC void
xfs_allocbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- ASSERT(cur->bc_rec.a.ar_startblock != 0);
-
rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
@@ -236,18 +245,24 @@ xfs_allocbt_init_ptr_from_cur(
}
STATIC __int64_t
-xfs_allocbt_key_diff(
+xfs_bnobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
xfs_alloc_key_t *kp = &key->alloc;
- __int64_t diff;
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return (__int64_t)be32_to_cpu(kp->ar_startblock) -
- rec->ar_startblock;
- }
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+}
+
+STATIC __int64_t
+xfs_cntbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
+ xfs_alloc_key_t *kp = &key->alloc;
+ __int64_t diff;
diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
if (diff)
@@ -256,6 +271,33 @@ xfs_allocbt_key_diff(
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
+STATIC __int64_t
+xfs_bnobt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
+}
+
+STATIC __int64_t
+xfs_cntbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ __int64_t diff;
+
+ diff = be32_to_cpu(k1->alloc.ar_blockcount) -
+ be32_to_cpu(k2->alloc.ar_blockcount);
+ if (diff)
+ return diff;
+
+ return be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
+}
+
static bool
xfs_allocbt_verify(
struct xfs_buf *bp)
@@ -346,44 +388,78 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
-xfs_allocbt_keys_inorder(
+xfs_bnobt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(k1->alloc.ar_blockcount) <
- be32_to_cpu(k2->alloc.ar_blockcount) ||
- (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
- be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock));
- }
+ return be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock);
}
STATIC int
-xfs_allocbt_recs_inorder(
+xfs_bnobt_recs_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(r1->alloc.ar_startblock) +
- be32_to_cpu(r1->alloc.ar_blockcount) <=
- be32_to_cpu(r2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(r1->alloc.ar_blockcount) <
- be32_to_cpu(r2->alloc.ar_blockcount) ||
- (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
- be32_to_cpu(r1->alloc.ar_startblock) <
- be32_to_cpu(r2->alloc.ar_startblock));
- }
+ return be32_to_cpu(r1->alloc.ar_startblock) +
+ be32_to_cpu(r1->alloc.ar_blockcount) <=
+ be32_to_cpu(r2->alloc.ar_startblock);
+}
+
+STATIC int
+xfs_cntbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->alloc.ar_blockcount) <
+ be32_to_cpu(k2->alloc.ar_blockcount) ||
+ (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+ be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock));
+}
+
+STATIC int
+xfs_cntbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return be32_to_cpu(r1->alloc.ar_blockcount) <
+ be32_to_cpu(r2->alloc.ar_blockcount) ||
+ (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+ be32_to_cpu(r1->alloc.ar_startblock) <
+ be32_to_cpu(r2->alloc.ar_startblock));
}
-#endif /* DEBUG */
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_bnobt_ops = {
+ .rec_len = sizeof(xfs_alloc_rec_t),
+ .key_len = sizeof(xfs_alloc_key_t),
+
+ .dup_cursor = xfs_allocbt_dup_cursor,
+ .set_root = xfs_allocbt_set_root,
+ .alloc_block = xfs_allocbt_alloc_block,
+ .free_block = xfs_allocbt_free_block,
+ .update_lastrec = xfs_allocbt_update_lastrec,
+ .get_minrecs = xfs_allocbt_get_minrecs,
+ .get_maxrecs = xfs_allocbt_get_maxrecs,
+ .init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
+ .key_diff = xfs_bnobt_key_diff,
+ .buf_ops = &xfs_allocbt_buf_ops,
+ .diff_two_keys = xfs_bnobt_diff_two_keys,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_bnobt_keys_inorder,
+ .recs_inorder = xfs_bnobt_recs_inorder,
+#endif
+};
-static const struct xfs_btree_ops xfs_allocbt_ops = {
+static const struct xfs_btree_ops xfs_cntbt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -397,11 +473,12 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
- .key_diff = xfs_allocbt_key_diff,
+ .key_diff = xfs_cntbt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
+ .diff_two_keys = xfs_cntbt_diff_two_keys,
#if defined(DEBUG) || defined(XFS_WARN)
- .keys_inorder = xfs_allocbt_keys_inorder,
- .recs_inorder = xfs_allocbt_recs_inorder,
+ .keys_inorder = xfs_cntbt_keys_inorder,
+ .recs_inorder = xfs_cntbt_recs_inorder,
#endif
};
@@ -427,16 +504,15 @@ xfs_allocbt_init_cursor(
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
- cur->bc_ops = &xfs_allocbt_ops;
- if (btnum == XFS_BTNUM_BNO)
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
- else
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
if (btnum == XFS_BTNUM_CNT) {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
+ cur->bc_ops = &xfs_cntbt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
} else {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
+ cur->bc_ops = &xfs_bnobt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 02/47] xfs: provide a query_range function for freespace btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
2017-01-07 0:35 ` [PATCH 01/47] xfs: plumb in needed functions for range querying of the freespace btrees Darrick J. Wong
@ 2017-01-07 0:35 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 03/47] xfs: create a function to query all records in a btree Darrick J. Wong
` (45 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:35 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Implement a query_range function for the bnobt and cntbt. This will
be used for getfsmap fallback if there is no rmapbt and by the online
scrub and repair code.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc.c | 42 ++++++++++++++++++++++++++++++++++++++++++
fs/xfs/libxfs/xfs_alloc.h | 10 ++++++++++
2 files changed, 52 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 5050056..860e297 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2934,3 +2934,45 @@ xfs_free_extent(
xfs_trans_brelse(tp, agbp);
return error;
}
+
+struct xfs_alloc_query_range_info {
+ xfs_alloc_query_range_fn fn;
+ void *priv;
+};
+
+/* Format btree record and pass to our callback. */
+STATIC int
+xfs_alloc_query_range_helper(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_alloc_query_range_info *query = priv;
+ struct xfs_alloc_rec_incore irec;
+
+ irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
+ irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
+ return query->fn(cur, &irec, query->priv);
+}
+
+/* Find all free space within a given range of blocks. */
+int
+xfs_alloc_query_range(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *low_rec,
+ struct xfs_alloc_rec_incore *high_rec,
+ xfs_alloc_query_range_fn fn,
+ void *priv)
+{
+ union xfs_btree_irec low_brec;
+ union xfs_btree_irec high_brec;
+ struct xfs_alloc_query_range_info query;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ low_brec.a = *low_rec;
+ high_brec.a = *high_rec;
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_range(cur, &low_brec, &high_brec,
+ xfs_alloc_query_range_helper, &query);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 7c404a6..f9f8b81 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -223,4 +223,14 @@ int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
+typedef int (*xfs_alloc_query_range_fn)(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv);
+
+int xfs_alloc_query_range(struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *low_rec,
+ struct xfs_alloc_rec_incore *high_rec,
+ xfs_alloc_query_range_fn fn, void *priv);
+
#endif /* __XFS_ALLOC_H__ */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 03/47] xfs: create a function to query all records in a btree
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
2017-01-07 0:35 ` [PATCH 01/47] xfs: plumb in needed functions for range querying of the freespace btrees Darrick J. Wong
2017-01-07 0:35 ` [PATCH 02/47] xfs: provide a query_range function for " Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 04/47] xfs: introduce the XFS_IOC_GETFSMAP ioctl Darrick J. Wong
` (44 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Create a helper function that will query all records in a btree.
This will be used by the online repair functions to examine every
record in a btree to rebuild a second btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc.c | 15 +++++++++++++++
fs/xfs/libxfs/xfs_alloc.h | 2 ++
fs/xfs/libxfs/xfs_btree.c | 14 ++++++++++++++
fs/xfs/libxfs/xfs_btree.h | 2 ++
fs/xfs/libxfs/xfs_rmap.c | 28 +++++++++++++++++++++-------
fs/xfs/libxfs/xfs_rmap.h | 2 ++
6 files changed, 56 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 860e297..e496447 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2976,3 +2976,18 @@ xfs_alloc_query_range(
return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_alloc_query_range_helper, &query);
}
+
+/* Find all free space records. */
+int
+xfs_alloc_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_alloc_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_alloc_query_range_info query;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index f9f8b81..0dc34bf 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -232,5 +232,7 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur,
struct xfs_alloc_rec_incore *low_rec,
struct xfs_alloc_rec_incore *high_rec,
xfs_alloc_query_range_fn fn, void *priv);
+int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
+ void *priv);
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 21e6a6a..1e68fd8 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4822,6 +4822,20 @@ xfs_btree_query_range(
fn, priv);
}
+/* Query a btree for all records. */
+int
+xfs_btree_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_btree_query_range_fn fn,
+ void *priv)
+{
+ union xfs_btree_irec low_rec = {0};
+ union xfs_btree_irec high_rec;
+
+ memset(&high_rec, 0xFF, sizeof(high_rec));
+ return xfs_btree_query_range(cur, &low_rec, &high_rec, fn, priv);
+}
+
/*
* Calculate the number of blocks needed to store a given number of records
* in a short-format (per-AG metadata) btree.
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b69b947..4feea6d 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -494,6 +494,8 @@ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
int xfs_btree_query_range(struct xfs_btree_cur *cur,
union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
xfs_btree_query_range_fn fn, void *priv);
+int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn,
+ void *priv);
typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
void *data);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3a8cc71..3840556 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2001,14 +2001,14 @@ xfs_rmap_query_range_helper(
/* Find all rmaps between two keys. */
int
xfs_rmap_query_range(
- struct xfs_btree_cur *cur,
- struct xfs_rmap_irec *low_rec,
- struct xfs_rmap_irec *high_rec,
- xfs_rmap_query_range_fn fn,
- void *priv)
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *low_rec,
+ struct xfs_rmap_irec *high_rec,
+ xfs_rmap_query_range_fn fn,
+ void *priv)
{
- union xfs_btree_irec low_brec;
- union xfs_btree_irec high_brec;
+ union xfs_btree_irec low_brec;
+ union xfs_btree_irec high_brec;
struct xfs_rmap_query_range_info query;
low_brec.r = *low_rec;
@@ -2019,6 +2019,20 @@ xfs_rmap_query_range(
xfs_rmap_query_range_helper, &query);
}
+/* Find all rmaps. */
+int
+xfs_rmap_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_rmap_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_rmap_query_range_info query;
+
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
+}
+
/* Clean up after calling xfs_rmap_finish_one. */
void
xfs_rmap_finish_one_cleanup(
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 7899305..faf2c1a 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -162,6 +162,8 @@ typedef int (*xfs_rmap_query_range_fn)(
int xfs_rmap_query_range(struct xfs_btree_cur *cur,
struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
xfs_rmap_query_range_fn fn, void *priv);
+int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn,
+ void *priv);
enum xfs_rmap_intent_type {
XFS_RMAP_MAP,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 04/47] xfs: introduce the XFS_IOC_GETFSMAP ioctl
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (2 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 03/47] xfs: create a function to query all records in a btree Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 05/47] xfs: report shared extents in getfsmapx Darrick J. Wong
` (43 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Introduce a new ioctl that uses the reverse mapping btree to return
information about the physical layout of the filesystem.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 95 +++++++
fs/xfs/xfs_fsmap.c | 698 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_fsmap.h | 51 ++++
fs/xfs/xfs_ioctl.c | 104 +++++++
fs/xfs/xfs_ioctl32.c | 1
fs/xfs/xfs_trace.h | 85 ++++++
fs/xfs/xfs_trans.c | 22 ++
fs/xfs/xfs_trans.h | 2
9 files changed, 1059 insertions(+)
create mode 100644 fs/xfs/xfs_fsmap.c
create mode 100644 fs/xfs/xfs_fsmap.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 26ef195..5c90f82 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -79,6 +79,7 @@ xfs-y += xfs_aops.o \
xfs_extent_busy.o \
xfs_file.o \
xfs_filestream.o \
+ xfs_fsmap.o \
xfs_fsops.o \
xfs_globals.o \
xfs_icache.o \
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b72dc82..e62996f 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -93,6 +93,100 @@ struct getbmapx {
#define BMV_OF_SHARED 0x8 /* segment shared with another file */
/*
+ * Structure for XFS_IOC_GETFSMAP.
+ *
+ * The memory layout for this call are the scalar values defined in
+ * struct fsmap_head, followed by two struct fsmap that describe
+ * the lower and upper bound of mappings to return, followed by an
+ * array of struct fsmap mappings.
+ *
+ * fmh_iflags control the output of the call, whereas fmh_oflags report
+ * on the overall record output. fmh_count should be set to the
+ * length of the fmh_recs array, and fmh_entries will be set to the
+ * number of entries filled out during each call. If fmh_count is
+ * zero, the number of reverse mappings will be returned in
+ * fmh_entries, though no mappings will be returned. fmh_reserved
+ * must be set to zero.
+ *
+ * The two elements in the fmh_keys array are used to constrain the
+ * output. The first element in the array should represent the
+ * lowest disk mapping ("low key") that the user wants to learn
+ * about. If this value is all zeroes, the filesystem will return
+ * the first entry it knows about. For a subsequent call, the
+ * contents of fsmap_head.fmh_recs[fsmap_head.fmh_count - 1] should be
+ * copied into fmh_keys[0] to have the kernel start where it left off.
+ *
+ * The second element in the fmh_keys array should represent the
+ * highest disk mapping ("high key") that the user wants to learn
+ * about. If this value is all ones, the filesystem will not stop
+ * until it runs out of mapping to return or runs out of space in
+ * fmh_recs.
+ *
+ * fmr_device can be either a 32-bit cookie representing a device, or
+ * a 32-bit dev_t if the FMH_OF_DEV_T flag is set. fmr_physical,
+ * fmr_offset, and fmr_length are expressed in units of bytes.
+ * fmr_owner is either an inode number, or a special value if
+ * FMR_OF_SPECIAL_OWNER is set in fmr_flags.
+ */
+#ifndef HAVE_GETFSMAP
+struct fsmap {
+ __u32 fmr_device; /* device id */
+ __u32 fmr_flags; /* mapping flags */
+ __u64 fmr_physical; /* device offset of segment */
+ __u64 fmr_owner; /* owner id */
+ __u64 fmr_offset; /* file offset of segment */
+ __u64 fmr_length; /* length of segment */
+ __u64 fmr_reserved[3]; /* must be zero */
+};
+
+struct fsmap_head {
+ __u32 fmh_iflags; /* control flags */
+ __u32 fmh_oflags; /* output flags */
+ __u32 fmh_count; /* # of entries in array incl. input */
+ __u32 fmh_entries; /* # of entries filled in (output). */
+ __u64 fmh_reserved[6]; /* must be zero */
+
+ struct fsmap fmh_keys[2]; /* low and high keys for the mapping search */
+ struct fsmap fmh_recs[]; /* returned records */
+};
+
+/* Size of an fsmap_head with room for nr records. */
+static inline size_t
+fsmap_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct fsmap_head) + nr * sizeof(struct fsmap);
+}
+#endif
+
+/* fmh_iflags values - set by XFS_IOC_GETFSMAP caller in the header. */
+/* no flags defined yet */
+#define FMH_IF_VALID 0
+
+/* fmh_oflags values - returned in the header segment only. */
+#define FMH_OF_DEV_T 0x1 /* fmr_device values will be dev_t */
+
+/* fmr_flags values - returned for each non-header segment */
+#define FMR_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
+#define FMR_OF_ATTR_FORK 0x2 /* segment = attribute fork */
+#define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */
+#define FMR_OF_SHARED 0x8 /* segment = shared with another file */
+#define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */
+#define FMR_OF_LAST 0x20 /* segment is the last in the FS */
+
+/* fmr_owner special values */
+#define FMR_OWN_FREE (-1ULL) /* free space */
+#define FMR_OWN_UNKNOWN (-2ULL) /* unknown owner */
+#define FMR_OWN_FS (-3ULL) /* static fs metadata */
+#define FMR_OWN_LOG (-4ULL) /* journalling log */
+#define FMR_OWN_AG (-5ULL) /* per-AG metadata */
+#define FMR_OWN_INOBT (-6ULL) /* inode btree blocks */
+#define FMR_OWN_INODES (-7ULL) /* inodes */
+#define FMR_OWN_REFC (-8ULL) /* refcount tree */
+#define FMR_OWN_COW (-9ULL) /* cow staging */
+#define FMR_OWN_DEFECTIVE (-10ULL) /* bad blocks */
+
+/*
* Structure for XFS_IOC_FSSETDM.
* For use by backup and restore programs to set the XFS on-disk inode
* fields di_dmevmask and di_dmstate. These must be set to exactly and
@@ -502,6 +596,7 @@ typedef struct xfs_swapext
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
+#define XFS_IOC_GETFSMAP _IOWR('X', 59, struct fsmap_head)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
new file mode 100644
index 0000000..985415b
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.c
@@ -0,0 +1,698 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_error.h"
+#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_log.h"
+#include "xfs_rmap.h"
+#include "xfs_alloc.h"
+#include "xfs_bit.h"
+#include "xfs_fsmap.h"
+
+/* Convert an xfs_fsmap to an fsmap. */
+void
+xfs_fsmap_from_internal(
+ struct fsmap *dest,
+ struct xfs_fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = BBTOB(src->fmr_physical);
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_offset = BBTOB(src->fmr_offset);
+ dest->fmr_length = BBTOB(src->fmr_length);
+ dest->fmr_reserved[0] = 0;
+ dest->fmr_reserved[1] = 0;
+ dest->fmr_reserved[2] = 0;
+}
+
+/* Convert an fsmap to an xfs_fsmap. */
+void
+xfs_fsmap_to_internal(
+ struct xfs_fsmap *dest,
+ struct fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = BTOBBT(src->fmr_physical);
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_offset = BTOBBT(src->fmr_offset);
+ dest->fmr_length = BTOBBT(src->fmr_length);
+}
+
+/* Convert an fsmap owner into an rmapbt owner. */
+static int
+xfs_fsmap_owner_to_rmap(
+ struct xfs_fsmap *fmr,
+ struct xfs_rmap_irec *rm)
+{
+ if (!(fmr->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
+ if (XFS_RMAP_NON_INODE_OWNER(fmr->fmr_owner))
+ return -EINVAL;
+ rm->rm_owner = fmr->fmr_owner;
+ return 0;
+ }
+
+ switch (fmr->fmr_owner) {
+ case 0: /* "lowest owner id possible" */
+ case FMR_OWN_FREE:
+ case FMR_OWN_UNKNOWN:
+ case FMR_OWN_FS:
+ case FMR_OWN_LOG:
+ case FMR_OWN_AG:
+ case FMR_OWN_INOBT:
+ case FMR_OWN_INODES:
+ case FMR_OWN_REFC:
+ case FMR_OWN_COW:
+ rm->rm_owner = fmr->fmr_owner;
+ return 0;
+ case FMR_OWN_DEFECTIVE:
+ /* fall through */
+ default:
+ return -EINVAL;
+ }
+}
+
+/* Convert an rmapbt owner into an fsmap owner. */
+static int
+xfs_fsmap_owner_from_rmap(
+ struct xfs_rmap_irec *rm,
+ struct xfs_fsmap *fmr)
+{
+ fmr->fmr_flags = 0;
+ if (!XFS_RMAP_NON_INODE_OWNER(rm->rm_owner)) {
+ fmr->fmr_owner = rm->rm_owner;
+ return 0;
+ }
+ fmr->fmr_flags |= FMR_OF_SPECIAL_OWNER;
+
+ switch (rm->rm_owner) {
+ case XFS_RMAP_OWN_FS:
+ case XFS_RMAP_OWN_LOG:
+ case XFS_RMAP_OWN_AG:
+ case XFS_RMAP_OWN_INOBT:
+ case XFS_RMAP_OWN_INODES:
+ case XFS_RMAP_OWN_REFC:
+ case XFS_RMAP_OWN_COW:
+ fmr->fmr_owner = rm->rm_owner;
+ return 0;
+ default:
+ return -EFSCORRUPTED;
+ }
+}
+
+/* getfsmap query state */
+struct xfs_getfsmap_info {
+ struct xfs_fsmap_head *head;
+ struct xfs_fsmap *rkey_low; /* lowest key */
+ xfs_fsmap_format_t formatter; /* formatting fn */
+ void *format_arg; /* format buffer */
+ bool last; /* last extent? */
+ xfs_daddr_t next_daddr; /* next daddr we expect */
+ u32 dev; /* device id */
+ u64 missing_owner; /* owner of holes */
+
+ xfs_agnumber_t agno; /* AG number, if applicable */
+ struct xfs_buf *agf_bp; /* AGF, for refcount queries */
+ struct xfs_rmap_irec low; /* low rmap key */
+ struct xfs_rmap_irec high; /* high rmap key */
+};
+
+/* Associate a device with a getfsmap handler. */
+struct xfs_getfsmap_dev {
+ u32 dev;
+ int (*fn)(struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info);
+};
+
+/* Compare two getfsmap device handlers. */
+static int
+xfs_getfsmap_dev_compare(
+ const void *p1,
+ const void *p2)
+{
+ const struct xfs_getfsmap_dev *d1 = p1;
+ const struct xfs_getfsmap_dev *d2 = p2;
+
+ return d1->dev - d2->dev;
+}
+
+/* Compare a record against our starting point */
+static bool
+xfs_getfsmap_rec_before_low_key(
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec)
+{
+ uint64_t x, y;
+
+ if (rec->rm_startblock < info->low.rm_startblock)
+ return true;
+ if (rec->rm_startblock > info->low.rm_startblock)
+ return false;
+
+ if (rec->rm_owner < info->low.rm_owner)
+ return true;
+ if (rec->rm_owner > info->low.rm_owner)
+ return false;
+
+ x = xfs_rmap_irec_offset_pack(rec);
+ y = xfs_rmap_irec_offset_pack(&info->low);
+ if (x < y)
+ return true;
+ return false;
+}
+
+/*
+ * Format a reverse mapping for getfsmap, having translated rm_startblock
+ * into the appropriate daddr units.
+ */
+STATIC int
+xfs_getfsmap_helper(
+ struct xfs_mount *mp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec,
+ xfs_daddr_t rec_daddr)
+{
+ struct xfs_fsmap fmr;
+ xfs_daddr_t key_end;
+ int error;
+
+ /*
+ * Filter out records that start before our startpoint, if the
+ * caller requested that.
+ */
+ if (xfs_getfsmap_rec_before_low_key(info, rec)) {
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ }
+
+ /*
+ * If the caller passed in a length with the low record and
+ * the record represents a file data extent, we incremented
+ * the offset in the low key by the length in the hopes of
+ * finding reverse mappings for the physical blocks we just
+ * saw. We did /not/ increment next_daddr by the length
+ * because the range query would not be able to find shared
+ * extents within the same physical block range.
+ *
+ * However, the extent we've been fed could have a startblock
+ * past the passed-in low record. If this is the case,
+ * advance next_daddr to the end of the passed-in low record
+ * so we don't report the extent prior to this extent as
+ * free.
+ */
+ key_end = info->rkey_low->fmr_physical + info->rkey_low->fmr_length;
+ if (info->dev == info->rkey_low->fmr_device &&
+ info->next_daddr < key_end && rec_daddr >= key_end)
+ info->next_daddr = key_end;
+
+ /* Are we just counting mappings? */
+ if (info->head->fmh_count == 0) {
+ if (rec_daddr > info->next_daddr)
+ info->head->fmh_entries++;
+
+ if (info->last)
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+
+ info->head->fmh_entries++;
+
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ }
+
+ /*
+ * If the record starts past the last physical block we saw,
+ * then we've found some free space. Report that too.
+ */
+ if (rec_daddr > info->next_daddr) {
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+
+ trace_xfs_fsmap_mapping(mp, info->dev, info->agno,
+ XFS_DADDR_TO_FSB(mp, info->next_daddr),
+ XFS_DADDR_TO_FSB(mp, rec_daddr -
+ info->next_daddr),
+ info->missing_owner, 0);
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = info->next_daddr;
+ fmr.fmr_owner = info->missing_owner;
+ fmr.fmr_offset = 0;
+ fmr.fmr_length = rec_daddr - info->next_daddr;
+ fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+ }
+
+ if (info->last)
+ goto out;
+
+ /* Fill out the extent we found */
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+
+ trace_xfs_fsmap_mapping(mp, info->dev, info->agno,
+ rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
+ rec->rm_offset);
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = rec_daddr;
+ error = xfs_fsmap_owner_from_rmap(rec, &fmr);
+ if (error)
+ return error;
+ fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
+ fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
+ fmr.fmr_flags |= FMR_OF_PREALLOC;
+ if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
+ fmr.fmr_flags |= FMR_OF_ATTR_FORK;
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+
+out:
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+}
+
+/* Transform a rmapbt irec into a fsmap */
+STATIC int
+xfs_getfsmap_datadev_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ xfs_fsblock_t fsb;
+ xfs_daddr_t rec_daddr;
+
+ fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->rm_startblock);
+ rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
+
+ return xfs_getfsmap_helper(mp, info, rec, rec_daddr);
+}
+
+/* Transform a absolute-startblock rmap (rtdev, logdev) into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_FSB_TO_BB(mp, rec->rm_startblock);
+
+ return xfs_getfsmap_helper(mp, info, rec, rec_daddr);
+}
+
+/* Set rmap flags based on the getfsmap flags */
+static void
+xfs_getfsmap_set_irec_flags(
+ struct xfs_rmap_irec *irec,
+ struct xfs_fsmap *fmr)
+{
+ irec->rm_flags = 0;
+ if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
+ irec->rm_flags |= XFS_RMAP_ATTR_FORK;
+ if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
+ irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
+ if (fmr->fmr_flags & FMR_OF_PREALLOC)
+ irec->rm_flags |= XFS_RMAP_UNWRITTEN;
+}
+
+/* Execute a getfsmap query against the log device. */
+STATIC int
+xfs_getfsmap_logdev(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_fsmap *dkey_low = keys;
+ struct xfs_btree_cur cur;
+ struct xfs_rmap_irec rmap;
+ int error;
+
+ /* Set up search keys */
+ info->low.rm_startblock = XFS_BB_TO_FSBT(mp, dkey_low->fmr_physical);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(keys, &info->low);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ error = xfs_fsmap_owner_to_rmap(keys + 1, &info->high);
+ if (error)
+ return error;
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+ info->missing_owner = FMR_OWN_FREE;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+
+ if (dkey_low->fmr_physical > 0)
+ return 0;
+
+ rmap.rm_startblock = 0;
+ rmap.rm_blockcount = mp->m_sb.sb_logblocks;
+ rmap.rm_owner = XFS_RMAP_OWN_LOG;
+ rmap.rm_offset = 0;
+ rmap.rm_flags = 0;
+
+ cur.bc_mp = mp;
+ return xfs_getfsmap_rtdev_helper(&cur, &rmap, info);
+}
+
+/* Execute a getfsmap query against the regular data device. */
+STATIC int
+xfs_getfsmap_datadev(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *bt_cur = NULL;
+ struct xfs_fsmap *dkey_low;
+ struct xfs_fsmap *dkey_high;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_agnumber_t start_ag;
+ xfs_agnumber_t end_ag;
+ xfs_daddr_t eofs;
+ int error = 0;
+
+ dkey_low = keys;
+ dkey_high = keys + 1;
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (dkey_low->fmr_physical >= eofs)
+ return 0;
+ if (dkey_high->fmr_physical >= eofs)
+ dkey_high->fmr_physical = eofs - 1;
+ start_fsb = XFS_DADDR_TO_FSB(mp, dkey_low->fmr_physical);
+ end_fsb = XFS_DADDR_TO_FSB(mp, dkey_high->fmr_physical);
+
+ /* Set up search keys */
+ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_low, &info->low);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+ info->missing_owner = FMR_OWN_FREE;
+
+ start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
+ end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
+
+ /* Query each AG */
+ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
+ if (info->agno == end_ag) {
+ info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
+ end_fsb);
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp,
+ dkey_high->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_high, &info->high);
+ if (error)
+ goto err;
+ xfs_getfsmap_set_irec_flags(&info->high, dkey_high);
+ }
+
+ if (bt_cur) {
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ bt_cur = NULL;
+ info->agf_bp = NULL;
+ }
+
+ error = xfs_alloc_read_agf(mp, tp, info->agno, 0,
+ &info->agf_bp);
+ if (error)
+ goto err;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+ bt_cur = xfs_rmapbt_init_cursor(mp, tp, info->agf_bp,
+ info->agno);
+ error = xfs_rmap_query_range(bt_cur, &info->low, &info->high,
+ xfs_getfsmap_datadev_helper, info);
+ if (error)
+ goto err;
+
+ if (info->agno == start_ag) {
+ info->low.rm_startblock = 0;
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+ info->low.rm_flags = 0;
+ }
+ }
+
+ /* Report any free space at the end of the AG */
+ info->last = true;
+ error = xfs_getfsmap_datadev_helper(bt_cur, &info->high, info);
+ if (error)
+ goto err;
+
+err:
+ if (bt_cur)
+ xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (info->agf_bp)
+ info->agf_bp = NULL;
+
+ return error;
+}
+
+/* Do we recognize the device? */
+STATIC bool
+xfs_getfsmap_is_valid_device(
+ struct xfs_mount *mp,
+ struct xfs_fsmap *fm)
+{
+ if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
+ fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
+ return true;
+ if (mp->m_logdev_targp &&
+ fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
+ return true;
+ return false;
+}
+
+/* Ensure that the low key is less than the high key. */
+STATIC bool
+xfs_getfsmap_check_keys(
+ struct xfs_fsmap *low_key,
+ struct xfs_fsmap *high_key)
+{
+ if (low_key->fmr_device > high_key->fmr_device)
+ return false;
+ if (low_key->fmr_device < high_key->fmr_device)
+ return true;
+
+ if (low_key->fmr_physical > high_key->fmr_physical)
+ return false;
+ if (low_key->fmr_physical < high_key->fmr_physical)
+ return true;
+
+ if (low_key->fmr_owner > high_key->fmr_owner)
+ return false;
+ if (low_key->fmr_owner < high_key->fmr_owner)
+ return true;
+
+ if (low_key->fmr_offset > high_key->fmr_offset)
+ return false;
+ if (low_key->fmr_offset < high_key->fmr_offset)
+ return true;
+
+ return false;
+}
+
+#define XFS_GETFSMAP_DEVS 3
+/*
+ * Get filesystem's extents as described in head, and format for
+ * output. Calls formatter to fill the user's buffer until all
+ * extents are mapped, until the passed-in head->fmh_count slots have
+ * been filled, or until the formatter short-circuits the loop, if it
+ * is tracking filled-in extents on its own.
+ */
+int
+xfs_getfsmap(
+ struct xfs_mount *mp,
+ struct xfs_fsmap_head *head,
+ xfs_fsmap_format_t formatter,
+ void *arg)
+{
+ struct xfs_trans *tp = NULL;
+ struct xfs_fsmap *rkey_low; /* request keys */
+ struct xfs_fsmap *rkey_high;
+ struct xfs_fsmap dkeys[2]; /* per-dev keys */
+ struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
+ struct xfs_getfsmap_info info = {0};
+ int i;
+ int error = 0;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+ if (head->fmh_iflags & ~FMH_IF_VALID)
+ return -EINVAL;
+ rkey_low = head->fmh_keys;
+ rkey_high = rkey_low + 1;
+ if (!xfs_getfsmap_is_valid_device(mp, rkey_low) ||
+ !xfs_getfsmap_is_valid_device(mp, rkey_high))
+ return -EINVAL;
+
+ head->fmh_entries = 0;
+
+ /* Set up our device handlers. */
+ memset(handlers, 0, sizeof(handlers));
+ handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
+ handlers[0].fn = xfs_getfsmap_datadev;
+ if (mp->m_logdev_targp != mp->m_ddev_targp) {
+ handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
+ handlers[1].fn = xfs_getfsmap_logdev;
+ }
+
+ xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
+ xfs_getfsmap_dev_compare);
+
+ /*
+ * Since we allow the user to copy the last mapping from a previous
+ * call into the low key slot, we have to advance the low key by
+ * whatever the reported length is. If the offset field doesn't apply,
+ * move up the start block to the next extent and start over with the
+ * lowest owner/offset possible; otherwise it's file data, so move up
+ * the offset only.
+ */
+ dkeys[0] = *rkey_low;
+ if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
+ dkeys[0].fmr_physical += dkeys[0].fmr_length;
+ dkeys[0].fmr_owner = 0;
+ dkeys[0].fmr_offset = 0;
+ } else
+ dkeys[0].fmr_offset += dkeys[0].fmr_length;
+ memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
+
+ if (!xfs_getfsmap_check_keys(dkeys, rkey_high))
+ return -EINVAL;
+
+ info.rkey_low = rkey_low;
+ info.formatter = formatter;
+ info.format_arg = arg;
+ info.head = head;
+
+ /* For each device we support... */
+ for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
+ /* Is this device within the range the user asked for? */
+ if (!handlers[i].fn)
+ continue;
+ if (rkey_low->fmr_device > handlers[i].dev)
+ continue;
+ if (rkey_high->fmr_device < handlers[i].dev)
+ break;
+
+ /*
+ * If this device number matches the high key, we have
+ * to pass the high key to the handler to limit the
+ * query results. If the device number exceeds the
+ * low key, zero out the low key so that we get
+ * everything from the beginning.
+ */
+ if (handlers[i].dev == rkey_high->fmr_device)
+ dkeys[1] = *rkey_high;
+ if (handlers[i].dev > rkey_low->fmr_device)
+ memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ break;
+
+ info.next_daddr = dkeys[0].fmr_physical;
+ info.dev = handlers[i].dev;
+ info.last = false;
+ info.agno = NULLAGNUMBER;
+ error = handlers[i].fn(tp, dkeys, &info);
+ if (error)
+ break;
+ xfs_trans_cancel(tp);
+ tp = NULL;
+ }
+
+ if (tp)
+ xfs_trans_cancel(tp);
+ head->fmh_oflags = FMH_OF_DEV_T;
+ return error;
+}
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
new file mode 100644
index 0000000..1943047
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_FSMAP_H__
+#define __XFS_FSMAP_H__
+
+/* internal fsmap representation */
+struct xfs_fsmap {
+ dev_t fmr_device; /* device id */
+ uint32_t fmr_flags; /* mapping flags */
+ uint64_t fmr_physical; /* device offset of segment */
+ uint64_t fmr_owner; /* owner id */
+ xfs_fileoff_t fmr_offset; /* file offset of segment */
+ xfs_filblks_t fmr_length; /* length of segment, blocks */
+};
+
+struct xfs_fsmap_head {
+ uint32_t fmh_iflags; /* control flags */
+ uint32_t fmh_oflags; /* output flags */
+ unsigned int fmh_count; /* # of entries in array incl. input */
+ unsigned int fmh_entries; /* # of entries filled in (output). */
+
+ struct xfs_fsmap fmh_keys[2]; /* low and high keys */
+};
+
+void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
+void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
+
+/* fsmap to userspace formatter - copy to user & advance pointer */
+typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
+
+int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
+ xfs_fsmap_format_t formatter, void *arg);
+
+#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c67cfb4..d39da5d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -41,6 +41,8 @@
#include "xfs_trans.h"
#include "xfs_pnfs.h"
#include "xfs_acl.h"
+#include "xfs_btree.h"
+#include "xfs_fsmap.h"
#include <linux/capability.h>
#include <linux/dcache.h>
@@ -1607,6 +1609,103 @@ xfs_ioc_getbmapx(
return 0;
}
+struct getfsmap_info {
+ struct xfs_mount *mp;
+ struct fsmap __user *data;
+ __u32 last_flags;
+};
+
+STATIC int
+xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
+{
+ struct getfsmap_info *info = priv;
+ struct fsmap fm;
+
+ trace_xfs_getfsmap_mapping(info->mp, xfm->fmr_device, xfm->fmr_physical,
+ xfm->fmr_length, xfm->fmr_owner, xfm->fmr_offset,
+ xfm->fmr_flags);
+
+ info->last_flags = xfm->fmr_flags;
+ xfs_fsmap_from_internal(&fm, xfm);
+ if (copy_to_user(info->data, &fm, sizeof(struct fsmap)))
+ return -EFAULT;
+
+ info->data++;
+ return 0;
+}
+
+STATIC int
+xfs_ioc_getfsmap(
+ struct xfs_inode *ip,
+ void __user *arg)
+{
+ struct getfsmap_info info;
+ struct xfs_fsmap_head xhead = {0};
+ struct fsmap_head head;
+ bool aborted = false;
+ int error;
+
+ if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
+ return -EFAULT;
+ if (head.fmh_reserved[0] || head.fmh_reserved[1] ||
+ head.fmh_reserved[2] || head.fmh_reserved[3] ||
+ head.fmh_reserved[4] || head.fmh_reserved[5] ||
+ head.fmh_keys[0].fmr_reserved[0] ||
+ head.fmh_keys[0].fmr_reserved[1] ||
+ head.fmh_keys[0].fmr_reserved[2] ||
+ head.fmh_keys[1].fmr_reserved[0] ||
+ head.fmh_keys[1].fmr_reserved[1] ||
+ head.fmh_keys[1].fmr_reserved[2])
+ return -EINVAL;
+
+ xhead.fmh_iflags = head.fmh_iflags;
+ xhead.fmh_count = head.fmh_count;
+ xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
+ xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
+
+ trace_xfs_getfsmap_low_key(ip->i_mount,
+ xhead.fmh_keys[0].fmr_device,
+ xhead.fmh_keys[0].fmr_physical,
+ xhead.fmh_keys[0].fmr_length,
+ xhead.fmh_keys[0].fmr_owner,
+ xhead.fmh_keys[0].fmr_offset,
+ xhead.fmh_keys[0].fmr_flags);
+
+ trace_xfs_getfsmap_high_key(ip->i_mount,
+ xhead.fmh_keys[1].fmr_device,
+ xhead.fmh_keys[1].fmr_physical,
+ xhead.fmh_keys[1].fmr_length,
+ xhead.fmh_keys[1].fmr_owner,
+ xhead.fmh_keys[1].fmr_offset,
+ xhead.fmh_keys[1].fmr_flags);
+
+ info.mp = ip->i_mount;
+ info.data = ((__force struct fsmap_head *)arg)->fmh_recs;
+ error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
+ if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ error = 0;
+ aborted = true;
+ } else if (error)
+ return error;
+
+ /* If we didn't abort, set the "last" flag in the last fmx */
+ if (!aborted && xhead.fmh_entries) {
+ info.data--;
+ info.last_flags |= FMR_OF_LAST;
+ if (copy_to_user(&info.data->fmr_flags, &info.last_flags,
+ sizeof(info.last_flags)))
+ return -EFAULT;
+ }
+
+ /* copy back header */
+ head.fmh_entries = xhead.fmh_entries;
+ head.fmh_oflags = xhead.fmh_oflags;
+ if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
+ return -EFAULT;
+
+ return 0;
+}
+
int
xfs_ioc_swapext(
xfs_swapext_t *sxp)
@@ -1787,6 +1886,11 @@ xfs_file_ioctl(
case XFS_IOC_GETBMAPX:
return xfs_ioc_getbmapx(ip, arg);
+ case XFS_IOC_GETFSMAP:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ return xfs_ioc_getfsmap(ip, arg);
+
case XFS_IOC_FD_TO_HANDLE:
case XFS_IOC_PATH_TO_HANDLE:
case XFS_IOC_PATH_TO_FSHANDLE: {
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 7c49938..5ba41b7 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -554,6 +554,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_GOINGDOWN:
case XFS_IOC_ERROR_INJECTION:
case XFS_IOC_ERROR_CLEARALL:
+ case XFS_IOC_GETFSMAP:
return xfs_file_ioctl(filp, cmd, p);
#ifndef BROKEN_X86_ALIGNMENT
/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 69c5bcd..09f4755 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3266,6 +3266,91 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
+/* fsmap traces */
+DECLARE_EVENT_CLASS(xfs_fsmap_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
+ xfs_fsblock_t bno, xfs_filblks_t len, __uint64_t owner,
+ __uint64_t offset),
+ TP_ARGS(mp, keydev, agno, bno, len, owner, offset),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_fsblock_t, bno)
+ __field(xfs_filblks_t, len)
+ __field(__uint64_t, owner)
+ __field(__uint64_t, offset)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->agno = agno;
+ __entry->bno = bno;
+ __entry->len = len;
+ __entry->owner = owner;
+ __entry->offset = offset;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset 0x%llx\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->agno,
+ __entry->bno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset)
+)
+#define DEFINE_FSMAP_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \
+ xfs_fsblock_t bno, xfs_filblks_t len, __uint64_t owner, \
+ __uint64_t offset), \
+ TP_ARGS(mp, keydev, agno, bno, len, owner, offset))
+DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
+DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
+DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
+
+DECLARE_EVENT_CLASS(xfs_getfsmap_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_daddr_t block,
+ xfs_daddr_t len, __uint64_t owner, __uint64_t offset,
+ __uint64_t flags),
+ TP_ARGS(mp, keydev, block, len, owner, offset, flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_daddr_t, block)
+ __field(xfs_daddr_t, len)
+ __field(__uint64_t, owner)
+ __field(__uint64_t, offset)
+ __field(__uint64_t, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->block = block;
+ __entry->len = len;
+ __entry->owner = owner;
+ __entry->offset = offset;
+ __entry->flags = flags;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->block,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+)
+#define DEFINE_GETFSMAP_EVENT(name) \
+DEFINE_EVENT(xfs_getfsmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_daddr_t block, \
+ xfs_daddr_t len, __uint64_t owner, __uint64_t offset, \
+ __uint64_t flags), \
+ TP_ARGS(mp, keydev, block, len, owner, offset, flags))
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 70f42ea..a280e12 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -263,6 +263,28 @@ xfs_trans_alloc(
}
/*
+ * Create an empty transaction with no reservation. This is a defensive
+ * mechanism for routines that query metadata without actually modifying
+ * them -- if the metadata being queried is somehow cross-linked (think a
+ * btree block pointer that points higher in the tree), we risk deadlock.
+ * However, blocks grabbed as part of a transaction can be re-grabbed.
+ * The verifiers will notice the corrupt block and the operation will fail
+ * back to userspace without deadlocking.
+ *
+ * Note the zero-length reservation; this transaction MUST be cancelled
+ * without any dirty data.
+ */
+int
+xfs_trans_alloc_empty(
+ struct xfs_mount *mp,
+ struct xfs_trans **tpp)
+{
+ struct xfs_trans_res resv = {0};
+
+ return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
+}
+
+/*
* Record the indicated change to the given field for application
* to the file system's superblock when the transaction commits.
* For now, just store the change in the transaction structure.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 61b7fbd..98024cb 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -159,6 +159,8 @@ typedef struct xfs_trans {
int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
uint blocks, uint rtextents, uint flags,
struct xfs_trans **tpp);
+int xfs_trans_alloc_empty(struct xfs_mount *mp,
+ struct xfs_trans **tpp);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 05/47] xfs: report shared extents in getfsmapx
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (3 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 04/47] xfs: introduce the XFS_IOC_GETFSMAP ioctl Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 06/47] xfs: have getfsmap fall back to the freesp btrees when rmap is not present Darrick J. Wong
` (42 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Cross-reference the reverse mapping data with the refcount btree to find
out which extents are shared.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_fsmap.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 985415b..d729581 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -37,6 +37,8 @@
#include "xfs_alloc.h"
#include "xfs_bit.h"
#include "xfs_fsmap.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
/* Convert an xfs_fsmap to an fsmap. */
void
@@ -192,6 +194,42 @@ xfs_getfsmap_rec_before_low_key(
return false;
}
+/* Decide if this mapping is shared. */
+STATIC int
+xfs_getfsmap_is_shared(
+ struct xfs_mount *mp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec,
+ bool *stat)
+{
+ struct xfs_btree_cur *cur;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ int error;
+
+ *stat = false;
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+ /* rt files will have agno set to NULLAGNUMBER */
+ if (info->agno == NULLAGNUMBER)
+ return 0;
+
+ /* Are there any shared blocks here? */
+ flen = 0;
+ cur = xfs_refcountbt_init_cursor(mp, NULL, info->agf_bp,
+ info->agno, NULL);
+
+ error = xfs_refcount_find_shared(cur, rec->rm_startblock,
+ rec->rm_blockcount, &fbno, &flen, false);
+
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ if (error)
+ return error;
+
+ *stat = flen > 0;
+ return 0;
+}
+
/*
* Format a reverse mapping for getfsmap, having translated rm_startblock
* into the appropriate daddr units.
@@ -205,6 +243,7 @@ xfs_getfsmap_helper(
{
struct xfs_fsmap fmr;
xfs_daddr_t key_end;
+ bool shared;
int error;
/*
@@ -304,6 +343,13 @@ xfs_getfsmap_helper(
fmr.fmr_flags |= FMR_OF_ATTR_FORK;
if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
+ if (fmr.fmr_flags == 0) {
+ error = xfs_getfsmap_is_shared(mp, info, rec, &shared);
+ if (error)
+ return error;
+ if (shared)
+ fmr.fmr_flags |= FMR_OF_SHARED;
+ }
error = info->formatter(&fmr, info->format_arg);
if (error)
return error;
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 06/47] xfs: have getfsmap fall back to the freesp btrees when rmap is not present
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (4 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 05/47] xfs: report shared extents in getfsmapx Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 07/47] xfs: getfsmap should fall back to rtbitmap when rtrmapbt " Darrick J. Wong
` (41 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
If the reverse-mapping btree isn't available, fall back to the
free space btrees to provide partial reverse mapping information.
The online scrub tool can make use of even partial information to
speed up the data block scan.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_fsmap.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 150 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index d729581..4e364f8 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -39,6 +39,7 @@
#include "xfs_fsmap.h"
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
+#include "xfs_alloc_btree.h"
/* Convert an xfs_fsmap to an fsmap. */
void
@@ -125,6 +126,7 @@ xfs_fsmap_owner_from_rmap(
case XFS_RMAP_OWN_INODES:
case XFS_RMAP_OWN_REFC:
case XFS_RMAP_OWN_COW:
+ case XFS_RMAP_OWN_NULL: /* "free" */
fmr->fmr_owner = rm->rm_owner;
return 0;
default:
@@ -396,6 +398,31 @@ xfs_getfsmap_rtdev_helper(
return xfs_getfsmap_helper(mp, info, rec, rec_daddr);
}
+/* Transform a bnobt irec into a fsmap */
+STATIC int
+xfs_getfsmap_datadev_bnobt_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_fsblock_t fsb;
+ xfs_daddr_t rec_daddr;
+
+ fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->ar_startblock);
+ rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
+
+ irec.rm_startblock = rec->ar_startblock;
+ irec.rm_blockcount = rec->ar_blockcount;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(mp, info, &irec, rec_daddr);
+}
+
/* Set rmap flags based on the getfsmap flags */
static void
xfs_getfsmap_set_irec_flags(
@@ -583,6 +610,125 @@ xfs_getfsmap_datadev(
return error;
}
+/* Execute a getfsmap query against the regular data device's bnobt. */
+STATIC int
+xfs_getfsmap_datadev_bnobt(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *bt_cur = NULL;
+ struct xfs_fsmap *dkey_low;
+ struct xfs_fsmap *dkey_high;
+ struct xfs_alloc_rec_incore alow;
+ struct xfs_alloc_rec_incore ahigh;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_agnumber_t start_ag;
+ xfs_agnumber_t end_ag;
+ xfs_daddr_t eofs;
+ int error = 0;
+
+ dkey_low = keys;
+ dkey_high = keys + 1;
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (dkey_low->fmr_physical >= eofs)
+ return 0;
+ if (dkey_high->fmr_physical >= eofs)
+ dkey_high->fmr_physical = eofs - 1;
+ start_fsb = XFS_DADDR_TO_FSB(mp, dkey_low->fmr_physical);
+ end_fsb = XFS_DADDR_TO_FSB(mp, dkey_high->fmr_physical);
+
+ /* Set up search keys */
+ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_low, &info->low);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+
+ info->missing_owner = FMR_OWN_UNKNOWN;
+
+ start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
+ end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
+
+ /* Query each AG */
+ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
+ if (info->agno == end_ag) {
+ info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
+ end_fsb);
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp,
+ dkey_high->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_high, &info->high);
+ if (error)
+ goto err;
+ xfs_getfsmap_set_irec_flags(&info->high, dkey_high);
+ }
+
+ if (bt_cur) {
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ bt_cur = NULL;
+ info->agf_bp = NULL;
+ }
+
+ error = xfs_alloc_read_agf(mp, tp, info->agno, 0,
+ &info->agf_bp);
+ if (error)
+ goto err;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+ bt_cur = xfs_allocbt_init_cursor(mp, tp, info->agf_bp,
+ info->agno, XFS_BTNUM_BNO);
+ alow.ar_startblock = info->low.rm_startblock;
+ ahigh.ar_startblock = info->high.rm_startblock;
+ error = xfs_alloc_query_range(bt_cur, &alow, &ahigh,
+ xfs_getfsmap_datadev_bnobt_helper, info);
+ if (error)
+ goto err;
+
+ if (info->agno == start_ag) {
+ info->low.rm_startblock = 0;
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+ info->low.rm_flags = 0;
+ }
+ }
+
+ /* Report any free space at the end of the AG */
+ info->last = true;
+ error = xfs_getfsmap_datadev_bnobt_helper(bt_cur, &ahigh, info);
+ if (error)
+ goto err;
+
+err:
+ if (bt_cur)
+ xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (info->agf_bp)
+ info->agf_bp = NULL;
+
+ return error;
+}
+
/* Do we recognize the device? */
STATIC bool
xfs_getfsmap_is_valid_device(
@@ -651,8 +797,6 @@ xfs_getfsmap(
int i;
int error = 0;
- if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
- return -EOPNOTSUPP;
if (head->fmh_iflags & ~FMH_IF_VALID)
return -EINVAL;
rkey_low = head->fmh_keys;
@@ -666,7 +810,10 @@ xfs_getfsmap(
/* Set up our device handlers. */
memset(handlers, 0, sizeof(handlers));
handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
- handlers[0].fn = xfs_getfsmap_datadev;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ handlers[0].fn = xfs_getfsmap_datadev;
+ else
+ handlers[0].fn = xfs_getfsmap_datadev_bnobt;
if (mp->m_logdev_targp != mp->m_ddev_targp) {
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
handlers[1].fn = xfs_getfsmap_logdev;
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 07/47] xfs: getfsmap should fall back to rtbitmap when rtrmapbt not present
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (5 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 06/47] xfs: have getfsmap fall back to the freesp btrees when rmap is not present Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 08/47] xfs: add scrub tracepoints Darrick J. Wong
` (40 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Use the realtime bitmap to return freespace information when the
rtrmapbt isn't present. Note that the rtrmapbt fsmap implementation
will show up later with the rtrmapbt patchset.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_fsmap.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_rtalloc.h | 2 +
2 files changed, 135 insertions(+)
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 4e364f8..ba5544b 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -40,6 +40,7 @@
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_alloc_btree.h"
+#include "xfs_rtalloc.h"
/* Convert an xfs_fsmap to an fsmap. */
void
@@ -398,6 +399,29 @@ xfs_getfsmap_rtdev_helper(
return xfs_getfsmap_helper(mp, info, rec, rec_daddr);
}
+/* Transform a rtbitmap "record" into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap_helper(
+ struct xfs_mount *mp,
+ xfs_rtblock_t start,
+ xfs_rtblock_t end,
+ void *priv)
+{
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_FSB_TO_BB(mp, start);
+
+ irec.rm_startblock = start;
+ irec.rm_blockcount = end - start + 1;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(mp, info, &irec, rec_daddr);
+}
+
/* Transform a bnobt irec into a fsmap */
STATIC int
xfs_getfsmap_datadev_bnobt_helper(
@@ -496,6 +520,108 @@ xfs_getfsmap_logdev(
return xfs_getfsmap_rtdev_helper(&cur, &rmap, info);
}
+/* Execute a getfsmap query against the realtime data device (rtbitmap). */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_fsmap *dkey_low;
+ struct xfs_fsmap *dkey_high;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_rtblock_t rtstart;
+ xfs_rtblock_t rtend;
+ xfs_rtblock_t rem;
+ xfs_daddr_t eofs;
+ int is_free;
+ int error = 0;
+
+ dkey_low = keys;
+ dkey_high = keys + 1;
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ if (dkey_low->fmr_physical >= eofs)
+ return 0;
+ if (dkey_high->fmr_physical >= eofs)
+ dkey_high->fmr_physical = eofs - 1;
+ start_fsb = XFS_BB_TO_FSBT(mp, dkey_low->fmr_physical);
+ end_fsb = XFS_BB_TO_FSB(mp, dkey_high->fmr_physical);
+
+ /* Set up search keys */
+ info->low.rm_startblock = start_fsb;
+ error = xfs_fsmap_owner_to_rmap(dkey_low, &info->low);
+ if (error)
+ return error;
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ info->high.rm_startblock = end_fsb;
+ error = xfs_fsmap_owner_to_rmap(dkey_high, &info->high);
+ if (error)
+ return error;
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp, dkey_high->fmr_offset);
+ info->high.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->high, dkey_high);
+
+ info->missing_owner = FMR_OWN_UNKNOWN;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+
+ /* Iterate the bitmap, looking for discrepancies. */
+ rtstart = 0;
+ rem = mp->m_sb.sb_rblocks;
+ while (rem) {
+ /* Is the first block free? */
+ error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
+ &is_free);
+ if (error)
+ goto out_unlock;
+
+ /* How long does the extent go for? */
+ error = xfs_rtfind_forw(mp, tp, rtstart,
+ mp->m_sb.sb_rblocks - 1, &rtend);
+ if (error)
+ goto out_unlock;
+
+ if (is_free) {
+ error = xfs_getfsmap_rtdev_rtbitmap_helper(mp,
+ rtstart, rtend, info);
+ if (error)
+ goto out_unlock;
+ }
+
+ rem -= rtend - rtstart + 1;
+ rtstart = rtend + 1;
+ }
+
+out_unlock:
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+
+ /* Report any free space at the end of the rtdev */
+ info->last = true;
+ error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, end_fsb, 0, info);
+ if (error)
+ goto err;
+
+err:
+ return error;
+}
+
/* Execute a getfsmap query against the regular data device. */
STATIC int
xfs_getfsmap_datadev(
@@ -741,6 +867,9 @@ xfs_getfsmap_is_valid_device(
if (mp->m_logdev_targp &&
fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
return true;
+ if (mp->m_rtdev_targp &&
+ fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
+ return true;
return false;
}
@@ -818,6 +947,10 @@ xfs_getfsmap(
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
handlers[1].fn = xfs_getfsmap_logdev;
}
+ if (mp->m_rtdev_targp) {
+ handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
+ handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
+ }
xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
xfs_getfsmap_dev_compare);
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 355dd9e..f798a3e 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -126,6 +126,8 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
# define xfs_growfs_rt(mp,in) (ENOSYS)
+# define xfs_rtcheck_range(...) (ENOSYS)
+# define xfs_rtfind_forw(...) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 08/47] xfs: add scrub tracepoints
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (6 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 07/47] xfs: getfsmap should fall back to rtbitmap when rtrmapbt " Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 09/47] xfs: create an ioctl to scrub AG metadata Darrick J. Wong
` (39 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_types.h | 5 +
fs/xfs/xfs_trace.h | 372 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 377 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 717909f..04145e5 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -94,6 +94,11 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
#define XFS_ATTR_FORK 1
#define XFS_COW_FORK 2
+#define XFS_FORK_DESC \
+ { XFS_DATA_FORK, "data" }, \
+ { XFS_ATTR_FORK, "attr" }, \
+ { XFS_COW_FORK, "CoW" }
+
/*
* Min numbers of data/attr fork btree root pointers.
*/
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 09f4755..6427c70 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3351,6 +3351,378 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
+/* scrub */
+#define XFS_SCRUB_TYPE_DESC \
+ { 0, NULL }
+DECLARE_EVENT_CLASS(xfs_scrub_class,
+ TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
+ xfs_ino_t inum, unsigned int gen, unsigned int flags,
+ int error),
+ TP_ARGS(ip, type, agno, inum, gen, flags, error),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, type)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_ino_t, inum)
+ __field(unsigned int, gen)
+ __field(unsigned int, flags)
+ __field(int, error)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->type = type;
+ __entry->agno = agno;
+ __entry->inum = inum;
+ __entry->gen = gen;
+ __entry->flags = flags;
+ __entry->error = error;
+ ),
+ TP_printk("dev %d:%d ino %llu type %s agno %u inum %llu gen %u flags 0x%x error %d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_DESC),
+ __entry->agno,
+ __entry->inum,
+ __entry->gen,
+ __entry->flags,
+ __entry->error)
+)
+#define DEFINE_SCRUB_EVENT(name) \
+DEFINE_EVENT(xfs_scrub_class, name, \
+ TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno, \
+ xfs_ino_t inum, unsigned int gen, unsigned int flags, \
+ int error), \
+ TP_ARGS(ip, type, agno, inum, gen, flags, error))
+
+DECLARE_EVENT_CLASS(xfs_scrub_sbtree_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno,
+ xfs_btnum_t btnum, int level, int nlevels, int ptr),
+ TP_ARGS(mp, agno, bno, btnum, level, nlevels, ptr),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_btnum_t, btnum)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, bno)
+ __field(int, level)
+ __field(int, nlevels)
+ __field(int, ptr)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->btnum = btnum;
+ __entry->bno = bno;
+ __entry->level = level;
+ __entry->nlevels = nlevels;
+ __entry->ptr = ptr;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u btnum %d level %d nlevels %d ptr %d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->bno,
+ __entry->btnum,
+ __entry->level,
+ __entry->nlevels,
+ __entry->ptr)
+)
+#define DEFINE_SCRUB_SBTREE_EVENT(name) \
+DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno, \
+ xfs_btnum_t btnum, int level, int nlevels, int ptr), \
+ TP_ARGS(mp, agno, bno, btnum, level, nlevels, ptr))
+
+DEFINE_SCRUB_EVENT(xfs_scrub);
+DEFINE_SCRUB_EVENT(xfs_scrub_done);
+DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
+DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
+
+TRACE_EVENT(xfs_scrub_op_error,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno,
+ const char *type, int error, const char *func,
+ int line),
+ TP_ARGS(mp, agno, bno, type, error, func, line),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, bno)
+ __string(type, type)
+ __field(int, error)
+ __string(func, func)
+ __field(int, line)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->bno = bno;
+ __assign_str(type, type);
+ __entry->error = error;
+ __assign_str(func, func);
+ __entry->line = line;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u type '%s' error %d fn %s:%d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->bno,
+ __get_str(type),
+ __entry->error,
+ __get_str(func),
+ __entry->line)
+);
+
+TRACE_EVENT(xfs_scrub_file_op_error,
+ TP_PROTO(struct xfs_inode *ip, int whichfork, xfs_fileoff_t offset,
+ const char *type, int error, const char *func,
+ int line),
+ TP_ARGS(ip, whichfork, offset, type, error, func, line),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, whichfork)
+ __field(xfs_fileoff_t, offset)
+ __string(type, type)
+ __field(int, error)
+ __string(func, func)
+ __field(int, line)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->whichfork = whichfork;
+ __entry->offset = offset;
+ __assign_str(type, type);
+ __entry->error = error;
+ __assign_str(func, func);
+ __entry->line = line;
+ ),
+ TP_printk("dev %d:%d ino %llu %s offset %llu type '%s' error %d fn %s:%d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->whichfork, XFS_FORK_DESC),
+ __entry->offset,
+ __get_str(type),
+ __entry->error,
+ __get_str(func),
+ __entry->line)
+);
+
+DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno,
+ const char *type, const char *check, const char *func,
+ int line),
+ TP_ARGS(mp, agno, bno, type, check, func, line),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, bno)
+ __string(type, type)
+ __string(check, check)
+ __string(func, func)
+ __field(int, line)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->bno = bno;
+ __assign_str(type, type);
+ __assign_str(check, check);
+ __assign_str(func, func);
+ __entry->line = line;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u type '%s' check '%s' fn %s:%d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->bno,
+ __get_str(type),
+ __get_str(check),
+ __get_str(func),
+ __entry->line)
+)
+
+#define DEFINE_SCRUB_BLOCK_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_scrub_block_error_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno, \
+ const char *type, const char *check, const char *func, \
+ int line), \
+ TP_ARGS(mp, agno, bno, type, check, func, line))
+
+DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error);
+DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen);
+
+DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno, xfs_agblock_t bno,
+ const char *type, const char *check, const char *func,
+ int line),
+ TP_ARGS(mp, ino, agno, bno, type, check, func, line),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, bno)
+ __string(type, type)
+ __string(check, check)
+ __string(func, func)
+ __field(int, line)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->ino = ino;
+ __entry->agno = agno;
+ __entry->bno = bno;
+ __assign_str(type, type);
+ __assign_str(check, check);
+ __assign_str(func, func);
+ __entry->line = line;
+ ),
+ TP_printk("dev %d:%d ino %llu agno %u agbno %u type '%s' check '%s' fn %s:%d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->agno,
+ __entry->bno,
+ __get_str(type),
+ __get_str(check),
+ __get_str(func),
+ __entry->line)
+)
+
+#define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_scrub_ino_error_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno, xfs_agblock_t bno, \
+ const char *type, const char *check, const char *func, \
+ int line), \
+ TP_ARGS(mp, ino, agno, bno, type, check, func, line))
+
+DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error);
+DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen);
+
+TRACE_EVENT(xfs_scrub_data_error,
+ TP_PROTO(struct xfs_inode *ip, int whichfork, xfs_fileoff_t offset,
+ const char *type, const char *check, const char *func,
+ int line),
+ TP_ARGS(ip, whichfork, offset, type, check, func, line),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, whichfork)
+ __field(xfs_fileoff_t, offset)
+ __string(type, type)
+ __string(check, check)
+ __string(func, func)
+ __field(int, line)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->whichfork = whichfork;
+ __entry->offset = offset;
+ __assign_str(type, type);
+ __assign_str(check, check);
+ __assign_str(func, func);
+ __entry->line = line;
+ ),
+ TP_printk("dev %d:%d ino %llu %s fork offset %llu type '%s' check '%s' fn %s:%d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->whichfork, XFS_FORK_DESC),
+ __entry->offset,
+ __get_str(type),
+ __get_str(check),
+ __get_str(func),
+ __entry->line)
+);
+
+TRACE_EVENT(xfs_scrub_xref_error,
+ TP_PROTO(struct xfs_mount *mp, const char *type, int error,
+ const char *func, int line),
+ TP_ARGS(mp, type, error, func, line),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __string(type, type)
+ __field(int, error)
+ __string(func, func)
+ __field(int, line)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __assign_str(type, type);
+ __entry->error = error;
+ __assign_str(func, func);
+ __entry->line = line;
+ ),
+ TP_printk("dev %d:%d btree %s xref error %d fn %s:%d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(type),
+ __entry->error,
+ __get_str(func),
+ __entry->line)
+);
+
+TRACE_EVENT(xfs_scrub_btree_error,
+ TP_PROTO(struct xfs_mount *mp, const char *bt_type, const char *bt_ptr,
+ xfs_agnumber_t agno, xfs_agblock_t bno, const char *check,
+ const char *func, int line),
+ TP_ARGS(mp, bt_type, bt_ptr, agno, bno, check, func, line),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __string(bt_type, bt_type)
+ __string(bt_ptr, bt_ptr)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, bno)
+ __string(check, check)
+ __string(func, func)
+ __field(int, line)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __assign_str(bt_type, bt_type);
+ __assign_str(bt_ptr, bt_ptr);
+ __entry->agno = agno;
+ __entry->bno = bno;
+ __assign_str(check, check);
+ __assign_str(func, func);
+ __entry->line = line;
+ ),
+ TP_printk("dev %d:%d %s %s agno %u agbno %u check '%s' fn %s:%d\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __get_str(bt_type),
+ __get_str(bt_ptr),
+ __entry->agno,
+ __entry->bno,
+ __get_str(check),
+ __get_str(func),
+ __entry->line)
+);
+
+DECLARE_EVENT_CLASS(xfs_scrub_ag_lock_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t max_ag,
+ xfs_agnumber_t agno),
+ TP_ARGS(mp, max_ag, agno),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, max_ag)
+ __field(xfs_agnumber_t, agno)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->max_ag = max_ag;
+ __entry->agno = agno;
+ ),
+ TP_printk("dev %d:%d max_ag %u agno %u\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->max_ag,
+ __entry->agno)
+)
+#define DEFINE_SCRUB_AG_LOCK_EVENT(name) \
+DEFINE_EVENT(xfs_scrub_ag_lock_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t max_ag, \
+ xfs_agnumber_t agno), \
+ TP_ARGS(mp, max_ag, agno))
+
+DEFINE_SCRUB_AG_LOCK_EVENT(xfs_scrub_ag_can_lock);
+DEFINE_SCRUB_AG_LOCK_EVENT(xfs_scrub_ag_may_deadlock);
+DEFINE_SCRUB_AG_LOCK_EVENT(xfs_scrub_ag_lock_all);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 09/47] xfs: create an ioctl to scrub AG metadata
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (7 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 08/47] xfs: add scrub tracepoints Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 10/47] xfs: generic functions to scrub metadata and btrees Darrick J. Wong
` (38 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Create an ioctl that can be used to scrub internal filesystem metadata.
The new ioctl takes the metadata type, an (optional) AG number, an
(optional) inode number and generation, and a flags argument. This will
be used by the upcoming XFS online scrub tool.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 5 +
fs/xfs/libxfs/xfs_fs.h | 35 ++++
fs/xfs/repair/common.c | 441 +++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.h | 147 +++++++++++++++
fs/xfs/repair/xfs_scrub.h | 29 +++
fs/xfs/xfs_ioctl.c | 28 +++
fs/xfs/xfs_ioctl32.c | 1
fs/xfs/xfs_trace.h | 2
8 files changed, 687 insertions(+), 1 deletion(-)
create mode 100644 fs/xfs/repair/common.c
create mode 100644 fs/xfs/repair/common.h
create mode 100644 fs/xfs/repair/xfs_scrub.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5c90f82..4c2199a 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -101,6 +101,11 @@ xfs-y += xfs_aops.o \
kmem.o \
uuid.o
+# online scrub/repair
+xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
+ common.o \
+ )
+
# low-level transaction/log code
xfs-y += xfs_log.o \
xfs_log_cil.o \
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index e62996f..d8ceaf8 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -554,6 +554,40 @@ typedef struct xfs_swapext
#define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+/* metadata scrubbing */
+struct xfs_scrub_metadata {
+ __u32 sm_type; /* What to check? */
+ __u32 sm_flags; /* flags; see below. */
+ union {
+ __u32 __agno;
+ struct {
+ __u64 __ino;
+ __u32 __gen;
+ } i;
+ __u64 __reserved[7]; /* pad to 64 bytes */
+ } p;
+};
+#define sm_agno p.__agno
+#define sm_ino p.i.__ino
+#define sm_gen p.i.__gen
+
+/*
+ * Metadata types and flags for scrub operation.
+ */
+#define XFS_SCRUB_TYPE_TEST 0 /* dummy to test ioctl */
+#define XFS_SCRUB_TYPE_MAX 0
+
+#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
+#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
+#define XFS_SCRUB_FLAG_PREEN 0x4 /* o: could be optimized */
+#define XFS_SCRUB_FLAG_XREF_FAIL 0x8 /* o: errors during cross-referencing */
+
+#define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_FLAG_REPAIR)
+#define XFS_SCRUB_FLAGS_OUT (XFS_SCRUB_FLAG_CORRUPT | \
+ XFS_SCRUB_FLAG_PREEN | \
+ XFS_SCRUB_FLAG_XREF_FAIL)
+#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
+
/*
* ioctl limits
*/
@@ -597,6 +631,7 @@ typedef struct xfs_swapext
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
#define XFS_IOC_GETFSMAP _IOWR('X', 59, struct fsmap_head)
+#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
new file mode 100644
index 0000000..e43e3cc
--- /dev/null
+++ b/fs/xfs/repair/common.c
@@ -0,0 +1,441 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "repair/xfs_scrub.h"
+#include "repair/common.h"
+
+/*
+ * Online Scrub and Repair
+ *
+ * Traditionally, XFS (the kernel driver) did not know how to check or
+ * repair on-disk data structures. That task was left to the xfs_check
+ * and xfs_repair tools, both of which require taking the filesystem
+ * offline for a thorough but time consuming examination. Online
+ * scrub & repair, on the other hand, enables us to check the metadata
+ * for obvious errors while carefully stepping around the filesystem's
+ * ongoing operations, locking rules, etc.
+ *
+ * Given that most XFS metadata consist of records stored in a btree,
+ * most of the checking functions iterate the btree blocks themselves
+ * looking for irregularities. When a record block is encountered, each
+ * record can be checked for obviously bad values. Record values can
+ * also be cross-referenced against other btrees to look for potential
+ * misunderstandings between pieces of metadata.
+ *
+ * It is expected that the checkers responsible for per-AG metadata
+ * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
+ * metadata structure, and perform any relevant cross-referencing before
+ * unlocking the AG and returning the results to userspace. These
+ * scrubbers must not keep an AG locked for too long to avoid tying up
+ * the block and inode allocators.
+ *
+ * Block maps and b-trees rooted in an inode present a special challenge
+ * because they can involve extents from any AG. The general scrubber
+ * structure of lock -> check -> xref -> unlock still holds, but AG
+ * locking order rules /must/ be obeyed to avoid deadlocks. The
+ * ordering rule, of course, is that we must lock in increasing AG
+ * order. Helper functions are provided to track which AG headers we've
+ * already locked. If we detect an imminent locking order violation, we
+ * can signal a potential deadlock, in which case the scrubber can jump
+ * out to the top level, lock all the AGs in order, and retry the scrub.
+ *
+ * For file data (directories, extended attributes, symlinks) scrub, we
+ * can simply lock the inode and walk the data. For btree data
+ * (directories and attributes) we follow the same btree-scrubbing
+ * strategy outlined previously to check the records.
+ *
+ * We use a bit of trickery with transactions to avoid buffer deadlocks
+ * if there is a cycle in the metadata. The basic problem is that
+ * travelling down a btree involves locking the current buffer at each
+ * tree level. If a pointer should somehow point back to a buffer that
+ * we've already examined, we will deadlock due to the second buffer
+ * locking attempt. Note however that grabbing a buffer in transaction
+ * context links the locked buffer to the transaction. If we try to
+ * re-grab the buffer in the context of the same transaction, we avoid
+ * the second lock attempt and continue. Between the verifier and the
+ * scrubber, something will notice that something is amiss and report
+ * the corruption. Therefore, each scrubber will allocate an empty
+ * transaction, attach buffers to it, and cancel the transaction at the
+ * end of the scrub run. Cancelling a non-dirty transaction simply
+ * unlocks the buffers.
+ *
+ * There are four pieces of data that scrub can communicate to
+ * userspace. The first is the error code (errno), which can be used to
+ * communicate operational errors in performing the scrub. There are
+ * also three flags that can be set in the scrub context. If the data
+ * structure itself is corrupt, the "corrupt" flag should be set. If
+ * the metadata is correct but otherwise suboptimal, there's a "preen"
+ * flag to signal that. Finally, if we were unable to access a data
+ * structure to perform cross-referencing, we can signal that as well.
+ */
+
+/* Check for operational errors. */
+bool
+xfs_scrub_op_ok(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ const char *type,
+ int *error,
+ const char *func,
+ int line)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+
+ if (*error == 0)
+ return true;
+
+ trace_xfs_scrub_op_error(mp, agno, bno, type, *error, func, line);
+ if (*error == -EFSBADCRC || *error == -EFSCORRUPTED) {
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ *error = 0;
+ }
+ return false;
+}
+
+/* Check for operational errors for a file offset. */
+bool
+xfs_scrub_file_op_ok(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ const char *type,
+ int *error,
+ const char *func,
+ int line)
+{
+ if (*error == 0)
+ return true;
+
+ trace_xfs_scrub_file_op_error(sc->ip, whichfork, offset, type, *error,
+ func, line);
+ if (*error == -EFSBADCRC || *error == -EFSCORRUPTED) {
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ *error = 0;
+ }
+ return false;
+}
+
+/* Check for metadata block optimization possibilities. */
+bool
+xfs_scrub_block_preen(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+ trace_xfs_scrub_block_preen(mp, agno, bno, type, check, func, line);
+ return fs_ok;
+}
+
+/* Check for metadata block corruption. */
+bool
+xfs_scrub_block_ok(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ trace_xfs_scrub_block_error(mp, agno, bno, type, check, func, line);
+ return fs_ok;
+}
+
+/* Check for inode metadata corruption. */
+bool
+xfs_scrub_ino_ok(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ if (bp) {
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+ } else {
+ agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+ bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ }
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ trace_xfs_scrub_ino_error(mp, ino, agno, bno, type, check, func, line);
+ return fs_ok;
+}
+
+/* Check for inode metadata optimization possibilities. */
+bool
+xfs_scrub_ino_preen(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ if (bp) {
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+ } else {
+ agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+ bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ }
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+ trace_xfs_scrub_ino_preen(mp, ip->i_ino, agno, bno, type, check,
+ func, line);
+ return fs_ok;
+}
+
+/* Check for file data block corruption. */
+bool
+xfs_scrub_data_ok(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ if (fs_ok)
+ return fs_ok;
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ trace_xfs_scrub_data_error(sc->ip, whichfork, offset, type, check,
+ func, line);
+ return fs_ok;
+}
+
+/* Dummy scrubber */
+
+STATIC int
+xfs_scrub_dummy(
+ struct xfs_scrub_context *sc)
+{
+ if (sc->sm->sm_gen & XFS_SCRUB_FLAG_CORRUPT)
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ if (sc->sm->sm_gen & XFS_SCRUB_FLAG_PREEN)
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+ if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XREF_FAIL)
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_XREF_FAIL;
+ if (sc->sm->sm_gen & ~XFS_SCRUB_FLAGS_OUT)
+ return -ENOENT;
+
+ return 0;
+}
+
+/* Scrub setup and teardown. */
+
+/* Free all the resources and finish the transactions. */
+int
+xfs_scrub_teardown(
+ struct xfs_scrub_context *sc,
+ int error)
+{
+ xfs_trans_cancel(sc->tp);
+ sc->tp = NULL;
+ return error;
+}
+
+/* Set us up with a transaction and an empty context. */
+int
+xfs_scrub_setup(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ memset(sc, 0, sizeof(*sc));
+ sc->sm = sm;
+ return xfs_scrub_trans_alloc(sm, mp, &M_RES(mp)->tr_itruncate,
+ 0, 0, 0, &sc->tp);
+}
+
+/* Scrubbing dispatch. */
+
+struct xfs_scrub_meta_fns {
+ int (*setup)(struct xfs_scrub_context *, struct xfs_inode *,
+ struct xfs_scrub_metadata *, bool);
+ int (*scrub)(struct xfs_scrub_context *);
+ int (*repair)(struct xfs_scrub_context *);
+ bool (*has)(struct xfs_sb *);
+};
+
+static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
+ {xfs_scrub_setup, xfs_scrub_dummy, NULL, NULL},
+};
+
+/* Dispatch metadata scrubbing. */
+int
+xfs_scrub_metadata(
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm)
+{
+ struct xfs_scrub_context sc;
+ struct xfs_mount *mp = ip->i_mount;
+ const struct xfs_scrub_meta_fns *fns;
+ bool deadlocked = false;
+ int error = 0;
+
+ trace_xfs_scrub(ip, sm->sm_type, sm->sm_agno, sm->sm_ino, sm->sm_gen,
+ sm->sm_flags, error);
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -ESHUTDOWN;
+
+ /* Check our inputs. */
+ error = -EINVAL;
+ sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
+ goto out;
+ if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
+ goto out;
+ error = -ENOENT;
+ if (sm->sm_type > XFS_SCRUB_TYPE_MAX)
+ goto out;
+ fns = &meta_scrub_fns[sm->sm_type];
+ if (fns->scrub == NULL)
+ goto out;
+ error = -EOPNOTSUPP;
+
+ /* Do we even have this type of metadata? */
+ error = -ENOENT;
+ if (fns->has && !fns->has(&mp->m_sb))
+ goto out;
+
+ /* This isn't a stable feature. Use with care. */
+ {
+ static bool warned;
+
+ if (!warned)
+ xfs_alert(mp,
+ "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
+ warned = true;
+ }
+
+retry_op:
+ /* Set up for the operation. */
+ error = fns->setup(&sc, ip, sm, deadlocked);
+ if (error)
+ goto out;
+
+ /* Scrub for errors. */
+ error = fns->scrub(&sc);
+ if (!deadlocked && error == -EDEADLOCK) {
+ deadlocked = true;
+ error = xfs_scrub_teardown(&sc, error);
+ if (error != -EDEADLOCK)
+ goto out;
+ goto retry_op;
+ } else if (error)
+ goto out_teardown;
+
+ if (sm->sm_flags & XFS_SCRUB_FLAG_CORRUPT)
+ xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
+
+out_teardown:
+ error = xfs_scrub_teardown(&sc, error);
+out:
+ trace_xfs_scrub_done(ip, sm->sm_type, sm->sm_agno, sm->sm_ino,
+ sm->sm_gen, sm->sm_flags, error);
+ return error;
+}
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
new file mode 100644
index 0000000..af88d67
--- /dev/null
+++ b/fs/xfs/repair/common.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_REPAIR_COMMON_H__
+#define __XFS_REPAIR_COMMON_H__
+
+struct xfs_scrub_context {
+ /* General scrub state. */
+ struct xfs_scrub_metadata *sm;
+ struct xfs_trans *tp;
+ struct xfs_inode *ip;
+};
+
+/* Should we end the scrub early? */
+static inline bool
+xfs_scrub_should_terminate(
+ int *error)
+{
+ if (fatal_signal_pending(current)) {
+ if (*error == 0)
+ *error = -EAGAIN;
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Grab a transaction. If we're going to repair something, we need to
+ * ensure there's enough reservation to make all the changes. If not,
+ * we can use an empty transaction.
+ */
+static inline int
+xfs_scrub_trans_alloc(
+ struct xfs_scrub_metadata *sm,
+ struct xfs_mount *mp,
+ struct xfs_trans_res *resp,
+ uint blocks,
+ uint rtextents,
+ uint flags,
+ struct xfs_trans **tpp)
+{
+ return xfs_trans_alloc_empty(mp, tpp);
+}
+
+/* Check for operational errors. */
+bool xfs_scrub_op_ok(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+ xfs_agblock_t bno, const char *type, int *error,
+ const char *func, int line);
+#define XFS_SCRUB_OP_ERROR_GOTO(sc, agno, bno, type, error, label) \
+ do { \
+ if (!xfs_scrub_op_ok((sc), (agno), (bno), (type), \
+ (error), __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for operational errors for a file offset. */
+bool xfs_scrub_file_op_ok(struct xfs_scrub_context *sc, int whichfork,
+ xfs_fileoff_t offset, const char *type,
+ int *error, const char *func, int line);
+#define XFS_SCRUB_FILE_OP_ERROR_GOTO(sc, which, off, type, error, label) \
+ do { \
+ if (!xfs_scrub_file_op_ok((sc), (which), (off), (type), \
+ (error), __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for metadata block optimization possibilities. */
+bool xfs_scrub_block_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+ const char *type, bool fs_ok, const char *check,
+ const char *func, int line);
+#define XFS_SCRUB_PREEN(sc, bp, type, fs_ok) \
+ xfs_scrub_block_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+
+/* Check for inode metadata optimization possibilities. */
+bool xfs_scrub_ino_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+ const char *type, bool fs_ok, const char *check,
+ const char *func, int line);
+#define XFS_SCRUB_INO_PREEN(sc, bp, type, fs_ok) \
+ xfs_scrub_ino_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+
+/* Check for metadata block corruption. */
+bool xfs_scrub_block_ok(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+ const char *type, bool fs_ok, const char *check,
+ const char *func, int line);
+#define XFS_SCRUB_CHECK(sc, bp, type, fs_ok) \
+ xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+#define XFS_SCRUB_GOTO(sc, bp, type, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for inode metadata corruption. */
+bool xfs_scrub_ino_ok(struct xfs_scrub_context *sc, xfs_ino_t ino,
+ struct xfs_buf *bp, const char *type, bool fs_ok,
+ const char *check, const char *func, int line);
+#define XFS_SCRUB_INO_CHECK(sc, ino, bp, type, fs_ok) \
+ xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+#define XFS_SCRUB_INO_GOTO(sc, ino, bp, type, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while(0)
+
+/* Check for file data block corruption. */
+bool xfs_scrub_data_ok(struct xfs_scrub_context *sc, int whichfork,
+ xfs_fileoff_t offset, const char *type, bool fs_ok,
+ const char *check, const char *func, int line);
+#define XFS_SCRUB_DATA_CHECK(sc, whichfork, offset, type, fs_ok) \
+ xfs_scrub_data_ok((sc), (whichfork), (offset), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)
+#define XFS_SCRUB_DATA_GOTO(sc, whichfork, offset, type, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_data_ok((sc), (whichfork), (offset), \
+ (type), (fs_ok), #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while(0)
+
+/* Setup functions */
+
+int xfs_scrub_teardown(struct xfs_scrub_context *sc, int error);
+int xfs_scrub_setup(struct xfs_scrub_context *sc, struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm, bool retry_deadlocked);
+
+#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/xfs_scrub.h b/fs/xfs/repair/xfs_scrub.h
new file mode 100644
index 0000000..64e21b4
--- /dev/null
+++ b/fs/xfs/repair/xfs_scrub.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_SCRUB_H__
+#define __XFS_SCRUB_H__
+
+#ifndef CONFIG_XFS_DEBUG
+# define xfs_scrub_metadata(ip, sm) (-ENOTTY)
+#else
+int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm);
+#endif /* CONFIG_XFS_DEBUG */
+
+#endif /* __XFS_SCRUB_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d39da5d..e9a4619 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -43,6 +43,7 @@
#include "xfs_acl.h"
#include "xfs_btree.h"
#include "xfs_fsmap.h"
+#include "repair/xfs_scrub.h"
#include <linux/capability.h>
#include <linux/dcache.h>
@@ -1706,6 +1707,30 @@ xfs_ioc_getfsmap(
return 0;
}
+STATIC int
+xfs_ioc_scrub_metadata(
+ struct xfs_inode *ip,
+ void __user *arg)
+{
+ struct xfs_scrub_metadata scrub;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&scrub, arg, sizeof(scrub)))
+ return -EFAULT;
+
+ error = xfs_scrub_metadata(ip, &scrub);
+ if (error)
+ return error;
+
+ if (copy_to_user(arg, &scrub, sizeof(scrub)))
+ return -EFAULT;
+
+ return 0;
+}
+
int
xfs_ioc_swapext(
xfs_swapext_t *sxp)
@@ -1891,6 +1916,9 @@ xfs_file_ioctl(
return -EPERM;
return xfs_ioc_getfsmap(ip, arg);
+ case XFS_IOC_SCRUB_METADATA:
+ return xfs_ioc_scrub_metadata(ip, arg);
+
case XFS_IOC_FD_TO_HANDLE:
case XFS_IOC_PATH_TO_HANDLE:
case XFS_IOC_PATH_TO_FSHANDLE: {
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 5ba41b7..40952b1 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -555,6 +555,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_ERROR_INJECTION:
case XFS_IOC_ERROR_CLEARALL:
case XFS_IOC_GETFSMAP:
+ case XFS_IOC_SCRUB_METADATA:
return xfs_file_ioctl(filp, cmd, p);
#ifndef BROKEN_X86_ALIGNMENT
/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6427c70..3e04690 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3353,7 +3353,7 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
/* scrub */
#define XFS_SCRUB_TYPE_DESC \
- { 0, NULL }
+ { XFS_SCRUB_TYPE_TEST, "dummy" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 10/47] xfs: generic functions to scrub metadata and btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (8 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 09/47] xfs: create an ioctl to scrub AG metadata Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:36 ` [PATCH 11/47] xfs: scrub the backup superblocks Darrick J. Wong
` (37 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Create a function that walks a btree, checking the integrity of each
btree block (headers, keys, records) and calling back to the caller
to perform further checks on the records. Add some helper functions
so that we report detailed scrub errors in a uniform manner in dmesg.
These are helper functions for subsequent patches.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_alloc.c | 2
fs/xfs/libxfs/xfs_alloc.h | 2
fs/xfs/libxfs/xfs_btree.c | 41 ++-
fs/xfs/libxfs/xfs_btree.h | 17 +
fs/xfs/libxfs/xfs_format.h | 2
fs/xfs/repair/btree.c | 658 ++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/btree.h | 95 ++++++
fs/xfs/repair/common.c | 234 ++++++++++++++++
fs/xfs/repair/common.h | 53 ++++
10 files changed, 1095 insertions(+), 10 deletions(-)
create mode 100644 fs/xfs/repair/btree.c
create mode 100644 fs/xfs/repair/btree.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 4c2199a..baec6d5 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -103,6 +103,7 @@ xfs-y += xfs_aops.o \
# online scrub/repair
xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
+ btree.o \
common.o \
)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index e496447..1b6bddb 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -629,7 +629,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
/*
* Read in the allocation group free block array.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_read_agfl(
xfs_mount_t *mp, /* mount point structure */
xfs_trans_t *tp, /* transaction pointer */
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0dc34bf..89a23be 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -217,6 +217,8 @@ xfs_alloc_get_rec(
int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, struct xfs_buf **bpp);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf **agbp);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 1e68fd8..3fa30a2 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -552,7 +552,7 @@ xfs_btree_ptr_offset(
/*
* Return a pointer to the n-th record in the btree block.
*/
-STATIC union xfs_btree_rec *
+union xfs_btree_rec *
xfs_btree_rec_addr(
struct xfs_btree_cur *cur,
int n,
@@ -565,7 +565,7 @@ xfs_btree_rec_addr(
/*
* Return a pointer to the n-th key in the btree block.
*/
-STATIC union xfs_btree_key *
+union xfs_btree_key *
xfs_btree_key_addr(
struct xfs_btree_cur *cur,
int n,
@@ -578,7 +578,7 @@ xfs_btree_key_addr(
/*
* Return a pointer to the n-th high key in the btree block.
*/
-STATIC union xfs_btree_key *
+union xfs_btree_key *
xfs_btree_high_key_addr(
struct xfs_btree_cur *cur,
int n,
@@ -591,7 +591,7 @@ xfs_btree_high_key_addr(
/*
* Return a pointer to the n-th block pointer in the btree block.
*/
-STATIC union xfs_btree_ptr *
+union xfs_btree_ptr *
xfs_btree_ptr_addr(
struct xfs_btree_cur *cur,
int n,
@@ -625,7 +625,7 @@ xfs_btree_get_iroot(
* Retrieve the block pointer from the cursor at the given level.
* This may be an inode btree root or from a buffer.
*/
-STATIC struct xfs_btree_block * /* generic btree block pointer */
+struct xfs_btree_block * /* generic btree block pointer */
xfs_btree_get_block(
struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in btree */
@@ -1736,7 +1736,7 @@ xfs_btree_decrement(
return error;
}
-STATIC int
+int
xfs_btree_lookup_get_block(
struct xfs_btree_cur *cur, /* btree cursor */
int level, /* level in the btree */
@@ -4882,3 +4882,32 @@ xfs_btree_count_blocks(
return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
blocks);
}
+
+/* If there's an extent, we're done. */
+STATIC int
+xfs_btree_has_record_helper(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+}
+
+/* Is there a record covering a given range of keys? */
+int
+xfs_btree_has_record(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_irec *low,
+ union xfs_btree_irec *high,
+ bool *exists)
+{
+ int error;
+
+ error = xfs_btree_query_range(cur, low, high,
+ &xfs_btree_has_record_helper, NULL);
+ if (error && error != XFS_BTREE_QUERY_RANGE_ABORT)
+ return error;
+ *exists = error == XFS_BTREE_QUERY_RANGE_ABORT;
+
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 4feea6d..ca2cd5a 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -161,7 +161,6 @@ struct xfs_btree_ops {
const struct xfs_buf_ops *buf_ops;
-#if defined(DEBUG) || defined(XFS_WARN)
/* check that k1 is lower than k2 */
int (*keys_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
@@ -171,7 +170,6 @@ struct xfs_btree_ops {
int (*recs_inorder)(struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2);
-#endif
};
/*
@@ -504,4 +502,19 @@ int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
+union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_key_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n,
+ struct xfs_btree_block *block);
+int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
+ union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
+struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
+ int level, struct xfs_buf **bpp);
+int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
+ union xfs_btree_irec *high, bool *exists);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 6b7579e..301effc 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -518,7 +518,7 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
(sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
}
-static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
+static inline bool xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
{
return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
diff --git a/fs/xfs/repair/btree.c b/fs/xfs/repair/btree.c
new file mode 100644
index 0000000..6956503
--- /dev/null
+++ b/fs/xfs/repair/btree.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "repair/common.h"
+#include "repair/btree.h"
+
+/* btree scrubbing */
+
+const char * const btree_types[] = {
+ [XFS_BTNUM_BNO] = "bnobt",
+ [XFS_BTNUM_CNT] = "cntbt",
+ [XFS_BTNUM_RMAP] = "rmapbt",
+ [XFS_BTNUM_BMAP] = "bmapbt",
+ [XFS_BTNUM_INO] = "inobt",
+ [XFS_BTNUM_FINO] = "finobt",
+ [XFS_BTNUM_REFC] = "refcountbt",
+};
+
+/* Format the trace parameters for the tree cursor. */
+static inline void
+xfs_scrub_btree_format(
+ struct xfs_btree_cur *cur,
+ int level,
+ char *bt_type,
+ size_t type_len,
+ char *bt_ptr,
+ size_t ptr_len,
+ xfs_fsblock_t *fsbno)
+{
+ char *type = NULL;
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+
+ switch (cur->bc_btnum) {
+ case XFS_BTNUM_BMAP:
+ switch (cur->bc_private.b.whichfork) {
+ case XFS_DATA_FORK:
+ type = "data";
+ break;
+ case XFS_ATTR_FORK:
+ type = "attr";
+ break;
+ case XFS_COW_FORK:
+ type = "CoW";
+ break;
+ }
+ snprintf(bt_type, type_len, "inode %llu %s fork",
+ (unsigned long long)cur->bc_private.b.ip->i_ino,
+ type);
+ break;
+ default:
+ strncpy(bt_type, btree_types[cur->bc_btnum], type_len);
+ break;
+ }
+
+ if (level < cur->bc_nlevels && cur->bc_ptrs[level] >= 1) {
+ block = xfs_btree_get_block(cur, level, &bp);
+ snprintf(bt_ptr, ptr_len, " %s %d/%d",
+ level == 0 ? "rec" : "ptr",
+ cur->bc_ptrs[level],
+ be16_to_cpu(block->bb_numrecs));
+ } else
+ bt_ptr[0] = 0;
+
+ if (level < cur->bc_nlevels && cur->bc_bufs[level])
+ *fsbno = XFS_DADDR_TO_FSB(cur->bc_mp,
+ cur->bc_bufs[level]->b_bn);
+ else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ *fsbno = XFS_INO_TO_FSB(cur->bc_mp,
+ cur->bc_private.b.ip->i_ino);
+ else
+ *fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, 0);
+}
+
+/* Check for btree corruption. */
+bool
+xfs_scrub_btree_ok(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ char bt_ptr[24];
+ char bt_type[48];
+ xfs_fsblock_t fsbno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ xfs_scrub_btree_format(cur, level, bt_type, 48, bt_ptr, 24, &fsbno);
+
+ trace_xfs_scrub_btree_error(cur->bc_mp, bt_type, bt_ptr,
+ XFS_FSB_TO_AGNO(cur->bc_mp, fsbno),
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno),
+ check, func, line);
+ return fs_ok;
+}
+
+/* Check for btree operation errors . */
+bool
+xfs_scrub_btree_op_ok(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error,
+ const char *func,
+ int line)
+{
+ char bt_ptr[24];
+ char bt_type[48];
+ xfs_fsblock_t fsbno;
+
+ if (*error == 0)
+ return true;
+
+ xfs_scrub_btree_format(cur, level, bt_type, 48, bt_ptr, 24, &fsbno);
+
+ return xfs_scrub_op_ok(sc,
+ XFS_FSB_TO_AGNO(cur->bc_mp, fsbno),
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno),
+ bt_type, error, func, line);
+}
+
+/*
+ * Make sure this record is in order and doesn't stray outside of the parent
+ * keys.
+ */
+STATIC int
+xfs_scrub_btree_rec(
+ struct xfs_scrub_btree *bs)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ union xfs_btree_rec *rec;
+ union xfs_btree_key key;
+ union xfs_btree_key hkey;
+ union xfs_btree_key *keyp;
+ struct xfs_btree_block *block;
+ struct xfs_btree_block *keyblock;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cur, 0, &bp);
+ rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
+
+ if (bp)
+ trace_xfs_scrub_btree_rec(cur->bc_mp,
+ XFS_FSB_TO_AGNO(cur->bc_mp,
+ XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn)),
+ XFS_FSB_TO_AGBNO(cur->bc_mp,
+ XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn)),
+ cur->bc_btnum, 0, cur->bc_nlevels,
+ cur->bc_ptrs[0]);
+ else if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ trace_xfs_scrub_btree_rec(cur->bc_mp,
+ XFS_INO_TO_AGNO(cur->bc_mp,
+ cur->bc_private.b.ip->i_ino),
+ XFS_INO_TO_AGBNO(cur->bc_mp,
+ cur->bc_private.b.ip->i_ino),
+ cur->bc_btnum, 0, cur->bc_nlevels,
+ cur->bc_ptrs[0]);
+ else
+ trace_xfs_scrub_btree_rec(cur->bc_mp,
+ NULLAGNUMBER, NULLAGBLOCK,
+ cur->bc_btnum, 0, cur->bc_nlevels,
+ cur->bc_ptrs[0]);
+
+ /* If this isn't the first record, are they in order? */
+ XFS_SCRUB_BTREC_CHECK(bs, bs->firstrec ||
+ cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec));
+ bs->firstrec = false;
+ bs->lastrec = *rec;
+
+ if (cur->bc_nlevels == 1)
+ return 0;
+
+ /* Is this at least as large as the parent low key? */
+ cur->bc_ops->init_key_from_rec(&key, rec);
+ keyblock = xfs_btree_get_block(cur, 1, &bp);
+ keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
+ XFS_SCRUB_BTKEY_CHECK(bs, 1,
+ cur->bc_ops->diff_two_keys(cur, &key, keyp) >= 0);
+
+ if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ return 0;
+
+ /* Is this no larger than the parent high key? */
+ cur->bc_ops->init_high_key_from_rec(&hkey, rec);
+ keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
+ XFS_SCRUB_BTKEY_CHECK(bs, 1,
+ cur->bc_ops->diff_two_keys(cur, keyp, &hkey) >= 0);
+
+ return 0;
+}
+
+/*
+ * Make sure this key is in order and doesn't stray outside of the parent
+ * keys.
+ */
+STATIC int
+xfs_scrub_btree_key(
+ struct xfs_scrub_btree *bs,
+ int level)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ union xfs_btree_key *key;
+ union xfs_btree_key *keyp;
+ struct xfs_btree_block *block;
+ struct xfs_btree_block *keyblock;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cur, level, &bp);
+ key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
+
+ if (bp)
+ trace_xfs_scrub_btree_key(cur->bc_mp,
+ XFS_FSB_TO_AGNO(cur->bc_mp,
+ XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn)),
+ XFS_FSB_TO_AGBNO(cur->bc_mp,
+ XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn)),
+ cur->bc_btnum, level, cur->bc_nlevels,
+ cur->bc_ptrs[level]);
+ else if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ trace_xfs_scrub_btree_key(cur->bc_mp,
+ XFS_INO_TO_AGNO(cur->bc_mp,
+ cur->bc_private.b.ip->i_ino),
+ XFS_INO_TO_AGBNO(cur->bc_mp,
+ cur->bc_private.b.ip->i_ino),
+ cur->bc_btnum, level, cur->bc_nlevels,
+ cur->bc_ptrs[level]);
+ else
+ trace_xfs_scrub_btree_key(cur->bc_mp,
+ NULLAGNUMBER, NULLAGBLOCK,
+ cur->bc_btnum, level, cur->bc_nlevels,
+ cur->bc_ptrs[level]);
+
+ /* If this isn't the first key, are they in order? */
+ XFS_SCRUB_BTKEY_CHECK(bs, level, bs->firstkey[level] ||
+ cur->bc_ops->keys_inorder(cur, &bs->lastkey[level],
+ key));
+ bs->firstkey[level] = false;
+ bs->lastkey[level] = *key;
+
+ if (level + 1 >= cur->bc_nlevels)
+ return 0;
+
+ /* Is this at least as large as the parent low key? */
+ keyblock = xfs_btree_get_block(cur, level + 1, &bp);
+ keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
+ XFS_SCRUB_BTKEY_CHECK(bs, level,
+ cur->bc_ops->diff_two_keys(cur, key, keyp) >= 0);
+
+ if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ return 0;
+
+ /* Is this no larger than the parent high key? */
+ key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
+ keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
+ XFS_SCRUB_BTKEY_CHECK(bs, level,
+ cur->bc_ops->diff_two_keys(cur, keyp, key) >= 0);
+
+ return 0;
+}
+
+/* Check a btree pointer. */
+static int
+xfs_scrub_btree_ptr(
+ struct xfs_scrub_btree *bs,
+ int level,
+ union xfs_btree_ptr *ptr)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ xfs_daddr_t daddr;
+ xfs_daddr_t eofs;
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels) {
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ XFS_SCRUB_BTKEY_GOTO(bs, level, ptr->l == 0, corrupt);
+ } else {
+ XFS_SCRUB_BTKEY_GOTO(bs, level, ptr->s == 0, corrupt);
+ }
+ return 0;
+ }
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ XFS_SCRUB_BTKEY_GOTO(bs, level,
+ ptr->l != cpu_to_be64(NULLFSBLOCK), corrupt);
+
+ daddr = XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+ } else {
+ XFS_SCRUB_BTKEY_GOTO(bs, level,
+ cur->bc_private.a.agno != NULLAGNUMBER, corrupt);
+ XFS_SCRUB_BTKEY_GOTO(bs, level,
+ ptr->s != cpu_to_be32(NULLAGBLOCK), corrupt);
+
+ daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+ be32_to_cpu(ptr->s));
+ }
+ eofs = XFS_FSB_TO_BB(cur->bc_mp, cur->bc_mp->m_sb.sb_dblocks);
+ XFS_SCRUB_BTKEY_GOTO(bs, level, daddr != 0, corrupt);
+ XFS_SCRUB_BTKEY_GOTO(bs, level, daddr < eofs, corrupt);
+
+ return 0;
+
+corrupt:
+ return -EFSCORRUPTED;
+}
+
+/* Check the siblings of a large format btree block. */
+STATIC int
+xfs_scrub_btree_lblock_check_siblings(
+ struct xfs_scrub_btree *bs,
+ struct xfs_btree_block *block)
+{
+ struct xfs_btree_block *pblock;
+ struct xfs_buf *pbp;
+ struct xfs_btree_cur *ncur = NULL;
+ union xfs_btree_ptr *pp;
+ xfs_fsblock_t leftsib;
+ xfs_fsblock_t rightsib;
+ xfs_fsblock_t fsbno;
+ int level;
+ int success;
+ int error = 0;
+
+ leftsib = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ rightsib = be64_to_cpu(block->bb_u.l.bb_rightsib);
+ level = xfs_btree_get_level(block);
+
+ /* Root block should never have siblings. */
+ if (level == bs->cur->bc_nlevels - 1) {
+ XFS_SCRUB_BTKEY_CHECK(bs, level, leftsib == NULLFSBLOCK);
+ XFS_SCRUB_BTKEY_CHECK(bs, level, rightsib == NULLFSBLOCK);
+ return error;
+ }
+
+ /* Does the left sibling match the parent level left block? */
+ if (leftsib != NULLFSBLOCK) {
+ error = xfs_btree_dup_cursor(bs->cur, &ncur);
+ if (error)
+ return error;
+ error = xfs_btree_decrement(ncur, level + 1, &success);
+ XFS_SCRUB_BTKEY_OP_ERROR_GOTO(bs, level + 1, &error, out_cur);
+ XFS_SCRUB_BTKEY_GOTO(bs, level, success, out_cur);
+
+ pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
+ pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
+ if (!xfs_scrub_btree_ptr(bs, level + 1, pp)) {
+ fsbno = be64_to_cpu(pp->l);
+ XFS_SCRUB_BTKEY_CHECK(bs, level, fsbno == leftsib);
+ }
+
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ ncur = NULL;
+ }
+
+ /* Does the right sibling match the parent level right block? */
+ if (!error && rightsib != NULLFSBLOCK) {
+ error = xfs_btree_dup_cursor(bs->cur, &ncur);
+ if (error)
+ return error;
+ error = xfs_btree_increment(ncur, level + 1, &success);
+ XFS_SCRUB_BTKEY_OP_ERROR_GOTO(bs, level + 1, &error, out_cur);
+ XFS_SCRUB_BTKEY_GOTO(bs, level, success, out_cur);
+
+ pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
+ pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
+ if (!xfs_scrub_btree_ptr(bs, level + 1, pp)) {
+ fsbno = be64_to_cpu(pp->l);
+ XFS_SCRUB_BTKEY_CHECK(bs, level, fsbno == rightsib);
+ }
+
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ ncur = NULL;
+ }
+
+out_cur:
+ if (ncur)
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/* Check the siblings of a small format btree block. */
+STATIC int
+xfs_scrub_btree_sblock_check_siblings(
+ struct xfs_scrub_btree *bs,
+ struct xfs_btree_block *block)
+{
+ struct xfs_btree_block *pblock;
+ struct xfs_buf *pbp;
+ struct xfs_btree_cur *ncur = NULL;
+ union xfs_btree_ptr *pp;
+ xfs_agblock_t leftsib;
+ xfs_agblock_t rightsib;
+ xfs_agblock_t agbno;
+ int level;
+ int success;
+ int error = 0;
+
+ leftsib = be32_to_cpu(block->bb_u.s.bb_leftsib);
+ rightsib = be32_to_cpu(block->bb_u.s.bb_rightsib);
+ level = xfs_btree_get_level(block);
+
+ /* Root block should never have siblings. */
+ if (level == bs->cur->bc_nlevels - 1) {
+ XFS_SCRUB_BTKEY_CHECK(bs, level, leftsib == NULLAGBLOCK);
+ XFS_SCRUB_BTKEY_CHECK(bs, level, rightsib == NULLAGBLOCK);
+ return error;
+ }
+
+ /* Does the left sibling match the parent level left block? */
+ if (leftsib != NULLAGBLOCK) {
+ error = xfs_btree_dup_cursor(bs->cur, &ncur);
+ if (error)
+ return error;
+ error = xfs_btree_decrement(ncur, level + 1, &success);
+ XFS_SCRUB_BTKEY_OP_ERROR_GOTO(bs, level + 1, &error, out_cur);
+ XFS_SCRUB_BTKEY_GOTO(bs, level, success, verify_rightsib);
+
+ pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
+ pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
+ if (!xfs_scrub_btree_ptr(bs, level + 1, pp)) {
+ agbno = be32_to_cpu(pp->s);
+ XFS_SCRUB_BTKEY_CHECK(bs, level, agbno == leftsib);
+ }
+
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ ncur = NULL;
+ }
+
+verify_rightsib:
+ if (ncur) {
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ ncur = NULL;
+ }
+
+ /* Does the right sibling match the parent level right block? */
+ if (rightsib != NULLAGBLOCK) {
+ error = xfs_btree_dup_cursor(bs->cur, &ncur);
+ if (error)
+ return error;
+ error = xfs_btree_increment(ncur, level + 1, &success);
+ XFS_SCRUB_BTKEY_OP_ERROR_GOTO(bs, level + 1, &error, out_cur);
+ XFS_SCRUB_BTKEY_GOTO(bs, level, success, out_cur);
+
+ pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
+ pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
+ if (!xfs_scrub_btree_ptr(bs, level + 1, pp)) {
+ agbno = be32_to_cpu(pp->s);
+ XFS_SCRUB_BTKEY_CHECK(bs, level, agbno == rightsib);
+ }
+
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ ncur = NULL;
+ }
+
+out_cur:
+ if (ncur)
+ xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/* Grab and scrub a btree block. */
+STATIC int
+xfs_scrub_btree_block(
+ struct xfs_scrub_btree *bs,
+ int level,
+ union xfs_btree_ptr *pp,
+ struct xfs_btree_block **pblock,
+ struct xfs_buf **pbp)
+{
+ int error;
+
+ error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
+ if (error)
+ return error;
+
+ xfs_btree_get_block(bs->cur, level, pbp);
+ error = xfs_btree_check_block(bs->cur, *pblock, level, *pbp);
+ if (error)
+ return error;
+
+ return bs->check_siblings_fn(bs, *pblock);
+}
+
+/*
+ * Visit all nodes and leaves of a btree. Check that all pointers and
+ * records are in order, that the keys reflect the records, and use a callback
+ * so that the caller can verify individual records. The callback is the same
+ * as the one for xfs_btree_query_range, so therefore this function also
+ * returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a negative error code.
+ */
+int
+xfs_scrub_btree(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ xfs_scrub_btree_rec_fn scrub_fn,
+ struct xfs_owner_info *oinfo,
+ void *private)
+{
+ struct xfs_scrub_btree bs = {0};
+ union xfs_btree_ptr ptr;
+ union xfs_btree_ptr *pp;
+ union xfs_btree_rec *recp;
+ struct xfs_btree_block *block;
+ int level;
+ struct xfs_buf *bp;
+ int i;
+ int error = 0;
+
+ /* Finish filling out the scrub state */
+ bs.cur = cur;
+ bs.scrub_rec = scrub_fn;
+ bs.oinfo = oinfo;
+ bs.firstrec = true;
+ bs.private = private;
+ bs.sc = sc;
+ for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
+ bs.firstkey[i] = true;
+ INIT_LIST_HEAD(&bs.to_check);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ bs.check_siblings_fn = xfs_scrub_btree_lblock_check_siblings;
+ else
+ bs.check_siblings_fn = xfs_scrub_btree_sblock_check_siblings;
+
+ /* Don't try to check a tree with a height we can't handle. */
+ XFS_SCRUB_BTREC_GOTO(&bs, cur->bc_nlevels > 0, out_badcursor);
+ XFS_SCRUB_BTREC_GOTO(&bs, cur->bc_nlevels <= XFS_BTREE_MAXLEVELS,
+ out_badcursor);
+
+ /* Make sure the root isn't in the superblock. */
+ cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ error = xfs_scrub_btree_ptr(&bs, cur->bc_nlevels, &ptr);
+ XFS_SCRUB_BTKEY_OP_ERROR_GOTO(&bs, cur->bc_nlevels, &error,
+ out_badcursor);
+
+ /* Load the root of the btree. */
+ level = cur->bc_nlevels - 1;
+ cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ error = xfs_scrub_btree_block(&bs, level, &ptr, &block, &bp);
+ XFS_SCRUB_BTKEY_OP_ERROR_GOTO(&bs, level, &error, out);
+
+ cur->bc_ptrs[level] = 1;
+
+ while (level < cur->bc_nlevels) {
+ block = xfs_btree_get_block(cur, level, &bp);
+
+ if (level == 0) {
+ /* End of leaf, pop back towards the root. */
+ if (cur->bc_ptrs[level] >
+ be16_to_cpu(block->bb_numrecs)) {
+ if (level < cur->bc_nlevels - 1)
+ cur->bc_ptrs[level + 1]++;
+ level++;
+ continue;
+ }
+
+ /* Records in order for scrub? */
+ error = xfs_scrub_btree_rec(&bs);
+ if (error)
+ goto out;
+ recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
+ error = bs.scrub_rec(&bs, recp);
+ if (error < 0 ||
+ error == XFS_BTREE_QUERY_RANGE_ABORT)
+ break;
+ if (xfs_scrub_should_terminate(&error))
+ break;
+
+ cur->bc_ptrs[level]++;
+ continue;
+ }
+
+ /* End of node, pop back towards the root. */
+ if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
+ if (level < cur->bc_nlevels - 1)
+ cur->bc_ptrs[level + 1]++;
+ level++;
+ continue;
+ }
+
+ /* Keys in order for scrub? */
+ error = xfs_scrub_btree_key(&bs, level);
+ if (error)
+ goto out;
+
+ /* Drill another level deeper. */
+ pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
+ error = xfs_scrub_btree_ptr(&bs, level, pp);
+ if (error) {
+ error = 0;
+ cur->bc_ptrs[level]++;
+ continue;
+ }
+ level--;
+ error = xfs_scrub_btree_block(&bs, level, pp, &block, &bp);
+ XFS_SCRUB_BTKEY_OP_ERROR_GOTO(&bs, level, &error, out);
+
+ cur->bc_ptrs[level] = 1;
+ }
+
+out:
+ /*
+ * If we don't end this function with the cursor pointing at a record
+ * block, a subsequent non-error cursor deletion will not release
+ * node-level buffers, causing a buffer leak. This is quite possible
+ * with a zero-results scrubbing run, so release the buffers if we
+ * aren't pointing at a record.
+ */
+ if (cur->bc_bufs[0] == NULL) {
+ for (i = 0; i < cur->bc_nlevels; i++) {
+ if (cur->bc_bufs[i]) {
+ xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
+ cur->bc_bufs[i] = NULL;
+ cur->bc_ptrs[i] = 0;
+ cur->bc_ra[i] = 0;
+ }
+ }
+ }
+
+out_badcursor:
+ return error;
+}
diff --git a/fs/xfs/repair/btree.h b/fs/xfs/repair/btree.h
new file mode 100644
index 0000000..75e89b1
--- /dev/null
+++ b/fs/xfs/repair/btree.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_REPAIR_BTREE_H__
+#define __XFS_REPAIR_BTREE_H__
+
+/* btree scrub */
+
+extern const char * const btree_types[];
+
+/* Check for btree corruption. */
+bool xfs_scrub_btree_ok(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur, int level, bool fs_ok,
+ const char *check, const char *func, int line);
+
+/* Check for btree operation errors. */
+bool xfs_scrub_btree_op_ok(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur, int level, int *error,
+ const char *func, int line);
+
+#define XFS_SCRUB_BTREC_CHECK(bs, fs_ok) \
+ xfs_scrub_btree_ok((bs)->sc, (bs)->cur, 0, (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+#define XFS_SCRUB_BTREC_GOTO(bs, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_btree_ok((bs)->sc, (bs)->cur, 0, (fs_ok), \
+ #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+#define XFS_SCRUB_BTREC_OP_ERROR_GOTO(bs, error, label) \
+ do { \
+ if (!xfs_scrub_btree_op_ok((bs)->sc, (bs)->cur, 0, \
+ (error), __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+#define XFS_SCRUB_BTKEY_CHECK(bs, level, fs_ok) \
+ xfs_scrub_btree_ok((bs)->sc, (bs)->cur, (level), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+#define XFS_SCRUB_BTKEY_GOTO(bs, level, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_btree_ok((bs)->sc, (bs)->cur, (level), (fs_ok), \
+ #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+#define XFS_SCRUB_BTKEY_OP_ERROR_GOTO(bs, level, error, label) \
+ do { \
+ if (!xfs_scrub_btree_op_ok((bs)->sc, (bs)->cur, (level), \
+ (error), __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+struct xfs_scrub_btree;
+typedef int (*xfs_scrub_btree_rec_fn)(
+ struct xfs_scrub_btree *bs,
+ union xfs_btree_rec *rec);
+
+struct xfs_scrub_btree {
+ /* caller-provided scrub state */
+ struct xfs_scrub_context *sc;
+ struct xfs_btree_cur *cur;
+ xfs_scrub_btree_rec_fn scrub_rec;
+ struct xfs_owner_info *oinfo;
+ void *private;
+
+ /* internal scrub state */
+ union xfs_btree_rec lastrec;
+ bool firstrec;
+ union xfs_btree_key lastkey[XFS_BTREE_MAXLEVELS];
+ bool firstkey[XFS_BTREE_MAXLEVELS];
+ struct list_head to_check;
+ int (*check_siblings_fn)(
+ struct xfs_scrub_btree *,
+ struct xfs_btree_block *);
+};
+int xfs_scrub_btree(struct xfs_scrub_context *sc, struct xfs_btree_cur *cur,
+ xfs_scrub_btree_rec_fn scrub_fn,
+ struct xfs_owner_info *oinfo, void *private);
+
+#endif /* __XFS_REPAIR_BTREE_H__ */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index e43e3cc..04f4829 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -43,6 +43,7 @@
#include "xfs_rmap_btree.h"
#include "repair/xfs_scrub.h"
#include "repair/common.h"
+#include "repair/btree.h"
/*
* Online Scrub and Repair
@@ -303,6 +304,235 @@ xfs_scrub_data_ok(
return fs_ok;
}
+/* AG scrubbing */
+
+/* Grab all the headers for an AG. */
+int
+xfs_scrub_ag_read_headers(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ struct xfs_buf **agi,
+ struct xfs_buf **agf,
+ struct xfs_buf **agfl)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ int error;
+
+ error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
+ if (error)
+ goto out;
+
+ error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
+ if (error)
+ goto out;
+
+ error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
+ if (error)
+ goto out;
+
+out:
+ return error;
+}
+
+/* Release all the AG btree cursors. */
+STATIC void
+xfs_scrub_ag_btcur_free(
+ struct xfs_scrub_ag *sa)
+{
+ if (sa->refc_cur)
+ xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
+ if (sa->rmap_cur)
+ xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
+ if (sa->fino_cur)
+ xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
+ if (sa->ino_cur)
+ xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
+ if (sa->cnt_cur)
+ xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
+ if (sa->bno_cur)
+ xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
+
+ sa->refc_cur = NULL;
+ sa->rmap_cur = NULL;
+ sa->fino_cur = NULL;
+ sa->ino_cur = NULL;
+ sa->bno_cur = NULL;
+ sa->cnt_cur = NULL;
+}
+
+/* Initialize all the btree cursors for an AG. */
+int
+xfs_scrub_ag_btcur_init(
+ struct xfs_scrub_context *sc,
+ struct xfs_scrub_ag *sa)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ xfs_agnumber_t agno = sa->agno;
+
+ if (sa->agf_bp) {
+ /* Set up a bnobt cursor for cross-referencing. */
+ sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
+ agno, XFS_BTNUM_BNO);
+ if (!sa->bno_cur)
+ goto err;
+
+ /* Set up a cntbt cursor for cross-referencing. */
+ sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
+ agno, XFS_BTNUM_CNT);
+ if (!sa->cnt_cur)
+ goto err;
+ }
+
+ /* Set up a inobt cursor for cross-referencing. */
+ if (sa->agi_bp) {
+ sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
+ agno, XFS_BTNUM_INO);
+ if (!sa->ino_cur)
+ goto err;
+ }
+
+ /* Set up a finobt cursor for cross-referencing. */
+ if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
+ agno, XFS_BTNUM_FINO);
+ if (!sa->fino_cur)
+ goto err;
+ }
+
+ /* Set up a rmapbt cursor for cross-referencing. */
+ if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
+ agno);
+ if (!sa->rmap_cur)
+ goto err;
+ }
+
+ /* Set up a refcountbt cursor for cross-referencing. */
+ if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
+ sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
+ sa->agf_bp, agno, NULL);
+ if (!sa->refc_cur)
+ goto err;
+ }
+
+ return 0;
+err:
+ return -ENOMEM;
+}
+
+/* Release the AG header context and btree cursors. */
+void
+xfs_scrub_ag_free(
+ struct xfs_scrub_ag *sa)
+{
+ xfs_scrub_ag_btcur_free(sa);
+ sa->agno = NULLAGNUMBER;
+}
+
+/*
+ * For scrub, grab the AGI and the AGF headers, in that order. Locking
+ * order requires us to get the AGI before the AGF. We use the
+ * transaction to avoid deadlocking on crosslinked metadata buffers;
+ * either the caller passes one in (bmap scrub) or we have to create a
+ * transaction ourselves.
+ */
+int
+xfs_scrub_ag_init(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ struct xfs_scrub_ag *sa)
+{
+ int error;
+
+ memset(sa, 0, sizeof(*sa));
+ sa->agno = agno;
+ error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
+ &sa->agf_bp, &sa->agfl_bp);
+ if (error)
+ goto err;
+
+ error = xfs_scrub_ag_btcur_init(sc, sa);
+ if (error)
+ goto err;
+
+ return error;
+err:
+ xfs_scrub_ag_free(sa);
+ return error;
+}
+
+/* Organize locking of multiple AGs for a scrub. */
+
+/* Initialize the AG lock handler. */
+void
+xfs_scrub_ag_lock_init(
+ struct xfs_mount *mp,
+ struct xfs_scrub_ag_lock *ag_lock)
+{
+ if (mp->m_sb.sb_agcount <= XFS_SCRUB_AGMASK_NR)
+ ag_lock->agmask = ag_lock->__agmask;
+ else
+ ag_lock->agmask = kmem_alloc(1 + (mp->m_sb.sb_agcount / NBBY),
+ KM_SLEEP | KM_NOFS);
+ ag_lock->max_ag = NULLAGNUMBER;
+}
+
+/* Can we lock the AG's headers without deadlocking? */
+bool
+xfs_scrub_ag_can_lock(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_scrub_ag_lock *ag_lock = &sc->ag_lock;
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+
+ trace_xfs_scrub_ag_can_lock(mp, ag_lock->max_ag, agno);
+
+ /* Already locked? */
+ if (test_bit(agno, ag_lock->agmask))
+ return true;
+
+ /* If we can't lock the AG without violating locking order, bail out. */
+ if (ag_lock->max_ag != NULLAGNUMBER && agno < ag_lock->max_ag) {
+ trace_xfs_scrub_ag_may_deadlock(mp, ag_lock->max_ag, agno);
+ return false;
+ }
+
+ set_bit(agno, ag_lock->agmask);
+ ag_lock->max_ag = agno;
+ return true;
+}
+
+/* Read all AG headers and attach to this transaction. */
+int
+xfs_scrub_ag_lock_all(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_scrub_ag_lock *ag_lock = &sc->ag_lock;
+ struct xfs_buf *agi;
+ struct xfs_buf *agf;
+ struct xfs_buf *agfl;
+ xfs_agnumber_t agno;
+ int error = 0;
+
+ trace_xfs_scrub_ag_lock_all(mp, ag_lock->max_ag, mp->m_sb.sb_agcount);
+
+ ASSERT(ag_lock->max_ag == NULLAGNUMBER);
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ error = xfs_scrub_ag_read_headers(sc, agno, &agi, &agf,
+ &agfl);
+ if (error)
+ break;
+ set_bit(agno, ag_lock->agmask);
+ ag_lock->max_ag = agno;
+ }
+
+ return error;
+}
+
/* Dummy scrubber */
STATIC int
@@ -329,6 +559,10 @@ xfs_scrub_teardown(
struct xfs_scrub_context *sc,
int error)
{
+ xfs_scrub_ag_free(&sc->sa);
+ if (sc->ag_lock.agmask != sc->ag_lock.__agmask)
+ kmem_free(sc->ag_lock.agmask);
+ sc->ag_lock.agmask = NULL;
xfs_trans_cancel(sc->tp);
sc->tp = NULL;
return error;
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index af88d67..fc38f67 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -20,11 +20,51 @@
#ifndef __XFS_REPAIR_COMMON_H__
#define __XFS_REPAIR_COMMON_H__
+/* Buffer pointers and btree cursors for an entire AG. */
+struct xfs_scrub_ag {
+ xfs_agnumber_t agno;
+
+ /* AG btree roots */
+ struct xfs_buf *agf_bp;
+ struct xfs_buf *agfl_bp;
+ struct xfs_buf *agi_bp;
+
+ /* AG btrees */
+ struct xfs_btree_cur *bno_cur;
+ struct xfs_btree_cur *cnt_cur;
+ struct xfs_btree_cur *ino_cur;
+ struct xfs_btree_cur *fino_cur;
+ struct xfs_btree_cur *rmap_cur;
+ struct xfs_btree_cur *refc_cur;
+};
+
+/*
+ * Track which AGs for which we've already locked the header buffers.
+ * This information helps us avoid deadlocks by ensuring locking order
+ * rule compliance. max_ag is the highest AG number that we've locked;
+ * we can only re-lock an AG we've already locked, or lock a higher AG.
+ * If we try to lock a lower numbered AG, we must restart the operation
+ * with all AG headers locked from the beginning.
+ */
+#define XFS_SCRUB_AGMASK_NR 128
+struct xfs_scrub_ag_lock {
+ xfs_agnumber_t max_ag;
+ unsigned long *agmask;
+ unsigned long __agmask[XFS_SCRUB_AGMASK_NR /
+ sizeof(unsigned long)];
+};
+
struct xfs_scrub_context {
/* General scrub state. */
struct xfs_scrub_metadata *sm;
struct xfs_trans *tp;
struct xfs_inode *ip;
+
+ /* State tracking for multi-AG operations. */
+ struct xfs_scrub_ag_lock ag_lock;
+
+ /* State tracking for single-AG operations. */
+ struct xfs_scrub_ag sa;
};
/* Should we end the scrub early? */
@@ -138,6 +178,19 @@ bool xfs_scrub_data_ok(struct xfs_scrub_context *sc, int whichfork,
goto label; \
} while(0)
+bool xfs_scrub_ag_can_lock(struct xfs_scrub_context *sc, xfs_agnumber_t agno);
+int xfs_scrub_ag_lock_all(struct xfs_scrub_context *sc);
+void xfs_scrub_ag_lock_init(struct xfs_mount *mp,
+ struct xfs_scrub_ag_lock *ag_lock);
+void xfs_scrub_ag_free(struct xfs_scrub_ag *sa);
+int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+ struct xfs_scrub_ag *sa);
+int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+ struct xfs_buf **agi, struct xfs_buf **agf,
+ struct xfs_buf **agfl);
+int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
+ struct xfs_scrub_ag *sa);
+
/* Setup functions */
int xfs_scrub_teardown(struct xfs_scrub_context *sc, int error);
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 11/47] xfs: scrub the backup superblocks
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (9 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 10/47] xfs: generic functions to scrub metadata and btrees Darrick J. Wong
@ 2017-01-07 0:36 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 12/47] xfs: scrub AGF and AGFL Darrick J. Wong
` (36 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:36 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Ensure that the geometry presented in the backup superblocks matches
the primary superblock so that repair can recover the filesystem if
that primary gets corrupted.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 3 -
| 191 ++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 1
fs/xfs/repair/common.h | 6 +
fs/xfs/xfs_trace.h | 3 -
6 files changed, 203 insertions(+), 2 deletions(-)
create mode 100644 fs/xfs/repair/agheader.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index baec6d5..ae307ba 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -103,6 +103,7 @@ xfs-y += xfs_aops.o \
# online scrub/repair
xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
+ agheader.o \
btree.o \
common.o \
)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index d8ceaf8..47bf348 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -575,7 +575,8 @@ struct xfs_scrub_metadata {
* Metadata types and flags for scrub operation.
*/
#define XFS_SCRUB_TYPE_TEST 0 /* dummy to test ioctl */
-#define XFS_SCRUB_TYPE_MAX 0
+#define XFS_SCRUB_TYPE_SB 1 /* superblock */
+#define XFS_SCRUB_TYPE_MAX 1
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
new file mode 100644
index 0000000..c5e03ab
--- /dev/null
+++ b/fs/xfs/repair/agheader.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "repair/common.h"
+
+/* Set us up to check an AG header. */
+int
+xfs_scrub_setup_ag(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (sm->sm_agno >= mp->m_sb.sb_agcount)
+ return -EINVAL;
+ return xfs_scrub_setup(sc, ip, sm, retry_deadlocked);
+}
+
+/* Superblock */
+
+#define XFS_SCRUB_SB_CHECK(fs_ok) \
+ XFS_SCRUB_CHECK(sc, bp, "superblock", fs_ok)
+#define XFS_SCRUB_SB_PREEN(fs_ok) \
+ XFS_SCRUB_PREEN(sc, bp, "superblock", fs_ok)
+#define XFS_SCRUB_SB_OP_ERROR_GOTO(label) \
+ XFS_SCRUB_OP_ERROR_GOTO(sc, agno, 0, "superblock", &error, out)
+/* Scrub the filesystem superblock. */
+int
+xfs_scrub_superblock(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *bp;
+ struct xfs_sb sb;
+ xfs_agnumber_t agno;
+ uint32_t v2_ok;
+ int error;
+
+ agno = sc->sm->sm_agno;
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
+ XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+ if (error) {
+ trace_xfs_scrub_block_error(mp, agno, XFS_SB_BLOCK(mp),
+ "superblock", "error != 0", __func__, __LINE__);
+ error = 0;
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ goto out;
+ }
+
+ /*
+ * The in-core sb is a more up-to-date copy of AG 0's sb,
+ * so there's no point in comparing the two.
+ */
+ if (agno == 0)
+ goto out;
+
+ xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
+
+ /* Verify the geometries match. */
+#define XFS_SCRUB_SB_FIELD(fn) \
+ XFS_SCRUB_SB_CHECK(sb.sb_##fn == mp->m_sb.sb_##fn)
+#define XFS_PREEN_SB_FIELD(fn) \
+ XFS_SCRUB_SB_PREEN(sb.sb_##fn == mp->m_sb.sb_##fn)
+ XFS_SCRUB_SB_FIELD(blocksize);
+ XFS_SCRUB_SB_FIELD(dblocks);
+ XFS_SCRUB_SB_FIELD(rblocks);
+ XFS_SCRUB_SB_FIELD(rextents);
+ XFS_SCRUB_SB_PREEN(uuid_equal(&sb.sb_uuid, &mp->m_sb.sb_uuid));
+ XFS_SCRUB_SB_FIELD(logstart);
+ XFS_PREEN_SB_FIELD(rootino);
+ XFS_PREEN_SB_FIELD(rbmino);
+ XFS_PREEN_SB_FIELD(rsumino);
+ XFS_SCRUB_SB_FIELD(rextsize);
+ XFS_SCRUB_SB_FIELD(agblocks);
+ XFS_SCRUB_SB_FIELD(agcount);
+ XFS_SCRUB_SB_FIELD(rbmblocks);
+ XFS_SCRUB_SB_FIELD(logblocks);
+ XFS_SCRUB_SB_CHECK(!(sb.sb_versionnum & ~XFS_SB_VERSION_OKBITS));
+ XFS_SCRUB_SB_CHECK(XFS_SB_VERSION_NUM(&sb) ==
+ XFS_SB_VERSION_NUM(&mp->m_sb));
+ XFS_SCRUB_SB_FIELD(sectsize);
+ XFS_SCRUB_SB_FIELD(inodesize);
+ XFS_SCRUB_SB_FIELD(inopblock);
+ XFS_SCRUB_SB_PREEN(memcmp(sb.sb_fname, mp->m_sb.sb_fname,
+ sizeof(sb.sb_fname)) == 0);
+ XFS_SCRUB_SB_FIELD(blocklog);
+ XFS_SCRUB_SB_FIELD(sectlog);
+ XFS_SCRUB_SB_FIELD(inodelog);
+ XFS_SCRUB_SB_FIELD(inopblog);
+ XFS_SCRUB_SB_FIELD(agblklog);
+ XFS_SCRUB_SB_FIELD(rextslog);
+ XFS_PREEN_SB_FIELD(imax_pct);
+ XFS_PREEN_SB_FIELD(uquotino);
+ XFS_PREEN_SB_FIELD(gquotino);
+ XFS_SCRUB_SB_FIELD(shared_vn);
+ XFS_SCRUB_SB_FIELD(inoalignmt);
+ XFS_PREEN_SB_FIELD(unit);
+ XFS_PREEN_SB_FIELD(width);
+ XFS_SCRUB_SB_FIELD(dirblklog);
+ XFS_SCRUB_SB_FIELD(logsectlog);
+ XFS_SCRUB_SB_FIELD(logsectsize);
+ XFS_SCRUB_SB_FIELD(logsunit);
+ v2_ok = XFS_SB_VERSION2_OKBITS;
+ if (XFS_SB_VERSION_NUM(&sb) >= XFS_SB_VERSION_5)
+ v2_ok |= XFS_SB_VERSION2_CRCBIT;
+ XFS_SCRUB_SB_CHECK(!(sb.sb_features2 & ~v2_ok));
+ XFS_SCRUB_SB_PREEN(sb.sb_features2 == sb.sb_bad_features2);
+ XFS_SCRUB_SB_CHECK(!sb.sb_features2 ||
+ xfs_sb_version_hasmorebits(&mp->m_sb));
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ XFS_SCRUB_SB_CHECK(!xfs_sb_has_compat_feature(&sb,
+ XFS_SB_FEAT_COMPAT_UNKNOWN));
+ XFS_SCRUB_SB_CHECK(!xfs_sb_has_ro_compat_feature(&sb,
+ XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+ XFS_SCRUB_SB_CHECK(!xfs_sb_has_incompat_feature(&sb,
+ XFS_SB_FEAT_INCOMPAT_UNKNOWN));
+ XFS_SCRUB_SB_CHECK(!xfs_sb_has_incompat_log_feature(&sb,
+ XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
+ XFS_SCRUB_SB_FIELD(spino_align);
+ XFS_PREEN_SB_FIELD(pquotino);
+ }
+ if (xfs_sb_version_hasmetauuid(&mp->m_sb)) {
+ XFS_SCRUB_SB_CHECK(uuid_equal(&sb.sb_meta_uuid,
+ &mp->m_sb.sb_meta_uuid));
+ XFS_SCRUB_SB_CHECK(uuid_equal(&sb.sb_uuid,
+ &mp->m_sb.sb_uuid));
+ } else
+ XFS_SCRUB_SB_CHECK(uuid_equal(&sb.sb_uuid,
+ &mp->m_sb.sb_meta_uuid));
+#undef XFS_SCRUB_SB_FIELD
+
+#define XFS_SCRUB_SB_FEAT(fn) \
+ XFS_SCRUB_SB_CHECK(xfs_sb_version_has##fn(&sb) == \
+ xfs_sb_version_has##fn(&mp->m_sb))
+ XFS_SCRUB_SB_FEAT(align);
+ XFS_SCRUB_SB_FEAT(dalign);
+ XFS_SCRUB_SB_FEAT(logv2);
+ XFS_SCRUB_SB_FEAT(extflgbit);
+ XFS_SCRUB_SB_FEAT(sector);
+ XFS_SCRUB_SB_FEAT(asciici);
+ XFS_SCRUB_SB_FEAT(morebits);
+ XFS_SCRUB_SB_FEAT(lazysbcount);
+ XFS_SCRUB_SB_FEAT(crc);
+ XFS_SCRUB_SB_FEAT(_pquotino);
+ XFS_SCRUB_SB_FEAT(ftype);
+ XFS_SCRUB_SB_FEAT(finobt);
+ XFS_SCRUB_SB_FEAT(sparseinodes);
+ XFS_SCRUB_SB_FEAT(metauuid);
+ XFS_SCRUB_SB_FEAT(rmapbt);
+ XFS_SCRUB_SB_FEAT(reflink);
+#undef XFS_SCRUB_SB_FEAT
+
+out:
+ return error;
+}
+#undef XFS_SCRUB_SB_OP_ERROR_GOTO
+#undef XFS_SCRUB_SB_CHECK
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 04f4829..94f6a2d 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -596,6 +596,7 @@ struct xfs_scrub_meta_fns {
static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup, xfs_scrub_dummy, NULL, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_superblock, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index fc38f67..bd2896b 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -196,5 +196,11 @@ int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
int xfs_scrub_teardown(struct xfs_scrub_context *sc, int error);
int xfs_scrub_setup(struct xfs_scrub_context *sc, struct xfs_inode *ip,
struct xfs_scrub_metadata *sm, bool retry_deadlocked);
+int xfs_scrub_setup_ag(struct xfs_scrub_context *sc, struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm, bool retry_deadlocked);
+
+/* Metadata scrubbers */
+
+int xfs_scrub_superblock(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 3e04690..ddfe15f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3353,7 +3353,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
/* scrub */
#define XFS_SCRUB_TYPE_DESC \
- { XFS_SCRUB_TYPE_TEST, "dummy" }
+ { XFS_SCRUB_TYPE_TEST, "dummy" }, \
+ { XFS_SCRUB_TYPE_SB, "superblock" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 12/47] xfs: scrub AGF and AGFL
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (10 preceding siblings ...)
2017-01-07 0:36 ` [PATCH 11/47] xfs: scrub the backup superblocks Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 13/47] xfs: scrub the AGI Darrick J. Wong
` (35 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Check the block references in the AGF and AGFL headers to make sure
they make sense.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_fs.h | 4 +
| 227 ++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 62 +++++++++++++
fs/xfs/repair/common.h | 8 ++
fs/xfs/xfs_trace.h | 4 +
5 files changed, 303 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 47bf348..b7f2850 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -576,7 +576,9 @@ struct xfs_scrub_metadata {
*/
#define XFS_SCRUB_TYPE_TEST 0 /* dummy to test ioctl */
#define XFS_SCRUB_TYPE_SB 1 /* superblock */
-#define XFS_SCRUB_TYPE_MAX 1
+#define XFS_SCRUB_TYPE_AGF 2 /* AG free header */
+#define XFS_SCRUB_TYPE_AGFL 3 /* AG free list */
+#define XFS_SCRUB_TYPE_MAX 3
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index c5e03ab..f937834 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -48,6 +48,72 @@ xfs_scrub_setup_ag(
return xfs_scrub_setup(sc, ip, sm, retry_deadlocked);
}
+/* Find the size of the AG, in blocks. */
+static inline xfs_agblock_t
+xfs_scrub_ag_blocks(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ ASSERT(agno < mp->m_sb.sb_agcount);
+
+ if (agno < mp->m_sb.sb_agcount - 1)
+ return mp->m_sb.sb_agblocks;
+ return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks);
+}
+
+/* Walk all the blocks in the AGFL. */
+int
+xfs_scrub_walk_agfl(
+ struct xfs_scrub_context *sc,
+ int (*fn)(struct xfs_scrub_context *,
+ xfs_agblock_t bno, void *),
+ void *priv)
+{
+ struct xfs_agf *agf;
+ __be32 *agfl_bno;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ unsigned int flfirst;
+ unsigned int fllast;
+ int i;
+ int error;
+
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, sc->sa.agfl_bp);
+ flfirst = be32_to_cpu(agf->agf_flfirst);
+ fllast = be32_to_cpu(agf->agf_fllast);
+
+ /* Skip an empty AGFL. */
+ if (agf->agf_flcount == cpu_to_be32(0))
+ return 0;
+
+ /* first to last is a consecutive list. */
+ if (fllast >= flfirst) {
+ for (i = flfirst; i <= fllast; i++) {
+ error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
+ if (error)
+ return error;
+ }
+
+ return 0;
+ }
+
+ /* first to the end */
+ for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
+ error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
+ if (error)
+ return error;
+ }
+
+ /* the start to last. */
+ for (i = 0; i <= fllast; i++) {
+ error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
/* Superblock */
#define XFS_SCRUB_SB_CHECK(fs_ok) \
@@ -189,3 +255,164 @@ xfs_scrub_superblock(
}
#undef XFS_SCRUB_SB_OP_ERROR_GOTO
#undef XFS_SCRUB_SB_CHECK
+
+/* AGF */
+
+#define XFS_SCRUB_AGF_CHECK(fs_ok) \
+ XFS_SCRUB_CHECK(sc, sc->sa.agf_bp, "AGF", fs_ok)
+#define XFS_SCRUB_AGF_OP_ERROR_GOTO(error, label) \
+ XFS_SCRUB_OP_ERROR_GOTO(sc, sc->sm->sm_agno, \
+ XFS_AGF_BLOCK(sc->tp->t_mountp), "AGF", error, label)
+/* Scrub the AGF. */
+int
+xfs_scrub_agf(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_agf *agf;
+ xfs_daddr_t daddr;
+ xfs_daddr_t eofs;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_agblock_t eoag;
+ xfs_agblock_t agfl_first;
+ xfs_agblock_t agfl_last;
+ xfs_agblock_t agfl_count;
+ xfs_agblock_t fl_count;
+ int level;
+ int error = 0;
+
+ agno = sc->sm->sm_agno;
+ error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGF);
+ XFS_SCRUB_AGF_OP_ERROR_GOTO(&error, out);
+
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+
+ /* Check the AG length */
+ eoag = be32_to_cpu(agf->agf_length);
+ XFS_SCRUB_AGF_CHECK(eoag == xfs_scrub_ag_blocks(mp, agno));
+
+ /* Check the AGF btree roots and levels */
+ agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
+ daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ XFS_SCRUB_AGF_CHECK(agbno > XFS_AGI_BLOCK(mp));
+ XFS_SCRUB_AGF_CHECK(agbno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_AGF_CHECK(agbno < eoag);
+ XFS_SCRUB_AGF_CHECK(daddr < eofs);
+
+ agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
+ daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ XFS_SCRUB_AGF_CHECK(agbno > XFS_AGI_BLOCK(mp));
+ XFS_SCRUB_AGF_CHECK(agbno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_AGF_CHECK(agbno < eoag);
+ XFS_SCRUB_AGF_CHECK(daddr < eofs);
+
+ level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
+ XFS_SCRUB_AGF_CHECK(level > 0);
+ XFS_SCRUB_AGF_CHECK(level <= XFS_BTREE_MAXLEVELS);
+
+ level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
+ XFS_SCRUB_AGF_CHECK(level > 0);
+ XFS_SCRUB_AGF_CHECK(level <= XFS_BTREE_MAXLEVELS);
+
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
+ daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ XFS_SCRUB_AGF_CHECK(agbno > XFS_AGI_BLOCK(mp));
+ XFS_SCRUB_AGF_CHECK(agbno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_AGF_CHECK(agbno < eoag);
+ XFS_SCRUB_AGF_CHECK(daddr < eofs);
+
+ level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+ XFS_SCRUB_AGF_CHECK(level > 0);
+ XFS_SCRUB_AGF_CHECK(level <= XFS_BTREE_MAXLEVELS);
+ }
+
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ agbno = be32_to_cpu(agf->agf_refcount_root);
+ daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ XFS_SCRUB_AGF_CHECK(agbno > XFS_AGI_BLOCK(mp));
+ XFS_SCRUB_AGF_CHECK(agbno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_AGF_CHECK(agbno < eoag);
+ XFS_SCRUB_AGF_CHECK(daddr < eofs);
+
+ level = be32_to_cpu(agf->agf_refcount_level);
+ XFS_SCRUB_AGF_CHECK(level > 0);
+ XFS_SCRUB_AGF_CHECK(level <= XFS_BTREE_MAXLEVELS);
+ }
+
+ /* Check the AGFL counters */
+ agfl_first = be32_to_cpu(agf->agf_flfirst);
+ agfl_last = be32_to_cpu(agf->agf_fllast);
+ agfl_count = be32_to_cpu(agf->agf_flcount);
+ if (agfl_last > agfl_first)
+ fl_count = agfl_last - agfl_first + 1;
+ else
+ fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
+ XFS_SCRUB_AGF_CHECK(agfl_count == 0 || fl_count == agfl_count);
+
+out:
+ return error;
+}
+#undef XFS_SCRUB_AGF_OP_ERROR_GOTO
+#undef XFS_SCRUB_AGF_CHECK
+
+/* AGFL */
+
+#define XFS_SCRUB_AGFL_CHECK(fs_ok) \
+ XFS_SCRUB_CHECK(sc, sc->sa.agfl_bp, "AGFL", fs_ok)
+struct xfs_scrub_agfl {
+ xfs_agblock_t eoag;
+ xfs_daddr_t eofs;
+};
+
+/* Scrub an AGFL block. */
+STATIC int
+xfs_scrub_agfl_block(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ void *priv)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ xfs_agnumber_t agno = sc->sa.agno;
+ struct xfs_scrub_agfl *sagfl = priv;
+
+ XFS_SCRUB_AGFL_CHECK(agbno > XFS_AGI_BLOCK(mp));
+ XFS_SCRUB_AGFL_CHECK(XFS_AGB_TO_DADDR(mp, agno, agbno) < sagfl->eofs);
+ XFS_SCRUB_AGFL_CHECK(agbno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_AGFL_CHECK(agbno < sagfl->eoag);
+
+ return 0;
+}
+
+#define XFS_SCRUB_AGFL_OP_ERROR_GOTO(error, label) \
+ XFS_SCRUB_OP_ERROR_GOTO(sc, sc->sm->sm_agno, \
+ XFS_AGFL_BLOCK(sc->tp->t_mountp), "AGFL", error, label)
+/* Scrub the AGFL. */
+int
+xfs_scrub_agfl(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_scrub_agfl sagfl;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_agf *agf;
+ int error;
+
+ error = xfs_scrub_load_ag_headers(sc, sc->sm->sm_agno,
+ XFS_SCRUB_TYPE_AGFL);
+ XFS_SCRUB_AGFL_OP_ERROR_GOTO(&error, out);
+ if (!sc->sa.agf_bp)
+ return -EFSCORRUPTED;
+
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ sagfl.eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ sagfl.eoag = be32_to_cpu(agf->agf_length);
+
+ /* Check the blocks in the AGFL. */
+ return xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sagfl);
+out:
+ return error;
+}
+#undef XFS_SCRUB_AGFL_OP_ERROR_GOTO
+#undef XFS_SCRUB_AGFL_CHECK
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 94f6a2d..eb303a3 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -584,6 +584,66 @@ xfs_scrub_setup(
0, 0, 0, &sc->tp);
}
+/*
+ * Load and verify an AG header for further AG header examination.
+ * If this header is not the target of the examination, don't return
+ * the buffer if a runtime or verifier error occurs.
+ */
+STATIC int
+xfs_scrub_load_ag_header(
+ struct xfs_scrub_context *sc,
+ xfs_daddr_t daddr,
+ struct xfs_buf **bpp,
+ const struct xfs_buf_ops *ops,
+ bool is_target)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ int error;
+
+ *bpp = NULL;
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, sc->sa.agno, daddr),
+ XFS_FSS_TO_BB(mp, 1), 0, bpp, ops);
+ return is_target ? error : 0;
+}
+
+/*
+ * Load as many of the AG headers and btree cursors as we can for an
+ * examination and cross-reference of an AG header.
+ */
+int
+xfs_scrub_load_ag_headers(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ unsigned int type)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ int error;
+
+ ASSERT(type == XFS_SCRUB_TYPE_AGF || type == XFS_SCRUB_TYPE_AGFL);
+ memset(&sc->sa, 0, sizeof(sc->sa));
+ sc->sa.agno = agno;
+
+ error = xfs_scrub_load_ag_header(sc, XFS_AGI_DADDR(mp),
+ &sc->sa.agi_bp, &xfs_agi_buf_ops, false);
+ if (error)
+ return error;
+
+ error = xfs_scrub_load_ag_header(sc, XFS_AGF_DADDR(mp),
+ &sc->sa.agf_bp, &xfs_agf_buf_ops,
+ type == XFS_SCRUB_TYPE_AGF);
+ if (error)
+ return error;
+
+ error = xfs_scrub_load_ag_header(sc, XFS_AGFL_DADDR(mp),
+ &sc->sa.agfl_bp, &xfs_agfl_buf_ops,
+ type == XFS_SCRUB_TYPE_AGFL);
+ if (error)
+ return error;
+
+ return 0;
+}
+
/* Scrubbing dispatch. */
struct xfs_scrub_meta_fns {
@@ -597,6 +657,8 @@ struct xfs_scrub_meta_fns {
static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup, xfs_scrub_dummy, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_superblock, NULL, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_agf, NULL, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_agfl, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index bd2896b..2f7075d 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -190,6 +190,12 @@ int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
struct xfs_buf **agfl);
int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
struct xfs_scrub_ag *sa);
+int xfs_scrub_load_ag_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+ unsigned int type);
+int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
+ int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
+ void *),
+ void *priv);
/* Setup functions */
@@ -202,5 +208,7 @@ int xfs_scrub_setup_ag(struct xfs_scrub_context *sc, struct xfs_inode *ip,
/* Metadata scrubbers */
int xfs_scrub_superblock(struct xfs_scrub_context *sc);
+int xfs_scrub_agf(struct xfs_scrub_context *sc);
+int xfs_scrub_agfl(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ddfe15f..b8b0028 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3354,7 +3354,9 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
/* scrub */
#define XFS_SCRUB_TYPE_DESC \
{ XFS_SCRUB_TYPE_TEST, "dummy" }, \
- { XFS_SCRUB_TYPE_SB, "superblock" }
+ { XFS_SCRUB_TYPE_SB, "superblock" }, \
+ { XFS_SCRUB_TYPE_AGF, "AGF" }, \
+ { XFS_SCRUB_TYPE_AGFL, "AGFL" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 13/47] xfs: scrub the AGI
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (11 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 12/47] xfs: scrub AGF and AGFL Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 14/47] xfs: support scrubbing free space btrees Darrick J. Wong
` (34 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Add a forgotten check to the AGI verifier, then wire up the scrub
infrastructure to check the AGI contents.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_fs.h | 3 +
| 96 ++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 7 ++-
fs/xfs/repair/common.h | 1
fs/xfs/xfs_trace.h | 3 +
5 files changed, 106 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b7f2850..f883dcd 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -578,7 +578,8 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_SB 1 /* superblock */
#define XFS_SCRUB_TYPE_AGF 2 /* AG free header */
#define XFS_SCRUB_TYPE_AGFL 3 /* AG free list */
-#define XFS_SCRUB_TYPE_MAX 3
+#define XFS_SCRUB_TYPE_AGI 4 /* AG inode header */
+#define XFS_SCRUB_TYPE_MAX 4
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index f937834..3623f4c 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -416,3 +416,99 @@ xfs_scrub_agfl(
}
#undef XFS_SCRUB_AGFL_OP_ERROR_GOTO
#undef XFS_SCRUB_AGFL_CHECK
+
+/* AGI */
+
+#define XFS_SCRUB_AGI_CHECK(fs_ok) \
+ XFS_SCRUB_CHECK(sc, sc->sa.agi_bp, "AGI", fs_ok)
+#define XFS_SCRUB_AGI_OP_ERROR_GOTO(error, label) \
+ XFS_SCRUB_OP_ERROR_GOTO(sc, sc->sm->sm_agno, \
+ XFS_AGI_BLOCK(sc->tp->t_mountp), "AGI", error, label)
+/* Scrub the AGI. */
+int
+xfs_scrub_agi(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_agi *agi;
+ xfs_daddr_t daddr;
+ xfs_daddr_t eofs;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_agblock_t eoag;
+ xfs_agino_t agino;
+ xfs_agino_t first_agino;
+ xfs_agino_t last_agino;
+ int i;
+ int level;
+ int error = 0;
+
+ agno = sc->sm->sm_agno;
+ error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGI);
+ XFS_SCRUB_AGI_OP_ERROR_GOTO(&error, out);
+
+ agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+
+ /* Check the AG length */
+ eoag = be32_to_cpu(agi->agi_length);
+ XFS_SCRUB_AGI_CHECK(eoag == xfs_scrub_ag_blocks(mp, agno));
+
+ /* Check btree roots and levels */
+ agbno = be32_to_cpu(agi->agi_root);
+ daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ XFS_SCRUB_AGI_CHECK(agbno > XFS_AGI_BLOCK(mp));
+ XFS_SCRUB_AGI_CHECK(agbno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_AGI_CHECK(agbno < eoag);
+ XFS_SCRUB_AGI_CHECK(daddr < eofs);
+
+ level = be32_to_cpu(agi->agi_level);
+ XFS_SCRUB_AGI_CHECK(level > 0);
+ XFS_SCRUB_AGI_CHECK(level <= XFS_BTREE_MAXLEVELS);
+
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ agbno = be32_to_cpu(agi->agi_free_root);
+ daddr = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ XFS_SCRUB_AGI_CHECK(agbno > XFS_AGI_BLOCK(mp));
+ XFS_SCRUB_AGI_CHECK(agbno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_AGI_CHECK(agbno < eoag);
+ XFS_SCRUB_AGI_CHECK(daddr < eofs);
+
+ level = be32_to_cpu(agi->agi_free_level);
+ XFS_SCRUB_AGI_CHECK(level > 0);
+ XFS_SCRUB_AGI_CHECK(level <= XFS_BTREE_MAXLEVELS);
+ }
+
+ /* Check inode counters */
+ first_agino = XFS_OFFBNO_TO_AGINO(mp, XFS_AGI_BLOCK(mp) + 1, 0);
+ last_agino = XFS_OFFBNO_TO_AGINO(mp, eoag + 1, 0) - 1;
+ agino = be32_to_cpu(agi->agi_count);
+ XFS_SCRUB_AGI_CHECK(agino <= last_agino - first_agino + 1);
+ XFS_SCRUB_AGI_CHECK(agino >= be32_to_cpu(agi->agi_freecount));
+
+ /* Check inode pointers */
+ agino = be32_to_cpu(agi->agi_newino);
+ if (agino != NULLAGINO) {
+ XFS_SCRUB_AGI_CHECK(agino >= first_agino);
+ XFS_SCRUB_AGI_CHECK(agino <= last_agino);
+ }
+ agino = be32_to_cpu(agi->agi_dirino);
+ if (agino != NULLAGINO) {
+ XFS_SCRUB_AGI_CHECK(agino >= first_agino);
+ XFS_SCRUB_AGI_CHECK(agino <= last_agino);
+ }
+
+ /* Check unlinked inode buckets */
+ for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
+ agino = be32_to_cpu(agi->agi_unlinked[i]);
+ if (agino == NULLAGINO)
+ continue;
+ XFS_SCRUB_AGI_CHECK(agino >= first_agino);
+ XFS_SCRUB_AGI_CHECK(agino <= last_agino);
+ }
+
+out:
+ return error;
+}
+#undef XFS_SCRUB_AGI_CHECK
+#undef XFS_SCRUB_AGI_OP_ERROR_GOTO
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index eb303a3..eb5c03e 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -620,12 +620,14 @@ xfs_scrub_load_ag_headers(
struct xfs_mount *mp = sc->tp->t_mountp;
int error;
- ASSERT(type == XFS_SCRUB_TYPE_AGF || type == XFS_SCRUB_TYPE_AGFL);
+ ASSERT(type == XFS_SCRUB_TYPE_AGF || type == XFS_SCRUB_TYPE_AGFL ||
+ type == XFS_SCRUB_TYPE_AGI);
memset(&sc->sa, 0, sizeof(sc->sa));
sc->sa.agno = agno;
error = xfs_scrub_load_ag_header(sc, XFS_AGI_DADDR(mp),
- &sc->sa.agi_bp, &xfs_agi_buf_ops, false);
+ &sc->sa.agi_bp, &xfs_agi_buf_ops,
+ type == XFS_SCRUB_TYPE_AGI);
if (error)
return error;
@@ -659,6 +661,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag, xfs_scrub_superblock, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agf, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agfl, NULL, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_agi, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 2f7075d..ec4c230 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -210,5 +210,6 @@ int xfs_scrub_setup_ag(struct xfs_scrub_context *sc, struct xfs_inode *ip,
int xfs_scrub_superblock(struct xfs_scrub_context *sc);
int xfs_scrub_agf(struct xfs_scrub_context *sc);
int xfs_scrub_agfl(struct xfs_scrub_context *sc);
+int xfs_scrub_agi(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index b8b0028..38118f5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3356,7 +3356,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_TEST, "dummy" }, \
{ XFS_SCRUB_TYPE_SB, "superblock" }, \
{ XFS_SCRUB_TYPE_AGF, "AGF" }, \
- { XFS_SCRUB_TYPE_AGFL, "AGFL" }
+ { XFS_SCRUB_TYPE_AGFL, "AGFL" }, \
+ { XFS_SCRUB_TYPE_AGI, "AGI" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 14/47] xfs: support scrubbing free space btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (12 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 13/47] xfs: scrub the AGI Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 15/47] xfs: support scrubbing inode btrees Darrick J. Wong
` (33 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Check the extent records free space btrees to ensure that the values
look sane.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_alloc_btree.c | 6 --
fs/xfs/libxfs/xfs_fs.h | 4 +
fs/xfs/repair/alloc.c | 114 +++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 2 +
fs/xfs/repair/common.h | 6 ++
fs/xfs/xfs_trace.h | 4 +
7 files changed, 129 insertions(+), 8 deletions(-)
create mode 100644 fs/xfs/repair/alloc.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ae307ba..39c8fe0 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -104,6 +104,7 @@ xfs-y += xfs_aops.o \
# online scrub/repair
xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
agheader.o \
+ alloc.o \
btree.o \
common.o \
)
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index ba3ec9c..10d6c12 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -386,7 +386,6 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_bnobt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -433,7 +432,6 @@ xfs_cntbt_recs_inorder(
be32_to_cpu(r1->alloc.ar_startblock) <
be32_to_cpu(r2->alloc.ar_startblock));
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_bnobt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
@@ -453,10 +451,8 @@ static const struct xfs_btree_ops xfs_bnobt_ops = {
.key_diff = xfs_bnobt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
.diff_two_keys = xfs_bnobt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_bnobt_keys_inorder,
.recs_inorder = xfs_bnobt_recs_inorder,
-#endif
};
static const struct xfs_btree_ops xfs_cntbt_ops = {
@@ -476,10 +472,8 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
.key_diff = xfs_cntbt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
.diff_two_keys = xfs_cntbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_cntbt_keys_inorder,
.recs_inorder = xfs_cntbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index f883dcd..4e61d8b 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -579,7 +579,9 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_AGF 2 /* AG free header */
#define XFS_SCRUB_TYPE_AGFL 3 /* AG free list */
#define XFS_SCRUB_TYPE_AGI 4 /* AG inode header */
-#define XFS_SCRUB_TYPE_MAX 4
+#define XFS_SCRUB_TYPE_BNOBT 5 /* freesp by block btree */
+#define XFS_SCRUB_TYPE_CNTBT 6 /* freesp by length btree */
+#define XFS_SCRUB_TYPE_MAX 6
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/repair/alloc.c b/fs/xfs/repair/alloc.c
new file mode 100644
index 0000000..2fef449
--- /dev/null
+++ b/fs/xfs/repair/alloc.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_rmap.h"
+#include "repair/common.h"
+#include "repair/btree.h"
+
+/* Set us up with AG headers and btree cursors. */
+int
+xfs_scrub_setup_ag_header(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ int error;
+
+ error = xfs_scrub_setup_ag(sc, ip, sm, retry_deadlocked);
+ if (error)
+ goto out;
+
+ error = xfs_scrub_ag_init(sc, sm->sm_agno, &sc->sa);
+ if (error)
+ xfs_trans_cancel(sc->tp);
+out:
+ return error;
+}
+
+/* Free space btree scrubber. */
+
+/* Scrub a bnobt/cntbt record. */
+STATIC int
+xfs_scrub_allocbt_helper(
+ struct xfs_scrub_btree *bs,
+ union xfs_btree_rec *rec)
+{
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ struct xfs_agf *agf;
+ xfs_agblock_t bno;
+ xfs_extlen_t len;
+ int error = 0;
+
+ bno = be32_to_cpu(rec->alloc.ar_startblock);
+ len = be32_to_cpu(rec->alloc.ar_blockcount);
+ agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
+
+ XFS_SCRUB_BTREC_CHECK(bs, bno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, bno < be32_to_cpu(agf->agf_length));
+ XFS_SCRUB_BTREC_CHECK(bs, bno < bno + len);
+ XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
+ mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
+ be32_to_cpu(agf->agf_length));
+
+ return error;
+}
+
+/* Scrub the freespace btrees for some AG. */
+STATIC int
+xfs_scrub_allocbt(
+ struct xfs_scrub_context *sc,
+ xfs_btnum_t which)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_btree_cur *cur;
+
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+ cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
+ return xfs_scrub_btree(sc, cur, xfs_scrub_allocbt_helper,
+ &oinfo, NULL);
+}
+
+int
+xfs_scrub_bnobt(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_scrub_allocbt(sc, XFS_BTNUM_BNO);
+}
+
+int
+xfs_scrub_cntbt(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
+}
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index eb5c03e..7c16c35 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -662,6 +662,8 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag, xfs_scrub_agf, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agfl, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agi, NULL, NULL},
+ {xfs_scrub_setup_ag_header, xfs_scrub_bnobt, NULL, NULL},
+ {xfs_scrub_setup_ag_header, xfs_scrub_cntbt, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index ec4c230..b4a907f 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -204,6 +204,10 @@ int xfs_scrub_setup(struct xfs_scrub_context *sc, struct xfs_inode *ip,
struct xfs_scrub_metadata *sm, bool retry_deadlocked);
int xfs_scrub_setup_ag(struct xfs_scrub_context *sc, struct xfs_inode *ip,
struct xfs_scrub_metadata *sm, bool retry_deadlocked);
+int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
/* Metadata scrubbers */
@@ -211,5 +215,7 @@ int xfs_scrub_superblock(struct xfs_scrub_context *sc);
int xfs_scrub_agf(struct xfs_scrub_context *sc);
int xfs_scrub_agfl(struct xfs_scrub_context *sc);
int xfs_scrub_agi(struct xfs_scrub_context *sc);
+int xfs_scrub_bnobt(struct xfs_scrub_context *sc);
+int xfs_scrub_cntbt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 38118f5..5cc7fed 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3357,7 +3357,9 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_SB, "superblock" }, \
{ XFS_SCRUB_TYPE_AGF, "AGF" }, \
{ XFS_SCRUB_TYPE_AGFL, "AGFL" }, \
- { XFS_SCRUB_TYPE_AGI, "AGI" }
+ { XFS_SCRUB_TYPE_AGI, "AGI" }, \
+ { XFS_SCRUB_TYPE_BNOBT, "bnobt" }, \
+ { XFS_SCRUB_TYPE_CNTBT, "cntbt" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 15/47] xfs: support scrubbing inode btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (13 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 14/47] xfs: support scrubbing free space btrees Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 16/47] xfs: support scrubbing rmap btree Darrick J. Wong
` (32 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Check the records of the inode btrees to make sure that the values
make sense given the inode records themselves.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 4
fs/xfs/libxfs/xfs_ialloc.c | 41 +++-
fs/xfs/libxfs/xfs_ialloc.h | 3
fs/xfs/libxfs/xfs_ialloc_btree.c | 32 +++
fs/xfs/repair/common.c | 2
fs/xfs/repair/common.h | 7 +
fs/xfs/repair/ialloc.c | 359 ++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_icache.c | 4
fs/xfs/xfs_icache.h | 7 -
fs/xfs/xfs_trace.h | 4
11 files changed, 437 insertions(+), 27 deletions(-)
create mode 100644 fs/xfs/repair/ialloc.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 39c8fe0..28b70ad 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -107,6 +107,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
alloc.o \
btree.o \
common.o \
+ ialloc.o \
)
# low-level transaction/log code
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 4e61d8b..349d77b 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -581,7 +581,9 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_AGI 4 /* AG inode header */
#define XFS_SCRUB_TYPE_BNOBT 5 /* freesp by block btree */
#define XFS_SCRUB_TYPE_CNTBT 6 /* freesp by length btree */
-#define XFS_SCRUB_TYPE_MAX 6
+#define XFS_SCRUB_TYPE_INOBT 7 /* inode btree */
+#define XFS_SCRUB_TYPE_FINOBT 8 /* free inode btree */
+#define XFS_SCRUB_TYPE_MAX 8
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index f272abf..e2f93e6 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -99,24 +99,14 @@ xfs_inobt_update(
return xfs_btree_update(cur, &rec);
}
-/*
- * Get the data from the pointed-to record.
- */
-int /* error */
-xfs_inobt_get_rec(
- struct xfs_btree_cur *cur, /* btree cursor */
- xfs_inobt_rec_incore_t *irec, /* btree record */
- int *stat) /* output: success/failure */
+void
+xfs_inobt_btrec_to_irec(
+ struct xfs_mount *mp,
+ union xfs_btree_rec *rec,
+ struct xfs_inobt_rec_incore *irec)
{
- union xfs_btree_rec *rec;
- int error;
-
- error = xfs_btree_get_rec(cur, &rec, stat);
- if (error || *stat == 0)
- return error;
-
irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
- if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
irec->ir_count = rec->inobt.ir_u.sp.ir_count;
irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
@@ -131,6 +121,25 @@ xfs_inobt_get_rec(
be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
}
irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int /* error */
+xfs_inobt_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_inobt_rec_incore_t *irec, /* btree record */
+ int *stat) /* output: success/failure */
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || *stat == 0)
+ return error;
+
+ xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 0bb8966..8e5861d 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -168,5 +168,8 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);
+union xfs_btree_rec;
+void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
+ struct xfs_inobt_rec_incore *irec);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 0fd086d..09d8cb0 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -152,6 +152,18 @@ xfs_inobt_init_key_from_rec(
}
STATIC void
+xfs_inobt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ __u32 x;
+
+ x = be32_to_cpu(rec->inobt.ir_startino);
+ x += XFS_INODES_PER_CHUNK - 1;
+ key->inobt.ir_startino = cpu_to_be32(x);
+}
+
+STATIC void
xfs_inobt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
@@ -205,6 +217,16 @@ xfs_inobt_key_diff(
cur->bc_rec.i.ir_startino;
}
+STATIC __int64_t
+xfs_inobt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (__int64_t)be32_to_cpu(k1->inobt.ir_startino) -
+ be32_to_cpu(k2->inobt.ir_startino);
+}
+
static int
xfs_inobt_verify(
struct xfs_buf *bp)
@@ -279,7 +301,6 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
.verify_write = xfs_inobt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_inobt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -299,7 +320,6 @@ xfs_inobt_recs_inorder(
return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
be32_to_cpu(r2->inobt.ir_startino);
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_inobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
@@ -312,14 +332,14 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+ .diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
-#endif
};
static const struct xfs_btree_ops xfs_finobt_ops = {
@@ -333,14 +353,14 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+ .diff_two_keys = xfs_inobt_diff_two_keys,
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 7c16c35..4ecf677 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -664,6 +664,8 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag, xfs_scrub_agi, NULL, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_bnobt, NULL, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_cntbt, NULL, NULL},
+ {xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, NULL, NULL},
+ {xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, NULL, xfs_sb_version_hasfinobt},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index b4a907f..69177c8 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -59,6 +59,7 @@ struct xfs_scrub_context {
struct xfs_scrub_metadata *sm;
struct xfs_trans *tp;
struct xfs_inode *ip;
+ bool retry;
/* State tracking for multi-AG operations. */
struct xfs_scrub_ag_lock ag_lock;
@@ -208,6 +209,10 @@ int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
/* Metadata scrubbers */
@@ -217,5 +222,7 @@ int xfs_scrub_agfl(struct xfs_scrub_context *sc);
int xfs_scrub_agi(struct xfs_scrub_context *sc);
int xfs_scrub_bnobt(struct xfs_scrub_context *sc);
int xfs_scrub_cntbt(struct xfs_scrub_context *sc);
+int xfs_scrub_inobt(struct xfs_scrub_context *sc);
+int xfs_scrub_finobt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/ialloc.c b/fs/xfs/repair/ialloc.c
new file mode 100644
index 0000000..67cf727
--- /dev/null
+++ b/fs/xfs/repair/ialloc.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_icache.h"
+#include "xfs_rmap.h"
+#include "xfs_log.h"
+#include "xfs_trans_priv.h"
+#include "repair/common.h"
+#include "repair/btree.h"
+
+/*
+ * Set us up with AG headers and btree cursors. Push everything out
+ * of the log so that we can correlate inodes to inobt.
+ */
+int
+xfs_scrub_setup_ag_iallocbt(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ /* Push everything out of the log onto disk prior to checking. */
+ if (retry_deadlocked) {
+ error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+ if (error)
+ goto out;
+ xfs_ail_push_all_sync(mp->m_ail);
+ }
+
+ error = xfs_scrub_setup_ag_header(sc, ip, sm, retry_deadlocked);
+ if (error)
+ goto out;
+ sc->retry = retry_deadlocked;
+out:
+ return error;
+}
+
+/* Inode btree scrubber. */
+
+/* Scrub a chunk of an inobt record. */
+STATIC int
+xfs_scrub_iallocbt_chunk(
+ struct xfs_scrub_btree *bs,
+ struct xfs_inobt_rec_incore *irec,
+ xfs_agino_t agino,
+ xfs_extlen_t len,
+ bool *keep_scanning)
+{
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ struct xfs_agf *agf;
+ xfs_agblock_t eoag;
+ xfs_agblock_t bno;
+ int error = 0;
+
+ agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
+ eoag = be32_to_cpu(agf->agf_length);
+ bno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+ *keep_scanning = true;
+ XFS_SCRUB_BTREC_CHECK(bs, bno < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, bno < eoag);
+ XFS_SCRUB_BTREC_CHECK(bs, bno < bno + len);
+ XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
+ mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
+ eoag);
+ if (error) {
+ *keep_scanning = false;
+ goto out;
+ }
+
+out:
+ return error;
+}
+
+/* Count the number of free inodes. */
+static unsigned int
+xfs_scrub_iallocbt_freecount(
+ xfs_inofree_t freemask)
+{
+ int bits = XFS_INODES_PER_CHUNK;
+ unsigned int ret = 0;
+
+ while (bits--) {
+ if (freemask & 1)
+ ret++;
+ freemask >>= 1;
+ }
+
+ return ret;
+}
+
+/* Check a particular inode with ir_free. */
+STATIC int
+xfs_scrub_iallocbt_check_cluster_freemask(
+ struct xfs_scrub_btree *bs,
+ xfs_ino_t fsino,
+ xfs_agino_t chunkino,
+ xfs_agino_t clusterino,
+ struct xfs_inobt_rec_incore *irec,
+ struct xfs_buf *bp)
+{
+ struct xfs_dinode *dip;
+ struct xfs_inode *ip;
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ bool freemask_ok;
+ int error;
+
+ dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
+ XFS_SCRUB_BTREC_GOTO(bs,
+ be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC,
+ out);
+ XFS_SCRUB_BTREC_GOTO(bs,
+ dip->di_version < 3 || be64_to_cpu(dip->di_ino) ==
+ fsino + clusterino,
+ out);
+ freemask_ok = !!(irec->ir_free & XFS_INOBT_MASK(chunkino + clusterino));
+ error = xfs_iget(mp, bs->cur->bc_tp, fsino + clusterino,
+ XFS_IGET_HITONLY, 0, &ip);
+ if (error == -ENOENT) {
+ ; /* i_mode is 0 */
+ } else if (!error && ip) {
+ freemask_ok ^= !!(VFS_I(ip)->i_mode);
+ IRELE(ip);
+ } else {
+ freemask_ok ^= !!(dip->di_mode);
+ if (!bs->sc->retry && !freemask_ok)
+ return -EDEADLOCK;
+ }
+ XFS_SCRUB_BTREC_CHECK(bs, freemask_ok);
+out:
+ return 0;
+}
+
+/* Make sure the free mask is consistent with what the inodes think. */
+STATIC int
+xfs_scrub_iallocbt_check_freemask(
+ struct xfs_scrub_btree *bs,
+ struct xfs_inobt_rec_incore *irec)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_imap imap;
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ struct xfs_dinode *dip;
+ struct xfs_buf *bp;
+ xfs_ino_t fsino;
+ xfs_agino_t nr_inodes;
+ xfs_agino_t agino;
+ xfs_agino_t chunkino;
+ xfs_agino_t clusterino;
+ xfs_agblock_t agbno;
+ int blks_per_cluster;
+ __uint16_t holemask;
+ __uint16_t ir_holemask;
+ int error = 0;
+
+ /* Make sure the freemask matches the inode records. */
+ blks_per_cluster = xfs_icluster_size_fsb(mp);
+ nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+
+ for (agino = irec->ir_startino;
+ agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
+ agino += blks_per_cluster * mp->m_sb.sb_inopblock) {
+ fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino);
+ chunkino = agino - irec->ir_startino;
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+ /* Compute the holemask mask for this cluster. */
+ for (clusterino = 0, holemask = 0; clusterino < nr_inodes;
+ clusterino += XFS_INODES_PER_HOLEMASK_BIT)
+ holemask |= XFS_INOBT_MASK((chunkino + clusterino) /
+ XFS_INODES_PER_HOLEMASK_BIT);
+
+ /* The whole cluster must be a hole or not a hole. */
+ ir_holemask = (irec->ir_holemask & holemask);
+ XFS_SCRUB_BTREC_CHECK(bs, ir_holemask == holemask ||
+ ir_holemask == 0);
+
+ /* If any part of this is a hole, skip it. */
+ if (ir_holemask)
+ continue;
+
+ /* Grab the inode cluster buffer. */
+ imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
+ agbno);
+ imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
+ imap.im_boffset = 0;
+
+ error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
+ &dip, &bp, 0, 0);
+ XFS_SCRUB_BTREC_OP_ERROR_GOTO(bs, &error, next_cluster);
+
+ /* Which inodes are free? */
+ for (clusterino = 0; clusterino < nr_inodes; clusterino++) {
+ error = xfs_scrub_iallocbt_check_cluster_freemask(bs,
+ fsino, chunkino, clusterino, irec, bp);
+ if (error) {
+ xfs_trans_brelse(bs->cur->bc_tp, bp);
+ return error;
+ }
+ }
+
+ xfs_trans_brelse(bs->cur->bc_tp, bp);
+next_cluster:
+ ;
+ }
+
+ return error;
+}
+
+/* Scrub an inobt/finobt record. */
+STATIC int
+xfs_scrub_iallocbt_helper(
+ struct xfs_scrub_btree *bs,
+ union xfs_btree_rec *rec)
+{
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ struct xfs_agi *agi;
+ struct xfs_inobt_rec_incore irec;
+ uint64_t holes;
+ xfs_agino_t agino;
+ xfs_agblock_t agbno;
+ xfs_extlen_t len;
+ bool keep_scanning;
+ int holecount;
+ int i;
+ int error = 0;
+ int err2 = 0;
+ unsigned int real_freecount;
+ __uint16_t holemask;
+
+ xfs_inobt_btrec_to_irec(mp, rec, &irec);
+
+ XFS_SCRUB_BTREC_CHECK(bs, irec.ir_count <= XFS_INODES_PER_CHUNK);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.ir_freecount <= XFS_INODES_PER_CHUNK);
+ real_freecount = irec.ir_freecount +
+ (XFS_INODES_PER_CHUNK - irec.ir_count);
+ XFS_SCRUB_BTREC_CHECK(bs, real_freecount ==
+ xfs_scrub_iallocbt_freecount(irec.ir_free));
+ agi = XFS_BUF_TO_AGI(bs->sc->sa.agi_bp);
+ agino = irec.ir_startino;
+ agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino);
+ XFS_SCRUB_BTREC_GOTO(bs, agbno < be32_to_cpu(agi->agi_length), out);
+
+ /* Handle non-sparse inodes */
+ if (!xfs_inobt_issparse(irec.ir_holemask)) {
+ len = XFS_B_TO_FSB(mp,
+ XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize);
+ XFS_SCRUB_BTREC_CHECK(bs,
+ irec.ir_count == XFS_INODES_PER_CHUNK);
+
+ error = xfs_scrub_iallocbt_chunk(bs, &irec, agino, len,
+ &keep_scanning);
+ if (error)
+ goto out;
+ goto check_freemask;
+ }
+
+ /* Check each chunk of a sparse inode cluster. */
+ holemask = irec.ir_holemask;
+ holecount = 0;
+ len = XFS_B_TO_FSB(mp,
+ XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize);
+ holes = ~xfs_inobt_irec_to_allocmask(&irec);
+ XFS_SCRUB_BTREC_CHECK(bs, (holes & irec.ir_free) == holes);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.ir_freecount <= irec.ir_count);
+
+ for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
+ i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
+ if (holemask & 1) {
+ holecount += XFS_INODES_PER_HOLEMASK_BIT;
+ continue;
+ }
+
+ err2 = xfs_scrub_iallocbt_chunk(bs, &irec, agino, len,
+ &keep_scanning);
+ if (!error && err2)
+ error = err2;
+ if (!keep_scanning)
+ break;
+ }
+
+ XFS_SCRUB_BTREC_CHECK(bs, holecount <= XFS_INODES_PER_CHUNK);
+ XFS_SCRUB_BTREC_CHECK(bs, holecount + irec.ir_count ==
+ XFS_INODES_PER_CHUNK);
+
+check_freemask:
+ error = xfs_scrub_iallocbt_check_freemask(bs, &irec);
+ if (error)
+ goto out;
+
+out:
+ return error;
+}
+
+/* Scrub the inode btrees for some AG. */
+STATIC int
+xfs_scrub_iallocbt(
+ struct xfs_scrub_context *sc,
+ xfs_btnum_t which)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_owner_info oinfo;
+
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
+ cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
+ return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_helper,
+ &oinfo, NULL);
+}
+
+int
+xfs_scrub_inobt(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_scrub_iallocbt(sc, XFS_BTNUM_INO);
+}
+
+int
+xfs_scrub_finobt(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
+}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 70ca4f6..0ea0403 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -583,6 +583,8 @@ xfs_iget(
error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
if (error)
goto out_error_or_again;
+ } else if (flags & XFS_IGET_HITONLY) {
+ rcu_read_unlock();
} else {
rcu_read_unlock();
XFS_STATS_INC(mp, xs_ig_missed);
@@ -595,6 +597,8 @@ xfs_iget(
xfs_perag_put(pag);
*ipp = ip;
+ if (!ip)
+ return 0;
/*
* If we have a real type for an on-disk inode, we can setup the inode
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index a1e02f4..e926f38 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -45,9 +45,10 @@ struct xfs_eofblocks {
/*
* Flags for xfs_iget()
*/
-#define XFS_IGET_CREATE 0x1
-#define XFS_IGET_UNTRUSTED 0x2
-#define XFS_IGET_DONTCACHE 0x4
+#define XFS_IGET_CREATE 0x1 /* initialize free inodes */
+#define XFS_IGET_UNTRUSTED 0x2 /* check the inode number */
+#define XFS_IGET_DONTCACHE 0x4 /* don't keep the inode cached */
+#define XFS_IGET_HITONLY 0x8 /* only return cached inodes */
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
uint flags, uint lock_flags, xfs_inode_t **ipp);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5cc7fed..ab50df5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3359,7 +3359,9 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_AGFL, "AGFL" }, \
{ XFS_SCRUB_TYPE_AGI, "AGI" }, \
{ XFS_SCRUB_TYPE_BNOBT, "bnobt" }, \
- { XFS_SCRUB_TYPE_CNTBT, "cntbt" }
+ { XFS_SCRUB_TYPE_CNTBT, "cntbt" }, \
+ { XFS_SCRUB_TYPE_INOBT, "inobt" }, \
+ { XFS_SCRUB_TYPE_FINOBT, "finobt" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 16/47] xfs: support scrubbing rmap btree
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (14 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 15/47] xfs: support scrubbing inode btrees Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 17/47] xfs: support scrubbing refcount btree Darrick J. Wong
` (31 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Check the reverse mapping records to make sure that the contents
make sense.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 3 +
fs/xfs/libxfs/xfs_rmap.c | 3 +
fs/xfs/libxfs/xfs_rmap.h | 3 +
fs/xfs/libxfs/xfs_rmap_btree.c | 4 -
fs/xfs/repair/common.c | 1
fs/xfs/repair/common.h | 1
fs/xfs/repair/rmap.c | 116 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_trace.h | 3 +
9 files changed, 128 insertions(+), 7 deletions(-)
create mode 100644 fs/xfs/repair/rmap.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 28b70ad..3b5ef7b 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -108,6 +108,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
btree.o \
common.o \
ialloc.o \
+ rmap.o \
)
# low-level transaction/log code
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 349d77b..d89b965 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -583,7 +583,8 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_CNTBT 6 /* freesp by length btree */
#define XFS_SCRUB_TYPE_INOBT 7 /* inode btree */
#define XFS_SCRUB_TYPE_FINOBT 8 /* free inode btree */
-#define XFS_SCRUB_TYPE_MAX 8
+#define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */
+#define XFS_SCRUB_TYPE_MAX 9
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3840556..c7d5102 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -179,7 +179,8 @@ xfs_rmap_delete(
return error;
}
-static int
+/* Convert an internal btree record to an rmap record. */
+int
xfs_rmap_btrec_to_irec(
union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec)
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index faf2c1a..3fa4559 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -214,5 +214,8 @@ int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno,
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
struct xfs_rmap_irec *irec, int *stat);
+union xfs_btree_rec;
+int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
+ struct xfs_rmap_irec *irec);
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 74e5a54..b342cc8 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -377,7 +377,6 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
.verify_write = xfs_rmapbt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_rmapbt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -437,7 +436,6 @@ xfs_rmapbt_recs_inorder(
return 1;
return 0;
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_rmapbt_ops = {
.rec_len = sizeof(struct xfs_rmap_rec),
@@ -456,10 +454,8 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
.key_diff = xfs_rmapbt_key_diff,
.buf_ops = &xfs_rmapbt_buf_ops,
.diff_two_keys = xfs_rmapbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_rmapbt_keys_inorder,
.recs_inorder = xfs_rmapbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 4ecf677..95e653a 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -666,6 +666,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_header, xfs_scrub_cntbt, NULL, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, NULL, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, NULL, xfs_sb_version_hasfinobt},
+ {xfs_scrub_setup_ag_header, xfs_scrub_rmapbt, NULL, xfs_sb_version_hasrmapbt},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 69177c8..7f160e4 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -224,5 +224,6 @@ int xfs_scrub_bnobt(struct xfs_scrub_context *sc);
int xfs_scrub_cntbt(struct xfs_scrub_context *sc);
int xfs_scrub_inobt(struct xfs_scrub_context *sc);
int xfs_scrub_finobt(struct xfs_scrub_context *sc);
+int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/rmap.c b/fs/xfs/repair/rmap.c
new file mode 100644
index 0000000..9ae3c72
--- /dev/null
+++ b/fs/xfs/repair/rmap.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_rmap.h"
+#include "repair/common.h"
+#include "repair/btree.h"
+
+/* Reverse-mapping scrubber. */
+
+/* Scrub an rmapbt record. */
+STATIC int
+xfs_scrub_rmapbt_helper(
+ struct xfs_scrub_btree *bs,
+ union xfs_btree_rec *rec)
+{
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ struct xfs_agf *agf;
+ struct xfs_rmap_irec irec;
+ xfs_agblock_t eoag;
+ bool non_inode;
+ bool is_unwritten;
+ bool is_bmbt;
+ bool is_attr;
+ int error;
+
+ error = xfs_rmap_btrec_to_irec(rec, &irec);
+ XFS_SCRUB_BTREC_OP_ERROR_GOTO(bs, &error, out);
+
+ /* Check extent. */
+ agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
+ eoag = be32_to_cpu(agf->agf_length);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rm_startblock < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rm_startblock < eoag);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rm_startblock < irec.rm_startblock +
+ irec.rm_blockcount);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rm_startblock + irec.rm_blockcount <=
+ mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rm_startblock + irec.rm_blockcount <=
+ eoag);
+
+ /* Check flags. */
+ non_inode = XFS_RMAP_NON_INODE_OWNER(irec.rm_owner);
+ is_bmbt = irec.rm_flags & XFS_RMAP_BMBT_BLOCK;
+ is_attr = irec.rm_flags & XFS_RMAP_ATTR_FORK;
+ is_unwritten = irec.rm_flags & XFS_RMAP_UNWRITTEN;
+
+ XFS_SCRUB_BTREC_CHECK(bs, !is_bmbt || irec.rm_offset == 0);
+ XFS_SCRUB_BTREC_CHECK(bs, !non_inode || irec.rm_offset == 0);
+ XFS_SCRUB_BTREC_CHECK(bs, !is_unwritten || !(is_bmbt || non_inode ||
+ is_attr));
+ XFS_SCRUB_BTREC_CHECK(bs, !non_inode || !(is_bmbt || is_unwritten ||
+ is_attr));
+
+ /* Owner inode within an AG? */
+ XFS_SCRUB_BTREC_CHECK(bs, non_inode ||
+ (XFS_INO_TO_AGNO(mp, irec.rm_owner) <
+ mp->m_sb.sb_agcount &&
+ XFS_AGINO_TO_AGBNO(mp,
+ XFS_INO_TO_AGINO(mp, irec.rm_owner)) <
+ mp->m_sb.sb_agblocks));
+ /* Owner inode within the FS? */
+ XFS_SCRUB_BTREC_CHECK(bs, non_inode ||
+ XFS_AGB_TO_DADDR(mp,
+ XFS_INO_TO_AGNO(mp, irec.rm_owner),
+ XFS_AGINO_TO_AGBNO(mp,
+ XFS_INO_TO_AGINO(mp, irec.rm_owner))) <
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+
+ /* Non-inode owner within the magic values? */
+ XFS_SCRUB_BTREC_CHECK(bs, !non_inode ||
+ (irec.rm_owner > XFS_RMAP_OWN_MIN &&
+ irec.rm_owner <= XFS_RMAP_OWN_FS));
+out:
+ return error;
+}
+
+/* Scrub the rmap btree for some AG. */
+int
+xfs_scrub_rmapbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+ return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_helper,
+ &oinfo, NULL);
+}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ab50df5..e752f68 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3361,7 +3361,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_BNOBT, "bnobt" }, \
{ XFS_SCRUB_TYPE_CNTBT, "cntbt" }, \
{ XFS_SCRUB_TYPE_INOBT, "inobt" }, \
- { XFS_SCRUB_TYPE_FINOBT, "finobt" }
+ { XFS_SCRUB_TYPE_FINOBT, "finobt" }, \
+ { XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 17/47] xfs: support scrubbing refcount btree
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (15 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 16/47] xfs: support scrubbing rmap btree Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 18/47] xfs: scrub inodes Darrick J. Wong
` (30 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Plumb in the pieces necessary to check the refcount btree. If rmap is
available, check the reference count by performing an interval query
against the rmapbt.
v2: Handle the case where the rmap records are not all at least the
length of the refcount extent.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 3 +
fs/xfs/libxfs/xfs_refcount_btree.c | 4 --
fs/xfs/repair/common.c | 1
fs/xfs/repair/common.h | 1
fs/xfs/repair/refcount.c | 85 ++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_trace.h | 3 +
7 files changed, 92 insertions(+), 6 deletions(-)
create mode 100644 fs/xfs/repair/refcount.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 3b5ef7b..2995503 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -108,6 +108,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
btree.o \
common.o \
ialloc.o \
+ refcount.o \
rmap.o \
)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index d89b965..9f3603cb 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -584,7 +584,8 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_INOBT 7 /* inode btree */
#define XFS_SCRUB_TYPE_FINOBT 8 /* free inode btree */
#define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */
-#define XFS_SCRUB_TYPE_MAX 9
+#define XFS_SCRUB_TYPE_REFCNTBT 10 /* reference count btree */
+#define XFS_SCRUB_TYPE_MAX 10
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 50add52..cae24bd 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -285,7 +285,6 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
.verify_write = xfs_refcountbt_write_verify,
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_refcountbt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -306,7 +305,6 @@ xfs_refcountbt_recs_inorder(
be32_to_cpu(r1->refc.rc_blockcount) <=
be32_to_cpu(r2->refc.rc_startblock);
}
-#endif
static const struct xfs_btree_ops xfs_refcountbt_ops = {
.rec_len = sizeof(struct xfs_refcount_rec),
@@ -325,10 +323,8 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = {
.key_diff = xfs_refcountbt_key_diff,
.buf_ops = &xfs_refcountbt_buf_ops,
.diff_two_keys = xfs_refcountbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_refcountbt_keys_inorder,
.recs_inorder = xfs_refcountbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 95e653a..6bbaed8 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -667,6 +667,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, NULL, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, NULL, xfs_sb_version_hasfinobt},
{xfs_scrub_setup_ag_header, xfs_scrub_rmapbt, NULL, xfs_sb_version_hasrmapbt},
+ {xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, NULL, xfs_sb_version_hasreflink},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 7f160e4..7dea98a 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -225,5 +225,6 @@ int xfs_scrub_cntbt(struct xfs_scrub_context *sc);
int xfs_scrub_inobt(struct xfs_scrub_context *sc);
int xfs_scrub_finobt(struct xfs_scrub_context *sc);
int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
+int xfs_scrub_refcountbt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/refcount.c b/fs/xfs/repair/refcount.c
new file mode 100644
index 0000000..186d83c
--- /dev/null
+++ b/fs/xfs/repair/refcount.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_rmap.h"
+#include "repair/common.h"
+#include "repair/btree.h"
+
+/* Reference count btree scrubber. */
+
+/* Scrub a refcountbt record. */
+STATIC int
+xfs_scrub_refcountbt_helper(
+ struct xfs_scrub_btree *bs,
+ union xfs_btree_rec *rec)
+{
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ struct xfs_agf *agf;
+ struct xfs_refcount_irec irec;
+ xfs_agblock_t eoag;
+ bool has_cowflag;
+ int error = 0;
+
+ irec.rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
+ irec.rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
+ irec.rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
+ agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
+ eoag = be32_to_cpu(agf->agf_length);
+
+ has_cowflag = !!(irec.rc_startblock & XFS_REFC_COW_START);
+ XFS_SCRUB_BTREC_CHECK(bs, (irec.rc_refcount == 1 && has_cowflag) ||
+ (irec.rc_refcount != 1 && !has_cowflag));
+ irec.rc_startblock &= ~XFS_REFC_COW_START;
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rc_startblock < mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rc_startblock < eoag);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rc_startblock < irec.rc_startblock +
+ irec.rc_blockcount);
+ XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)irec.rc_startblock +
+ irec.rc_blockcount <= mp->m_sb.sb_agblocks);
+ XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)irec.rc_startblock +
+ irec.rc_blockcount <= eoag);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rc_refcount >= 1);
+
+ return error;
+}
+
+/* Scrub the refcount btree for some AG. */
+int
+xfs_scrub_refcountbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
+ return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_helper,
+ &oinfo, NULL);
+}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e752f68..4757fea 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3362,7 +3362,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_CNTBT, "cntbt" }, \
{ XFS_SCRUB_TYPE_INOBT, "inobt" }, \
{ XFS_SCRUB_TYPE_FINOBT, "finobt" }, \
- { XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }
+ { XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }, \
+ { XFS_SCRUB_TYPE_REFCNTBT, "refcountbt" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 18/47] xfs: scrub inodes
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (16 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 17/47] xfs: support scrubbing refcount btree Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 19/47] xfs: scrub inode block mappings Darrick J. Wong
` (29 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Scrub the fields within an inode.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 3
fs/xfs/libxfs/xfs_inode_buf.c | 2
fs/xfs/libxfs/xfs_inode_buf.h | 3
fs/xfs/repair/common.c | 14 +-
fs/xfs/repair/common.h | 12 +
fs/xfs/repair/inode.c | 379 +++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_itable.c | 2
fs/xfs/xfs_itable.h | 5 +
fs/xfs/xfs_trace.h | 3
10 files changed, 417 insertions(+), 7 deletions(-)
create mode 100644 fs/xfs/repair/inode.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 2995503..b53cf70 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -108,6 +108,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
btree.o \
common.o \
ialloc.o \
+ inode.o \
refcount.o \
rmap.o \
)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 9f3603cb..3dfec06 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -585,7 +585,8 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_FINOBT 8 /* free inode btree */
#define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */
#define XFS_SCRUB_TYPE_REFCNTBT 10 /* reference count btree */
-#define XFS_SCRUB_TYPE_MAX 10
+#define XFS_SCRUB_TYPE_INODE 11 /* inode record */
+#define XFS_SCRUB_TYPE_MAX 11
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index dd483e2..60b75d3 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -380,7 +380,7 @@ xfs_log_dinode_to_disk(
}
}
-static bool
+bool
xfs_dinode_verify(
struct xfs_mount *mp,
xfs_ino_t ino,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6848a0a..988fd67 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -82,4 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
+bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
+ struct xfs_dinode *dip);
+
#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 6bbaed8..3fb7f6c 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -557,6 +557,7 @@ xfs_scrub_dummy(
int
xfs_scrub_teardown(
struct xfs_scrub_context *sc,
+ struct xfs_inode *ip_in,
int error)
{
xfs_scrub_ag_free(&sc->sa);
@@ -565,6 +566,14 @@ xfs_scrub_teardown(
sc->ag_lock.agmask = NULL;
xfs_trans_cancel(sc->tp);
sc->tp = NULL;
+ if (sc->ip != NULL) {
+ xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(sc->ip, XFS_IOLOCK_EXCL);
+ xfs_iunlock(sc->ip, XFS_MMAPLOCK_EXCL);
+ if (sc->ip != ip_in)
+ IRELE(sc->ip);
+ sc->ip = NULL;
+ }
return error;
}
@@ -668,6 +677,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, NULL, xfs_sb_version_hasfinobt},
{xfs_scrub_setup_ag_header, xfs_scrub_rmapbt, NULL, xfs_sb_version_hasrmapbt},
{xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, NULL, xfs_sb_version_hasreflink},
+ {xfs_scrub_setup_inode_raw, xfs_scrub_inode, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
@@ -728,7 +738,7 @@ xfs_scrub_metadata(
error = fns->scrub(&sc);
if (!deadlocked && error == -EDEADLOCK) {
deadlocked = true;
- error = xfs_scrub_teardown(&sc, error);
+ error = xfs_scrub_teardown(&sc, ip, error);
if (error != -EDEADLOCK)
goto out;
goto retry_op;
@@ -739,7 +749,7 @@ xfs_scrub_metadata(
xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
out_teardown:
- error = xfs_scrub_teardown(&sc, error);
+ error = xfs_scrub_teardown(&sc, ip, error);
out:
trace_xfs_scrub_done(ip, sm->sm_type, sm->sm_agno, sm->sm_ino,
sm->sm_gen, sm->sm_flags, error);
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 7dea98a..ae4bee5 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -200,7 +200,8 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
/* Setup functions */
-int xfs_scrub_teardown(struct xfs_scrub_context *sc, int error);
+int xfs_scrub_teardown(struct xfs_scrub_context *sc, struct xfs_inode *ip_in,
+ int error);
int xfs_scrub_setup(struct xfs_scrub_context *sc, struct xfs_inode *ip,
struct xfs_scrub_metadata *sm, bool retry_deadlocked);
int xfs_scrub_setup_ag(struct xfs_scrub_context *sc, struct xfs_inode *ip,
@@ -213,6 +214,14 @@ int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_inode(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
+int xfs_scrub_setup_inode_raw(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
/* Metadata scrubbers */
@@ -226,5 +235,6 @@ int xfs_scrub_inobt(struct xfs_scrub_context *sc);
int xfs_scrub_finobt(struct xfs_scrub_context *sc);
int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
int xfs_scrub_refcountbt(struct xfs_scrub_context *sc);
+int xfs_scrub_inode(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/inode.c b/fs/xfs/repair/inode.c
new file mode 100644
index 0000000..e06d585
--- /dev/null
+++ b/fs/xfs/repair/inode.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_itable.h"
+#include "xfs_inode_buf.h"
+#include "xfs_inode_fork.h"
+#include "xfs_ialloc.h"
+#include "xfs_log.h"
+#include "xfs_trans_priv.h"
+#include "repair/common.h"
+
+/*
+ * Given an inode and the scrub control structure, return either the
+ * inode referenced in the control structure or the inode passed in.
+ * The inode is not locked.
+ */
+STATIC struct xfs_inode *
+xfs_scrub_get_inode(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_inode *ips = NULL;
+ int error;
+
+ if (sc->sm->sm_gen && !sc->sm->sm_ino)
+ return ERR_PTR(-EINVAL);
+
+ if (sc->sm->sm_ino && sc->sm->sm_ino != ip->i_ino) {
+ if (xfs_internal_inum(mp, sc->sm->sm_ino))
+ return ERR_PTR(-ENOENT);
+ error = xfs_iget(mp, NULL, sc->sm->sm_ino, XFS_IGET_UNTRUSTED,
+ 0, &ips);
+ if (error) {
+ trace_xfs_scrub_op_error(mp,
+ XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
+ XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
+ "inode", error, __func__, __LINE__);
+ goto out_err;
+ }
+ if (VFS_I(ips)->i_generation != sc->sm->sm_gen) {
+ IRELE(ips);
+ return ERR_PTR(-ENOENT);
+ }
+
+ return ips;
+ }
+
+ return ip;
+out_err:
+ return ERR_PTR(error);
+}
+
+/* Set us up with an inode. */
+int
+xfs_scrub_setup_inode(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ memset(sc, 0, sizeof(*sc));
+ sc->sm = sm;
+ sc->ip = xfs_scrub_get_inode(sc, ip);
+ if (IS_ERR(sc->ip))
+ return PTR_ERR(sc->ip);
+ else if (sc->ip == NULL)
+ return -ENOENT;
+
+ xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
+ xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL);
+ error = xfs_scrub_trans_alloc(sm, mp, &M_RES(mp)->tr_itruncate,
+ 0, 0, 0, &sc->tp);
+ if (error)
+ goto out_unlock;
+ xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+
+ xfs_scrub_ag_lock_init(mp, &sc->ag_lock);
+ return error;
+out_unlock:
+ xfs_iunlock(sc->ip, XFS_IOLOCK_EXCL);
+ xfs_iunlock(sc->ip, XFS_MMAPLOCK_EXCL);
+ if (sc->ip != ip)
+ IRELE(sc->ip);
+ return error;
+}
+
+/* Try to get the in-core inode. If we can't, we'll just have to do it raw. */
+int
+xfs_scrub_setup_inode_raw(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int error;
+
+ if (sm->sm_ino && xfs_internal_inum(mp, sm->sm_ino))
+ return -ENOENT;
+
+ error = xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked);
+ if (error) {
+ memset(sc, 0, sizeof(*sc));
+ sc->ip = NULL;
+ sc->sm = sm;
+
+ /* Push everything out of the log onto disk prior to check. */
+ error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+ if (error)
+ return error;
+ xfs_ail_push_all_sync(mp->m_ail);
+
+ return xfs_scrub_trans_alloc(sm, mp,
+ &M_RES(mp)->tr_itruncate, 0, 0, 0, &sc->tp);
+ }
+ return 0;
+}
+
+/* Inode core */
+
+#define XFS_SCRUB_INODE_CHECK(fs_ok) \
+ XFS_SCRUB_INO_CHECK(sc, ino, bp, "inode", fs_ok);
+#define XFS_SCRUB_INODE_GOTO(fs_ok, label) \
+ XFS_SCRUB_INO_GOTO(sc, ino, bp, "inode", fs_ok, label);
+#define XFS_SCRUB_INODE_OP_ERROR_GOTO(label) \
+ XFS_SCRUB_OP_ERROR_GOTO(sc, XFS_INO_TO_AGNO(mp, ino), \
+ XFS_INO_TO_AGBNO(mp, ino), "inode", &error, label);
+#define XFS_SCRUB_INODE_PREEN(fs_ok) \
+ XFS_SCRUB_INO_PREEN(sc, bp, "inode", fs_ok);
+/* Scrub an inode. */
+int
+xfs_scrub_inode(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_imap imap;
+ struct xfs_dinode di;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_ifork *ifp;
+ struct xfs_buf *bp = NULL;
+ struct xfs_dinode *dip;
+ xfs_ino_t ino;
+ unsigned long long isize;
+ uint64_t flags2;
+ uint32_t nextents;
+ uint32_t extsize;
+ uint32_t cowextsize;
+ uint16_t flags;
+ uint16_t mode;
+ int error = 0;
+
+ /* Did we get the in-core inode, or are we doing this manually? */
+ if (sc->ip) {
+ ino = sc->ip->i_ino;
+ xfs_inode_to_disk(sc->ip, &di, 0);
+ dip = &di;
+ } else {
+ /* Map & read inode. */
+ ino = sc->sm->sm_ino;
+ error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED);
+ XFS_SCRUB_INODE_OP_ERROR_GOTO(out);
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp,
+ NULL);
+ XFS_SCRUB_INODE_OP_ERROR_GOTO(out);
+
+ /* Is this really the inode we want? */
+ bp->b_ops = &xfs_inode_buf_ops;
+ dip = xfs_buf_offset(bp, imap.im_boffset);
+ error = xfs_dinode_verify(mp, ino, dip) ? 0 : -EFSCORRUPTED;
+ XFS_SCRUB_INODE_OP_ERROR_GOTO(out);
+ XFS_SCRUB_INODE_GOTO(
+ xfs_dinode_good_version(mp, dip->di_version),
+ out);
+ if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) {
+ error = -EINVAL;
+ goto out;
+ }
+ }
+
+ flags = be16_to_cpu(dip->di_flags);
+ if (dip->di_version >= 3)
+ flags2 = be64_to_cpu(dip->di_flags2);
+ else
+ flags2 = 0;
+
+ /* di_mode */
+ mode = be16_to_cpu(dip->di_mode);
+ XFS_SCRUB_INODE_CHECK(!(mode & ~(S_IALLUGO | S_IFMT)));
+
+ /* v1/v2 fields */
+ switch (dip->di_version) {
+ case 1:
+ XFS_SCRUB_INODE_CHECK(dip->di_nlink == 0);
+ XFS_SCRUB_INODE_CHECK(dip->di_mode || !sc->ip);
+ XFS_SCRUB_INODE_CHECK(dip->di_projid_lo == 0);
+ XFS_SCRUB_INODE_CHECK(dip->di_projid_hi == 0);
+ break;
+ case 2:
+ case 3:
+ XFS_SCRUB_INODE_CHECK(dip->di_onlink == 0);
+ XFS_SCRUB_INODE_CHECK(dip->di_mode || !sc->ip);
+ XFS_SCRUB_INODE_CHECK(dip->di_projid_hi == 0 ||
+ xfs_sb_version_hasprojid32bit(&mp->m_sb));
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ /* di_format */
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_DEV:
+ XFS_SCRUB_INODE_CHECK(S_ISCHR(mode) || S_ISBLK(mode) ||
+ S_ISFIFO(mode) || S_ISSOCK(mode));
+ break;
+ case XFS_DINODE_FMT_LOCAL:
+ XFS_SCRUB_INODE_CHECK(S_ISDIR(mode) || S_ISLNK(mode));
+ break;
+ case XFS_DINODE_FMT_EXTENTS:
+ XFS_SCRUB_INODE_CHECK(S_ISREG(mode) || S_ISDIR(mode) ||
+ S_ISLNK(mode));
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ XFS_SCRUB_INODE_CHECK(S_ISREG(mode) || S_ISDIR(mode));
+ break;
+ case XFS_DINODE_FMT_UUID:
+ default:
+ XFS_SCRUB_INODE_CHECK(false);
+ break;
+ }
+
+ /* di_size */
+ isize = be64_to_cpu(dip->di_size);
+ XFS_SCRUB_INODE_CHECK(!(isize & (1ULL << 63)));
+ if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode))
+ XFS_SCRUB_INODE_CHECK(isize == 0);
+
+ /* di_nblocks */
+ if (flags2 & XFS_DIFLAG2_REFLINK) {
+ ; /* nblocks can exceed dblocks */
+ } else if (flags & XFS_DIFLAG_REALTIME) {
+ XFS_SCRUB_INODE_CHECK(be64_to_cpu(dip->di_nblocks) <
+ mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks);
+ } else {
+ XFS_SCRUB_INODE_CHECK(be64_to_cpu(dip->di_nblocks) <
+ mp->m_sb.sb_dblocks);
+ }
+
+ /* di_extsize */
+ if (flags & XFS_DIFLAG_EXTSIZE) {
+ extsize = be32_to_cpu(dip->di_extsize);
+ XFS_SCRUB_INODE_CHECK(extsize > 0);
+ XFS_SCRUB_INODE_CHECK(extsize <= MAXEXTLEN);
+ XFS_SCRUB_INODE_CHECK(extsize <= mp->m_sb.sb_agblocks / 2 ||
+ (flags & XFS_DIFLAG_REALTIME));
+ }
+
+ /* di_flags */
+ XFS_SCRUB_INODE_CHECK(!(flags & XFS_DIFLAG_IMMUTABLE) ||
+ !(flags & XFS_DIFLAG_APPEND));
+
+ XFS_SCRUB_INODE_CHECK(!(flags & XFS_DIFLAG_FILESTREAM) ||
+ !(flags & XFS_DIFLAG_REALTIME));
+
+ /* di_nextents */
+ nextents = be32_to_cpu(dip->di_nextents);
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_EXTENTS:
+ XFS_SCRUB_INODE_CHECK(nextents <=
+ XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec));
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ XFS_SCRUB_INODE_CHECK(nextents >
+ XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec));
+ break;
+ case XFS_DINODE_FMT_LOCAL:
+ case XFS_DINODE_FMT_DEV:
+ case XFS_DINODE_FMT_UUID:
+ default:
+ XFS_SCRUB_INODE_CHECK(nextents == 0);
+ break;
+ }
+
+ /* di_anextents */
+ nextents = be16_to_cpu(dip->di_anextents);
+ switch (dip->di_aformat) {
+ case XFS_DINODE_FMT_EXTENTS:
+ XFS_SCRUB_INODE_CHECK(nextents <=
+ XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec));
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ XFS_SCRUB_INODE_CHECK(nextents >
+ XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec));
+ break;
+ case XFS_DINODE_FMT_LOCAL:
+ case XFS_DINODE_FMT_DEV:
+ case XFS_DINODE_FMT_UUID:
+ default:
+ XFS_SCRUB_INODE_CHECK(nextents == 0);
+ break;
+ }
+
+ /* di_forkoff */
+ XFS_SCRUB_INODE_CHECK(XFS_DFORK_APTR(dip) <
+ (char *)dip + mp->m_sb.sb_inodesize);
+ XFS_SCRUB_INODE_CHECK(dip->di_anextents == 0 || dip->di_forkoff);
+
+ /* di_aformat */
+ XFS_SCRUB_INODE_CHECK(dip->di_aformat == XFS_DINODE_FMT_LOCAL ||
+ dip->di_aformat == XFS_DINODE_FMT_EXTENTS ||
+ dip->di_aformat == XFS_DINODE_FMT_BTREE);
+
+ /* di_cowextsize */
+ if (flags2 & XFS_DIFLAG2_COWEXTSIZE) {
+ cowextsize = be32_to_cpu(dip->di_cowextsize);
+ XFS_SCRUB_INODE_CHECK(xfs_sb_version_hasreflink(&mp->m_sb));
+ XFS_SCRUB_INODE_CHECK(cowextsize > 0);
+ XFS_SCRUB_INODE_CHECK(cowextsize <= MAXEXTLEN);
+ XFS_SCRUB_INODE_CHECK(cowextsize <= mp->m_sb.sb_agblocks / 2);
+ }
+
+ /* Now let's do the things that require a live inode. */
+ if (!sc->ip)
+ goto out;
+
+ /*
+ * If this is a reflink inode with no CoW in progress, maybe we
+ * can turn off the reflink flag?
+ */
+ if (xfs_is_reflink_inode(sc->ip)) {
+ ifp = XFS_IFORK_PTR(sc->ip, XFS_COW_FORK);
+ XFS_SCRUB_INODE_PREEN(ifp->if_bytes > 0);
+ }
+
+out:
+ if (bp)
+ xfs_trans_brelse(sc->tp, bp);
+ return error;
+}
+#undef XFS_SCRUB_INODE_PREEN
+#undef XFS_SCRUB_INODE_OP_ERROR_GOTO
+#undef XFS_SCRUB_INODE_GOTO
+#undef XFS_SCRUB_INODE_CHECK
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e8817..4fd5fe1 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -31,7 +31,7 @@
#include "xfs_trace.h"
#include "xfs_icache.h"
-STATIC int
+int
xfs_internal_inum(
xfs_mount_t *mp,
xfs_ino_t ino)
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 6ea8b39..dd2427b 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -96,4 +96,9 @@ xfs_inumbers(
void __user *buffer, /* buffer with inode info */
inumbers_fmt_pf formatter);
+int
+xfs_internal_inum(
+ xfs_mount_t *mp,
+ xfs_ino_t ino);
+
#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 4757fea..2cf344e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3363,7 +3363,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_INOBT, "inobt" }, \
{ XFS_SCRUB_TYPE_FINOBT, "finobt" }, \
{ XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }, \
- { XFS_SCRUB_TYPE_REFCNTBT, "refcountbt" }
+ { XFS_SCRUB_TYPE_REFCNTBT, "refcountbt" }, \
+ { XFS_SCRUB_TYPE_INODE, "inode" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 19/47] xfs: scrub inode block mappings
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (17 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 18/47] xfs: scrub inodes Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:37 ` [PATCH 20/47] xfs: scrub directory/attribute btrees Darrick J. Wong
` (28 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Scrub an individual inode's block mappings to make sure they make sense.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_bmap_btree.c | 26 ++-
fs/xfs/libxfs/xfs_fs.h | 5 -
fs/xfs/repair/bmap.c | 360 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 3
fs/xfs/repair/common.h | 7 +
fs/xfs/xfs_bmap_util.c | 105 +++++++-----
fs/xfs/xfs_bmap_util.h | 4
fs/xfs/xfs_trace.h | 5 -
9 files changed, 468 insertions(+), 48 deletions(-)
create mode 100644 fs/xfs/repair/bmap.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index b53cf70..1e86403 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -105,6 +105,7 @@ xfs-y += xfs_aops.o \
xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
agheader.o \
alloc.o \
+ bmap.o \
btree.o \
common.o \
ialloc.o \
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index d6330c2..33ab7f3 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -623,6 +623,16 @@ xfs_bmbt_init_key_from_rec(
}
STATIC void
+xfs_bmbt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->bmbt.br_startoff = cpu_to_be64(
+ xfs_bmbt_disk_get_startoff(&rec->bmbt) +
+ xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1);
+}
+
+STATIC void
xfs_bmbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
@@ -647,6 +657,16 @@ xfs_bmbt_key_diff(
cur->bc_rec.b.br_startoff;
}
+STATIC __int64_t
+xfs_bmbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (__int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
+ be64_to_cpu(k2->bmbt.br_startoff);
+}
+
static bool
xfs_bmbt_verify(
struct xfs_buf *bp)
@@ -737,7 +757,6 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
};
-#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_bmbt_keys_inorder(
struct xfs_btree_cur *cur,
@@ -758,7 +777,6 @@ xfs_bmbt_recs_inorder(
xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
xfs_bmbt_disk_get_startoff(&r2->bmbt);
}
-#endif /* DEBUG */
static const struct xfs_btree_ops xfs_bmbt_ops = {
.rec_len = sizeof(xfs_bmbt_rec_t),
@@ -772,14 +790,14 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.get_minrecs = xfs_bmbt_get_minrecs,
.get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
.init_key_from_rec = xfs_bmbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
+ .diff_two_keys = xfs_bmbt_diff_two_keys,
.buf_ops = &xfs_bmbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
-#endif
};
/*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 3dfec06..9753400 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -586,7 +586,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */
#define XFS_SCRUB_TYPE_REFCNTBT 10 /* reference count btree */
#define XFS_SCRUB_TYPE_INODE 11 /* inode record */
-#define XFS_SCRUB_TYPE_MAX 11
+#define XFS_SCRUB_TYPE_BMBTD 12 /* data fork block mapping */
+#define XFS_SCRUB_TYPE_BMBTA 13 /* attr fork block mapping */
+#define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */
+#define XFS_SCRUB_TYPE_MAX 14
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
new file mode 100644
index 0000000..bc1ad8e
--- /dev/null
+++ b/fs/xfs/repair/bmap.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_inode_fork.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_rmap.h"
+#include "repair/common.h"
+#include "repair/btree.h"
+
+/* Set us up with an inode and AG headers, if needed. */
+int
+xfs_scrub_setup_inode_bmap(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ int error;
+
+ error = xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked);
+ if (error || !retry_deadlocked)
+ return error;
+
+ error = xfs_scrub_ag_lock_all(sc);
+ if (error)
+ goto err;
+ sc->retry = retry_deadlocked;
+ return 0;
+err:
+ return xfs_scrub_teardown(sc, ip, error);
+}
+
+/*
+ * Inode fork block mapping (BMBT) scrubber.
+ * More complex than the others because we have to scrub
+ * all the extents regardless of whether or not the fork
+ * is in btree format.
+ */
+
+struct xfs_scrub_bmap_info {
+ struct xfs_scrub_context *sc;
+ const char *type;
+ xfs_daddr_t eofs;
+ xfs_fileoff_t lastoff;
+ bool is_rt;
+ bool is_shared;
+ bool scrub_btrec;
+ int whichfork;
+};
+
+#define XFS_SCRUB_BMAP_CHECK(fs_ok) \
+ XFS_SCRUB_INO_CHECK(info->sc, info->sc->ip->i_ino, bp, info->type, fs_ok)
+#define XFS_SCRUB_BMAP_GOTO(fs_ok, label) \
+ XFS_SCRUB_INO_GOTO(info->sc, info->sc->ip->i_ino, bp, info->type, fs_ok, label)
+#define XFS_SCRUB_BMAP_OP_ERROR_GOTO(label) \
+ XFS_SCRUB_OP_ERROR_GOTO(info->sc, agno, 0, "bmap", &error, label);
+/* Scrub a single extent record. */
+STATIC int
+xfs_scrub_bmap_extent(
+ struct xfs_inode *ip,
+ struct xfs_btree_cur *cur,
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_scrub_ag sa = {0};
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buf *bp = NULL;
+ xfs_daddr_t daddr;
+ xfs_daddr_t dlen;
+ xfs_agnumber_t agno;
+ int error = 0;
+
+ if (cur)
+ xfs_btree_get_block(cur, 0, &bp);
+
+ XFS_SCRUB_BMAP_CHECK(irec->br_startoff >= info->lastoff);
+ XFS_SCRUB_BMAP_CHECK(irec->br_startblock != HOLESTARTBLOCK);
+
+ if (isnullstartblock(irec->br_startblock)) {
+ XFS_SCRUB_BMAP_CHECK(irec->br_state == XFS_EXT_NORM);
+ goto out;
+ }
+
+ /* Actual mapping, so check the block ranges. */
+ if (info->is_rt) {
+ daddr = XFS_FSB_TO_BB(mp, irec->br_startblock);
+ agno = NULLAGNUMBER;
+ } else {
+ daddr = XFS_FSB_TO_DADDR(mp, irec->br_startblock);
+ agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+ XFS_SCRUB_BMAP_GOTO(agno < mp->m_sb.sb_agcount, out);
+ }
+ dlen = XFS_FSB_TO_BB(mp, irec->br_blockcount);
+ XFS_SCRUB_BMAP_CHECK(daddr < info->eofs);
+ XFS_SCRUB_BMAP_CHECK(daddr + dlen < info->eofs);
+ XFS_SCRUB_BMAP_CHECK(irec->br_state != XFS_EXT_UNWRITTEN ||
+ xfs_sb_version_hasextflgbit(&mp->m_sb));
+ if (error)
+ goto out;
+
+ /* Set ourselves up for cross-referencing later. */
+ if (!info->is_rt) {
+ if (!xfs_scrub_ag_can_lock(info->sc, agno))
+ return -EDEADLOCK;
+ error = xfs_scrub_ag_init(info->sc, agno, &sa);
+ XFS_SCRUB_BMAP_OP_ERROR_GOTO(out);
+ }
+
+ xfs_scrub_ag_free(&sa);
+out:
+ info->lastoff = irec->br_startoff + irec->br_blockcount;
+ return error;
+}
+#undef XFS_SCRUB_BMAP_OP_ERROR_GOTO
+#undef XFS_SCRUB_BMAP_GOTO
+
+/* Scrub a bmbt record. */
+STATIC int
+xfs_scrub_bmapbt_helper(
+ struct xfs_scrub_btree *bs,
+ union xfs_btree_rec *rec)
+{
+ struct xfs_bmbt_rec_host ihost;
+ struct xfs_bmbt_irec irec;
+ struct xfs_scrub_bmap_info *info = bs->private;
+ struct xfs_inode *ip = bs->cur->bc_private.b.ip;
+ struct xfs_buf *bp = NULL;
+ struct xfs_btree_block *block;
+ uint64_t owner;
+ int i;
+
+ /*
+ * Check the owners of the btree blocks up to the level below
+ * the root since the verifiers don't do that.
+ */
+ if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
+ bs->cur->bc_ptrs[0] == 1) {
+ for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
+ block = xfs_btree_get_block(bs->cur, i, &bp);
+ owner = be64_to_cpu(block->bb_u.l.bb_owner);
+ XFS_SCRUB_BMAP_CHECK(owner == ip->i_ino);
+ }
+ }
+
+ if (!info->scrub_btrec)
+ return 0;
+
+ /* Set up the in-core record and scrub it. */
+ ihost.l0 = be64_to_cpu(rec->bmbt.l0);
+ ihost.l1 = be64_to_cpu(rec->bmbt.l1);
+ xfs_bmbt_get_all(&ihost, &irec);
+ return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec);
+}
+#undef XFS_SCRUB_BMAP_CHECK
+
+#define XFS_SCRUB_FORK_CHECK(fs_ok) \
+ XFS_SCRUB_INO_CHECK(sc, ip->i_ino, NULL, info.type, fs_ok);
+#define XFS_SCRUB_FORK_GOTO(fs_ok, label) \
+ XFS_SCRUB_INO_GOTO(sc, ip->i_ino, NULL, info.type, fs_ok, label);
+#define XFS_SCRUB_FORK_OP_ERROR_GOTO(label) \
+ XFS_SCRUB_OP_ERROR_GOTO(sc, \
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), \
+ XFS_INO_TO_AGBNO(ip->i_mount, ip->i_ino), \
+ info.type, &error, label)
+/* Scrub an inode fork's block mappings. */
+STATIC int
+xfs_scrub_bmap(
+ struct xfs_scrub_context *sc,
+ int whichfork)
+{
+ struct xfs_bmbt_irec irec;
+ struct xfs_scrub_bmap_info info = {0};
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_ifork *ifp;
+ struct xfs_btree_cur *cur;
+ xfs_fileoff_t off;
+ xfs_fileoff_t endoff;
+ int nmaps;
+ int flags = 0;
+ int error = 0;
+ int err2 = 0;
+
+ switch (whichfork) {
+ case XFS_DATA_FORK:
+ info.type = "data fork";
+ break;
+ case XFS_ATTR_FORK:
+ info.type = "attr fork";
+ break;
+ case XFS_COW_FORK:
+ info.type = "CoW fork";
+ break;
+ }
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+
+ info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
+ info.eofs = XFS_FSB_TO_BB(mp, info.is_rt ? mp->m_sb.sb_rblocks :
+ mp->m_sb.sb_dblocks);
+ info.whichfork = whichfork;
+ info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
+ info.sc = sc;
+
+ switch (whichfork) {
+ case XFS_COW_FORK:
+ /* Non-existent CoW forks are ignorable. */
+ if (!ifp)
+ goto out_unlock;
+ /* No CoW forks on non-reflink inodes/filesystems. */
+ XFS_SCRUB_FORK_GOTO(xfs_is_reflink_inode(ip), out_unlock);
+ break;
+ case XFS_ATTR_FORK:
+ if (!ifp)
+ goto out_unlock;
+ XFS_SCRUB_FORK_CHECK(xfs_sb_version_hasattr(&mp->m_sb));
+ break;
+ }
+
+ /* Check the fork values */
+ switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ case XFS_DINODE_FMT_UUID:
+ case XFS_DINODE_FMT_DEV:
+ case XFS_DINODE_FMT_LOCAL:
+ /* No mappings to check. */
+ goto out_unlock;
+ case XFS_DINODE_FMT_EXTENTS:
+ XFS_SCRUB_FORK_GOTO(ifp->if_flags & XFS_IFEXTENTS, out_unlock);
+ break;
+ case XFS_DINODE_FMT_BTREE:
+ XFS_SCRUB_FORK_CHECK(whichfork != XFS_COW_FORK);
+ /*
+ * Scan the btree. If extents aren't loaded, have the btree
+ * scrub routine examine the extent records.
+ */
+ info.scrub_btrec = !(ifp->if_flags & XFS_IFEXTENTS);
+
+ cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
+ err2 = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_helper,
+ &oinfo, &info);
+ xfs_btree_del_cursor(cur, err2 ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (err2 == -EDEADLOCK)
+ return err2;
+ else if (err2)
+ goto out_unlock;
+ /* Skip in-core extent checking if we did it in the btree */
+ if (info.scrub_btrec)
+ goto out_unlock;
+ break;
+ default:
+ XFS_SCRUB_FORK_GOTO(false, out_unlock);
+ break;
+ }
+
+ /* Extent data is in memory, so scrub that. */
+ switch (whichfork) {
+ case XFS_ATTR_FORK:
+ flags |= XFS_BMAPI_ATTRFORK;
+ break;
+ case XFS_COW_FORK:
+ flags |= XFS_BMAPI_COWFORK;
+ break;
+ default:
+ break;
+ }
+
+ /* Find the offset of the last extent in the mapping. */
+ error = xfs_bmap_last_offset(ip, &endoff, whichfork);
+ XFS_SCRUB_FORK_OP_ERROR_GOTO(out_unlock);
+
+ /* Scrub extent records. */
+ off = 0;
+ while (true) {
+ nmaps = 1;
+ err2 = xfs_bmapi_read(ip, off, endoff - off, &irec,
+ &nmaps, flags);
+ if (err2 || nmaps == 0 || irec.br_startoff > endoff)
+ break;
+ /* Scrub non-hole extent. */
+ if (irec.br_startblock != HOLESTARTBLOCK) {
+ err2 = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
+ if (err2 == -EDEADLOCK)
+ return err2;
+ else if (!error && err2)
+ error = err2;
+ if (xfs_scrub_should_terminate(&error))
+ break;
+ }
+
+ off += irec.br_blockcount;
+ }
+
+out_unlock:
+ if (error == 0 && err2 != 0)
+ error = err2;
+ return error;
+}
+#undef XFS_SCRUB_FORK_CHECK
+#undef XFS_SCRUB_FORK_GOTO
+
+/* Scrub an inode's data fork. */
+int
+xfs_scrub_bmap_data(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_scrub_bmap(sc, XFS_DATA_FORK);
+}
+
+/* Scrub an inode's attr fork. */
+int
+xfs_scrub_bmap_attr(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_scrub_bmap(sc, XFS_ATTR_FORK);
+}
+
+/* Scrub an inode's CoW fork. */
+int
+xfs_scrub_bmap_cow(
+ struct xfs_scrub_context *sc)
+{
+ if (!xfs_is_reflink_inode(sc->ip))
+ return -ENOENT;
+
+ return xfs_scrub_bmap(sc, XFS_COW_FORK);
+}
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 3fb7f6c..1aa4f20 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -678,6 +678,9 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_header, xfs_scrub_rmapbt, NULL, xfs_sb_version_hasrmapbt},
{xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, NULL, xfs_sb_version_hasreflink},
{xfs_scrub_setup_inode_raw, xfs_scrub_inode, NULL, NULL},
+ {xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_data, NULL, NULL},
+ {xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_attr, NULL, NULL},
+ {xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_cow, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index ae4bee5..8c30e78 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -222,6 +222,10 @@ int xfs_scrub_setup_inode_raw(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
/* Metadata scrubbers */
@@ -236,5 +240,8 @@ int xfs_scrub_finobt(struct xfs_scrub_context *sc);
int xfs_scrub_rmapbt(struct xfs_scrub_context *sc);
int xfs_scrub_refcountbt(struct xfs_scrub_context *sc);
int xfs_scrub_inode(struct xfs_scrub_context *sc);
+int xfs_scrub_bmap_data(struct xfs_scrub_context *sc);
+int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
+int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index b9abce5..d983f28 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -231,7 +231,7 @@ xfs_bmap_count_leaves(
xfs_ifork_t *ifp,
xfs_extnum_t idx,
int numrecs,
- int *count)
+ unsigned long long *count)
{
int b;
@@ -250,7 +250,7 @@ xfs_bmap_disk_count_leaves(
struct xfs_mount *mp,
struct xfs_btree_block *block,
int numrecs,
- int *count)
+ unsigned long long *count)
{
int b;
xfs_bmbt_rec_t *frp;
@@ -265,17 +265,18 @@ xfs_bmap_disk_count_leaves(
* Recursively walks each level of a btree
* to count total fsblocks in use.
*/
-STATIC int /* error */
+STATIC int
xfs_bmap_count_tree(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_ifork_t *ifp, /* inode fork pointer */
- xfs_fsblock_t blockno, /* file system block number */
- int levelin, /* level in btree */
- int *count) /* Count of blocks */
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_ifork *ifp,
+ xfs_fsblock_t blockno,
+ int levelin,
+ unsigned int *nextents,
+ unsigned long long *count)
{
int error;
- xfs_buf_t *bp, *nbp;
+ struct xfs_buf *bp, *nbp;
int level = levelin;
__be64 *pp;
xfs_fsblock_t bno = blockno;
@@ -308,8 +309,9 @@ xfs_bmap_count_tree(
/* Dive to the next level */
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
- if (unlikely((error =
- xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
+ error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents,
+ count);
+ if (error) {
xfs_trans_brelse(tp, bp);
XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
XFS_ERRLEVEL_LOW, mp);
@@ -321,6 +323,7 @@ xfs_bmap_count_tree(
for (;;) {
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
numrecs = be16_to_cpu(block->bb_numrecs);
+ (*nextents) += numrecs;
xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
xfs_trans_brelse(tp, bp);
if (nextbno == NULLFSBLOCK)
@@ -341,44 +344,61 @@ xfs_bmap_count_tree(
/*
* Count fsblocks of the given fork.
*/
-static int /* error */
+int
xfs_bmap_count_blocks(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_inode_t *ip, /* incore inode */
- int whichfork, /* data or attr fork */
- int *count) /* out: count of blocks */
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int whichfork,
+ unsigned int *nextents,
+ unsigned long long *count)
{
struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
- xfs_ifork_t *ifp; /* fork structure */
+ struct xfs_ifork *ifp; /* fork structure */
int level; /* btree level, for checking */
- xfs_mount_t *mp; /* file system mount structure */
+ struct xfs_mount *mp; /* file system mount structure */
__be64 *pp; /* pointer to block address */
+ int error;
bno = NULLFSBLOCK;
mp = ip->i_mount;
+ *nextents = 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
- if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
- xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
+ if (!ifp)
return 0;
- }
- /*
- * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
- */
- block = ifp->if_broot;
- level = be16_to_cpu(block->bb_level);
- ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
- bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-
- if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
- XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
- mp);
- return -EFSCORRUPTED;
+ switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+ case XFS_DINODE_FMT_EXTENTS:
+ *nextents = xfs_iext_count(ifp);
+ xfs_bmap_count_leaves(ifp, 0, (*nextents), count);
+ return 0;
+ case XFS_DINODE_FMT_BTREE:
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+ */
+ block = ifp->if_broot;
+ level = be16_to_cpu(block->bb_level);
+ ASSERT(level > 0);
+ pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+ bno = be64_to_cpu(*pp);
+ ASSERT(bno != NULLFSBLOCK);
+ ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+ ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
+ error = xfs_bmap_count_tree(mp, tp, ifp, bno, level,
+ nextents, count);
+ if (error) {
+ XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)",
+ XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
return 0;
@@ -1788,8 +1808,9 @@ xfs_swap_extent_forks(
int *target_log_flags)
{
struct xfs_ifork tempifp, *ifp, *tifp;
- int aforkblks = 0;
- int taforkblks = 0;
+ unsigned long long aforkblks = 0;
+ unsigned long long taforkblks = 0;
+ unsigned int junk;
xfs_extnum_t nextents;
__uint64_t tmp;
int error;
@@ -1799,14 +1820,14 @@ xfs_swap_extent_forks(
*/
if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
- error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK,
+ error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
&aforkblks);
if (error)
return error;
}
if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
(tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
- error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
+ error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
&taforkblks);
if (error)
return error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 68a621a..73a03c3 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -71,4 +71,8 @@ int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
+int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+ int whichfork, unsigned int *nextents,
+ unsigned long long *count);
+
#endif /* __XFS_BMAP_UTIL_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2cf344e..141a39f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3364,7 +3364,10 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_FINOBT, "finobt" }, \
{ XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }, \
{ XFS_SCRUB_TYPE_REFCNTBT, "refcountbt" }, \
- { XFS_SCRUB_TYPE_INODE, "inode" }
+ { XFS_SCRUB_TYPE_INODE, "inode" }, \
+ { XFS_SCRUB_TYPE_BMBTD, "bmapbtd" }, \
+ { XFS_SCRUB_TYPE_BMBTA, "bmapbta" }, \
+ { XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 20/47] xfs: scrub directory/attribute btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (18 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 19/47] xfs: scrub inode block mappings Darrick J. Wong
@ 2017-01-07 0:37 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 21/47] xfs: scrub directory metadata Darrick J. Wong
` (27 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:37 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Provide a way to check the shape and scrub the hashes and records
in a directory or extended attribute btree. These are helper functions
for the directory & attribute scrubbers in subsequent patches.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_dir2_node.c | 28 ++
fs/xfs/libxfs/xfs_dir2_priv.h | 2
fs/xfs/repair/dabtree.c | 466 +++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/dabtree.h | 62 +++++
5 files changed, 559 insertions(+)
create mode 100644 fs/xfs/repair/dabtree.c
create mode 100644 fs/xfs/repair/dabtree.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 1e86403..dd66bf5 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -108,6 +108,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
bmap.o \
btree.o \
common.o \
+ dabtree.o \
ialloc.o \
inode.o \
refcount.o \
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 75a5574..f83a197 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -481,6 +481,34 @@ xfs_dir2_free_hdr_check(
* Stale entries are ok.
*/
xfs_dahash_t /* hash value */
+xfs_dir2_leaf1_lasthash(
+ struct xfs_inode *dp,
+ struct xfs_buf *bp, /* leaf buffer */
+ int *count) /* count of entries in leaf */
+{
+ struct xfs_dir2_leaf *leaf = bp->b_addr;
+ struct xfs_dir2_leaf_entry *ents;
+ struct xfs_dir3_icleaf_hdr leafhdr;
+
+ dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+
+ ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
+ leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
+
+ if (count)
+ *count = leafhdr.count;
+ if (!leafhdr.count)
+ return 0;
+
+ ents = dp->d_ops->leaf_ents_p(leaf);
+ return be32_to_cpu(ents[leafhdr.count - 1].hashval);
+}
+
+/*
+ * Return the last hash value in the leaf.
+ * Stale entries are ok.
+ */
+xfs_dahash_t /* hash value */
xfs_dir2_leafn_lasthash(
struct xfs_inode *dp,
struct xfs_buf *bp, /* leaf buffer */
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547f..1abd314 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -93,6 +93,8 @@ extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
/* xfs_dir2_node.c */
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
struct xfs_buf *lbp);
+extern xfs_dahash_t xfs_dir2_leaf1_lasthash(struct xfs_inode *dp,
+ struct xfs_buf *bp, int *count);
extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
struct xfs_buf *bp, int *count);
extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
diff --git a/fs/xfs/repair/dabtree.c b/fs/xfs/repair/dabtree.c
new file mode 100644
index 0000000..23ef92c
--- /dev/null
+++ b/fs/xfs/repair/dabtree.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_inode_fork.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_attr_leaf.h"
+#include "repair/common.h"
+#include "repair/dabtree.h"
+
+/* Directory/Attribute Btree */
+
+/* Find an entry at a certain level in a da btree. */
+STATIC void *
+xfs_scrub_da_btree_entry(
+ struct xfs_scrub_da_btree *ds,
+ int level,
+ int rec)
+{
+ char *ents;
+ void *(*fn)(void *);
+ size_t sz;
+ struct xfs_da_state_blk *blk;
+
+ /* Dispatch the entry finding function. */
+ blk = &ds->state->path.blk[level];
+ switch (blk->magic) {
+ case XFS_ATTR_LEAF_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
+ fn = (xfs_da_leaf_ents_fn)xfs_attr3_leaf_entryp;
+ sz = sizeof(struct xfs_attr_leaf_entry);
+ break;
+ case XFS_DIR2_LEAFN_MAGIC:
+ case XFS_DIR3_LEAFN_MAGIC:
+ fn = (xfs_da_leaf_ents_fn)ds->dargs.dp->d_ops->leaf_ents_p;
+ sz = sizeof(struct xfs_dir2_leaf_entry);
+ break;
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ fn = (xfs_da_leaf_ents_fn)ds->dargs.dp->d_ops->leaf_ents_p;
+ sz = sizeof(struct xfs_dir2_leaf_entry);
+ break;
+ case XFS_DA_NODE_MAGIC:
+ case XFS_DA3_NODE_MAGIC:
+ fn = (xfs_da_leaf_ents_fn)ds->dargs.dp->d_ops->node_tree_p;
+ sz = sizeof(struct xfs_da_node_entry);
+ break;
+ default:
+ return NULL;
+ }
+
+ ents = fn(blk->bp->b_addr);
+ return ents + (sz * rec);
+}
+
+/* Scrub a da btree hash (key). */
+int
+xfs_scrub_da_btree_hash(
+ struct xfs_scrub_da_btree *ds,
+ int level,
+ __be32 *hashp)
+{
+ struct xfs_da_state_blk *blks;
+ struct xfs_da_node_entry *btree;
+ xfs_dahash_t hash;
+ xfs_dahash_t parent_hash;
+ int error = 0;
+
+ /* Is this hash in order? */
+ hash = be32_to_cpu(*hashp);
+ XFS_SCRUB_DA_CHECK(ds, hash >= ds->hashes[level]);
+ ds->hashes[level] = hash;
+
+ if (level == 0)
+ return error;
+
+ /* Is this hash no larger than the parent hash? */
+ blks = ds->state->path.blk;
+ btree = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index);
+ parent_hash = be32_to_cpu(btree->hashval);
+ XFS_SCRUB_DA_CHECK(ds, hash <= parent_hash);
+
+ return error;
+}
+
+/* Scrub a da btree pointer. */
+STATIC int
+xfs_scrub_da_btree_ptr(
+ struct xfs_scrub_da_btree *ds,
+ int level,
+ xfs_dablk_t blkno)
+{
+ int error = 0;
+
+ XFS_SCRUB_DA_CHECK(ds, blkno >= ds->lowest);
+ XFS_SCRUB_DA_CHECK(ds, ds->highest == 0 || blkno < ds->highest);
+
+ return error;
+}
+
+/*
+ * The da btree scrubber can handle leaf1 blocks as a degenerate
+ * form of da btree. Since the regular da code doesn't handle
+ * leaf1, we must multiplex the verifiers.
+ */
+static void
+xfs_scrub_da_btree_read_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ bp->b_ops->verify_read(bp);
+ return;
+ default:
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ bp->b_ops->verify_read(bp);
+ return;
+ }
+}
+static void
+xfs_scrub_da_btree_write_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ bp->b_ops->verify_write(bp);
+ return;
+ default:
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ bp->b_ops->verify_write(bp);
+ return;
+ }
+}
+
+const static struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
+ .name = "xfs_scrub_da_btree",
+ .verify_read = xfs_scrub_da_btree_read_verify,
+ .verify_write = xfs_scrub_da_btree_write_verify,
+};
+
+/* Check a block's sibling pointers. */
+STATIC int
+xfs_scrub_da_btree_block_check_siblings(
+ struct xfs_scrub_da_btree *ds,
+ int level,
+ struct xfs_da_blkinfo *hdr)
+{
+ xfs_dablk_t forw;
+ xfs_dablk_t back;
+ int retval;
+ int error = 0;
+
+ forw = be32_to_cpu(hdr->forw);
+ back = be32_to_cpu(hdr->back);
+
+ /* Top level blocks should not have sibling pointers. */
+ if (level == 0) {
+ XFS_SCRUB_DA_CHECK(ds, forw == 0);
+ XFS_SCRUB_DA_CHECK(ds, back == 0);
+ return error;
+ }
+
+ /* Check back (left) pointer. */
+ if (back != 0) {
+ /* Move the alternate cursor back one block. */
+ ds->state->altpath = ds->state->path;
+ error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
+ 0, false, &retval);
+ XFS_SCRUB_DA_OP_ERROR_GOTO(ds, &error, out);
+ XFS_SCRUB_DA_GOTO(ds, retval == 0, verify_forw);
+ XFS_SCRUB_DA_CHECK(ds,
+ ds->state->altpath.blk[level].blkno == back);
+ }
+
+verify_forw:
+ /* Check forw (right) pointer. */
+ if (!error && forw != 0) {
+ /* Move the alternate cursor forward one block. */
+ ds->state->altpath = ds->state->path;
+ error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
+ 1, false, &retval);
+ XFS_SCRUB_DA_OP_ERROR_GOTO(ds, &error, out);
+ XFS_SCRUB_DA_GOTO(ds, retval == 0, out);
+ XFS_SCRUB_DA_CHECK(ds,
+ ds->state->altpath.blk[level].blkno == forw);
+ }
+out:
+ memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
+ return error;
+}
+
+/* Load a dir/attribute block from a btree. */
+STATIC int
+xfs_scrub_da_btree_block(
+ struct xfs_scrub_da_btree *ds,
+ int level,
+ xfs_dablk_t blkno)
+{
+ struct xfs_da_state_blk *blk;
+ struct xfs_da_intnode *node;
+ struct xfs_da_node_entry *btree;
+ struct xfs_da3_blkinfo *hdr3;
+ struct xfs_da_args *dargs = &ds->dargs;
+ struct xfs_inode *ip = ds->dargs.dp;
+ xfs_ino_t owner;
+ int *pmaxrecs;
+ struct xfs_da3_icnode_hdr nodehdr;
+ int error;
+
+ blk = &ds->state->path.blk[level];
+ ds->state->path.active = level + 1;
+
+ /* Release old block. */
+ if (blk->bp) {
+ xfs_trans_brelse(dargs->trans, blk->bp);
+ blk->bp = NULL;
+ }
+
+ /* Check the pointer. */
+ blk->blkno = blkno;
+ error = xfs_scrub_da_btree_ptr(ds, level, blkno);
+ if (error) {
+ blk->blkno = 0;
+ goto out;
+ }
+
+ /* Read the buffer. */
+ error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
+ &blk->bp, dargs->whichfork,
+ &xfs_scrub_da_btree_buf_ops);
+ XFS_SCRUB_DA_OP_ERROR_GOTO(ds, &error, out_nobuf);
+ /* It's ok for a directory not to have a da btree in it. */
+ if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
+ blk->bp == NULL)
+ goto out_nobuf;
+ XFS_SCRUB_DA_GOTO(ds, blk->bp != NULL, out_nobuf);
+
+ hdr3 = blk->bp->b_addr;
+ blk->magic = be16_to_cpu(hdr3->hdr.magic);
+ pmaxrecs = &ds->maxrecs[level];
+
+ /* Check the owner. */
+ if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
+ owner = be64_to_cpu(hdr3->owner);
+ XFS_SCRUB_DA_GOTO(ds, owner == ip->i_ino, out);
+ }
+
+ /* Check the siblings. */
+ error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
+ if (error)
+ goto out;
+
+ /* Interpret the buffer. */
+ switch (blk->magic) {
+ case XFS_ATTR_LEAF_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_ATTR_LEAF_BUF);
+ blk->magic = XFS_ATTR_LEAF_MAGIC;
+ blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
+ XFS_SCRUB_DA_CHECK(ds, ds->tree_level == 0);
+ break;
+ case XFS_DIR2_LEAFN_MAGIC:
+ case XFS_DIR3_LEAFN_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_DIR_LEAFN_BUF);
+ blk->magic = XFS_DIR2_LEAFN_MAGIC;
+ blk->hashval = xfs_dir2_leafn_lasthash(ip, blk->bp, pmaxrecs);
+ XFS_SCRUB_DA_CHECK(ds, ds->tree_level == 0);
+ break;
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_DIR_LEAF1_BUF);
+ blk->magic = XFS_DIR2_LEAF1_MAGIC;
+ blk->hashval = xfs_dir2_leaf1_lasthash(ip, blk->bp, pmaxrecs);
+ XFS_SCRUB_DA_CHECK(ds, ds->tree_level == 0);
+ break;
+ case XFS_DA_NODE_MAGIC:
+ case XFS_DA3_NODE_MAGIC:
+ xfs_trans_buf_set_type(dargs->trans, blk->bp,
+ XFS_BLFT_DA_NODE_BUF);
+ blk->magic = XFS_DA_NODE_MAGIC;
+ node = blk->bp->b_addr;
+ ip->d_ops->node_hdr_from_disk(&nodehdr, node);
+ btree = ip->d_ops->node_tree_p(node);
+ *pmaxrecs = nodehdr.count;
+ blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
+ if (level == 0) {
+ XFS_SCRUB_DA_GOTO(ds,
+ nodehdr.level < XFS_DA_NODE_MAXDEPTH,
+ out);
+ ds->tree_level = nodehdr.level;
+ } else
+ XFS_SCRUB_DA_CHECK(ds, ds->tree_level == nodehdr.level);
+ break;
+ default:
+ xfs_trans_brelse(dargs->trans, blk->bp);
+ XFS_SCRUB_DA_CHECK(ds, false);
+ blk->bp = NULL;
+ blk->blkno = 0;
+ break;
+ }
+
+out:
+ return error;
+out_nobuf:
+ blk->blkno = 0;
+ return error;
+}
+
+/* Visit all nodes and leaves of a da btree. */
+int
+xfs_scrub_da_btree(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_scrub_da_btree_rec_fn scrub_fn)
+{
+ struct xfs_scrub_da_btree ds = {0};
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_da_state_blk *blks;
+ struct xfs_da_node_entry *btree;
+ void *rec;
+ xfs_dablk_t blkno;
+ bool is_attr;
+ int level;
+ int error;
+
+ /* Skip short format data structures; no btree to scan. */
+ if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
+ return 0;
+
+ /* Set up initial da state. */
+ is_attr = whichfork == XFS_ATTR_FORK;
+ ds.dargs.geo = is_attr ? mp->m_attr_geo : mp->m_dir_geo;
+ ds.dargs.dp = sc->ip;
+ ds.dargs.whichfork = whichfork;
+ ds.dargs.trans = sc->tp;
+ ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
+ ds.state = xfs_da_state_alloc();
+ ds.state->args = &ds.dargs;
+ ds.state->mp = sc->ip->i_mount;
+ ds.type = is_attr ? "attr" : "dir";
+ ds.sc = sc;
+ blkno = ds.lowest = is_attr ? 0 : ds.dargs.geo->leafblk;
+ ds.highest = is_attr ? 0 : ds.dargs.geo->freeblk;
+ level = 0;
+
+ /* Find the root of the da tree, if present. */
+ blks = ds.state->path.blk;
+ error = xfs_scrub_da_btree_block(&ds, level, blkno);
+ if (error)
+ goto out_state;
+ if (blks[level].bp == NULL)
+ goto out_state;
+
+ blks[level].index = 0;
+ while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
+ /* Handle leaf block. */
+ if (blks[level].magic != XFS_DA_NODE_MAGIC) {
+ /* End of leaf, pop back towards the root. */
+ if (blks[level].index >= ds.maxrecs[level]) {
+ if (level > 0)
+ blks[level - 1].index++;
+ ds.tree_level++;
+ level--;
+ continue;
+ }
+
+ /* Dispatch record scrubbing. */
+ rec = xfs_scrub_da_btree_entry(&ds, level,
+ blks[level].index);
+ error = scrub_fn(&ds, level, rec);
+ if (error < 0 ||
+ error == XFS_BTREE_QUERY_RANGE_ABORT)
+ break;
+ if (xfs_scrub_should_terminate(&error))
+ break;
+
+ blks[level].index++;
+ continue;
+ }
+
+ btree = xfs_scrub_da_btree_entry(&ds, level, blks[level].index);
+
+ /* End of node, pop back towards the root. */
+ if (blks[level].index >= ds.maxrecs[level]) {
+ if (level > 0)
+ blks[level - 1].index++;
+ ds.tree_level++;
+ level--;
+ continue;
+ }
+
+ /* Hashes in order for scrub? */
+ error = xfs_scrub_da_btree_hash(&ds, level, &btree->hashval);
+ if (error)
+ goto out;
+
+ /* Drill another level deeper. */
+ blkno = be32_to_cpu(btree->before);
+ level++;
+ ds.tree_level--;
+ error = xfs_scrub_da_btree_block(&ds, level, blkno);
+ if (error)
+ goto out;
+ if (blks[level].bp == NULL)
+ goto out;
+
+ blks[level].index = 0;
+ }
+
+out:
+ /* Release all the buffers we're tracking. */
+ for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
+ if (blks[level].bp == NULL)
+ continue;
+ xfs_trans_brelse(sc->tp, blks[level].bp);
+ blks[level].bp = NULL;
+ }
+
+out_state:
+ xfs_da_state_free(ds.state);
+ return error;
+}
diff --git a/fs/xfs/repair/dabtree.h b/fs/xfs/repair/dabtree.h
new file mode 100644
index 0000000..1302d67
--- /dev/null
+++ b/fs/xfs/repair/dabtree.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_REPAIR_DABTREE_H__
+#define __XFS_REPAIR_DABTREE_H__
+
+/* dir/attr btree */
+
+struct xfs_scrub_da_btree {
+ struct xfs_da_args dargs;
+ xfs_dahash_t hashes[XFS_DA_NODE_MAXDEPTH];
+ int maxrecs[XFS_DA_NODE_MAXDEPTH];
+ struct xfs_da_state *state;
+ const char *type;
+ struct xfs_scrub_context *sc;
+ xfs_dablk_t lowest;
+ xfs_dablk_t highest;
+ int tree_level;
+};
+
+typedef void *(*xfs_da_leaf_ents_fn)(void *);
+typedef int (*xfs_scrub_da_btree_rec_fn)(struct xfs_scrub_da_btree *ds,
+ int level, void *rec);
+
+#define XFS_SCRUB_DA_CHECK(ds, fs_ok) \
+ XFS_SCRUB_DATA_CHECK((ds)->sc, (ds)->dargs.whichfork, \
+ xfs_dir2_da_to_db((ds)->dargs.geo, \
+ (ds)->state->path.blk[level].blkno), (ds)->type, \
+ fs_ok)
+#define XFS_SCRUB_DA_GOTO(ds, fs_ok, label) \
+ XFS_SCRUB_DATA_GOTO((ds)->sc, (ds)->dargs.whichfork, \
+ xfs_dir2_da_to_db((ds)->dargs.geo, \
+ (ds)->state->path.blk[level].blkno), (ds)->type, \
+ fs_ok, label)
+#define XFS_SCRUB_DA_OP_ERROR_GOTO(ds, error, label) \
+ XFS_SCRUB_FILE_OP_ERROR_GOTO((ds)->sc, (ds)->dargs.whichfork, \
+ xfs_dir2_da_to_db((ds)->dargs.geo, \
+ (ds)->state->path.blk[level].blkno), (ds)->type, \
+ (error), label)
+
+int xfs_scrub_da_btree_hash(struct xfs_scrub_da_btree *ds, int level,
+ __be32 *hashp);
+int xfs_scrub_da_btree(struct xfs_scrub_context *sc, int whichfork,
+ xfs_scrub_da_btree_rec_fn scrub_fn);
+
+#endif /* __XFS_REPAIR_DABTREE_H__ */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 21/47] xfs: scrub directory metadata
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (19 preceding siblings ...)
2017-01-07 0:37 ` [PATCH 20/47] xfs: scrub directory/attribute btrees Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 22/47] xfs: scrub extended attributes Darrick J. Wong
` (26 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Scrub the hash tree and all the entries in a directory.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_dir2_priv.h | 4 -
fs/xfs/libxfs/xfs_fs.h | 3
fs/xfs/repair/common.c | 1
fs/xfs/repair/common.h | 1
fs/xfs/repair/dir.c | 267 +++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_dir2_readdir.c | 19 ++-
fs/xfs/xfs_file.c | 2
fs/xfs/xfs_trace.h | 3
9 files changed, 290 insertions(+), 11 deletions(-)
create mode 100644 fs/xfs/repair/dir.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index dd66bf5..0fb2b5d 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -109,6 +109,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
btree.o \
common.o \
dabtree.o \
+ dir.o \
ialloc.o \
inode.o \
refcount.o \
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 1abd314..4cf2956 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -129,7 +129,7 @@ extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
/* xfs_dir2_readdir.c */
-extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
- size_t bufsize);
+extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
+ struct dir_context *ctx, size_t bufsize);
#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 9753400..986f993 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -589,7 +589,8 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_BMBTD 12 /* data fork block mapping */
#define XFS_SCRUB_TYPE_BMBTA 13 /* attr fork block mapping */
#define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */
-#define XFS_SCRUB_TYPE_MAX 14
+#define XFS_SCRUB_TYPE_DIR 15 /* directory */
+#define XFS_SCRUB_TYPE_MAX 15
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 1aa4f20..adf457c 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -681,6 +681,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_data, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_attr, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_cow, NULL, NULL},
+ {xfs_scrub_setup_inode, xfs_scrub_directory, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 8c30e78..080596b 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -243,5 +243,6 @@ int xfs_scrub_inode(struct xfs_scrub_context *sc);
int xfs_scrub_bmap_data(struct xfs_scrub_context *sc);
int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
+int xfs_scrub_directory(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/dir.c b/fs/xfs/repair/dir.c
new file mode 100644
index 0000000..2838566
--- /dev/null
+++ b/fs/xfs/repair/dir.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_itable.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "repair/common.h"
+#include "repair/dabtree.h"
+
+/* Directories */
+
+/* Scrub a directory entry. */
+
+struct xfs_scrub_dir_ctx {
+ struct dir_context dc;
+ struct xfs_scrub_context *sc;
+};
+
+#define XFS_SCRUB_DIR_CHECK(fs_ok) \
+ XFS_SCRUB_DATA_CHECK(sdc->sc, XFS_DATA_FORK, offset, "dir", fs_ok)
+#define XFS_SCRUB_DIR_GOTO(fs_ok, label) \
+ XFS_SCRUB_DATA_GOTO(sdc->sc, XFS_DATA_FORK, offset, "dir", fs_ok, label)
+#define XFS_SCRUB_DIR_OP_ERROR_GOTO(label) \
+ XFS_SCRUB_FILE_OP_ERROR_GOTO(sdc->sc, XFS_DATA_FORK, offset, "dir", &error, label)
+/* Check that an inode's mode matches a given DT_ type. */
+STATIC int
+xfs_scrub_dir_check_ftype(
+ struct xfs_scrub_dir_ctx *sdc,
+ xfs_fileoff_t offset,
+ xfs_ino_t inum,
+ int dtype)
+{
+ struct xfs_mount *mp = sdc->sc->ip->i_mount;
+ struct xfs_inode *ip;
+ int ino_dtype;
+ int error = 0;
+
+ if (!xfs_sb_version_hasftype(&mp->m_sb)) {
+ XFS_SCRUB_DIR_CHECK(dtype == DT_UNKNOWN || dtype == DT_DIR);
+ goto out;
+ }
+
+ error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
+ XFS_SCRUB_OP_ERROR_GOTO(sdc->sc,
+ XFS_INO_TO_AGNO(mp, inum),
+ XFS_INO_TO_AGBNO(mp, inum),
+ "inode", &error, out);
+ ino_dtype = (VFS_I(ip)->i_mode & S_IFMT) >> S_SHIFT;
+ XFS_SCRUB_DIR_CHECK(ino_dtype == dtype);
+ IRELE(ip);
+out:
+ return error;
+}
+
+/* Scrub a single directory entry. */
+STATIC int
+xfs_scrub_dir_actor(
+ struct dir_context *dc,
+ const char *name,
+ int namelen,
+ loff_t pos,
+ u64 ino,
+ unsigned type)
+{
+ struct xfs_mount *mp;
+ struct xfs_inode *ip;
+ struct xfs_scrub_dir_ctx *sdc;
+ struct xfs_name xname;
+ xfs_ino_t lookup_ino;
+ xfs_dablk_t offset;
+ int error = 0;
+
+ sdc = container_of(dc, struct xfs_scrub_dir_ctx, dc);
+ ip = sdc->sc->ip;
+ mp = ip->i_mount;
+ offset = xfs_dir2_db_to_da(mp->m_dir_geo,
+ xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos));
+
+ /* Does this inode number make sense? */
+ XFS_SCRUB_DIR_GOTO(xfs_dir_ino_validate(mp, ino) == 0, out);
+ XFS_SCRUB_DIR_GOTO(!xfs_internal_inum(mp, ino), out);
+
+ /* Verify that we can look up this name by hash. */
+ xname.name = name;
+ xname.len = namelen;
+ xname.type = XFS_DIR3_FT_UNKNOWN;
+
+ error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
+ XFS_SCRUB_DIR_OP_ERROR_GOTO(fail_xref);
+ XFS_SCRUB_DIR_GOTO(lookup_ino == ino, out);
+
+ if (!memcmp(".", name, namelen)) {
+ /* If this is "." then check that the inum matches the dir. */
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ XFS_SCRUB_DIR_CHECK(type == DT_DIR);
+ XFS_SCRUB_DIR_CHECK(ino == ip->i_ino);
+ } else if (!memcmp("..", name, namelen)) {
+ /*
+ * If this is ".." in the root inode, check that the inum
+ * matches this dir.
+ */
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ XFS_SCRUB_DIR_CHECK(type == DT_DIR);
+ if (ip->i_ino == mp->m_sb.sb_rootino)
+ XFS_SCRUB_DIR_CHECK(ino == ip->i_ino);
+ }
+ if (error)
+ goto out;
+
+ /* Verify the file type. */
+ error = xfs_scrub_dir_check_ftype(sdc, offset, lookup_ino, type);
+ if (error)
+ goto out;
+out:
+ return error;
+fail_xref:
+ return error ? error : -EFSCORRUPTED;
+}
+#undef XFS_SCRUB_DIR_OP_ERROR_GOTO
+#undef XFS_SCRUB_DIR_GOTO
+#undef XFS_SCRUB_DIR_CHECK
+
+#define XFS_SCRUB_DIRENT_CHECK(fs_ok) \
+ XFS_SCRUB_DATA_CHECK(ds->sc, XFS_DATA_FORK, rec_bno, "dir", fs_ok)
+#define XFS_SCRUB_DIRENT_GOTO(fs_ok, label) \
+ XFS_SCRUB_DATA_GOTO(ds->sc, XFS_DATA_FORK, rec_bno, "dir", fs_ok, label)
+#define XFS_SCRUB_DIRENT_OP_ERROR_GOTO(label) \
+ XFS_SCRUB_FILE_OP_ERROR_GOTO(ds->sc, XFS_DATA_FORK, rec_bno, "dir", &error, label)
+/* Scrub a directory btree record. */
+STATIC int
+xfs_scrub_dir_rec(
+ struct xfs_scrub_da_btree *ds,
+ int level,
+ void *rec)
+{
+ struct xfs_mount *mp = ds->state->mp;
+ struct xfs_dir2_leaf_entry *ent = rec;
+ struct xfs_inode *dp = ds->dargs.dp;
+ struct xfs_dir2_data_entry *dent;
+ struct xfs_buf *bp;
+ xfs_ino_t ino;
+ xfs_dablk_t rec_bno;
+ xfs_dir2_db_t db;
+ xfs_dir2_data_aoff_t off;
+ xfs_dir2_dataptr_t ptr;
+ xfs_dahash_t calc_hash;
+ xfs_dahash_t hash;
+ unsigned int tag;
+ int error;
+
+ /* Check the hash of the entry. */
+ error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
+ if (error)
+ goto out;
+
+ /* Valid hash pointer? */
+ ptr = be32_to_cpu(ent->address);
+ if (ptr == 0)
+ return 0;
+
+ /* Find the directory entry's location. */
+ db = xfs_dir2_dataptr_to_db(mp->m_dir_geo, ptr);
+ off = xfs_dir2_dataptr_to_off(mp->m_dir_geo, ptr);
+ rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db);
+
+ XFS_SCRUB_DA_GOTO(ds, rec_bno < mp->m_dir_geo->leafblk, out);
+ error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp);
+ XFS_SCRUB_DIRENT_OP_ERROR_GOTO(out);
+ XFS_SCRUB_DIRENT_GOTO(bp != NULL, out);
+
+ /* Retrieve the entry and check it. */
+ dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
+ ino = be64_to_cpu(dent->inumber);
+ hash = be32_to_cpu(ent->hashval);
+ tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
+ XFS_SCRUB_DIRENT_CHECK(xfs_dir_ino_validate(mp, ino) == 0);
+ XFS_SCRUB_DIRENT_CHECK(!xfs_internal_inum(mp, ino));
+ XFS_SCRUB_DIRENT_CHECK(tag == off);
+ XFS_SCRUB_DIRENT_GOTO(dent->namelen < MAXNAMELEN, out_relse);
+ calc_hash = xfs_da_hashname(dent->name, dent->namelen);
+ XFS_SCRUB_DIRENT_CHECK(calc_hash == hash);
+
+out_relse:
+ xfs_trans_brelse(ds->dargs.trans, bp);
+out:
+ return error;
+}
+#undef XFS_SCRUB_DIRENT_OP_ERROR_GOTO
+#undef XFS_SCRUB_DIRENT_GOTO
+#undef XFS_SCRUB_DIRENT_CHECK
+
+/* Scrub a whole directory. */
+int
+xfs_scrub_directory(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_scrub_dir_ctx sdc = {
+ .dc.actor = xfs_scrub_dir_actor,
+ .dc.pos = 0,
+ };
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ size_t bufsize;
+ loff_t oldpos;
+ int error;
+
+ if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
+ return -ENOENT;
+
+ /* Check directory tree structure */
+ error = xfs_scrub_da_btree(sc, XFS_DATA_FORK, xfs_scrub_dir_rec);
+ if (error)
+ return error;
+
+ /* Check that every dirent we see can also be looked up by hash. */
+ bufsize = (size_t)min_t(loff_t, 32768, sc->ip->i_d.di_size);
+ sdc.sc = sc;
+
+ oldpos = 0;
+ xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
+ while (true) {
+ error = xfs_readdir(sc->tp, sc->ip, &sdc.dc, bufsize);
+ XFS_SCRUB_OP_ERROR_GOTO(sc,
+ XFS_INO_TO_AGNO(mp, sc->ip->i_ino),
+ XFS_INO_TO_AGBNO(mp, sc->ip->i_ino),
+ "inode", &error, out_unlock);
+ if (oldpos == sdc.dc.pos)
+ break;
+ oldpos = sdc.dc.pos;
+ }
+
+out_unlock:
+ xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+ return error;
+}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b..0b3b636 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -181,7 +181,7 @@ xfs_dir2_block_getdents(
return 0;
lock_mode = xfs_ilock_data_map_shared(dp);
- error = xfs_dir3_block_read(NULL, dp, &bp);
+ error = xfs_dir3_block_read(args->trans, dp, &bp);
xfs_iunlock(dp, lock_mode);
if (error)
return error;
@@ -239,7 +239,7 @@ xfs_dir2_block_getdents(
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
be64_to_cpu(dep->inumber),
xfs_dir3_get_dtype(dp->i_mount, filetype))) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return 0;
}
}
@@ -250,7 +250,7 @@ xfs_dir2_block_getdents(
*/
ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
0x7fffffff;
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return 0;
}
@@ -386,7 +386,7 @@ xfs_dir2_leaf_readbuf(
* Read the directory block starting at the first mapping.
*/
mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);
- error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
+ error = xfs_dir3_data_read(args->trans, dp, map->br_startoff,
map->br_blockcount >= geo->fsbcount ?
XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) :
-1, &bp);
@@ -535,7 +535,7 @@ xfs_dir2_leaf_getdents(
bool trim_map = false;
if (bp) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
bp = NULL;
trim_map = true;
}
@@ -649,15 +649,21 @@ xfs_dir2_leaf_getdents(
ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
kmem_free(map_info);
if (bp)
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(args->trans, bp);
return error;
}
/*
* Read a directory.
+ *
+ * If supplied, the transaction collects locked dir buffers to avoid
+ * nested buffer deadlocks. This function does not dirty the
+ * transaction. The caller should ensure that the inode is locked
+ * before calling this function.
*/
int
xfs_readdir(
+ struct xfs_trans *tp,
struct xfs_inode *dp,
struct dir_context *ctx,
size_t bufsize)
@@ -676,6 +682,7 @@ xfs_readdir(
args.dp = dp;
args.geo = dp->i_mount->m_dir_geo;
+ args.trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
rval = xfs_dir2_sf_getdents(&args, ctx);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bbb9eb6..0df15e4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -944,7 +944,7 @@ xfs_file_readdir(
*/
bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
- return xfs_readdir(ip, ctx, bufsize);
+ return xfs_readdir(NULL, ip, ctx, bufsize);
}
/*
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 141a39f..18b211f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3367,7 +3367,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_INODE, "inode" }, \
{ XFS_SCRUB_TYPE_BMBTD, "bmapbtd" }, \
{ XFS_SCRUB_TYPE_BMBTA, "bmapbta" }, \
- { XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }
+ { XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }, \
+ { XFS_SCRUB_TYPE_DIR, "dir" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 22/47] xfs: scrub extended attributes
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (20 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 21/47] xfs: scrub directory metadata Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 23/47] xfs: scrub symbolic links Darrick J. Wong
` (25 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Scrub the hash tree, keys, and values in an extended attribute structure.
Refactor the attribute code to use the transaction if the caller supplied
one to avoid buffer deadocks.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_attr.c | 26 +++--
fs/xfs/libxfs/xfs_attr_remote.c | 5 +
fs/xfs/libxfs/xfs_fs.h | 3 -
fs/xfs/repair/attr.c | 216 +++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 5 +
fs/xfs/repair/common.h | 6 +
fs/xfs/xfs_attr.h | 2
fs/xfs/xfs_attr_list.c | 28 +++--
fs/xfs/xfs_trace.h | 3 -
10 files changed, 269 insertions(+), 26 deletions(-)
create mode 100644 fs/xfs/repair/attr.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 0fb2b5d..ae045ff 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -105,6 +105,7 @@ xfs-y += xfs_aops.o \
xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
agheader.o \
alloc.o \
+ attr.o \
bmap.o \
btree.o \
common.o \
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index af1ecb1..b4e1686 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -114,6 +114,23 @@ xfs_inode_hasattr(
* Overall external interface routines.
*========================================================================*/
+/* Retrieve an extended attribute and its value. Must have iolock. */
+int
+xfs_attr_get_locked(
+ struct xfs_inode *ip,
+ struct xfs_da_args *args)
+{
+ if (!xfs_inode_hasattr(ip))
+ return -ENOATTR;
+ else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+ return xfs_attr_shortform_getvalue(args);
+ else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
+ return xfs_attr_leaf_get(args);
+ else
+ return xfs_attr_node_get(args);
+}
+
+/* Retrieve an extended attribute by name, and its value. */
int
xfs_attr_get(
struct xfs_inode *ip,
@@ -144,14 +161,7 @@ xfs_attr_get(
args.op_flags = XFS_DA_OP_OKNOENT;
lock_mode = xfs_ilock_attr_map_shared(ip);
- if (!xfs_inode_hasattr(ip))
- error = -ENOATTR;
- else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
- error = xfs_attr_shortform_getvalue(&args);
- else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
- error = xfs_attr_leaf_get(&args);
- else
- error = xfs_attr_node_get(&args);
+ error = xfs_attr_get_locked(ip, &args);
xfs_iunlock(ip, lock_mode);
*valuelenp = args.valuelen;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d52f525..76958b4 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -386,7 +386,8 @@ xfs_attr_rmtval_get(
(map[i].br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ error = xfs_trans_read_buf(mp, args->trans,
+ mp->m_ddev_targp,
dblkno, dblkcnt, 0, &bp,
&xfs_attr3_rmt_buf_ops);
if (error)
@@ -395,7 +396,7 @@ xfs_attr_rmtval_get(
error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
&offset, &valuelen,
&dst);
- xfs_buf_relse(bp);
+ xfs_trans_brelse(args->trans, bp);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 986f993..4da3718 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -590,7 +590,8 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_BMBTA 13 /* attr fork block mapping */
#define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */
#define XFS_SCRUB_TYPE_DIR 15 /* directory */
-#define XFS_SCRUB_TYPE_MAX 15
+#define XFS_SCRUB_TYPE_XATTR 16 /* extended attribute */
+#define XFS_SCRUB_TYPE_MAX 16
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/repair/attr.c b/fs/xfs/repair/attr.c
new file mode 100644
index 0000000..5b6e1d9
--- /dev/null
+++ b/fs/xfs/repair/attr.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "repair/common.h"
+#include "repair/dabtree.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+/* Set us up with an inode and a buffer for reading xattr values. */
+int
+xfs_scrub_setup_inode_xattr(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ void *buf;
+ int error;
+
+ /* Allocate the buffer without the inode lock held. */
+ buf = kmem_zalloc_large(XATTR_SIZE_MAX, KM_SLEEP);
+ if (!buf)
+ return -ENOMEM;
+
+ error = xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked);
+ if (error) {
+ kmem_free(buf);
+ return error;
+ }
+
+ sc->buf = buf;
+ return 0;
+}
+
+/* Extended Attributes */
+
+struct xfs_scrub_xattr {
+ struct xfs_attr_list_context context;
+ struct xfs_scrub_context *sc;
+};
+
+#define XFS_SCRUB_ATTR_CHECK(fs_ok) \
+ XFS_SCRUB_DATA_CHECK(sx->sc, XFS_ATTR_FORK, args.blkno, "attr", fs_ok)
+#define XFS_SCRUB_ATTR_OP_ERROR_GOTO(label) \
+ XFS_SCRUB_FILE_OP_ERROR_GOTO(sx->sc, XFS_ATTR_FORK, args.blkno, "attr", &error, label)
+/* Check that an extended attribute key can be looked up by hash. */
+static void
+xfs_scrub_xattr_listent(
+ struct xfs_attr_list_context *context,
+ int flags,
+ unsigned char *name,
+ int namelen,
+ int valuelen)
+{
+ struct xfs_scrub_xattr *sx;
+ struct xfs_da_args args = {0};
+ int error = 0;
+
+ sx = container_of(context, struct xfs_scrub_xattr, context);
+
+ args.flags = ATTR_KERNOTIME;
+ if (flags & XFS_ATTR_ROOT)
+ args.flags |= ATTR_ROOT;
+ else if (flags & XFS_ATTR_SECURE)
+ args.flags |= ATTR_SECURE;
+ args.geo = context->dp->i_mount->m_attr_geo;
+ args.whichfork = XFS_ATTR_FORK;
+ args.dp = context->dp;
+ args.name = name;
+ args.namelen = namelen;
+ args.hashval = xfs_da_hashname(args.name, args.namelen);
+ args.trans = context->tp;
+ args.value = sx->sc->buf;
+ args.valuelen = XATTR_SIZE_MAX;
+
+ error = xfs_attr_get_locked(context->dp, &args);
+ if (error == -EEXIST)
+ error = 0;
+ XFS_SCRUB_ATTR_OP_ERROR_GOTO(fail_xref);
+ XFS_SCRUB_ATTR_CHECK(args.valuelen == valuelen);
+
+fail_xref:
+ return;
+}
+#undef XFS_SCRUB_ATTR_OP_ERROR_GOTO
+#undef XFS_SCRUB_ATTR_CHECK
+
+/* Scrub a attribute btree record. */
+STATIC int
+xfs_scrub_xattr_rec(
+ struct xfs_scrub_da_btree *ds,
+ int level,
+ void *rec)
+{
+ struct xfs_mount *mp = ds->state->mp;
+ struct xfs_attr_leaf_entry *ent = rec;
+ struct xfs_da_state_blk *blk;
+ struct xfs_attr_leaf_name_local *lentry;
+ struct xfs_attr_leaf_name_remote *rentry;
+ struct xfs_buf *bp;
+ xfs_dahash_t calc_hash;
+ xfs_dahash_t hash;
+ int nameidx;
+ int hdrsize;
+ unsigned int badflags;
+ int error;
+
+ blk = &ds->state->path.blk[level];
+
+ /* Check the hash of the entry. */
+ error = xfs_scrub_da_btree_hash(ds, level, &ent->hashval);
+ if (error)
+ goto out;
+
+ /* Find the attr entry's location. */
+ bp = blk->bp;
+ hdrsize = xfs_attr3_leaf_hdr_size(bp->b_addr);
+ nameidx = be16_to_cpu(ent->nameidx);
+ XFS_SCRUB_DA_GOTO(ds, nameidx >= hdrsize, out);
+ XFS_SCRUB_DA_GOTO(ds, nameidx < mp->m_attr_geo->blksize, out);
+
+ /* Retrieve the entry and check it. */
+ hash = be32_to_cpu(ent->hashval);
+ badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE |
+ XFS_ATTR_INCOMPLETE);
+ XFS_SCRUB_DA_CHECK(ds, (ent->flags & badflags) == 0);
+ if (ent->flags & XFS_ATTR_LOCAL) {
+ lentry = (struct xfs_attr_leaf_name_local *)
+ (((char *)bp->b_addr) + nameidx);
+ XFS_SCRUB_DA_GOTO(ds, lentry->namelen < MAXNAMELEN, out);
+ calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen);
+ } else {
+ rentry = (struct xfs_attr_leaf_name_remote *)
+ (((char *)bp->b_addr) + nameidx);
+ XFS_SCRUB_DA_GOTO(ds, rentry->namelen < MAXNAMELEN, out);
+ calc_hash = xfs_da_hashname(rentry->name, rentry->namelen);
+ }
+ XFS_SCRUB_DA_CHECK(ds, calc_hash == hash);
+
+out:
+ return error;
+}
+
+/* Scrub the extended attribute metadata. */
+int
+xfs_scrub_xattr(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_scrub_xattr sx = { 0 };
+ struct attrlist_cursor_kern cursor = { 0 };
+ struct xfs_mount *mp = sc->ip->i_mount;
+ int error = 0;
+
+ if (!xfs_inode_hasattr(sc->ip))
+ return -ENOENT;
+
+ /* Check attribute tree structure */
+ error = xfs_scrub_da_btree(sc, XFS_ATTR_FORK, xfs_scrub_xattr_rec);
+ if (error)
+ goto out;
+
+ /* Check that every attr key can also be looked up by hash. */
+ sx.context.dp = sc->ip;
+ sx.context.cursor = &cursor;
+ sx.context.resynch = 1;
+ sx.context.put_listent = xfs_scrub_xattr_listent;
+ sx.context.tp = sc->tp;
+ sx.sc = sc;
+
+ xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
+ error = xfs_attr_list_int(&sx.context);
+ xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+
+ XFS_SCRUB_OP_ERROR_GOTO(sc,
+ XFS_INO_TO_AGNO(mp, sc->ip->i_ino),
+ XFS_INO_TO_AGBNO(mp, sc->ip->i_ino),
+ "inode", &error, out);
+out:
+ return error;
+}
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index adf457c..8f7c6ee 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -574,6 +574,10 @@ xfs_scrub_teardown(
IRELE(sc->ip);
sc->ip = NULL;
}
+ if (sc->buf) {
+ kmem_free(sc->buf);
+ sc->buf = NULL;
+ }
return error;
}
@@ -682,6 +686,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_attr, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_cow, NULL, NULL},
{xfs_scrub_setup_inode, xfs_scrub_directory, NULL, NULL},
+ {xfs_scrub_setup_inode_xattr, xfs_scrub_xattr, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 080596b..0f3ffd7 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -59,6 +59,7 @@ struct xfs_scrub_context {
struct xfs_scrub_metadata *sm;
struct xfs_trans *tp;
struct xfs_inode *ip;
+ void *buf;
bool retry;
/* State tracking for multi-AG operations. */
@@ -226,6 +227,10 @@ int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_inode_xattr(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
/* Metadata scrubbers */
@@ -244,5 +249,6 @@ int xfs_scrub_bmap_data(struct xfs_scrub_context *sc);
int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
int xfs_scrub_directory(struct xfs_scrub_context *sc);
+int xfs_scrub_xattr(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index d14691a..24093f4 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -117,6 +117,7 @@ typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int);
typedef struct xfs_attr_list_context {
+ struct xfs_trans *tp;
struct xfs_inode *dp; /* inode */
struct attrlist_cursor_kern *cursor; /* position in list */
char *alist; /* output buffer */
@@ -142,6 +143,7 @@ typedef struct xfs_attr_list_context {
int xfs_attr_inactive(struct xfs_inode *dp);
int xfs_attr_list_int(struct xfs_attr_list_context *);
int xfs_inode_hasattr(struct xfs_inode *ip);
+int xfs_attr_get_locked(struct xfs_inode *ip, struct xfs_da_args *args);
int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
unsigned char *value, int *valuelenp, int flags);
int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 97c45b6..42bd26d 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -230,7 +230,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
bp = NULL;
if (cursor->blkno > 0) {
- error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
+ error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
if ((error != 0) && (error != -EFSCORRUPTED))
return error;
@@ -242,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
case XFS_DA_NODE_MAGIC:
case XFS_DA3_NODE_MAGIC:
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
break;
case XFS_ATTR_LEAF_MAGIC:
@@ -254,18 +254,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (cursor->hashval > be32_to_cpu(
entries[leafhdr.count - 1].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
} else if (cursor->hashval <= be32_to_cpu(
entries[0].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
}
break;
default:
trace_xfs_attr_list_wrong_blk(context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
bp = NULL;
}
}
@@ -281,7 +281,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
for (;;) {
__uint16_t magic;
- error = xfs_da3_node_read(NULL, dp,
+ error = xfs_da3_node_read(context->tp, dp,
cursor->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error)
@@ -297,7 +297,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount,
node);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return -EFSCORRUPTED;
}
@@ -313,10 +313,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
}
}
if (i == nodehdr.count) {
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
}
}
ASSERT(bp != NULL);
@@ -333,12 +333,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (context->seen_enough || leafhdr.forw == 0)
break;
cursor->blkno = leafhdr.forw;
- xfs_trans_brelse(NULL, bp);
- error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp);
+ xfs_trans_brelse(context->tp, bp);
+ error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp);
if (error)
return error;
}
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
@@ -448,12 +448,12 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
trace_xfs_attr_leaf_list(context);
context->cursor->blkno = 0;
- error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
+ error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp);
if (error)
return error;
xfs_attr3_leaf_list_int(bp, context);
- xfs_trans_brelse(NULL, bp);
+ xfs_trans_brelse(context->tp, bp);
return 0;
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 18b211f..760552d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3368,7 +3368,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_BMBTD, "bmapbtd" }, \
{ XFS_SCRUB_TYPE_BMBTA, "bmapbta" }, \
{ XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }, \
- { XFS_SCRUB_TYPE_DIR, "dir" }
+ { XFS_SCRUB_TYPE_DIR, "dir" }, \
+ { XFS_SCRUB_TYPE_XATTR, "xattr" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 23/47] xfs: scrub symbolic links
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (21 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 22/47] xfs: scrub extended attributes Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 24/47] xfs: scrub realtime bitmap/summary Darrick J. Wong
` (24 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Create the infrastructure to scrub symbolic link data.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 3 +
fs/xfs/repair/common.c | 1
fs/xfs/repair/common.h | 5 ++
fs/xfs/repair/symlink.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_trace.h | 3 +
6 files changed, 118 insertions(+), 2 deletions(-)
create mode 100644 fs/xfs/repair/symlink.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ae045ff..bdab112 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -115,6 +115,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
inode.o \
refcount.o \
rmap.o \
+ symlink.o \
)
# low-level transaction/log code
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 4da3718..023a842 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -591,7 +591,8 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */
#define XFS_SCRUB_TYPE_DIR 15 /* directory */
#define XFS_SCRUB_TYPE_XATTR 16 /* extended attribute */
-#define XFS_SCRUB_TYPE_MAX 16
+#define XFS_SCRUB_TYPE_SYMLINK 17 /* symbolic link */
+#define XFS_SCRUB_TYPE_MAX 17
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 8f7c6ee..636bceed 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -687,6 +687,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_cow, NULL, NULL},
{xfs_scrub_setup_inode, xfs_scrub_directory, NULL, NULL},
{xfs_scrub_setup_inode_xattr, xfs_scrub_xattr, NULL, NULL},
+ {xfs_scrub_setup_inode_symlink, xfs_scrub_symlink, NULL, NULL},
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 0f3ffd7..123dc1d 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -231,6 +231,10 @@ int xfs_scrub_setup_inode_xattr(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_inode_symlink(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
/* Metadata scrubbers */
@@ -250,5 +254,6 @@ int xfs_scrub_bmap_attr(struct xfs_scrub_context *sc);
int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
int xfs_scrub_directory(struct xfs_scrub_context *sc);
int xfs_scrub_xattr(struct xfs_scrub_context *sc);
+int xfs_scrub_symlink(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/symlink.c b/fs/xfs/repair/symlink.c
new file mode 100644
index 0000000..8b4fb31
--- /dev/null
+++ b/fs/xfs/repair/symlink.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_inode_fork.h"
+#include "xfs_symlink.h"
+#include "repair/common.h"
+
+/* Set us up with an inode and a buffer for reading symlink targets. */
+int
+xfs_scrub_setup_inode_symlink(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ void *buf;
+ int error;
+
+ /* Allocate the buffer without the inode lock held. */
+ buf = kmem_zalloc_large(MAXPATHLEN + 1, KM_SLEEP);
+ if (!buf)
+ return -ENOMEM;
+
+ error = xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked);
+ if (error) {
+ kmem_free(buf);
+ return error;
+ }
+
+ sc->buf = buf;
+ return 0;
+}
+
+/* Symbolic links. */
+
+#define XFS_SCRUB_SYMLINK_CHECK(fs_ok) \
+ XFS_SCRUB_INO_CHECK(sc, ip->i_ino, NULL, "symlink", fs_ok)
+#define XFS_SCRUB_SYMLINK_GOTO(fs_ok, label) \
+ XFS_SCRUB_INO_GOTO(sc, ip->i_ino, NULL, "symlink", fs_ok, label)
+int
+xfs_scrub_symlink(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_ifork *ifp;
+ loff_t len;
+ int error = 0;
+
+ if (!S_ISLNK(VFS_I(ip)->i_mode))
+ return -ENOENT;
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ len = ip->i_d.di_size;
+
+ /* Plausible size? */
+ XFS_SCRUB_SYMLINK_GOTO(len <= MAXPATHLEN, out);
+
+ /* Inline symlink? */
+ if (ifp->if_flags & XFS_IFINLINE) {
+ XFS_SCRUB_SYMLINK_GOTO(len > 0, out);
+ XFS_SCRUB_SYMLINK_CHECK(len <= XFS_IFORK_DSIZE(ip));
+ XFS_SCRUB_SYMLINK_CHECK(len <= strnlen(ifp->if_u1.if_data,
+ XFS_IFORK_DSIZE(ip)));
+ goto out;
+ }
+
+ /* Remote symlink; must read. */
+ xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
+ error = xfs_readlink(sc->ip, sc->buf);
+ xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+ XFS_SCRUB_FILE_OP_ERROR_GOTO(sc, XFS_DATA_FORK, 0, "symlink",
+ &error, out);
+ XFS_SCRUB_SYMLINK_CHECK(len <= strnlen(sc->buf, MAXPATHLEN));
+out:
+ return error;
+}
+#undef XFS_SCRUB_SYMLINK_GOTO
+#undef XFS_SCRUB_SYMLINK_CHECK
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 760552d..87fd942 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3369,7 +3369,8 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_BMBTA, "bmapbta" }, \
{ XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }, \
{ XFS_SCRUB_TYPE_DIR, "dir" }, \
- { XFS_SCRUB_TYPE_XATTR, "xattr" }
+ { XFS_SCRUB_TYPE_XATTR, "xattr" }, \
+ { XFS_SCRUB_TYPE_SYMLINK, "symlink" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 24/47] xfs: scrub realtime bitmap/summary
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (22 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 23/47] xfs: scrub symbolic links Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 25/47] xfs: scrub should cross-reference with the bnobt Darrick J. Wong
` (23 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Perform simple tests of the realtime bitmap and summary.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 5 ++
fs/xfs/libxfs/xfs_format.h | 5 ++
fs/xfs/libxfs/xfs_fs.h | 4 +
fs/xfs/libxfs/xfs_rtbitmap.c | 2 -
| 1
fs/xfs/repair/common.c | 7 ++
fs/xfs/repair/common.h | 6 ++
fs/xfs/repair/rtbitmap.c | 131 ++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_rtalloc.h | 3 +
fs/xfs/xfs_trace.h | 4 +
10 files changed, 165 insertions(+), 3 deletions(-)
create mode 100644 fs/xfs/repair/rtbitmap.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index bdab112..7cee02a 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -117,6 +117,11 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
rmap.o \
symlink.o \
)
+ifeq ($(CONFIG_XFS_DEBUG)$(CONFIG_XFS_RT),yy)
+xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
+ rtbitmap.o \
+ )
+endif
# low-level transaction/log code
xfs-y += xfs_log.o \
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 301effc..cb00017 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -315,6 +315,11 @@ static inline bool xfs_sb_good_version(struct xfs_sb *sbp)
return false;
}
+static inline bool xfs_sb_version_hasrealtime(struct xfs_sb *sbp)
+{
+ return sbp->sb_rblocks > 0;
+}
+
/*
* Detect a mismatched features2 field. Older kernels read/wrote
* this into the wrong slot, so to be safe we keep them in sync.
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 023a842..5cc8462 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -592,7 +592,9 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_DIR 15 /* directory */
#define XFS_SCRUB_TYPE_XATTR 16 /* extended attribute */
#define XFS_SCRUB_TYPE_SYMLINK 17 /* symbolic link */
-#define XFS_SCRUB_TYPE_MAX 17
+#define XFS_SCRUB_TYPE_RTBITMAP 18 /* realtime bitmap */
+#define XFS_SCRUB_TYPE_RTSUM 19 /* realtime summary */
+#define XFS_SCRUB_TYPE_MAX 19
#define XFS_SCRUB_FLAG_REPAIR 0x1 /* i: repair this metadata */
#define XFS_SCRUB_FLAG_CORRUPT 0x2 /* o: needs repair */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index ea45584..f4b68c0 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -70,7 +70,7 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
* Get a buffer for the bitmap or summary file block specified.
* The buffer is returned read and locked.
*/
-static int
+int
xfs_rtbuf_get(
xfs_mount_t *mp, /* file system mount structure */
xfs_trans_t *tp, /* transaction pointer */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index 3623f4c..8812b64 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -248,6 +248,7 @@ xfs_scrub_superblock(
XFS_SCRUB_SB_FEAT(metauuid);
XFS_SCRUB_SB_FEAT(rmapbt);
XFS_SCRUB_SB_FEAT(reflink);
+ XFS_SCRUB_SB_FEAT(realtime);
#undef XFS_SCRUB_SB_FEAT
out:
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 636bceed..6adccb5 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -688,6 +688,13 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_inode, xfs_scrub_directory, NULL, NULL},
{xfs_scrub_setup_inode_xattr, xfs_scrub_xattr, NULL, NULL},
{xfs_scrub_setup_inode_symlink, xfs_scrub_symlink, NULL, NULL},
+#ifdef CONFIG_XFS_RT
+ {xfs_scrub_setup_rt, xfs_scrub_rtbitmap, NULL, xfs_sb_version_hasrealtime},
+ {xfs_scrub_setup_rt, xfs_scrub_rtsummary, NULL, xfs_sb_version_hasrealtime},
+#else
+ {NULL, NULL, NULL, NULL},
+ {NULL, NULL, NULL, NULL},
+#endif
};
/* Dispatch metadata scrubbing. */
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 123dc1d..8dc68b9 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -235,6 +235,10 @@ int xfs_scrub_setup_inode_symlink(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_rt(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
/* Metadata scrubbers */
@@ -255,5 +259,7 @@ int xfs_scrub_bmap_cow(struct xfs_scrub_context *sc);
int xfs_scrub_directory(struct xfs_scrub_context *sc);
int xfs_scrub_xattr(struct xfs_scrub_context *sc);
int xfs_scrub_symlink(struct xfs_scrub_context *sc);
+int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc);
+int xfs_scrub_rtsummary(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/rtbitmap.c b/fs/xfs/repair/rtbitmap.c
new file mode 100644
index 0000000..6492e35
--- /dev/null
+++ b/fs/xfs/repair/rtbitmap.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "repair/common.h"
+
+/* Set us up with the realtime metadata locked. */
+int
+xfs_scrub_setup_rt(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int lockmode;
+ int error = 0;
+
+ if (sm->sm_agno || sm->sm_ino || sm->sm_gen)
+ return -EINVAL;
+
+ error = xfs_scrub_setup(sc, ip, sm, retry_deadlocked);
+ if (error)
+ return error;
+
+ lockmode = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
+ xfs_ilock(mp->m_rbmip, lockmode);
+ xfs_trans_ijoin(sc->tp, mp->m_rbmip, lockmode);
+
+ return 0;
+}
+
+/* Realtime bitmap. */
+
+#define XFS_SCRUB_RTBITMAP_CHECK(fs_ok) \
+ XFS_SCRUB_CHECK(sc, bp, "rtbitmap", fs_ok);
+#define XFS_SCRUB_RTBITMAP_OP_ERROR_GOTO(error, label) \
+ XFS_SCRUB_OP_ERROR_GOTO(sc, 0, 0, "rtbitmap", error, label)
+/* Scrub the realtime bitmap. */
+int
+xfs_scrub_rtbitmap(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *bp = NULL;
+ xfs_rtblock_t rtstart;
+ xfs_rtblock_t rtend = 0;
+ xfs_rtblock_t block;
+ xfs_rtblock_t rem;
+ int is_free = 0;
+ int error = 0;
+ int err2 = 0;
+
+ /* Iterate the bitmap, looking for discrepancies. */
+ rtstart = 0;
+ rem = mp->m_sb.sb_rblocks;
+ while (rem) {
+ if (xfs_scrub_should_terminate(&error))
+ break;
+
+ /* Is the first block free? */
+ err2 = xfs_rtcheck_range(mp, sc->tp, rtstart, 1, 1, &rtend,
+ &is_free);
+ XFS_SCRUB_RTBITMAP_OP_ERROR_GOTO(&err2, out);
+
+ /* How long does the extent go for? */
+ err2 = xfs_rtfind_forw(mp, sc->tp, rtstart,
+ mp->m_sb.sb_rblocks - 1, &rtend);
+ XFS_SCRUB_RTBITMAP_OP_ERROR_GOTO(&err2, out);
+
+ /* Find the buffer for error reporting. */
+ block = XFS_BITTOBLOCK(mp, rtstart);
+ err2 = xfs_rtbuf_get(mp, sc->tp, block, 0, &bp);
+ XFS_SCRUB_RTBITMAP_OP_ERROR_GOTO(&err2, out);
+ XFS_SCRUB_RTBITMAP_CHECK(rtend >= rtstart);
+
+ xfs_trans_brelse(sc->tp, bp);
+ bp = NULL;
+ rem -= rtend - rtstart + 1;
+ rtstart = rtend + 1;
+ }
+
+out:
+ if (bp)
+ xfs_trans_brelse(sc->tp, bp);
+ if (!error && err2)
+ error = err2;
+ return error;
+}
+#undef XFS_SCRUB_RTBITMAP_OP_ERROR_GOTO
+#undef XFS_SCRUB_RTBITMAP_CHECK
+
+/* Scrub the realtime summary. */
+int
+xfs_scrub_rtsummary(
+ struct xfs_scrub_context *sc)
+{
+ /* XXX: implement this some day */
+ return -ENOENT;
+}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index f798a3e..3036349 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -98,6 +98,8 @@ xfs_growfs_rt(
/*
* From xfs_rtbitmap.c
*/
+int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtblock_t start, xfs_extlen_t len, int val,
xfs_rtblock_t *new, int *stat);
@@ -128,6 +130,7 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
# define xfs_growfs_rt(mp,in) (ENOSYS)
# define xfs_rtcheck_range(...) (ENOSYS)
# define xfs_rtfind_forw(...) (ENOSYS)
+# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 87fd942..c52e3a5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3370,7 +3370,9 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
{ XFS_SCRUB_TYPE_BMBTC, "bmapbtc" }, \
{ XFS_SCRUB_TYPE_DIR, "dir" }, \
{ XFS_SCRUB_TYPE_XATTR, "xattr" }, \
- { XFS_SCRUB_TYPE_SYMLINK, "symlink" }
+ { XFS_SCRUB_TYPE_SYMLINK, "symlink" }, \
+ { XFS_SCRUB_TYPE_RTBITMAP, "rtbitmap" }, \
+ { XFS_SCRUB_TYPE_RTSUM, "rtsummary" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, int type, xfs_agnumber_t agno,
xfs_ino_t inum, unsigned int gen, unsigned int flags,
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 25/47] xfs: scrub should cross-reference with the bnobt
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (23 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 24/47] xfs: scrub realtime bitmap/summary Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 26/47] xfs: cross-reference bnobt records with cntbt Darrick J. Wong
` (22 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
When we're scrubbing various btrees, cross-reference the records with
the bnobt to ensure that we don't also think the space is free.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc.c | 19 ++++++++
fs/xfs/libxfs/xfs_alloc.h | 3 +
| 106 ++++++++++++++++++++++++++++++++++++++++++++-
fs/xfs/repair/bmap.c | 14 ++++++
fs/xfs/repair/btree.c | 101 +++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 38 ++++++++++++++++
fs/xfs/repair/common.h | 10 ++++
fs/xfs/repair/ialloc.c | 13 ++++++
fs/xfs/repair/refcount.c | 17 +++++++
fs/xfs/repair/rmap.c | 18 +++++++-
10 files changed, 336 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 1b6bddb..ad8044b 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2991,3 +2991,22 @@ xfs_alloc_query_all(
query.fn = fn;
return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
}
+
+/* Is there a record covering a given extent? */
+int
+xfs_alloc_has_record(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool *exists)
+{
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+
+ memset(&low, 0, sizeof(low));
+ low.a.ar_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.a.ar_startblock = bno + len - 1;
+
+ return xfs_btree_has_record(cur, &low, &high, exists);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 89a23be..3fd6540 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -237,4 +237,7 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur,
int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
void *priv);
+int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, bool *exist);
+
#endif /* __XFS_ALLOC_H__ */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index 8812b64..6375f19 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -31,6 +31,7 @@
#include "xfs_trace.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
+#include "xfs_alloc.h"
#include "repair/common.h"
/* Set us up to check an AG header. */
@@ -129,10 +130,13 @@ xfs_scrub_superblock(
{
struct xfs_mount *mp = sc->tp->t_mountp;
struct xfs_buf *bp;
+ struct xfs_scrub_ag *psa;
struct xfs_sb sb;
xfs_agnumber_t agno;
uint32_t v2_ok;
+ bool is_freesp;
int error;
+ int err2;
agno = sc->sm->sm_agno;
@@ -152,7 +156,7 @@ xfs_scrub_superblock(
* so there's no point in comparing the two.
*/
if (agno == 0)
- goto out;
+ goto btree_xref;
xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
@@ -251,6 +255,24 @@ xfs_scrub_superblock(
XFS_SCRUB_SB_FEAT(realtime);
#undef XFS_SCRUB_SB_FEAT
+ if (error)
+ goto out;
+
+btree_xref:
+
+ err2 = xfs_scrub_ag_init(sc, agno, &sc->sa);
+ if (!xfs_scrub_should_xref(sc, err2, NULL))
+ goto out;
+
+ psa = &sc->sa;
+ /* Cross-reference with bnobt. */
+ if (psa->bno_cur) {
+ err2 = xfs_alloc_has_record(psa->bno_cur, XFS_SB_BLOCK(mp),
+ 1, &is_freesp);
+ if (xfs_scrub_should_xref(sc, err2, &psa->bno_cur))
+ XFS_SCRUB_SB_CHECK(!is_freesp);
+ }
+
out:
return error;
}
@@ -259,6 +281,19 @@ xfs_scrub_superblock(
/* AGF */
+/* Tally freespace record lengths. */
+STATIC int
+xfs_scrub_agf_record_bno_lengths(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ xfs_extlen_t *blocks = priv;
+
+ (*blocks) += rec->ar_blockcount;
+ return 0;
+}
+
#define XFS_SCRUB_AGF_CHECK(fs_ok) \
XFS_SCRUB_CHECK(sc, sc->sa.agf_bp, "AGF", fs_ok)
#define XFS_SCRUB_AGF_OP_ERROR_GOTO(error, label) \
@@ -271,6 +306,7 @@ xfs_scrub_agf(
{
struct xfs_mount *mp = sc->tp->t_mountp;
struct xfs_agf *agf;
+ struct xfs_scrub_ag *psa;
xfs_daddr_t daddr;
xfs_daddr_t eofs;
xfs_agnumber_t agno;
@@ -280,8 +316,11 @@ xfs_scrub_agf(
xfs_agblock_t agfl_last;
xfs_agblock_t agfl_count;
xfs_agblock_t fl_count;
+ xfs_extlen_t blocks;
+ bool is_freesp;
int level;
int error = 0;
+ int err2;
agno = sc->sm->sm_agno;
error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGF);
@@ -353,6 +392,31 @@ xfs_scrub_agf(
fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
XFS_SCRUB_AGF_CHECK(agfl_count == 0 || fl_count == agfl_count);
+ /* Load btrees for xref if the AGF is ok. */
+ psa = &sc->sa;
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_FLAG_CORRUPT))
+ goto out;
+ error = xfs_scrub_ag_btcur_init(sc, psa);
+ if (error)
+ goto out;
+
+ /* Cross-reference with the bnobt. */
+ if (psa->bno_cur) {
+ err2 = xfs_alloc_has_record(psa->bno_cur, XFS_AGF_BLOCK(mp),
+ 1, &is_freesp);
+ if (!xfs_scrub_should_xref(sc, err2, &psa->bno_cur))
+ goto skip_bnobt;
+ XFS_SCRUB_AGF_CHECK(!is_freesp);
+
+ blocks = 0;
+ err2 = xfs_alloc_query_all(psa->bno_cur,
+ xfs_scrub_agf_record_bno_lengths, &blocks);
+ if (!xfs_scrub_should_xref(sc, err2, &psa->bno_cur))
+ goto skip_bnobt;
+ XFS_SCRUB_AGF_CHECK(blocks == be32_to_cpu(agf->agf_freeblks));
+ }
+skip_bnobt:
+
out:
return error;
}
@@ -378,12 +442,22 @@ xfs_scrub_agfl_block(
struct xfs_mount *mp = sc->tp->t_mountp;
xfs_agnumber_t agno = sc->sa.agno;
struct xfs_scrub_agfl *sagfl = priv;
+ bool is_freesp;
+ int err2;
XFS_SCRUB_AGFL_CHECK(agbno > XFS_AGI_BLOCK(mp));
XFS_SCRUB_AGFL_CHECK(XFS_AGB_TO_DADDR(mp, agno, agbno) < sagfl->eofs);
XFS_SCRUB_AGFL_CHECK(agbno < mp->m_sb.sb_agblocks);
XFS_SCRUB_AGFL_CHECK(agbno < sagfl->eoag);
+ /* Cross-reference with the bnobt. */
+ if (sc->sa.bno_cur) {
+ err2 = xfs_alloc_has_record(sc->sa.bno_cur, agbno,
+ 1, &is_freesp);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.bno_cur))
+ XFS_SCRUB_AGFL_CHECK(!is_freesp);
+ }
+
return 0;
}
@@ -398,7 +472,9 @@ xfs_scrub_agfl(
struct xfs_scrub_agfl sagfl;
struct xfs_mount *mp = sc->tp->t_mountp;
struct xfs_agf *agf;
+ bool is_freesp;
int error;
+ int err2;
error = xfs_scrub_load_ag_headers(sc, sc->sm->sm_agno,
XFS_SCRUB_TYPE_AGFL);
@@ -410,6 +486,14 @@ xfs_scrub_agfl(
sagfl.eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
sagfl.eoag = be32_to_cpu(agf->agf_length);
+ /* Cross-reference with the bnobt. */
+ if (sc->sa.bno_cur) {
+ err2 = xfs_alloc_has_record(sc->sa.bno_cur, XFS_AGFL_BLOCK(mp),
+ 1, &is_freesp);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.bno_cur))
+ XFS_SCRUB_AGFL_CHECK(!is_freesp);
+ }
+
/* Check the blocks in the AGFL. */
return xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sagfl);
out:
@@ -432,6 +516,7 @@ xfs_scrub_agi(
{
struct xfs_mount *mp = sc->tp->t_mountp;
struct xfs_agi *agi;
+ struct xfs_scrub_ag *psa;
xfs_daddr_t daddr;
xfs_daddr_t eofs;
xfs_agnumber_t agno;
@@ -440,9 +525,11 @@ xfs_scrub_agi(
xfs_agino_t agino;
xfs_agino_t first_agino;
xfs_agino_t last_agino;
+ bool is_freesp;
int i;
int level;
int error = 0;
+ int err2;
agno = sc->sm->sm_agno;
error = xfs_scrub_load_ag_headers(sc, agno, XFS_SCRUB_TYPE_AGI);
@@ -508,8 +595,23 @@ xfs_scrub_agi(
XFS_SCRUB_AGI_CHECK(agino <= last_agino);
}
+ /* Load btrees for xref if the AGI is ok. */
+ psa = &sc->sa;
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_FLAG_CORRUPT))
+ goto out;
+ error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+ if (error)
+ goto out;
+
+ /* Cross-reference with bnobt. */
+ if (psa->bno_cur) {
+ err2 = xfs_alloc_has_record(psa->bno_cur, XFS_AGI_BLOCK(mp),
+ 1, &is_freesp);
+ if (xfs_scrub_should_xref(sc, err2, &psa->bno_cur))
+ XFS_SCRUB_AGI_CHECK(!is_freesp);
+ }
+
out:
return error;
}
#undef XFS_SCRUB_AGI_CHECK
-#undef XFS_SCRUB_AGI_OP_ERROR_GOTO
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
index bc1ad8e..5cb89f7 100644
--- a/fs/xfs/repair/bmap.c
+++ b/fs/xfs/repair/bmap.c
@@ -36,6 +36,7 @@
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -100,7 +101,10 @@ xfs_scrub_bmap_extent(
xfs_daddr_t daddr;
xfs_daddr_t dlen;
xfs_agnumber_t agno;
+ xfs_fsblock_t bno;
+ bool is_freesp;
int error = 0;
+ int err2 = 0;
if (cur)
xfs_btree_get_block(cur, 0, &bp);
@@ -117,10 +121,12 @@ xfs_scrub_bmap_extent(
if (info->is_rt) {
daddr = XFS_FSB_TO_BB(mp, irec->br_startblock);
agno = NULLAGNUMBER;
+ bno = irec->br_startblock;
} else {
daddr = XFS_FSB_TO_DADDR(mp, irec->br_startblock);
agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
XFS_SCRUB_BMAP_GOTO(agno < mp->m_sb.sb_agcount, out);
+ bno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
}
dlen = XFS_FSB_TO_BB(mp, irec->br_blockcount);
XFS_SCRUB_BMAP_CHECK(daddr < info->eofs);
@@ -138,6 +144,14 @@ xfs_scrub_bmap_extent(
XFS_SCRUB_BMAP_OP_ERROR_GOTO(out);
}
+ /* Cross-reference with the bnobt. */
+ if (sa.bno_cur) {
+ err2 = xfs_alloc_has_record(sa.bno_cur, bno,
+ irec->br_blockcount, &is_freesp);
+ if (xfs_scrub_should_xref(info->sc, err2, &sa.bno_cur))
+ XFS_SCRUB_BMAP_CHECK(!is_freesp);
+ }
+
xfs_scrub_ag_free(&sa);
out:
info->lastoff = irec->br_startoff + irec->br_blockcount;
diff --git a/fs/xfs/repair/btree.c b/fs/xfs/repair/btree.c
index 6956503..c13762e 100644
--- a/fs/xfs/repair/btree.c
+++ b/fs/xfs/repair/btree.c
@@ -494,6 +494,93 @@ xfs_scrub_btree_sblock_check_siblings(
return error;
}
+struct check_owner {
+ struct list_head list;
+ xfs_fsblock_t fsb;
+};
+
+/*
+ * Make sure this btree block isn't in the free list and that there's
+ * an rmap record for it.
+ */
+STATIC int
+xfs_scrub_btree_check_block_owner(
+ struct xfs_scrub_btree *bs,
+ xfs_fsblock_t fsb)
+{
+ struct xfs_scrub_ag sa;
+ struct xfs_scrub_ag *psa;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+ bool is_freesp;
+ int error = 0;
+ int err2;
+
+ agno = XFS_FSB_TO_AGNO(bs->cur->bc_mp, fsb);
+ bno = XFS_FSB_TO_AGBNO(bs->cur->bc_mp, fsb);
+
+ if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (!xfs_scrub_ag_can_lock(bs->sc, agno))
+ return -EDEADLOCK;
+ error = xfs_scrub_ag_init(bs->sc, agno, &sa);
+ if (error)
+ return error;
+ psa = &sa;
+ } else
+ psa = &bs->sc->sa;
+
+ /* Check that this block isn't free. */
+ if (psa->bno_cur) {
+ err2 = xfs_alloc_has_record(psa->bno_cur, bno, 1, &is_freesp);
+ if (xfs_scrub_btree_should_xref(bs, err2, NULL))
+ XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
+ }
+
+ if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ xfs_scrub_ag_free(&sa);
+
+ return error;
+}
+
+/* Check the owner of a btree block. */
+STATIC int
+xfs_scrub_btree_check_owner(
+ struct xfs_scrub_btree *bs,
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ struct check_owner *co;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
+ return 0;
+
+ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsbno);
+
+ /* Turn back if we could deadlock. */
+ if ((bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
+ !xfs_scrub_ag_can_lock(bs->sc, agno))
+ return -EDEADLOCK;
+
+ /*
+ * We want to cross-reference each btree block with the bnobt
+ * and the rmapbt. We cannot cross-reference the bnobt or
+ * rmapbt while scanning the bnobt or rmapbt, respectively,
+ * because that would trash the cursor state. Therefore, save
+ * the block numbers for later scanning.
+ */
+ if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
+ co = kmem_alloc(sizeof(struct check_owner), KM_SLEEP | KM_NOFS);
+ co->fsb = fsbno;
+ list_add_tail(&co->list, &bs->to_check);
+ return 0;
+ }
+
+ return xfs_scrub_btree_check_block_owner(bs, fsbno);
+}
+
/* Grab and scrub a btree block. */
STATIC int
xfs_scrub_btree_block(
@@ -514,6 +601,10 @@ xfs_scrub_btree_block(
if (error)
return error;
+ error = xfs_scrub_btree_check_owner(bs, *pbp);
+ if (error)
+ return error;
+
return bs->check_siblings_fn(bs, *pblock);
}
@@ -539,6 +630,8 @@ xfs_scrub_btree(
struct xfs_btree_block *block;
int level;
struct xfs_buf *bp;
+ struct check_owner *co;
+ struct check_owner *n;
int i;
int error = 0;
@@ -653,6 +746,14 @@ xfs_scrub_btree(
}
}
+ /* Process deferred owner checks on btree blocks. */
+ list_for_each_entry_safe(co, n, &bs.to_check, list) {
+ if (!error)
+ error = xfs_scrub_btree_check_block_owner(&bs, co->fsb);
+ list_del(&co->list);
+ kmem_free(co);
+ }
+
out_badcursor:
return error;
}
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 6adccb5..458057a 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -533,6 +533,44 @@ xfs_scrub_ag_lock_all(
return error;
}
+/*
+ * Predicate that decides if we need to evaluate the cross-reference check.
+ * If there was an error accessing the cross-reference btree, just delete
+ * the cursor and skip the check.
+ */
+bool
+__xfs_scrub_should_xref(
+ struct xfs_scrub_context *sc,
+ int error,
+ struct xfs_btree_cur **curpp,
+ const char *func,
+ int line)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+
+ /* If not a btree cross-reference, just check the error code. */
+ if (curpp == NULL) {
+ if (error == 0)
+ return true;
+ trace_xfs_scrub_xref_error(mp, "unknown", error, func, line);
+ return false;
+ }
+
+ ASSERT(*curpp != NULL);
+ /* If no error or we've already given up on xref, just bail out. */
+ if (error == 0 || *curpp == NULL)
+ return true;
+
+ /* xref error, delete cursor and bail out. */
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_XREF_FAIL;
+ trace_xfs_scrub_xref_error(mp, btree_types[(*curpp)->bc_btnum],
+ error, func, line);
+ xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
+ *curpp = NULL;
+
+ return false;
+}
+
/* Dummy scrubber */
STATIC int
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 8dc68b9..bff1b2c 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -180,6 +180,14 @@ bool xfs_scrub_data_ok(struct xfs_scrub_context *sc, int whichfork,
goto label; \
} while(0)
+bool __xfs_scrub_should_xref(struct xfs_scrub_context *sc, int error,
+ struct xfs_btree_cur **curpp, const char *func,
+ int line);
+#define xfs_scrub_should_xref(sc, error, curpp) \
+ __xfs_scrub_should_xref((sc), (error), (curpp), __func__, __LINE__)
+#define xfs_scrub_btree_should_xref(bs, error, curpp) \
+ __xfs_scrub_should_xref((bs)->sc, (error), (curpp), __func__, __LINE__)
+
bool xfs_scrub_ag_can_lock(struct xfs_scrub_context *sc, xfs_agnumber_t agno);
int xfs_scrub_ag_lock_all(struct xfs_scrub_context *sc);
void xfs_scrub_ag_lock_init(struct xfs_mount *mp,
@@ -198,6 +206,8 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
void *),
void *priv);
+int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
+ struct xfs_scrub_ag *sa);
/* Setup functions */
diff --git a/fs/xfs/repair/ialloc.c b/fs/xfs/repair/ialloc.c
index 67cf727..3c45fe6 100644
--- a/fs/xfs/repair/ialloc.c
+++ b/fs/xfs/repair/ialloc.c
@@ -37,6 +37,7 @@
#include "xfs_rmap.h"
#include "xfs_log.h"
#include "xfs_trans_priv.h"
+#include "xfs_alloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -83,9 +84,12 @@ xfs_scrub_iallocbt_chunk(
{
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_agf *agf;
+ struct xfs_scrub_ag *psa;
xfs_agblock_t eoag;
xfs_agblock_t bno;
+ bool is_freesp;
int error = 0;
+ int err2;
agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
eoag = be32_to_cpu(agf->agf_length);
@@ -104,6 +108,15 @@ xfs_scrub_iallocbt_chunk(
goto out;
}
+ psa = &bs->sc->sa;
+ /* Cross-reference with the bnobt. */
+ if (psa->bno_cur) {
+ err2 = xfs_alloc_has_record(psa->bno_cur, bno, len,
+ &is_freesp);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->bno_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/refcount.c b/fs/xfs/repair/refcount.c
index 186d83c..0cf82600 100644
--- a/fs/xfs/repair/refcount.c
+++ b/fs/xfs/repair/refcount.c
@@ -31,6 +31,7 @@
#include "xfs_trace.h"
#include "xfs_sb.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -44,10 +45,13 @@ xfs_scrub_refcountbt_helper(
{
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_agf *agf;
+ struct xfs_scrub_ag *psa;
struct xfs_refcount_irec irec;
xfs_agblock_t eoag;
bool has_cowflag;
+ bool is_freesp;
int error = 0;
+ int err2;
irec.rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
irec.rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
@@ -69,6 +73,19 @@ xfs_scrub_refcountbt_helper(
irec.rc_blockcount <= eoag);
XFS_SCRUB_BTREC_CHECK(bs, irec.rc_refcount >= 1);
+ if (error)
+ goto out;
+
+ psa = &bs->sc->sa;
+ /* Cross-reference with the bnobt. */
+ if (psa->bno_cur) {
+ err2 = xfs_alloc_has_record(psa->bno_cur, irec.rc_startblock,
+ irec.rc_blockcount, &is_freesp);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->bno_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
+ }
+
+out:
return error;
}
diff --git a/fs/xfs/repair/rmap.c b/fs/xfs/repair/rmap.c
index 9ae3c72..c04cfb6 100644
--- a/fs/xfs/repair/rmap.c
+++ b/fs/xfs/repair/rmap.c
@@ -31,6 +31,7 @@
#include "xfs_trace.h"
#include "xfs_sb.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -44,13 +45,16 @@ xfs_scrub_rmapbt_helper(
{
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_agf *agf;
+ struct xfs_scrub_ag *psa;
struct xfs_rmap_irec irec;
xfs_agblock_t eoag;
+ bool is_freesp;
bool non_inode;
bool is_unwritten;
bool is_bmbt;
bool is_attr;
- int error;
+ int error = 0;
+ int err2;
error = xfs_rmap_btrec_to_irec(rec, &irec);
XFS_SCRUB_BTREC_OP_ERROR_GOTO(bs, &error, out);
@@ -99,6 +103,18 @@ xfs_scrub_rmapbt_helper(
XFS_SCRUB_BTREC_CHECK(bs, !non_inode ||
(irec.rm_owner > XFS_RMAP_OWN_MIN &&
irec.rm_owner <= XFS_RMAP_OWN_FS));
+ if (error)
+ goto out;
+
+ psa = &bs->sc->sa;
+ /* check there's no record in freesp btrees */
+ if (psa->bno_cur) {
+ err2 = xfs_alloc_has_record(psa->bno_cur, irec.rm_startblock,
+ irec.rm_blockcount, &is_freesp);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->bno_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
+ }
+
out:
return error;
}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 26/47] xfs: cross-reference bnobt records with cntbt
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (24 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 25/47] xfs: scrub should cross-reference with the bnobt Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 27/47] xfs: cross-reference extents with AG header Darrick J. Wong
` (21 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Scrub should make sure that each bnobt record has a corresponding
cntbt record.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc.c | 2 +-
fs/xfs/libxfs/xfs_alloc.h | 7 +++++++
| 20 ++++++++++++++++++++
fs/xfs/repair/alloc.c | 31 +++++++++++++++++++++++++++++++
4 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index ad8044b..4776f66 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -169,7 +169,7 @@ xfs_alloc_lookup_ge(
* Lookup the first record less than or equal to [bno, len]
* in the btree given by cur.
*/
-static int /* error */
+int /* error */
xfs_alloc_lookup_le(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 3fd6540..b79159c 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -202,6 +202,13 @@ xfs_free_extent(
enum xfs_ag_resv_type type); /* block reservation type */
int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat); /* success/failure */
+
+int /* error */
xfs_alloc_lookup_ge(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index 6375f19..b351885 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -318,6 +318,7 @@ xfs_scrub_agf(
xfs_agblock_t fl_count;
xfs_extlen_t blocks;
bool is_freesp;
+ int have;
int level;
int error = 0;
int err2;
@@ -417,6 +418,25 @@ xfs_scrub_agf(
}
skip_bnobt:
+ /* Cross-reference with the cntbt. */
+ if (psa->cnt_cur) {
+ err2 = xfs_alloc_lookup_le(psa->cnt_cur, 0, -1U, &have);
+ if (!xfs_scrub_should_xref(sc, err2, &psa->cnt_cur))
+ goto skip_cntbt;
+ if (!have) {
+ XFS_SCRUB_AGF_CHECK(agf->agf_freeblks ==
+ be32_to_cpu(0));
+ goto skip_cntbt;
+ }
+ err2 = xfs_alloc_get_rec(psa->cnt_cur, &agbno, &blocks, &have);
+ if (!xfs_scrub_should_xref(sc, err2, &psa->cnt_cur))
+ goto skip_cntbt;
+ XFS_SCRUB_AGF_CHECK(have);
+ XFS_SCRUB_AGF_CHECK(!have ||
+ blocks == be32_to_cpu(agf->agf_longest));
+ }
+skip_cntbt:
+
out:
return error;
}
diff --git a/fs/xfs/repair/alloc.c b/fs/xfs/repair/alloc.c
index 2fef449..7cc15b8 100644
--- a/fs/xfs/repair/alloc.c
+++ b/fs/xfs/repair/alloc.c
@@ -31,6 +31,7 @@
#include "xfs_trace.h"
#include "xfs_sb.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -65,9 +66,15 @@ xfs_scrub_allocbt_helper(
{
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_agf *agf;
+ struct xfs_btree_cur **xcur;
+ struct xfs_scrub_ag *psa;
+ xfs_agblock_t fbno;
xfs_agblock_t bno;
+ xfs_extlen_t flen;
xfs_extlen_t len;
+ int has_otherrec;
int error = 0;
+ int err2;
bno = be32_to_cpu(rec->alloc.ar_startblock);
len = be32_to_cpu(rec->alloc.ar_blockcount);
@@ -81,6 +88,30 @@ xfs_scrub_allocbt_helper(
XFS_SCRUB_BTREC_CHECK(bs, (unsigned long long)bno + len <=
be32_to_cpu(agf->agf_length));
+ if (error)
+ goto out;
+
+ psa = &bs->sc->sa;
+ /*
+ * Ensure there's a corresponding cntbt/bnobt record matching
+ * this bnobt/cntbt record, respectively.
+ */
+ xcur = bs->cur == psa->bno_cur ? &psa->cnt_cur : &psa->bno_cur;
+ if (*xcur) {
+ err2 = xfs_alloc_lookup_le(*xcur, bno, len, &has_otherrec);
+ if (xfs_scrub_btree_should_xref(bs, err2, xcur)) {
+ XFS_SCRUB_BTREC_GOTO(bs, has_otherrec, out);
+ err2 = xfs_alloc_get_rec(*xcur, &fbno, &flen,
+ &has_otherrec);
+ if (xfs_scrub_btree_should_xref(bs, err2, xcur)) {
+ XFS_SCRUB_BTREC_GOTO(bs, has_otherrec, out);
+ XFS_SCRUB_BTREC_CHECK(bs, fbno == bno);
+ XFS_SCRUB_BTREC_CHECK(bs, flen == len);
+ }
+ }
+ }
+
+out:
return error;
}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 27/47] xfs: cross-reference extents with AG header
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (25 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 26/47] xfs: cross-reference bnobt records with cntbt Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 28/47] xfs: cross-reference inode btrees during scrub Darrick J. Wong
` (20 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Ensure that none of the AG btree records overlap the AG sb/agf/agfl/agi
headers except for the XFS_RMAP_OWN_FS rmap.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
| 27 +++++++++++++++++++++++++++
fs/xfs/repair/alloc.c | 4 ++++
fs/xfs/repair/bmap.c | 5 +++++
fs/xfs/repair/common.h | 2 ++
fs/xfs/repair/ialloc.c | 4 ++++
fs/xfs/repair/refcount.c | 4 ++++
fs/xfs/repair/rmap.c | 5 +++++
7 files changed, 51 insertions(+)
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index b351885..3276e88 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -115,6 +115,30 @@ xfs_scrub_walk_agfl(
return 0;
}
+/* Does this AG extent cover the AG headers? */
+bool
+xfs_scrub_extent_covers_ag_head(
+ struct xfs_mount *mp,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ xfs_agblock_t bno;
+
+ bno = XFS_SB_BLOCK(mp);
+ if (bno >= agbno && bno < agbno + len)
+ return true;
+ bno = XFS_AGF_BLOCK(mp);
+ if (bno >= agbno && bno < agbno + len)
+ return true;
+ bno = XFS_AGFL_BLOCK(mp);
+ if (bno >= agbno && bno < agbno + len)
+ return true;
+ bno = XFS_AGI_BLOCK(mp);
+ if (bno >= agbno && bno < agbno + len)
+ return true;
+ return false;
+}
+
/* Superblock */
#define XFS_SCRUB_SB_CHECK(fs_ok) \
@@ -470,6 +494,9 @@ xfs_scrub_agfl_block(
XFS_SCRUB_AGFL_CHECK(agbno < mp->m_sb.sb_agblocks);
XFS_SCRUB_AGFL_CHECK(agbno < sagfl->eoag);
+ /* Cross-reference with the AG headers. */
+ XFS_SCRUB_AGFL_CHECK(!xfs_scrub_extent_covers_ag_head(mp, agbno, 1));
+
/* Cross-reference with the bnobt. */
if (sc->sa.bno_cur) {
err2 = xfs_alloc_has_record(sc->sa.bno_cur, agbno,
diff --git a/fs/xfs/repair/alloc.c b/fs/xfs/repair/alloc.c
index 7cc15b8..c30e048 100644
--- a/fs/xfs/repair/alloc.c
+++ b/fs/xfs/repair/alloc.c
@@ -91,6 +91,10 @@ xfs_scrub_allocbt_helper(
if (error)
goto out;
+ /* Make sure we don't cover the AG headers. */
+ XFS_SCRUB_BTREC_CHECK(bs,
+ !xfs_scrub_extent_covers_ag_head(mp, bno, len));
+
psa = &bs->sc->sa;
/*
* Ensure there's a corresponding cntbt/bnobt record matching
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
index 5cb89f7..ffb0112 100644
--- a/fs/xfs/repair/bmap.c
+++ b/fs/xfs/repair/bmap.c
@@ -144,6 +144,11 @@ xfs_scrub_bmap_extent(
XFS_SCRUB_BMAP_OP_ERROR_GOTO(out);
}
+ /* Make sure we don't cover the AG headers. */
+ if (!info->is_rt)
+ XFS_SCRUB_BMAP_CHECK(!xfs_scrub_extent_covers_ag_head(mp,
+ bno, irec->br_blockcount));
+
/* Cross-reference with the bnobt. */
if (sa.bno_cur) {
err2 = xfs_alloc_has_record(sa.bno_cur, bno,
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index bff1b2c..3c110b7 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -208,6 +208,8 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
void *priv);
int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
struct xfs_scrub_ag *sa);
+bool xfs_scrub_extent_covers_ag_head(struct xfs_mount *mp, xfs_agblock_t agbno,
+ xfs_extlen_t len);
/* Setup functions */
diff --git a/fs/xfs/repair/ialloc.c b/fs/xfs/repair/ialloc.c
index 3c45fe6..e677167 100644
--- a/fs/xfs/repair/ialloc.c
+++ b/fs/xfs/repair/ialloc.c
@@ -108,6 +108,10 @@ xfs_scrub_iallocbt_chunk(
goto out;
}
+ /* Make sure we don't cover the AG headers. */
+ XFS_SCRUB_BTREC_CHECK(bs,
+ !xfs_scrub_extent_covers_ag_head(mp, bno, len));
+
psa = &bs->sc->sa;
/* Cross-reference with the bnobt. */
if (psa->bno_cur) {
diff --git a/fs/xfs/repair/refcount.c b/fs/xfs/repair/refcount.c
index 0cf82600..bb1d4ee 100644
--- a/fs/xfs/repair/refcount.c
+++ b/fs/xfs/repair/refcount.c
@@ -76,6 +76,10 @@ xfs_scrub_refcountbt_helper(
if (error)
goto out;
+ /* Make sure we don't cover the AG headers. */
+ XFS_SCRUB_BTREC_CHECK(bs, !xfs_scrub_extent_covers_ag_head(mp,
+ irec.rc_startblock, irec.rc_blockcount));
+
psa = &bs->sc->sa;
/* Cross-reference with the bnobt. */
if (psa->bno_cur) {
diff --git a/fs/xfs/repair/rmap.c b/fs/xfs/repair/rmap.c
index c04cfb6..5b26166 100644
--- a/fs/xfs/repair/rmap.c
+++ b/fs/xfs/repair/rmap.c
@@ -106,6 +106,11 @@ xfs_scrub_rmapbt_helper(
if (error)
goto out;
+ /* Make sure only the AG header owner maps to the AG header. */
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rm_owner == XFS_RMAP_OWN_FS ||
+ !xfs_scrub_extent_covers_ag_head(mp, irec.rm_startblock,
+ irec.rm_blockcount));
+
psa = &bs->sc->sa;
/* check there's no record in freesp btrees */
if (psa->bno_cur) {
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 28/47] xfs: cross-reference inode btrees during scrub
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (26 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 27/47] xfs: cross-reference extents with AG header Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:38 ` [PATCH 29/47] xfs: cross-reference reverse-mapping btree Darrick J. Wong
` (19 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Cross-reference the inode btrees with the other metadata when we
scrub the filesystem.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_ialloc.c | 99 ++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/libxfs/xfs_ialloc.h | 6 +++
| 81 ++++++++++++++++++++++++++++++++++++
fs/xfs/repair/alloc.c | 18 ++++++++
fs/xfs/repair/bmap.c | 20 +++++++++
fs/xfs/repair/ialloc.c | 16 +++++++
fs/xfs/repair/refcount.c | 20 +++++++++
fs/xfs/repair/rmap.c | 24 +++++++++++
8 files changed, 284 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index e2f93e6..0fb7ba0 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2668,3 +2668,102 @@ xfs_ialloc_pagi_init(
xfs_trans_brelse(tp, bp);
return 0;
}
+
+/* Is there an inode record covering a given range of inode numbers? */
+int
+xfs_ialloc_has_inode_record(
+ struct xfs_btree_cur *cur,
+ xfs_agino_t low,
+ xfs_agino_t high,
+ bool *exists)
+{
+ struct xfs_inobt_rec_incore irec;
+ xfs_agino_t agino;
+ __uint16_t holemask;
+ int has;
+ int i;
+ int error;
+
+ *exists = false;
+ error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has);
+ while (error == 0 && has) {
+ error = xfs_inobt_get_rec(cur, &irec, &has);
+ if (error || irec.ir_startino > high)
+ break;
+
+ agino = irec.ir_startino;
+ holemask = irec.ir_holemask;
+ for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
+ i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
+ if (holemask & 1)
+ continue;
+ if (agino + XFS_INODES_PER_HOLEMASK_BIT > low &&
+ agino <= high) {
+ *exists = true;
+ goto out;
+ }
+ }
+
+ error = xfs_btree_increment(cur, 0, &has);
+ }
+out:
+ return error;
+}
+
+/* Is there an inode record covering a given extent? */
+int
+xfs_ialloc_has_inodes_at_extent(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool *exists)
+{
+ xfs_agino_t low;
+ xfs_agino_t high;
+
+ low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0);
+ high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1;
+
+ return xfs_ialloc_has_inode_record(cur, low, high, exists);
+}
+
+struct xfs_ialloc_count_inodes {
+ xfs_agino_t count;
+ xfs_agino_t freecount;
+};
+
+/* Record inode counts across all inobt records. */
+STATIC int
+xfs_ialloc_count_inodes_helper(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_inobt_rec_incore irec;
+ struct xfs_ialloc_count_inodes *ci = priv;
+
+ xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
+ ci->count += irec.ir_count;
+ ci->freecount += irec.ir_freecount;
+
+ return 0;
+}
+
+/* Count allocated and free inodes under an inobt. */
+int
+xfs_ialloc_count_inodes(
+ struct xfs_btree_cur *cur,
+ xfs_agino_t *count,
+ xfs_agino_t *freecount)
+{
+ struct xfs_ialloc_count_inodes ci = {0};
+ int error;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
+ error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_helper, &ci);
+ if (!error) {
+ *count = ci.count;
+ *freecount = ci.freecount;
+ }
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 8e5861d..17f0f1b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -171,5 +171,11 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
union xfs_btree_rec;
void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
struct xfs_inobt_rec_incore *irec);
+int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur,
+ xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low,
+ xfs_agino_t high, bool *exists);
+int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count,
+ xfs_agino_t *freecount);
#endif /* __XFS_IALLOC_H__ */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index 3276e88..9c4bdf1 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -32,6 +32,7 @@
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
#include "repair/common.h"
/* Set us up to check an AG header. */
@@ -159,6 +160,7 @@ xfs_scrub_superblock(
xfs_agnumber_t agno;
uint32_t v2_ok;
bool is_freesp;
+ bool has_inodes;
int error;
int err2;
@@ -297,6 +299,22 @@ xfs_scrub_superblock(
XFS_SCRUB_SB_CHECK(!is_freesp);
}
+ /* Cross-reference with inobt. */
+ if (psa->ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->ino_cur,
+ XFS_SB_BLOCK(mp), 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &psa->ino_cur))
+ XFS_SCRUB_SB_CHECK(!has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (psa->fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->fino_cur,
+ XFS_SB_BLOCK(mp), 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &psa->fino_cur))
+ XFS_SCRUB_SB_CHECK(!has_inodes);
+ }
+
out:
return error;
}
@@ -342,6 +360,7 @@ xfs_scrub_agf(
xfs_agblock_t fl_count;
xfs_extlen_t blocks;
bool is_freesp;
+ bool has_inodes;
int have;
int level;
int error = 0;
@@ -461,6 +480,22 @@ xfs_scrub_agf(
}
skip_cntbt:
+ /* Cross-reference with inobt. */
+ if (psa->ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->ino_cur,
+ XFS_AGF_BLOCK(mp), 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &psa->ino_cur))
+ XFS_SCRUB_AGF_CHECK(!has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (psa->fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->fino_cur,
+ XFS_AGF_BLOCK(mp), 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &psa->fino_cur))
+ XFS_SCRUB_AGF_CHECK(!has_inodes);
+ }
+
out:
return error;
}
@@ -487,6 +522,7 @@ xfs_scrub_agfl_block(
xfs_agnumber_t agno = sc->sa.agno;
struct xfs_scrub_agfl *sagfl = priv;
bool is_freesp;
+ bool has_inodes;
int err2;
XFS_SCRUB_AGFL_CHECK(agbno > XFS_AGI_BLOCK(mp));
@@ -505,6 +541,22 @@ xfs_scrub_agfl_block(
XFS_SCRUB_AGFL_CHECK(!is_freesp);
}
+ /* Cross-reference with inobt. */
+ if (sc->sa.ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
+ agbno, 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.ino_cur))
+ XFS_SCRUB_AGFL_CHECK(!has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (sc->sa.fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(sc->sa.fino_cur,
+ agbno, 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.fino_cur))
+ XFS_SCRUB_AGFL_CHECK(!has_inodes);
+ }
+
return 0;
}
@@ -572,7 +624,10 @@ xfs_scrub_agi(
xfs_agino_t agino;
xfs_agino_t first_agino;
xfs_agino_t last_agino;
+ xfs_agino_t count;
+ xfs_agino_t freecount;
bool is_freesp;
+ bool has_inodes;
int i;
int level;
int error = 0;
@@ -658,6 +713,32 @@ xfs_scrub_agi(
XFS_SCRUB_AGI_CHECK(!is_freesp);
}
+ /* Cross-reference with inobt. */
+ if (psa->ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->ino_cur,
+ XFS_AGI_BLOCK(mp), 1, &has_inodes);
+ if (!xfs_scrub_should_xref(sc, err2, &psa->ino_cur))
+ goto skip_inobt_xref;
+ XFS_SCRUB_AGI_CHECK(!has_inodes);
+ err2 = xfs_ialloc_count_inodes(psa->ino_cur, &count,
+ &freecount);
+ if (xfs_scrub_should_xref(sc, err2, &psa->ino_cur)) {
+ XFS_SCRUB_AGI_CHECK(be32_to_cpu(agi->agi_count) ==
+ count);
+ XFS_SCRUB_AGI_CHECK(be32_to_cpu(agi->agi_freecount) ==
+ freecount);
+ }
+ }
+
+skip_inobt_xref:
+ /* Cross-reference with finobt. */
+ if (psa->fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->fino_cur,
+ XFS_AGI_BLOCK(mp), 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &psa->fino_cur))
+ XFS_SCRUB_AGI_CHECK(!has_inodes);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/alloc.c b/fs/xfs/repair/alloc.c
index c30e048..7812ee0 100644
--- a/fs/xfs/repair/alloc.c
+++ b/fs/xfs/repair/alloc.c
@@ -32,6 +32,7 @@
#include "xfs_sb.h"
#include "xfs_rmap.h"
#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -72,6 +73,7 @@ xfs_scrub_allocbt_helper(
xfs_agblock_t bno;
xfs_extlen_t flen;
xfs_extlen_t len;
+ bool has_inodes;
int has_otherrec;
int error = 0;
int err2;
@@ -115,6 +117,22 @@ xfs_scrub_allocbt_helper(
}
}
+ /* Cross-reference with inobt. */
+ if (psa->ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->ino_cur, bno,
+ len, &has_inodes);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->ino_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (psa->fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->fino_cur, bno,
+ len, &has_inodes);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->fino_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !has_inodes);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
index ffb0112..dd030081d 100644
--- a/fs/xfs/repair/bmap.c
+++ b/fs/xfs/repair/bmap.c
@@ -37,6 +37,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -103,6 +104,7 @@ xfs_scrub_bmap_extent(
xfs_agnumber_t agno;
xfs_fsblock_t bno;
bool is_freesp;
+ bool has_inodes;
int error = 0;
int err2 = 0;
@@ -157,6 +159,24 @@ xfs_scrub_bmap_extent(
XFS_SCRUB_BMAP_CHECK(!is_freesp);
}
+ /* Cross-reference with inobt. */
+ if (sa.ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(sa.ino_cur,
+ irec->br_startblock, irec->br_blockcount,
+ &has_inodes);
+ if (xfs_scrub_should_xref(info->sc, err2, &sa.ino_cur))
+ XFS_SCRUB_BMAP_CHECK(!has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (sa.fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(sa.fino_cur,
+ irec->br_startblock, irec->br_blockcount,
+ &has_inodes);
+ if (xfs_scrub_should_xref(info->sc, err2, &sa.fino_cur))
+ XFS_SCRUB_BMAP_CHECK(!has_inodes);
+ }
+
xfs_scrub_ag_free(&sa);
out:
info->lastoff = irec->br_startoff + irec->br_blockcount;
diff --git a/fs/xfs/repair/ialloc.c b/fs/xfs/repair/ialloc.c
index e677167..b169ccf 100644
--- a/fs/xfs/repair/ialloc.c
+++ b/fs/xfs/repair/ialloc.c
@@ -85,9 +85,11 @@ xfs_scrub_iallocbt_chunk(
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_agf *agf;
struct xfs_scrub_ag *psa;
+ struct xfs_btree_cur **xcur;
xfs_agblock_t eoag;
xfs_agblock_t bno;
bool is_freesp;
+ bool has_inodes;
int error = 0;
int err2;
@@ -121,6 +123,20 @@ xfs_scrub_iallocbt_chunk(
XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
}
+ /* If we have a finobt, cross-reference with it. */
+ if (bs->cur == psa->fino_cur)
+ xcur = &psa->ino_cur;
+ else if (bs->cur == psa->ino_cur && irec->ir_freecount > 0)
+ xcur = &psa->fino_cur;
+ else
+ xcur = NULL;
+ if (xcur && *xcur) {
+ err2 = xfs_ialloc_has_inode_record(*xcur,
+ agino, agino, &has_inodes);
+ if (xfs_scrub_btree_should_xref(bs, err2, xcur))
+ XFS_SCRUB_BTREC_CHECK(bs, has_inodes);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/refcount.c b/fs/xfs/repair/refcount.c
index bb1d4ee..38a8987 100644
--- a/fs/xfs/repair/refcount.c
+++ b/fs/xfs/repair/refcount.c
@@ -32,6 +32,7 @@
#include "xfs_sb.h"
#include "xfs_rmap.h"
#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -50,6 +51,7 @@ xfs_scrub_refcountbt_helper(
xfs_agblock_t eoag;
bool has_cowflag;
bool is_freesp;
+ bool has_inodes;
int error = 0;
int err2;
@@ -89,6 +91,24 @@ xfs_scrub_refcountbt_helper(
XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
}
+ /* Cross-reference with inobt. */
+ if (psa->ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->ino_cur,
+ irec.rc_startblock, irec.rc_blockcount,
+ &has_inodes);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->ino_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (psa->fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->fino_cur,
+ irec.rc_startblock, irec.rc_blockcount,
+ &has_inodes);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->fino_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !has_inodes);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/rmap.c b/fs/xfs/repair/rmap.c
index 5b26166..d4db8bf 100644
--- a/fs/xfs/repair/rmap.c
+++ b/fs/xfs/repair/rmap.c
@@ -32,6 +32,7 @@
#include "xfs_sb.h"
#include "xfs_rmap.h"
#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -53,6 +54,7 @@ xfs_scrub_rmapbt_helper(
bool is_unwritten;
bool is_bmbt;
bool is_attr;
+ bool has_inodes;
int error = 0;
int err2;
@@ -120,6 +122,28 @@ xfs_scrub_rmapbt_helper(
XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
}
+ /* Cross-reference with inobt. */
+ if (psa->ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->ino_cur,
+ irec.rm_startblock, irec.rm_blockcount,
+ &has_inodes);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->ino_cur))
+ XFS_SCRUB_BTREC_CHECK(bs,
+ irec.rm_owner == XFS_RMAP_OWN_INODES ||
+ !has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (psa->fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(psa->fino_cur,
+ irec.rm_startblock, irec.rm_blockcount,
+ &has_inodes);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->fino_cur))
+ XFS_SCRUB_BTREC_CHECK(bs,
+ irec.rm_owner == XFS_RMAP_OWN_INODES ||
+ !has_inodes);
+ }
+
out:
return error;
}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 29/47] xfs: cross-reference reverse-mapping btree
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (27 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 28/47] xfs: cross-reference inode btrees during scrub Darrick J. Wong
@ 2017-01-07 0:38 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 30/47] xfs: cross-reference refcount btree during scrub Darrick J. Wong
` (18 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:38 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
When scrubbing various btrees, we should cross-reference the records
with the reverse mapping btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_rmap.c | 58 ++++++++++++++
fs/xfs/libxfs/xfs_rmap.h | 5 +
| 95 +++++++++++++++++++++++
fs/xfs/repair/alloc.c | 9 ++
fs/xfs/repair/bmap.c | 89 ++++++++++++++++++++++
fs/xfs/repair/btree.c | 10 ++
fs/xfs/repair/ialloc.c | 30 +++++++
fs/xfs/repair/inode.c | 23 ++++++
fs/xfs/repair/refcount.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 506 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index c7d5102..cce51cb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2306,3 +2306,61 @@ xfs_rmap_free_extent(
return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
XFS_DATA_FORK, &bmap);
}
+
+/* Is there a record covering a given extent? */
+int
+xfs_rmap_has_record(
+ struct xfs_btree_cur *cur,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ bool *exists)
+{
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+
+ memset(&low, 0, sizeof(low));
+ low.r.rm_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.r.rm_startblock = bno + len - 1;
+
+ return xfs_btree_has_record(cur, &low, &high, exists);
+}
+
+/* Is there a record covering a given extent? */
+int
+xfs_rmap_record_exists(
+ struct xfs_btree_cur *cur,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ struct xfs_owner_info *oinfo,
+ bool *has_rmap)
+{
+ uint64_t owner;
+ uint64_t offset;
+ unsigned int flags;
+ int stat;
+ struct xfs_rmap_irec irec;
+ int error;
+
+ xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
+
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &stat);
+ if (error)
+ return error;
+ if (!stat) {
+ *has_rmap = false;
+ return 0;
+ }
+
+ error = xfs_rmap_get_rec(cur, &irec, &stat);
+ if (error)
+ return error;
+ if (!stat) {
+ *has_rmap = false;
+ return 0;
+ }
+
+ *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
+ irec.rm_startblock + irec.rm_blockcount >= bno + len);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 3fa4559..ea359ab 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -217,5 +217,10 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
union xfs_btree_rec;
int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec);
+int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
+ xfs_filblks_t len, bool *exists);
+int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
+ xfs_filblks_t len, struct xfs_owner_info *oinfo,
+ bool *has_rmap);
#endif /* __XFS_RMAP_H__ */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index 9c4bdf1..025f17a 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -33,6 +33,7 @@
#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
#include "repair/common.h"
/* Set us up to check an AG header. */
@@ -157,10 +158,12 @@ xfs_scrub_superblock(
struct xfs_buf *bp;
struct xfs_scrub_ag *psa;
struct xfs_sb sb;
+ struct xfs_owner_info oinfo;
xfs_agnumber_t agno;
uint32_t v2_ok;
bool is_freesp;
bool has_inodes;
+ bool has_rmap;
int error;
int err2;
@@ -315,6 +318,15 @@ xfs_scrub_superblock(
XFS_SCRUB_SB_CHECK(!has_inodes);
}
+ /* Cross-reference with the rmapbt. */
+ if (psa->rmap_cur) {
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ err2 = xfs_rmap_record_exists(psa->rmap_cur, XFS_SB_BLOCK(mp),
+ 1, &oinfo, &has_rmap);
+ if (xfs_scrub_should_xref(sc, err2, &psa->rmap_cur))
+ XFS_SCRUB_SB_CHECK(has_rmap);
+ }
+
out:
return error;
}
@@ -346,6 +358,7 @@ int
xfs_scrub_agf(
struct xfs_scrub_context *sc)
{
+ struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->tp->t_mountp;
struct xfs_agf *agf;
struct xfs_scrub_ag *psa;
@@ -359,8 +372,10 @@ xfs_scrub_agf(
xfs_agblock_t agfl_count;
xfs_agblock_t fl_count;
xfs_extlen_t blocks;
+ xfs_extlen_t btreeblks = 0;
bool is_freesp;
bool has_inodes;
+ bool has_rmap;
int have;
int level;
int error = 0;
@@ -496,6 +511,37 @@ xfs_scrub_agf(
XFS_SCRUB_AGF_CHECK(!has_inodes);
}
+ /* Cross-reference with the rmapbt. */
+ if (psa->rmap_cur) {
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ err2 = xfs_rmap_record_exists(psa->rmap_cur, XFS_AGF_BLOCK(mp),
+ 1, &oinfo, &has_rmap);
+ if (xfs_scrub_should_xref(sc, err2, &psa->rmap_cur))
+ XFS_SCRUB_AGF_CHECK(has_rmap);
+ }
+ if (psa->rmap_cur) {
+ err2 = xfs_btree_count_blocks(psa->rmap_cur, &blocks);
+ if (xfs_scrub_should_xref(sc, err2, &psa->rmap_cur)) {
+ btreeblks = blocks - 1;
+ XFS_SCRUB_AGF_CHECK(blocks == be32_to_cpu(
+ agf->agf_rmap_blocks));
+ }
+ }
+
+ /* Check btreeblks */
+ if ((!xfs_sb_version_hasrmapbt(&mp->m_sb) || psa->rmap_cur) &&
+ psa->bno_cur && psa->cnt_cur) {
+ err2 = xfs_btree_count_blocks(psa->bno_cur, &blocks);
+ if (xfs_scrub_should_xref(sc, err2, &psa->bno_cur))
+ btreeblks += blocks - 1;
+ err2 = xfs_btree_count_blocks(psa->cnt_cur, &blocks);
+ if (xfs_scrub_should_xref(sc, err2, &psa->cnt_cur))
+ btreeblks += blocks - 1;
+ if (psa->bno_cur && psa->cnt_cur)
+ XFS_SCRUB_AGF_CHECK(btreeblks == be32_to_cpu(
+ agf->agf_btreeblks));
+ }
+
out:
return error;
}
@@ -507,6 +553,7 @@ xfs_scrub_agf(
#define XFS_SCRUB_AGFL_CHECK(fs_ok) \
XFS_SCRUB_CHECK(sc, sc->sa.agfl_bp, "AGFL", fs_ok)
struct xfs_scrub_agfl {
+ struct xfs_owner_info oinfo;
xfs_agblock_t eoag;
xfs_daddr_t eofs;
};
@@ -523,6 +570,7 @@ xfs_scrub_agfl_block(
struct xfs_scrub_agfl *sagfl = priv;
bool is_freesp;
bool has_inodes;
+ bool has_rmap;
int err2;
XFS_SCRUB_AGFL_CHECK(agbno > XFS_AGI_BLOCK(mp));
@@ -557,6 +605,14 @@ xfs_scrub_agfl_block(
XFS_SCRUB_AGFL_CHECK(!has_inodes);
}
+ /* Cross-reference with the rmapbt. */
+ if (sc->sa.rmap_cur) {
+ err2 = xfs_rmap_record_exists(sc->sa.rmap_cur, agbno, 1,
+ &sagfl->oinfo, &has_rmap);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.rmap_cur))
+ XFS_SCRUB_AGFL_CHECK(has_rmap);
+ }
+
return 0;
}
@@ -572,6 +628,8 @@ xfs_scrub_agfl(
struct xfs_mount *mp = sc->tp->t_mountp;
struct xfs_agf *agf;
bool is_freesp;
+ bool has_inodes;
+ bool has_rmap;
int error;
int err2;
@@ -593,7 +651,33 @@ xfs_scrub_agfl(
XFS_SCRUB_AGFL_CHECK(!is_freesp);
}
+ /* Cross-reference with inobt. */
+ if (sc->sa.ino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
+ XFS_AGFL_BLOCK(mp), 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.ino_cur))
+ XFS_SCRUB_AGFL_CHECK(!has_inodes);
+ }
+
+ /* Cross-reference with finobt. */
+ if (sc->sa.fino_cur) {
+ err2 = xfs_ialloc_has_inodes_at_extent(sc->sa.fino_cur,
+ XFS_AGFL_BLOCK(mp), 1, &has_inodes);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.fino_cur))
+ XFS_SCRUB_AGFL_CHECK(!has_inodes);
+ }
+
+ /* Set up cross-reference with rmapbt. */
+ if (sc->sa.rmap_cur) {
+ xfs_rmap_ag_owner(&sagfl.oinfo, XFS_RMAP_OWN_FS);
+ err2 = xfs_rmap_record_exists(sc->sa.rmap_cur,
+ XFS_AGFL_BLOCK(mp), 1, &sagfl.oinfo, &has_rmap);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.rmap_cur))
+ XFS_SCRUB_AGFL_CHECK(has_rmap);
+ }
+
/* Check the blocks in the AGFL. */
+ xfs_rmap_ag_owner(&sagfl.oinfo, XFS_RMAP_OWN_AG);
return xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sagfl);
out:
return error;
@@ -613,6 +697,7 @@ int
xfs_scrub_agi(
struct xfs_scrub_context *sc)
{
+ struct xfs_owner_info oinfo;
struct xfs_mount *mp = sc->tp->t_mountp;
struct xfs_agi *agi;
struct xfs_scrub_ag *psa;
@@ -628,6 +713,7 @@ xfs_scrub_agi(
xfs_agino_t freecount;
bool is_freesp;
bool has_inodes;
+ bool has_rmap;
int i;
int level;
int error = 0;
@@ -739,6 +825,15 @@ xfs_scrub_agi(
XFS_SCRUB_AGI_CHECK(!has_inodes);
}
+ /* Cross-reference with the rmapbt. */
+ if (psa->rmap_cur) {
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ err2 = xfs_rmap_record_exists(psa->rmap_cur, XFS_AGI_BLOCK(mp),
+ 1, &oinfo, &has_rmap);
+ if (xfs_scrub_should_xref(sc, err2, &psa->rmap_cur))
+ XFS_SCRUB_AGI_CHECK(has_rmap);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/alloc.c b/fs/xfs/repair/alloc.c
index 7812ee0..ecbc341 100644
--- a/fs/xfs/repair/alloc.c
+++ b/fs/xfs/repair/alloc.c
@@ -73,6 +73,7 @@ xfs_scrub_allocbt_helper(
xfs_agblock_t bno;
xfs_extlen_t flen;
xfs_extlen_t len;
+ bool has_rmap;
bool has_inodes;
int has_otherrec;
int error = 0;
@@ -133,6 +134,14 @@ xfs_scrub_allocbt_helper(
XFS_SCRUB_BTREC_CHECK(bs, !has_inodes);
}
+ /* Cross-reference with the rmapbt. */
+ if (psa->rmap_cur) {
+ err2 = xfs_rmap_has_record(psa->rmap_cur, bno, len,
+ &has_rmap);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->rmap_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !has_rmap);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
index dd030081d..b88b450 100644
--- a/fs/xfs/repair/bmap.c
+++ b/fs/xfs/repair/bmap.c
@@ -103,8 +103,13 @@ xfs_scrub_bmap_extent(
xfs_daddr_t dlen;
xfs_agnumber_t agno;
xfs_fsblock_t bno;
+ struct xfs_rmap_irec rmap;
+ uint64_t owner;
+ xfs_fileoff_t offset;
bool is_freesp;
bool has_inodes;
+ unsigned int rflags;
+ int has_rmap;
int error = 0;
int err2 = 0;
@@ -177,6 +182,90 @@ xfs_scrub_bmap_extent(
XFS_SCRUB_BMAP_CHECK(!has_inodes);
}
+ /* Cross-reference with rmapbt. */
+ if (sa.rmap_cur) {
+ if (info->whichfork == XFS_COW_FORK) {
+ owner = XFS_RMAP_OWN_COW;
+ offset = 0;
+ } else {
+ owner = ip->i_ino;
+ offset = irec->br_startoff;
+ }
+
+ /* Look for a corresponding rmap. */
+ rflags = 0;
+ if (info->whichfork == XFS_ATTR_FORK)
+ rflags |= XFS_RMAP_ATTR_FORK;
+
+ if (info->is_shared) {
+ err2 = xfs_rmap_lookup_le_range(sa.rmap_cur, bno, owner,
+ offset, rflags, &rmap,
+ &has_rmap);
+ if (xfs_scrub_should_xref(info->sc, err2,
+ &sa.rmap_cur)) {
+ XFS_SCRUB_BMAP_GOTO(has_rmap, skip_rmap_xref);
+ } else
+ goto skip_rmap_xref;
+ } else {
+ err2 = xfs_rmap_lookup_le(sa.rmap_cur, bno, 0, owner,
+ offset, rflags, &has_rmap);
+ if (xfs_scrub_should_xref(info->sc, err2,
+ &sa.rmap_cur)) {
+ XFS_SCRUB_BMAP_GOTO(has_rmap, skip_rmap_xref);
+ } else
+ goto skip_rmap_xref;
+
+ err2 = xfs_rmap_get_rec(sa.rmap_cur, &rmap,
+ &has_rmap);
+ if (xfs_scrub_should_xref(info->sc, err2,
+ &sa.rmap_cur)) {
+ XFS_SCRUB_BMAP_GOTO(has_rmap, skip_rmap_xref);
+ } else
+ goto skip_rmap_xref;
+ }
+
+ /* Check the rmap. */
+ XFS_SCRUB_BMAP_CHECK(rmap.rm_startblock <= bno);
+ XFS_SCRUB_BMAP_CHECK(rmap.rm_startblock <
+ rmap.rm_startblock + rmap.rm_blockcount);
+ XFS_SCRUB_BMAP_CHECK(bno + irec->br_blockcount <=
+ rmap.rm_startblock + rmap.rm_blockcount);
+ if (owner != XFS_RMAP_OWN_COW) {
+ XFS_SCRUB_BMAP_CHECK(rmap.rm_offset <= offset);
+ XFS_SCRUB_BMAP_CHECK(rmap.rm_offset <
+ rmap.rm_offset + rmap.rm_blockcount);
+ XFS_SCRUB_BMAP_CHECK(offset + irec->br_blockcount <=
+ rmap.rm_offset + rmap.rm_blockcount);
+ }
+ XFS_SCRUB_BMAP_CHECK(rmap.rm_owner == owner);
+ switch (irec->br_state) {
+ case XFS_EXT_UNWRITTEN:
+ XFS_SCRUB_BMAP_CHECK(
+ rmap.rm_flags & XFS_RMAP_UNWRITTEN);
+ break;
+ case XFS_EXT_NORM:
+ XFS_SCRUB_BMAP_CHECK(
+ !(rmap.rm_flags & XFS_RMAP_UNWRITTEN));
+ break;
+ default:
+ break;
+ }
+ switch (info->whichfork) {
+ case XFS_ATTR_FORK:
+ XFS_SCRUB_BMAP_CHECK(
+ rmap.rm_flags & XFS_RMAP_ATTR_FORK);
+ break;
+ case XFS_DATA_FORK:
+ case XFS_COW_FORK:
+ XFS_SCRUB_BMAP_CHECK(
+ !(rmap.rm_flags & XFS_RMAP_ATTR_FORK));
+ break;
+ }
+ XFS_SCRUB_BMAP_CHECK(!(rmap.rm_flags & XFS_RMAP_BMBT_BLOCK));
+skip_rmap_xref:
+ ;
+ }
+
xfs_scrub_ag_free(&sa);
out:
info->lastoff = irec->br_startoff + irec->br_blockcount;
diff --git a/fs/xfs/repair/btree.c b/fs/xfs/repair/btree.c
index c13762e..4e4436a 100644
--- a/fs/xfs/repair/btree.c
+++ b/fs/xfs/repair/btree.c
@@ -32,6 +32,7 @@
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
+#include "xfs_rmap.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -513,6 +514,7 @@ xfs_scrub_btree_check_block_owner(
xfs_agnumber_t agno;
xfs_agblock_t bno;
bool is_freesp;
+ bool has_rmap;
int error = 0;
int err2;
@@ -536,6 +538,14 @@ xfs_scrub_btree_check_block_owner(
XFS_SCRUB_BTREC_CHECK(bs, !is_freesp);
}
+ /* Check that there's an rmap for this. */
+ if (psa->rmap_cur) {
+ err2 = xfs_rmap_record_exists(psa->rmap_cur, bno, 1, bs->oinfo,
+ &has_rmap);
+ if (xfs_scrub_btree_should_xref(bs, err2, NULL))
+ XFS_SCRUB_BTREC_CHECK(bs, has_rmap);
+ }
+
if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
xfs_scrub_ag_free(&sa);
diff --git a/fs/xfs/repair/ialloc.c b/fs/xfs/repair/ialloc.c
index b169ccf..816eb1a 100644
--- a/fs/xfs/repair/ialloc.c
+++ b/fs/xfs/repair/ialloc.c
@@ -86,16 +86,19 @@ xfs_scrub_iallocbt_chunk(
struct xfs_agf *agf;
struct xfs_scrub_ag *psa;
struct xfs_btree_cur **xcur;
+ struct xfs_owner_info oinfo;
xfs_agblock_t eoag;
xfs_agblock_t bno;
bool is_freesp;
bool has_inodes;
+ bool has_rmap;
int error = 0;
int err2;
agf = XFS_BUF_TO_AGF(bs->sc->sa.agf_bp);
eoag = be32_to_cpu(agf->agf_length);
bno = XFS_AGINO_TO_AGBNO(mp, agino);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
*keep_scanning = true;
XFS_SCRUB_BTREC_CHECK(bs, bno < mp->m_sb.sb_agblocks);
@@ -137,6 +140,14 @@ xfs_scrub_iallocbt_chunk(
XFS_SCRUB_BTREC_CHECK(bs, has_inodes);
}
+ /* Cross-reference with rmapbt. */
+ if (psa->rmap_cur) {
+ err2 = xfs_rmap_record_exists(psa->rmap_cur, bno,
+ len, &oinfo, &has_rmap);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->rmap_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, has_rmap);
+ }
+
out:
return error;
}
@@ -211,6 +222,7 @@ xfs_scrub_iallocbt_check_freemask(
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_dinode *dip;
struct xfs_buf *bp;
+ struct xfs_scrub_ag *psa;
xfs_ino_t fsino;
xfs_agino_t nr_inodes;
xfs_agino_t agino;
@@ -220,12 +232,15 @@ xfs_scrub_iallocbt_check_freemask(
int blks_per_cluster;
__uint16_t holemask;
__uint16_t ir_holemask;
+ bool has;
int error = 0;
+ int err2;
/* Make sure the freemask matches the inode records. */
blks_per_cluster = xfs_icluster_size_fsb(mp);
nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ psa = &bs->sc->sa;
for (agino = irec->ir_startino;
agino < irec->ir_startino + XFS_INODES_PER_CHUNK;
@@ -245,6 +260,21 @@ xfs_scrub_iallocbt_check_freemask(
XFS_SCRUB_BTREC_CHECK(bs, ir_holemask == holemask ||
ir_holemask == 0);
+ /* Does the rmap agree that we have inodes here? */
+ if (psa->rmap_cur) {
+ err2 = xfs_rmap_record_exists(psa->rmap_cur, agbno,
+ blks_per_cluster, &oinfo, &has);
+ if (!xfs_scrub_btree_should_xref(bs, err2,
+ &psa->rmap_cur))
+ goto skip_xref;
+ if (has)
+ XFS_SCRUB_BTREC_CHECK(bs, ir_holemask == 0);
+ else
+ XFS_SCRUB_BTREC_CHECK(bs,
+ ir_holemask == holemask);
+ }
+
+skip_xref:
/* If any part of this is a hole, skip it. */
if (ir_holemask)
continue;
diff --git a/fs/xfs/repair/inode.c b/fs/xfs/repair/inode.c
index e06d585..14d9e19 100644
--- a/fs/xfs/repair/inode.c
+++ b/fs/xfs/repair/inode.c
@@ -38,6 +38,7 @@
#include "xfs_ialloc.h"
#include "xfs_log.h"
#include "xfs_trans_priv.h"
+#include "xfs_rmap.h"
#include "repair/common.h"
/*
@@ -182,6 +183,7 @@ xfs_scrub_inode(
uint16_t flags;
uint16_t mode;
int error = 0;
+ int err2;
/* Did we get the in-core inode, or are we doing this manually? */
if (sc->ip) {
@@ -368,6 +370,27 @@ xfs_scrub_inode(
XFS_SCRUB_INODE_PREEN(ifp->if_bytes > 0);
}
+ /* Make sure the rmap thinks there's an inode here. */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ struct xfs_owner_info oinfo;
+ struct xfs_scrub_ag sa = {0};
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ bool has_rmap;
+
+ agno = XFS_INO_TO_AGNO(mp, ino);
+ agbno = XFS_INO_TO_AGBNO(mp, ino);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ error = xfs_scrub_ag_init(sc, agno, &sa);
+ XFS_SCRUB_INODE_OP_ERROR_GOTO(out);
+
+ err2 = xfs_rmap_record_exists(sa.rmap_cur, agbno,
+ 1, &oinfo, &has_rmap);
+ if (xfs_scrub_should_xref(sc, err2, &sa.rmap_cur))
+ XFS_SCRUB_INODE_CHECK(has_rmap);
+ xfs_scrub_ag_free(&sa);
+ }
+
out:
if (bp)
xfs_trans_brelse(sc->tp, bp);
diff --git a/fs/xfs/repair/refcount.c b/fs/xfs/repair/refcount.c
index 38a8987..75071de 100644
--- a/fs/xfs/repair/refcount.c
+++ b/fs/xfs/repair/refcount.c
@@ -38,6 +38,163 @@
/* Reference count btree scrubber. */
+struct xfs_scrub_refcountbt_fragment {
+ struct xfs_rmap_irec rm;
+ struct list_head list;
+};
+
+struct xfs_scrub_refcountbt_rmap_check_info {
+ struct xfs_scrub_btree *bs;
+ xfs_nlink_t nr;
+ struct xfs_refcount_irec rc;
+ struct list_head fragments;
+};
+
+/*
+ * Decide if the given rmap is large enough that we can redeem it
+ * towards refcount verification now, or if it's a fragment, in
+ * which case we'll hang onto it in the hopes that we'll later
+ * discover that we've collected exactly the correct number of
+ * fragments as the refcountbt says we should have.
+ */
+STATIC int
+xfs_scrub_refcountbt_rmap_check(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_scrub_refcountbt_rmap_check_info *rsrci = priv;
+ struct xfs_scrub_refcountbt_fragment *frag;
+ xfs_agblock_t rm_last;
+ xfs_agblock_t rc_last;
+
+ rm_last = rec->rm_startblock + rec->rm_blockcount;
+ rc_last = rsrci->rc.rc_startblock + rsrci->rc.rc_blockcount;
+ XFS_SCRUB_BTREC_CHECK(rsrci->bs, rsrci->rc.rc_refcount != 1 ||
+ rec->rm_owner == XFS_RMAP_OWN_COW);
+ if (rec->rm_startblock <= rsrci->rc.rc_startblock && rm_last >= rc_last)
+ rsrci->nr++;
+ else {
+ frag = kmem_zalloc(sizeof(struct xfs_scrub_refcountbt_fragment),
+ KM_SLEEP);
+ frag->rm = *rec;
+ list_add_tail(&frag->list, &rsrci->fragments);
+ }
+
+ return 0;
+}
+
+/*
+ * Given a bunch of rmap fragments, iterate through them, keeping
+ * a running tally of the refcount. If this ever deviates from
+ * what we expect (which is the refcountbt's refcount minus the
+ * number of extents that totally covered the refcountbt extent),
+ * we have a refcountbt error.
+ */
+STATIC void
+xfs_scrub_refcountbt_process_rmap_fragments(
+ struct xfs_mount *mp,
+ struct xfs_scrub_refcountbt_rmap_check_info *rsrci)
+{
+ struct list_head worklist;
+ struct xfs_scrub_refcountbt_fragment *cur;
+ struct xfs_scrub_refcountbt_fragment *n;
+ xfs_agblock_t bno;
+ xfs_agblock_t rbno;
+ xfs_agblock_t next_rbno;
+ xfs_nlink_t nr;
+ xfs_nlink_t target_nr;
+
+ target_nr = rsrci->rc.rc_refcount - rsrci->nr;
+ if (target_nr == 0)
+ return;
+
+ /*
+ * There are (rsrci->rc.rc_refcount - rsrci->nr refcount)
+ * references we haven't found yet. Pull that many off the
+ * fragment list and figure out where the smallest rmap ends
+ * (and therefore the next rmap should start). All the rmaps
+ * we pull off should start at or before the beginning of the
+ * refcount record's range.
+ */
+ INIT_LIST_HEAD(&worklist);
+ rbno = NULLAGBLOCK;
+ nr = 1;
+ list_for_each_entry_safe(cur, n, &rsrci->fragments, list) {
+ if (cur->rm.rm_startblock > rsrci->rc.rc_startblock)
+ goto fail;
+ bno = cur->rm.rm_startblock + cur->rm.rm_blockcount;
+ if (rbno > bno)
+ rbno = bno;
+ list_del(&cur->list);
+ list_add_tail(&cur->list, &worklist);
+ if (nr == target_nr)
+ break;
+ nr++;
+ }
+
+ if (nr != target_nr)
+ goto fail;
+
+ while (!list_empty(&rsrci->fragments)) {
+ /* Discard any fragments ending at rbno. */
+ nr = 0;
+ next_rbno = NULLAGBLOCK;
+ list_for_each_entry_safe(cur, n, &worklist, list) {
+ bno = cur->rm.rm_startblock + cur->rm.rm_blockcount;
+ if (bno != rbno) {
+ if (next_rbno > bno)
+ next_rbno = bno;
+ continue;
+ }
+ list_del(&cur->list);
+ kmem_free(cur);
+ nr++;
+ }
+
+ /* Empty list? We're done. */
+ if (list_empty(&rsrci->fragments))
+ break;
+
+ /* Try to add nr rmaps starting at rbno to the worklist. */
+ list_for_each_entry_safe(cur, n, &rsrci->fragments, list) {
+ bno = cur->rm.rm_startblock + cur->rm.rm_blockcount;
+ if (cur->rm.rm_startblock != rbno)
+ goto fail;
+ list_del(&cur->list);
+ list_add_tail(&cur->list, &worklist);
+ if (next_rbno > bno)
+ next_rbno = bno;
+ nr--;
+ if (nr == 0)
+ break;
+ }
+
+ rbno = next_rbno;
+ }
+
+ /*
+ * Make sure the last extent we processed ends at or beyond
+ * the end of the refcount extent.
+ */
+ if (rbno < rsrci->rc.rc_startblock + rsrci->rc.rc_blockcount)
+ goto fail;
+
+ rsrci->nr = rsrci->rc.rc_refcount;
+fail:
+ /* Delete fragments and work list. */
+ list_for_each_entry_safe(cur, n, &worklist, list) {
+ list_del(&cur->list);
+ kmem_free(cur);
+ }
+ list_for_each_entry_safe(cur, n, &rsrci->fragments, list) {
+ cur = list_first_entry(&rsrci->fragments,
+ struct xfs_scrub_refcountbt_fragment, list);
+ list_del(&cur->list);
+ kmem_free(cur);
+ }
+}
+
/* Scrub a refcountbt record. */
STATIC int
xfs_scrub_refcountbt_helper(
@@ -48,6 +205,11 @@ xfs_scrub_refcountbt_helper(
struct xfs_agf *agf;
struct xfs_scrub_ag *psa;
struct xfs_refcount_irec irec;
+ struct xfs_rmap_irec low;
+ struct xfs_rmap_irec high;
+ struct xfs_scrub_refcountbt_rmap_check_info rsrci;
+ struct xfs_scrub_refcountbt_fragment *cur;
+ struct xfs_scrub_refcountbt_fragment *n;
xfs_agblock_t eoag;
bool has_cowflag;
bool is_freesp;
@@ -109,6 +271,31 @@ xfs_scrub_refcountbt_helper(
XFS_SCRUB_BTREC_CHECK(bs, !has_inodes);
}
+ /* Cross-reference with the rmapbt to confirm the refcount. */
+ if (psa->rmap_cur) {
+ memset(&low, 0, sizeof(low));
+ low.rm_startblock = irec.rc_startblock;
+ memset(&high, 0xFF, sizeof(high));
+ high.rm_startblock = irec.rc_startblock +
+ irec.rc_blockcount - 1;
+
+ rsrci.bs = bs;
+ rsrci.nr = 0;
+ rsrci.rc = irec;
+ INIT_LIST_HEAD(&rsrci.fragments);
+ err2 = xfs_rmap_query_range(psa->rmap_cur, &low, &high,
+ &xfs_scrub_refcountbt_rmap_check, &rsrci);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->rmap_cur)) {
+ xfs_scrub_refcountbt_process_rmap_fragments(mp, &rsrci);
+ XFS_SCRUB_BTREC_CHECK(bs, irec.rc_refcount == rsrci.nr);
+ }
+
+ list_for_each_entry_safe(cur, n, &rsrci.fragments, list) {
+ list_del(&cur->list);
+ kmem_free(cur);
+ }
+ }
+
out:
return error;
}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 30/47] xfs: cross-reference refcount btree during scrub
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (28 preceding siblings ...)
2017-01-07 0:38 ` [PATCH 29/47] xfs: cross-reference reverse-mapping btree Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 31/47] xfs: scrub should cross-reference the realtime bitmap Darrick J. Wong
` (17 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
During metadata btree scrub, we should cross-reference with the
reference counts.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_refcount.c | 19 +++++++++++++
fs/xfs/libxfs/xfs_refcount.h | 3 ++
| 52 ++++++++++++++++++++++++++++++++++++
fs/xfs/repair/alloc.c | 10 +++++++
fs/xfs/repair/bmap.c | 57 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/ialloc.c | 10 +++++++
fs/xfs/repair/rmap.c | 60 ++++++++++++++++++++++++++++++++++++++++++
7 files changed, 211 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b177ef3..c6c875d 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1696,3 +1696,22 @@ xfs_refcount_recover_cow_leftovers(
xfs_trans_cancel(tp);
goto out_free;
}
+
+/* Is there a record covering a given extent? */
+int
+xfs_refcount_has_record(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool *exists)
+{
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+
+ memset(&low, 0, sizeof(low));
+ low.rc.rc_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.rc.rc_startblock = bno + len - 1;
+
+ return xfs_btree_has_record(cur, &low, &high, exists);
+}
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 098dc66..78cb142 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -67,4 +67,7 @@ extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp,
extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
xfs_agnumber_t agno);
+extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
+ xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+
#endif /* __XFS_REFCOUNT_H__ */
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index 025f17a..5a3d4c1 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -34,6 +34,7 @@
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_rmap.h"
+#include "xfs_refcount.h"
#include "repair/common.h"
/* Set us up to check an AG header. */
@@ -164,6 +165,7 @@ xfs_scrub_superblock(
bool is_freesp;
bool has_inodes;
bool has_rmap;
+ bool has_refcount;
int error;
int err2;
@@ -327,6 +329,14 @@ xfs_scrub_superblock(
XFS_SCRUB_SB_CHECK(has_rmap);
}
+ /* Cross-reference with the refcountbt. */
+ if (psa->refc_cur) {
+ err2 = xfs_refcount_has_record(psa->refc_cur, XFS_SB_BLOCK(mp),
+ 1, &has_refcount);
+ if (xfs_scrub_should_xref(sc, err2, &psa->refc_cur))
+ XFS_SCRUB_SB_CHECK(!has_refcount);
+ }
+
out:
return error;
}
@@ -376,6 +386,7 @@ xfs_scrub_agf(
bool is_freesp;
bool has_inodes;
bool has_rmap;
+ bool has_refcount;
int have;
int level;
int error = 0;
@@ -542,6 +553,20 @@ xfs_scrub_agf(
agf->agf_btreeblks));
}
+ /* Cross-reference with the refcountbt. */
+ if (psa->refc_cur) {
+ err2 = xfs_refcount_has_record(psa->refc_cur, XFS_AGF_BLOCK(mp),
+ 1, &has_refcount);
+ if (xfs_scrub_should_xref(sc, err2, &psa->refc_cur))
+ XFS_SCRUB_AGF_CHECK(!has_refcount);
+ }
+ if (psa->refc_cur) {
+ err2 = xfs_btree_count_blocks(psa->refc_cur, &blocks);
+ if (xfs_scrub_should_xref(sc, err2, &psa->refc_cur))
+ XFS_SCRUB_AGF_CHECK(blocks == be32_to_cpu(
+ agf->agf_refcount_blocks));
+ }
+
out:
return error;
}
@@ -571,6 +596,7 @@ xfs_scrub_agfl_block(
bool is_freesp;
bool has_inodes;
bool has_rmap;
+ bool has_refcount;
int err2;
XFS_SCRUB_AGFL_CHECK(agbno > XFS_AGI_BLOCK(mp));
@@ -613,6 +639,14 @@ xfs_scrub_agfl_block(
XFS_SCRUB_AGFL_CHECK(has_rmap);
}
+ /* Cross-reference with the refcountbt. */
+ if (sc->sa.refc_cur) {
+ err2 = xfs_refcount_has_record(sc->sa.refc_cur, agbno, 1,
+ &has_refcount);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.refc_cur))
+ XFS_SCRUB_AGFL_CHECK(!has_refcount);
+ }
+
return 0;
}
@@ -630,6 +664,7 @@ xfs_scrub_agfl(
bool is_freesp;
bool has_inodes;
bool has_rmap;
+ bool has_refcount;
int error;
int err2;
@@ -676,6 +711,14 @@ xfs_scrub_agfl(
XFS_SCRUB_AGFL_CHECK(has_rmap);
}
+ /* Set up cross-reference with refcountbt. */
+ if (sc->sa.refc_cur) {
+ err2 = xfs_refcount_has_record(sc->sa.refc_cur,
+ XFS_AGFL_BLOCK(mp), 1, &has_refcount);
+ if (xfs_scrub_should_xref(sc, err2, &sc->sa.refc_cur))
+ XFS_SCRUB_AGFL_CHECK(!has_refcount);
+ }
+
/* Check the blocks in the AGFL. */
xfs_rmap_ag_owner(&sagfl.oinfo, XFS_RMAP_OWN_AG);
return xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sagfl);
@@ -714,6 +757,7 @@ xfs_scrub_agi(
bool is_freesp;
bool has_inodes;
bool has_rmap;
+ bool has_refcount;
int i;
int level;
int error = 0;
@@ -834,6 +878,14 @@ xfs_scrub_agi(
XFS_SCRUB_AGI_CHECK(has_rmap);
}
+ /* Cross-reference with the refcountbt. */
+ if (psa->refc_cur) {
+ err2 = xfs_refcount_has_record(psa->refc_cur, XFS_AGI_BLOCK(mp),
+ 1, &has_refcount);
+ if (xfs_scrub_should_xref(sc, err2, &psa->refc_cur))
+ XFS_SCRUB_AGI_CHECK(!has_refcount);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/alloc.c b/fs/xfs/repair/alloc.c
index ecbc341..0ed9fe1 100644
--- a/fs/xfs/repair/alloc.c
+++ b/fs/xfs/repair/alloc.c
@@ -33,6 +33,7 @@
#include "xfs_rmap.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
+#include "xfs_refcount.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -75,6 +76,7 @@ xfs_scrub_allocbt_helper(
xfs_extlen_t len;
bool has_rmap;
bool has_inodes;
+ bool has_refcount;
int has_otherrec;
int error = 0;
int err2;
@@ -142,6 +144,14 @@ xfs_scrub_allocbt_helper(
XFS_SCRUB_BTREC_CHECK(bs, !has_rmap);
}
+ /* Cross-reference with the refcountbt. */
+ if (psa->refc_cur) {
+ err2 = xfs_refcount_has_record(psa->refc_cur, bno, len,
+ &has_refcount);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->refc_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !has_refcount);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
index b88b450..5cc7d51 100644
--- a/fs/xfs/repair/bmap.c
+++ b/fs/xfs/repair/bmap.c
@@ -38,6 +38,7 @@
#include "xfs_rmap.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
+#include "xfs_refcount.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -104,12 +105,17 @@ xfs_scrub_bmap_extent(
xfs_agnumber_t agno;
xfs_fsblock_t bno;
struct xfs_rmap_irec rmap;
+ struct xfs_refcount_irec rc;
uint64_t owner;
xfs_fileoff_t offset;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
bool is_freesp;
bool has_inodes;
+ bool has_cowflag;
unsigned int rflags;
int has_rmap;
+ int has_refcount;
int error = 0;
int err2 = 0;
@@ -266,6 +272,57 @@ xfs_scrub_bmap_extent(
;
}
+ /*
+ * If this is a non-shared file on a reflink filesystem,
+ * check the refcountbt to see if the flag is wrong.
+ */
+ if (sa.refc_cur) {
+ if (info->whichfork == XFS_COW_FORK) {
+ /* Check this CoW staging extent. */
+ err2 = xfs_refcount_lookup_le(sa.refc_cur,
+ bno + XFS_REFC_COW_START,
+ &has_refcount);
+ if (xfs_scrub_should_xref(info->sc, err2,
+ &sa.refc_cur)) {
+ XFS_SCRUB_BMAP_GOTO(has_refcount,
+ skip_refc_xref);
+ } else
+ goto skip_refc_xref;
+
+ err2 = xfs_refcount_get_rec(sa.refc_cur, &rc,
+ &has_refcount);
+ if (xfs_scrub_should_xref(info->sc, err2,
+ &sa.refc_cur)) {
+ XFS_SCRUB_BMAP_GOTO(has_refcount,
+ skip_refc_xref);
+ } else
+ goto skip_refc_xref;
+
+ has_cowflag = !!(rc.rc_startblock & XFS_REFC_COW_START);
+ XFS_SCRUB_BMAP_CHECK(
+ (rc.rc_refcount == 1 && has_cowflag) ||
+ (rc.rc_refcount != 1 && !has_cowflag));
+ rc.rc_startblock &= ~XFS_REFC_COW_START;
+ XFS_SCRUB_BMAP_CHECK(rc.rc_startblock <= bno);
+ XFS_SCRUB_BMAP_CHECK(rc.rc_startblock <
+ rc.rc_startblock + rc.rc_blockcount);
+ XFS_SCRUB_BMAP_CHECK(bno + irec->br_blockcount <=
+ rc.rc_startblock + rc.rc_blockcount);
+ XFS_SCRUB_BMAP_CHECK(rc.rc_refcount == 1);
+ } else {
+ /* If this is shared, the inode flag must be set. */
+ err2 = xfs_refcount_find_shared(sa.refc_cur, bno,
+ irec->br_blockcount, &fbno, &flen,
+ false);
+ if (xfs_scrub_should_xref(info->sc, err2,
+ &sa.refc_cur))
+ XFS_SCRUB_BMAP_CHECK(flen == 0 ||
+ xfs_is_reflink_inode(ip));
+ }
+skip_refc_xref:
+ ;
+ }
+
xfs_scrub_ag_free(&sa);
out:
info->lastoff = irec->br_startoff + irec->br_blockcount;
diff --git a/fs/xfs/repair/ialloc.c b/fs/xfs/repair/ialloc.c
index 816eb1a..d68e354 100644
--- a/fs/xfs/repair/ialloc.c
+++ b/fs/xfs/repair/ialloc.c
@@ -38,6 +38,7 @@
#include "xfs_log.h"
#include "xfs_trans_priv.h"
#include "xfs_alloc.h"
+#include "xfs_refcount.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -92,6 +93,7 @@ xfs_scrub_iallocbt_chunk(
bool is_freesp;
bool has_inodes;
bool has_rmap;
+ bool has_refcount;
int error = 0;
int err2;
@@ -148,6 +150,14 @@ xfs_scrub_iallocbt_chunk(
XFS_SCRUB_BTREC_CHECK(bs, has_rmap);
}
+ /* Cross-reference with the refcountbt. */
+ if (psa->refc_cur) {
+ err2 = xfs_refcount_has_record(psa->refc_cur, bno,
+ len, &has_refcount);
+ if (xfs_scrub_btree_should_xref(bs, err2, &psa->refc_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, !has_refcount);
+ }
+
out:
return error;
}
diff --git a/fs/xfs/repair/rmap.c b/fs/xfs/repair/rmap.c
index d4db8bf..d53ff46 100644
--- a/fs/xfs/repair/rmap.c
+++ b/fs/xfs/repair/rmap.c
@@ -33,6 +33,7 @@
#include "xfs_rmap.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
+#include "xfs_refcount.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -48,13 +49,18 @@ xfs_scrub_rmapbt_helper(
struct xfs_agf *agf;
struct xfs_scrub_ag *psa;
struct xfs_rmap_irec irec;
+ struct xfs_refcount_irec crec;
xfs_agblock_t eoag;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
bool is_freesp;
bool non_inode;
bool is_unwritten;
bool is_bmbt;
bool is_attr;
bool has_inodes;
+ bool has_cowflag;
+ int has_refcount;
int error = 0;
int err2;
@@ -144,6 +150,60 @@ xfs_scrub_rmapbt_helper(
!has_inodes);
}
+ /* Cross-reference with the refcount btree. */
+ if (psa->refc_cur) {
+ if (irec.rm_owner == XFS_RMAP_OWN_COW) {
+ /* Check this CoW staging extent. */
+ err2 = xfs_refcount_lookup_le(psa->refc_cur,
+ irec.rm_startblock + XFS_REFC_COW_START,
+ &has_refcount);
+ if (xfs_scrub_btree_should_xref(bs, err2,
+ &psa->refc_cur)) {
+ XFS_SCRUB_BTREC_GOTO(bs, has_refcount,
+ skip_refc_xref);
+ } else
+ goto skip_refc_xref;
+
+ err2 = xfs_refcount_get_rec(psa->refc_cur, &crec,
+ &has_refcount);
+ if (xfs_scrub_btree_should_xref(bs, err2,
+ &psa->refc_cur)) {
+ XFS_SCRUB_BTREC_GOTO(bs, has_refcount,
+ skip_refc_xref);
+ } else
+ goto skip_refc_xref;
+
+ has_cowflag = !!(crec.rc_startblock & XFS_REFC_COW_START);
+ XFS_SCRUB_BTREC_CHECK(bs,
+ (crec.rc_refcount == 1 && has_cowflag) ||
+ (crec.rc_refcount != 1 && !has_cowflag));
+ crec.rc_startblock &= ~XFS_REFC_COW_START;
+ XFS_SCRUB_BTREC_CHECK(bs, crec.rc_startblock <=
+ irec.rm_startblock);
+ XFS_SCRUB_BTREC_CHECK(bs, crec.rc_startblock +
+ crec.rc_blockcount >
+ crec.rc_startblock);
+ XFS_SCRUB_BTREC_CHECK(bs, crec.rc_startblock +
+ crec.rc_blockcount >=
+ irec.rm_startblock +
+ irec.rm_blockcount);
+ XFS_SCRUB_BTREC_CHECK(bs,
+ crec.rc_refcount == 1);
+ } else {
+ /* If this is shared, the inode flag must be set. */
+ err2 = xfs_refcount_find_shared(psa->refc_cur,
+ irec.rm_startblock, irec.rm_blockcount,
+ &fbno, &flen, false);
+ if (xfs_scrub_btree_should_xref(bs, err2,
+ &psa->refc_cur))
+ XFS_SCRUB_BTREC_CHECK(bs, flen == 0 ||
+ (!non_inode && !is_attr &&
+ !is_bmbt && !is_unwritten));
+ }
+skip_refc_xref:
+ ;
+ }
+
out:
return error;
}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 31/47] xfs: scrub should cross-reference the realtime bitmap
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (29 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 30/47] xfs: cross-reference refcount btree during scrub Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 32/47] xfs: cross-reference the block mappings when possible Darrick J. Wong
` (16 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
While we're scrubbing various btrees, cross-reference the records
with the other metadata.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_rtbitmap.c | 30 ++++++++++++++++++++++++++++++
fs/xfs/repair/bmap.c | 10 ++++++++++
fs/xfs/xfs_rtalloc.h | 3 +++
3 files changed, 43 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index f4b68c0..4b8457c 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1016,3 +1016,33 @@ xfs_rtfree_extent(
}
return 0;
}
+
+/* Is the given extent all free? */
+int
+xfs_rtbitmap_extent_is_free(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_rtblock_t start,
+ xfs_rtblock_t len,
+ bool *is_free)
+{
+ xfs_rtblock_t end;
+ xfs_extlen_t clen;
+ int matches;
+ int error;
+
+ *is_free = false;
+ while (len) {
+ clen = len > ~0U ? ~0U : len;
+ error = xfs_rtcheck_range(mp, tp, start, clen, 1, &end,
+ &matches);
+ if (error || !matches || end < start + clen)
+ return error;
+
+ len -= end - start;
+ start = end + 1;
+ }
+
+ *is_free = true;
+ return error;
+}
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
index 5cc7d51..bd6a620 100644
--- a/fs/xfs/repair/bmap.c
+++ b/fs/xfs/repair/bmap.c
@@ -39,6 +39,7 @@
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_refcount.h"
+#include "xfs_rtalloc.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -113,6 +114,7 @@ xfs_scrub_bmap_extent(
bool is_freesp;
bool has_inodes;
bool has_cowflag;
+ bool is_free = false;
unsigned int rflags;
int has_rmap;
int has_refcount;
@@ -168,6 +170,14 @@ xfs_scrub_bmap_extent(
irec->br_blockcount, &is_freesp);
if (xfs_scrub_should_xref(info->sc, err2, &sa.bno_cur))
XFS_SCRUB_BMAP_CHECK(!is_freesp);
+ } else {
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+ err2 = xfs_rtbitmap_extent_is_free(mp, info->sc->tp,
+ irec->br_startblock, irec->br_blockcount,
+ &is_free);
+ if (xfs_scrub_should_xref(info->sc, err2, NULL))
+ XFS_SCRUB_BMAP_CHECK(!is_free);
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
}
/* Cross-reference with inobt. */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 3036349..bd1c6a9 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -121,6 +121,8 @@ int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtblock_t start, xfs_extlen_t len,
struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
+int xfs_rtbitmap_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_rtblock_t len, bool *is_free);
#else
@@ -131,6 +133,7 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
# define xfs_rtcheck_range(...) (ENOSYS)
# define xfs_rtfind_forw(...) (ENOSYS)
# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
+# define xfs_rtbitmap_extent_is_free(m,t,s,l,i) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 32/47] xfs: cross-reference the block mappings when possible
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (30 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 31/47] xfs: scrub should cross-reference the realtime bitmap Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 33/47] xfs: create tracepoints for online repair Darrick J. Wong
` (15 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Check inode field contents against the block mappings when possible.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/repair/inode.c | 43 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/fs/xfs/repair/inode.c b/fs/xfs/repair/inode.c
index 14d9e19..51f0e78 100644
--- a/fs/xfs/repair/inode.c
+++ b/fs/xfs/repair/inode.c
@@ -39,6 +39,8 @@
#include "xfs_log.h"
#include "xfs_trans_priv.h"
#include "xfs_rmap.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
#include "repair/common.h"
/*
@@ -176,6 +178,7 @@ xfs_scrub_inode(
struct xfs_dinode *dip;
xfs_ino_t ino;
unsigned long long isize;
+ unsigned long long count;
uint64_t flags2;
uint32_t nextents;
uint32_t extsize;
@@ -391,6 +394,46 @@ xfs_scrub_inode(
xfs_scrub_ag_free(&sa);
}
+ /* Walk all the extents to check nextents/naextents/nblocks. */
+ count = 0;
+ err2 = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
+ &nextents, &count);
+ if (!xfs_scrub_should_xref(sc, err2, NULL))
+ goto skip_block_check;
+ XFS_SCRUB_INODE_CHECK(nextents >= be32_to_cpu(dip->di_nextents));
+
+ err2 = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
+ &nextents, &count);
+ if (!xfs_scrub_should_xref(sc, err2, NULL))
+ goto skip_block_check;
+ XFS_SCRUB_INODE_CHECK(nextents == be16_to_cpu(dip->di_anextents));
+
+ /* Check nblocks, taking any delalloc extents into account. */
+ if (sc->ip)
+ count -= sc->ip->i_delayed_blks;
+ XFS_SCRUB_INODE_CHECK(count == be64_to_cpu(dip->di_nblocks));
+
+skip_block_check:
+ /* Make sure we don't have any written extents after EOF. */
+ if (S_ISREG(mode) && !(flags & XFS_DIFLAG_PREALLOC) &&
+ (dip->di_format == XFS_DINODE_FMT_EXTENTS ||
+ dip->di_format == XFS_DINODE_FMT_BTREE)) {
+ struct xfs_bmbt_irec got;
+ struct xfs_ifork *ifp;
+ xfs_fileoff_t lblk;
+ xfs_extnum_t idx;
+ bool found;
+
+ lblk = XFS_B_TO_FSB(mp, i_size_read(VFS_I(sc->ip)));
+ ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
+ found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &idx, &got);
+ while (found) {
+ XFS_SCRUB_INODE_PREEN(got.br_startoff < lblk ||
+ got.br_state != XFS_EXT_NORM);
+ lblk = got.br_startoff + got.br_blockcount;
+ found = xfs_iext_get_extent(ifp, ++idx, &got);
+ }
+ }
out:
if (bp)
xfs_trans_brelse(sc->tp, bp);
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 33/47] xfs: create tracepoints for online repair
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (31 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 32/47] xfs: cross-reference the block mappings when possible Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 34/47] xfs: implement the metadata repair ioctl flag Darrick J. Wong
` (14 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
These tracepoints will be used to debug the online repair routines.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_trace.h | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 147 insertions(+)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c52e3a5..becdb7a 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3742,6 +3742,153 @@ DEFINE_SCRUB_AG_LOCK_EVENT(xfs_scrub_ag_can_lock);
DEFINE_SCRUB_AG_LOCK_EVENT(xfs_scrub_ag_may_deadlock);
DEFINE_SCRUB_AG_LOCK_EVENT(xfs_scrub_ag_lock_all);
+/* repair tracepoints */
+DEFINE_SCRUB_EVENT(xfs_repair_attempt);
+DEFINE_SCRUB_EVENT(xfs_repair_done);
+DEFINE_BUSY_EVENT(xfs_repair_free_or_unmap_extent);
+DEFINE_BUSY_EVENT(xfs_repair_collect_btree_extent);
+TRACE_EVENT(xfs_repair_init_btblock,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+ uint32_t magic),
+ TP_ARGS(mp, agno, agbno, magic),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(uint32_t, magic)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agbno = agbno;
+ __entry->magic = magic;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u magic 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->agbno, __entry->magic)
+)
+TRACE_EVENT(xfs_repair_find_ag_btree_roots_helper,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+ uint32_t magic, uint16_t level),
+ TP_ARGS(mp, agno, agbno, magic, level),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(uint32_t, magic)
+ __field(uint16_t, level)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->agbno = agbno;
+ __entry->magic = magic;
+ __entry->level = level;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u magic 0x%x level %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->agbno, __entry->magic, __entry->level)
+)
+TRACE_EVENT(xfs_repair_calc_ag_resblks,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agino_t icount, xfs_agblock_t aglen, xfs_agblock_t freelen,
+ xfs_agblock_t usedlen),
+ TP_ARGS(mp, agno, icount, aglen, freelen, usedlen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, icount)
+ __field(xfs_agblock_t, aglen)
+ __field(xfs_agblock_t, freelen)
+ __field(xfs_agblock_t, usedlen)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->icount = icount;
+ __entry->aglen = aglen;
+ __entry->freelen = freelen;
+ __entry->usedlen = usedlen;
+ ),
+ TP_printk("dev %d:%d agno %d icount %u aglen %u freelen %u usedlen %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->icount, __entry->aglen, __entry->freelen,
+ __entry->usedlen)
+)
+TRACE_EVENT(xfs_repair_calc_ag_resblks_btsize,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t bnobt_sz, xfs_agblock_t inobt_sz,
+ xfs_agblock_t rmapbt_sz, xfs_agblock_t refcbt_sz),
+ TP_ARGS(mp, agno, bnobt_sz, inobt_sz, rmapbt_sz, refcbt_sz),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, bnobt_sz)
+ __field(xfs_agblock_t, inobt_sz)
+ __field(xfs_agblock_t, rmapbt_sz)
+ __field(xfs_agblock_t, refcbt_sz)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->bnobt_sz = bnobt_sz;
+ __entry->inobt_sz = inobt_sz;
+ __entry->rmapbt_sz = rmapbt_sz;
+ __entry->refcbt_sz = refcbt_sz;
+ ),
+ TP_printk("dev %d:%d agno %d bno %u ino %u rmap %u refcount %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->bnobt_sz, __entry->inobt_sz, __entry->rmapbt_sz,
+ __entry->refcbt_sz)
+)
+TRACE_EVENT(xfs_repair_reset_counters,
+ TP_PROTO(struct xfs_mount *mp),
+ TP_ARGS(mp),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ ),
+ TP_printk("dev %d:%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev))
+)
+
+DEFINE_BUSY_EVENT(xfs_repair_agfl_insert);
+DEFINE_RMAPBT_EVENT(xfs_repair_alloc_extent_fn);
+DEFINE_RMAPBT_EVENT(xfs_repair_ialloc_extent_fn);
+TRACE_EVENT(xfs_repair_ialloc_insert,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agino_t startino, uint16_t holemask, uint8_t count,
+ uint8_t freecount, uint64_t freemask),
+ TP_ARGS(mp, agno, startino, holemask, count, freecount, freemask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, startino)
+ __field(uint16_t, holemask)
+ __field(uint8_t, count)
+ __field(uint8_t, freecount)
+ __field(uint64_t, freemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->startino = startino;
+ __entry->holemask = holemask;
+ __entry->count = count;
+ __entry->freecount = freecount;
+ __entry->freemask = freemask;
+ ),
+ TP_printk("dev %d:%d agno %d startino %u holemask 0x%x count %u freecount %u freemask 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->startino, __entry->holemask, __entry->count,
+ __entry->freecount, __entry->freemask)
+)
+DEFINE_RMAPBT_EVENT(xfs_repair_rmap_extent_fn);
+DEFINE_REFCOUNT_EXTENT_EVENT(xfs_repair_refcount_extent_fn);
+DEFINE_RMAPBT_EVENT(xfs_repair_bmap_extent_fn);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 34/47] xfs: implement the metadata repair ioctl flag
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (32 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 33/47] xfs: create tracepoints for online repair Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 35/47] xfs: add helper routines for the repair code Darrick J. Wong
` (13 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Plumb in the pieces necessary to make the "repair" subfunction of
the scrub ioctl actually work.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/repair/common.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++--
fs/xfs/xfs_error.h | 4 +-
2 files changed, 98 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 458057a..bbcee6b 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -41,6 +41,7 @@
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
+#include "xfs_error.h"
#include "repair/xfs_scrub.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -108,8 +109,43 @@
* the metadata is correct but otherwise suboptimal, there's a "preen"
* flag to signal that. Finally, if we were unable to access a data
* structure to perform cross-referencing, we can signal that as well.
+ *
+ * If a piece of metadata proves corrupt or suboptimal, the userspace
+ * program can ask the kernel to apply some tender loving care (TLC) to
+ * the metadata object. "Corruption" is defined by metadata violating
+ * the on-disk specification; operations cannot continue if the
+ * violation is left untreated. It is possible for XFS to continue if
+ * an object is "suboptimal", however performance may be degraded.
+ * Repairs are usually performed by rebuilding the metadata entirely out
+ * of redundant metadata. Optimizing, on the other hand, can sometimes
+ * be done without rebuilding entire structures.
+ *
+ * Generally speaking, the repair code has the following code structure:
+ * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
+ * The first check helps us figure out if we need to rebuild or simply
+ * optimize the structure so that the rebuild knows what to do. The
+ * second check evaluates the completeness of the repair; that is what
+ * is reported to userspace.
*/
+/* Fix something if errors were detected and the user asked for repair. */
+static inline bool
+xfs_scrub_should_fix(
+ struct xfs_scrub_metadata *sm)
+{
+ return (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR) &&
+ (sm->sm_flags & (XFS_SCRUB_FLAG_CORRUPT | XFS_SCRUB_FLAG_PREEN));
+}
+
+/* Clear the corruption status flags. */
+static inline bool
+xfs_scrub_reset_corruption_flags(
+ struct xfs_scrub_metadata *sm)
+{
+ return sm->sm_flags &= ~(XFS_SCRUB_FLAG_CORRUPT | XFS_SCRUB_FLAG_PREEN |
+ XFS_SCRUB_FLAG_XREF_FAIL);
+}
+
/* Check for operational errors. */
bool
xfs_scrub_op_ok(
@@ -602,7 +638,10 @@ xfs_scrub_teardown(
if (sc->ag_lock.agmask != sc->ag_lock.__agmask)
kmem_free(sc->ag_lock.agmask);
sc->ag_lock.agmask = NULL;
- xfs_trans_cancel(sc->tp);
+ if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_FLAG_REPAIR))
+ error = xfs_trans_commit(sc->tp);
+ else
+ xfs_trans_cancel(sc->tp);
sc->tp = NULL;
if (sc->ip != NULL) {
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
@@ -745,6 +784,8 @@ xfs_scrub_metadata(
struct xfs_mount *mp = ip->i_mount;
const struct xfs_scrub_meta_fns *fns;
bool deadlocked = false;
+ bool already_fixed = false;
+ bool was_corrupt = false;
int error = 0;
trace_xfs_scrub(ip, sm->sm_type, sm->sm_agno, sm->sm_ino, sm->sm_gen,
@@ -758,8 +799,6 @@ xfs_scrub_metadata(
sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
goto out;
- if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
- goto out;
error = -ENOENT;
if (sm->sm_type > XFS_SCRUB_TYPE_MAX)
goto out;
@@ -767,6 +806,14 @@ xfs_scrub_metadata(
if (fns->scrub == NULL)
goto out;
error = -EOPNOTSUPP;
+ if ((sm->sm_flags & XFS_SCRUB_FLAG_REPAIR) &&
+ (fns->repair == NULL || !xfs_sb_version_hascrc(&mp->m_sb)))
+ goto out;
+
+ error = -EROFS;
+ if ((sm->sm_flags & XFS_SCRUB_FLAG_REPAIR) &&
+ (mp->m_flags & XFS_MOUNT_RDONLY))
+ goto out;
/* Do we even have this type of metadata? */
error = -ENOENT;
@@ -800,8 +847,51 @@ xfs_scrub_metadata(
} else if (error)
goto out_teardown;
- if (sm->sm_flags & XFS_SCRUB_FLAG_CORRUPT)
- xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
+ /* Let debug users force us into the repair routines. */
+ if ((sm->sm_flags & XFS_SCRUB_FLAG_REPAIR) && !already_fixed &&
+ XFS_TEST_ERROR(false, mp,
+ XFS_ERRTAG_FORCE_SCRUB_REPAIR,
+ XFS_RANDOM_FORCE_SCRUB_REPAIR)) {
+ sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ }
+ if (!already_fixed)
+ was_corrupt = (sm->sm_flags & XFS_SCRUB_FLAG_CORRUPT);
+
+ if (!already_fixed && xfs_scrub_should_fix(sm)) {
+ xfs_scrub_ag_btcur_free(&sc.sa);
+
+ /* Ok, something's wrong. Repair it. */
+ trace_xfs_repair_attempt(ip, sm->sm_type, sm->sm_agno,
+ sm->sm_ino, sm->sm_gen, sm->sm_flags, error);
+ error = fns->repair(&sc);
+ trace_xfs_repair_done(ip, sm->sm_type, sm->sm_agno,
+ sm->sm_ino, sm->sm_gen, sm->sm_flags, error);
+ if (error)
+ goto out_teardown;
+
+ /*
+ * Commit the fixes and perform a second dry-run scrub
+ * so that we can tell userspace if we fixed the problem.
+ */
+ error = xfs_scrub_teardown(&sc, ip, error);
+ if (error)
+ goto out;
+ xfs_scrub_reset_corruption_flags(sm);
+ already_fixed = true;
+ goto retry_op;
+ }
+
+ if (sm->sm_flags & XFS_SCRUB_FLAG_CORRUPT) {
+ char *errstr;
+
+ if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
+ errstr = "Corruption not fixed during online repair. "
+ "Unmount and run xfs_repair.";
+ else
+ errstr = "Corruption detected during scrub.";
+ xfs_alert_ratelimited(mp, errstr);
+ } else if (already_fixed && was_corrupt)
+ xfs_alert_ratelimited(mp, "Corruption repaired during scrub.");
out_teardown:
error = xfs_scrub_teardown(&sc, ip, error);
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 05f8666..4c22d9a 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -96,7 +96,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25
#define XFS_ERRTAG_BMAP_FINISH_ONE 26
#define XFS_ERRTAG_AG_RESV_CRITICAL 27
-#define XFS_ERRTAG_MAX 28
+#define XFS_ERRTAG_FORCE_SCRUB_REPAIR 28
+#define XFS_ERRTAG_MAX 29
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -129,6 +130,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_RANDOM_REFCOUNT_FINISH_ONE 1
#define XFS_RANDOM_BMAP_FINISH_ONE 1
#define XFS_RANDOM_AG_RESV_CRITICAL 4
+#define XFS_RANDOM_FORCE_SCRUB_REPAIR 1
#ifdef DEBUG
extern int xfs_error_test_active;
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 35/47] xfs: add helper routines for the repair code
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (33 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 34/47] xfs: implement the metadata repair ioctl flag Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 36/47] xfs: repair superblocks Darrick J. Wong
` (12 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Add some helper functions for repair functions that will help us to
allocate and initialize new metadata blocks for btrees that we're
rebuilding.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_alloc_btree.c | 9
fs/xfs/libxfs/xfs_alloc_btree.h | 2
fs/xfs/libxfs/xfs_bmap_btree.c | 9
fs/xfs/libxfs/xfs_bmap_btree.h | 3
fs/xfs/libxfs/xfs_btree.c | 4
fs/xfs/libxfs/xfs_btree.h | 2
fs/xfs/libxfs/xfs_ialloc_btree.c | 9
fs/xfs/libxfs/xfs_ialloc_btree.h | 3
fs/xfs/libxfs/xfs_rmap.c | 51 +++
fs/xfs/libxfs/xfs_rmap.h | 3
fs/xfs/repair/common.c | 8
fs/xfs/repair/common.h | 50 ++
fs/xfs/repair/repair.c | 763 ++++++++++++++++++++++++++++++++++++++
14 files changed, 913 insertions(+), 4 deletions(-)
create mode 100644 fs/xfs/repair/repair.c
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7cee02a..d7bb48a 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -114,6 +114,7 @@ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix repair/, \
ialloc.o \
inode.o \
refcount.o \
+ repair.o \
rmap.o \
symlink.o \
)
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 10d6c12..80b89c9 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -534,3 +534,12 @@ xfs_allocbt_maxrecs(
return blocklen / sizeof(xfs_alloc_rec_t);
return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
}
+
+/* Calculate the freespace btree size for some records. */
+xfs_extlen_t
+xfs_allocbt_calc_size(
+ struct xfs_mount *mp,
+ unsigned long long len)
+{
+ return xfs_btree_calc_size(mp, mp->m_alloc_mnr, len);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 45e189e..2fd5472 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
@@ -61,5 +61,7 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_buf *,
xfs_agnumber_t, xfs_btnum_t);
extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
+extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp,
+ unsigned long long len);
#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 33ab7f3..c704905 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -913,3 +913,12 @@ xfs_bmbt_change_owner(
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
return error;
}
+
+/* Calculate the bmap btree size for some records. */
+unsigned long long
+xfs_bmbt_calc_size(
+ struct xfs_mount *mp,
+ unsigned long long len)
+{
+ return xfs_btree_calc_size(mp, mp->m_bmap_dmnr, len);
+}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 819a8a4..835f0a3 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -140,4 +140,7 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
+extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp,
+ unsigned long long len);
+
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 3fa30a2..3788adb 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4840,7 +4840,7 @@ xfs_btree_query_all(
* Calculate the number of blocks needed to store a given number of records
* in a short-format (per-AG metadata) btree.
*/
-xfs_extlen_t
+unsigned long long
xfs_btree_calc_size(
struct xfs_mount *mp,
uint *limits,
@@ -4848,7 +4848,7 @@ xfs_btree_calc_size(
{
int level;
int maxrecs;
- xfs_extlen_t rval;
+ unsigned long long rval;
maxrecs = limits[0];
for (level = 0, rval = 0; len > 1; level++) {
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index ca2cd5a..87f1e0b 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -480,7 +480,7 @@ bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
unsigned long len);
-xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
+unsigned long long xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
unsigned long long len);
/* return codes */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 09d8cb0..ec784c3 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -500,3 +500,12 @@ xfs_inobt_rec_check_count(
return 0;
}
#endif /* DEBUG */
+
+/* Calculate the inobt btree size for some records. */
+xfs_extlen_t
+xfs_iallocbt_calc_size(
+ struct xfs_mount *mp,
+ unsigned long long len)
+{
+ return xfs_btree_calc_size(mp, mp->m_inobt_mnr, len);
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index bd88453..3046c11 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
#define xfs_inobt_rec_check_count(mp, rec) 0
#endif /* DEBUG */
+extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
+ unsigned long long len);
+
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index cce51cb..e61d816 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2364,3 +2364,54 @@ xfs_rmap_record_exists(
irec.rm_startblock + irec.rm_blockcount >= bno + len);
return 0;
}
+
+struct xfs_rmap_has_other_keys {
+ uint64_t owner;
+ uint64_t offset;
+ bool *has_rmap;
+ unsigned int flags;
+};
+
+/* For each rmap given, figure out if it doesn't match the key we want. */
+STATIC int
+xfs_rmap_has_other_keys_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_rmap_has_other_keys *rhok = priv;
+
+ if (rhok->owner == rec->rm_owner && rhok->offset == rec->rm_offset &&
+ ((rhok->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rhok->flags)
+ return 0;
+ *rhok->has_rmap = true;
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+}
+
+/*
+ * Given an extent and some owner info, can we find records overlapping
+ * the extent whose owner info does not match the given owner?
+ */
+int
+xfs_rmap_has_other_keys(
+ struct xfs_btree_cur *cur,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ struct xfs_owner_info *oinfo,
+ bool *has_rmap)
+{
+ struct xfs_rmap_irec low = {0};
+ struct xfs_rmap_irec high;
+ struct xfs_rmap_has_other_keys rhok;
+
+ xfs_owner_info_unpack(oinfo, &rhok.owner, &rhok.offset, &rhok.flags);
+ *has_rmap = false;
+ rhok.has_rmap = has_rmap;
+
+ low.rm_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.rm_startblock = bno + len - 1;
+
+ return xfs_rmap_query_range(cur, &low, &high,
+ xfs_rmap_has_other_keys_helper, &rhok);
+}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index ea359ab..606efe3 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -222,5 +222,8 @@ int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
xfs_filblks_t len, struct xfs_owner_info *oinfo,
bool *has_rmap);
+int xfs_rmap_has_other_keys(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
+ xfs_filblks_t len, struct xfs_owner_info *oinfo,
+ bool *has_rmap);
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index bbcee6b..a3efaf0 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -634,6 +634,8 @@ xfs_scrub_teardown(
struct xfs_inode *ip_in,
int error)
{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+
xfs_scrub_ag_free(&sc->sa);
if (sc->ag_lock.agmask != sc->ag_lock.__agmask)
kmem_free(sc->ag_lock.agmask);
@@ -655,6 +657,8 @@ xfs_scrub_teardown(
kmem_free(sc->buf);
sc->buf = NULL;
}
+ if (sc->reset_counters && !error)
+ error = xfs_repair_reset_counters(mp);
return error;
}
@@ -667,11 +671,13 @@ xfs_scrub_setup(
bool retry_deadlocked)
{
struct xfs_mount *mp = ip->i_mount;
+ xfs_extlen_t resblks;
memset(sc, 0, sizeof(*sc));
sc->sm = sm;
+ resblks = xfs_repair_calc_ag_resblks(sc, ip, sm);
return xfs_scrub_trans_alloc(sm, mp, &M_RES(mp)->tr_itruncate,
- 0, 0, 0, &sc->tp);
+ resblks, 0, 0, &sc->tp);
}
/*
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 3c110b7..1f01ab3 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -61,6 +61,7 @@ struct xfs_scrub_context {
struct xfs_inode *ip;
void *buf;
bool retry;
+ bool reset_counters;
/* State tracking for multi-AG operations. */
struct xfs_scrub_ag_lock ag_lock;
@@ -97,6 +98,9 @@ xfs_scrub_trans_alloc(
uint flags,
struct xfs_trans **tpp)
{
+ if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
+ return xfs_trans_alloc(mp, resp, blocks, rtextents, flags, tpp);
+
return xfs_trans_alloc_empty(mp, tpp);
}
@@ -274,4 +278,50 @@ int xfs_scrub_symlink(struct xfs_scrub_context *sc);
int xfs_scrub_rtbitmap(struct xfs_scrub_context *sc);
int xfs_scrub_rtsummary(struct xfs_scrub_context *sc);
+/* Repair helpers */
+
+struct xfs_repair_find_ag_btree {
+ uint64_t rmap_owner;
+ const struct xfs_buf_ops *buf_ops;
+ uint32_t magic;
+ xfs_agblock_t root;
+ unsigned int level;
+};
+
+struct xfs_repair_btree_extent {
+ struct list_head list;
+ xfs_fsblock_t fsbno;
+ xfs_extlen_t len;
+};
+
+int xfs_repair_roll_ag_trans(struct xfs_scrub_context *sc);
+bool xfs_repair_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
+ enum xfs_ag_resv_type type);
+int xfs_repair_alloc_ag_block(struct xfs_scrub_context *sc,
+ struct xfs_owner_info *oinfo,
+ xfs_fsblock_t *fsbno, enum xfs_ag_resv_type resv);
+int xfs_repair_init_btblock(struct xfs_scrub_context *sc, xfs_fsblock_t fsb,
+ struct xfs_buf **bpp, __u32 magic,
+ const struct xfs_buf_ops *ops);
+int xfs_repair_fix_freelist(struct xfs_scrub_context *sc, bool can_shrink);
+int xfs_repair_put_freelist(struct xfs_scrub_context *sc, xfs_agblock_t agbno);
+int xfs_repair_collect_btree_extent(struct xfs_mount *mp,
+ struct list_head *btlist,
+ xfs_fsblock_t fsbno, xfs_extlen_t len);
+int xfs_repair_reap_btree_extents(struct xfs_scrub_context *sc,
+ struct list_head *btlist,
+ struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type);
+void xfs_repair_cancel_btree_extents(struct xfs_scrub_context *sc,
+ struct list_head *btlist);
+int xfs_repair_subtract_extents(struct xfs_mount *mp, struct list_head *exlist,
+ struct list_head *sublist);
+int xfs_repair_find_ag_btree_roots(struct xfs_scrub_context *sc,
+ struct xfs_buf *agf_bp,
+ struct xfs_repair_find_ag_btree *btree_info);
+int xfs_repair_reset_counters(struct xfs_mount *mp);
+xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm);
+
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/repair.c b/fs/xfs/repair/repair.c
new file mode 100644
index 0000000..2791d2c
--- /dev/null
+++ b/fs/xfs/repair/repair.c
@@ -0,0 +1,763 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_extent_busy.h"
+#include "xfs_ag_resv.h"
+#include "xfs_trans_space.h"
+#include "repair/common.h"
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xfs_repair_roll_ag_trans(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ /* Keep the AG header buffers locked so we can keep going. */
+ xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+ xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+ xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+
+ /* Roll the transaction. */
+ tp = sc->tp;
+ error = xfs_trans_roll(&sc->tp, NULL);
+ if (error)
+ return error;
+
+ /* Join the buffer to the new transaction or release the hold. */
+ if (sc->tp != tp) {
+ xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
+ xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
+ xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
+ } else {
+ xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+ xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+ xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
+ }
+
+ return error;
+}
+
+/*
+ * Does the given AG have enough space to rebuild a btree? Neither AG
+ * reservation can be critical, and we must have enough space (factoring
+ * in AG reservations) to construct a whole btree.
+ */
+bool
+xfs_repair_ag_has_space(
+ struct xfs_perag *pag,
+ xfs_extlen_t nr_blocks,
+ enum xfs_ag_resv_type type)
+{
+ return !xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) &&
+ !xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
+ pag->pagf_freeblks - xfs_ag_resv_needed(pag, type) > nr_blocks;
+}
+
+/* Allocate a block in an AG. */
+int
+xfs_repair_alloc_ag_block(
+ struct xfs_scrub_context *sc,
+ struct xfs_owner_info *oinfo,
+ xfs_fsblock_t *fsbno,
+ enum xfs_ag_resv_type resv)
+{
+ struct xfs_alloc_arg args = {0};
+ xfs_agblock_t bno;
+ int error;
+
+ if (resv == XFS_AG_RESV_AGFL) {
+ error = xfs_alloc_get_freelist(sc->tp, sc->sa.agf_bp, &bno, 1);
+ if (error)
+ return error;
+ xfs_extent_busy_reuse(sc->tp->t_mountp, sc->sa.agno, bno,
+ 1, false);
+ *fsbno = XFS_AGB_TO_FSB(sc->tp->t_mountp, sc->sa.agno, bno);
+ return 0;
+ }
+
+ args.tp = sc->tp;
+ args.mp = sc->tp->t_mountp;
+ args.oinfo = *oinfo;
+ args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0);
+ args.minlen = 1;
+ args.maxlen = 1;
+ args.prod = 1;
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.resv = resv;
+
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ return error;
+ if (args.fsbno == NULLFSBLOCK)
+ return -ENOSPC;
+ ASSERT(args.len == 1);
+ *fsbno = args.fsbno;
+
+ return 0;
+}
+
+/* Initialize an AG block to a zeroed out btree header. */
+int
+xfs_repair_init_btblock(
+ struct xfs_scrub_context *sc,
+ xfs_fsblock_t fsb,
+ struct xfs_buf **bpp,
+ __u32 magic,
+ const struct xfs_buf_ops *ops)
+{
+ struct xfs_trans *tp = sc->tp;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *bp;
+
+ trace_xfs_repair_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
+ XFS_FSB_TO_AGBNO(mp, fsb), magic);
+
+ ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
+ XFS_FSB_TO_BB(mp, 1), 0);
+ xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+ xfs_btree_init_block(mp, bp, magic, 0, 0, sc->sa.agno,
+ XFS_BTREE_CRC_BLOCKS);
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
+ xfs_trans_log_buf(tp, bp, 0, bp->b_length);
+ bp->b_ops = ops;
+ *bpp = bp;
+
+ return 0;
+}
+
+/* Ensure the freelist is full. */
+int
+xfs_repair_fix_freelist(
+ struct xfs_scrub_context *sc,
+ bool can_shrink)
+{
+ struct xfs_alloc_arg args = {0};
+ int error;
+
+ args.mp = sc->tp->t_mountp;
+ args.tp = sc->tp;
+ args.agno = sc->sa.agno;
+ args.alignment = 1;
+ args.pag = xfs_perag_get(args.mp, sc->sa.agno);
+
+ error = xfs_alloc_fix_freelist(&args,
+ can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
+ xfs_perag_put(args.pag);
+
+ return error;
+}
+
+/* Put a block back on the AGFL. */
+int
+xfs_repair_put_freelist(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno)
+{
+ struct xfs_owner_info oinfo;
+ int error;
+
+ /*
+ * Since we're "freeing" a lost block onto the AGFL, we have to
+ * create an rmap for the block prior to merging it or else other
+ * parts will break.
+ */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+ error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
+ &oinfo);
+ if (error)
+ return error;
+
+ /* Put the block on the AGFL. */
+ error = xfs_alloc_put_freelist(sc->tp, sc->sa.agf_bp, sc->sa.agfl_bp,
+ agbno, 0);
+ if (error)
+ return error;
+ xfs_extent_busy_insert(sc->tp, sc->sa.agno, agbno, 1,
+ XFS_EXTENT_BUSY_SKIP_DISCARD);
+
+ /* Make sure the AGFL doesn't overfill. */
+ return xfs_repair_fix_freelist(sc, true);
+}
+
+/*
+ * For a given metadata extent and owner, delete the associated rmap.
+ * If the block has no other owners, free it.
+ */
+STATIC int
+xfs_repair_free_or_unmap_extent(
+ struct xfs_scrub_context *sc,
+ xfs_fsblock_t fsbno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type resv)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_btree_cur *rmap_cur;
+ struct xfs_buf *agf_bp = NULL;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ bool has_other_rmap;
+ int error = 0;
+
+ ASSERT(xfs_sb_version_hasrmapbt(&mp->m_sb));
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+ trace_xfs_repair_free_or_unmap_extent(mp, agno, agbno, len);
+
+ for (; len > 0 && !error; len--, agbno++, fsbno++) {
+ ASSERT(sc->ip != NULL || agno == sc->sa.agno);
+
+ /* Can we find any other rmappings? */
+ if (sc->ip) {
+ error = xfs_alloc_read_agf(mp, sc->tp, agno, 0,
+ &agf_bp);
+ if (error)
+ break;
+ }
+ rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp,
+ agf_bp ? agf_bp : sc->sa.agf_bp, agno);
+ error = xfs_rmap_has_other_keys(rmap_cur, agbno, 1, oinfo,
+ &has_other_rmap);
+ if (error)
+ goto out_cur;
+ xfs_btree_del_cursor(rmap_cur, XFS_BTREE_NOERROR);
+ if (agf_bp)
+ xfs_trans_brelse(sc->tp, agf_bp);
+
+ /*
+ * If there are other rmappings, this block is cross
+ * linked and must not be freed. Remove the reverse
+ * mapping and move on. Otherwise, we were the only
+ * owner of the block, so free the extent, which will
+ * also remove the rmap.
+ */
+ if (has_other_rmap)
+ error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1,
+ oinfo);
+ else if (resv == XFS_AG_RESV_AGFL)
+ error = xfs_repair_put_freelist(sc, agbno);
+ else
+ error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
+ if (error)
+ break;
+
+ if (sc->ip)
+ error = xfs_trans_roll(&sc->tp, sc->ip);
+ else
+ error = xfs_repair_roll_ag_trans(sc);
+ }
+
+ return error;
+out_cur:
+ xfs_btree_del_cursor(rmap_cur, XFS_BTREE_ERROR);
+ if (agf_bp)
+ xfs_trans_brelse(sc->tp, agf_bp);
+ return error;
+}
+
+/* Collect a dead btree extent for later disposal. */
+int
+xfs_repair_collect_btree_extent(
+ struct xfs_mount *mp,
+ struct list_head *btlist,
+ xfs_fsblock_t fsbno,
+ xfs_extlen_t len)
+{
+ struct xfs_repair_btree_extent *rbe;
+
+ trace_xfs_repair_collect_btree_extent(mp, XFS_FSB_TO_AGNO(mp, fsbno),
+ XFS_FSB_TO_AGBNO(mp, fsbno), len);
+
+ rbe = kmem_alloc(sizeof(*rbe), KM_NOFS);
+ if (!rbe)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&rbe->list);
+ rbe->fsbno = fsbno;
+ rbe->len = len;
+ list_add_tail(&rbe->list, btlist);
+
+ return 0;
+}
+
+/* Dispose of dead btree extents. If oinfo is NULL, just delete the list. */
+int
+xfs_repair_reap_btree_extents(
+ struct xfs_scrub_context *sc,
+ struct list_head *btlist,
+ struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
+{
+ struct xfs_repair_btree_extent *rbe;
+ struct xfs_repair_btree_extent *n;
+ int error = 0;
+
+ list_for_each_entry_safe(rbe, n, btlist, list) {
+ if (oinfo) {
+ error = xfs_repair_free_or_unmap_extent(sc, rbe->fsbno,
+ rbe->len, oinfo, type);
+ if (error)
+ oinfo = NULL;
+ }
+ list_del(&rbe->list);
+ kmem_free(rbe);
+ }
+
+ return error;
+}
+
+/* Errors happened, just delete the dead btree extent list. */
+void
+xfs_repair_cancel_btree_extents(
+ struct xfs_scrub_context *sc,
+ struct list_head *btlist)
+{
+ xfs_repair_reap_btree_extents(sc, btlist, NULL, XFS_AG_RESV_NONE);
+}
+
+/* Compare two btree extents. */
+static int
+xfs_repair_btree_extent_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_repair_btree_extent *ap;
+ struct xfs_repair_btree_extent *bp;
+
+ ap = container_of(a, struct xfs_repair_btree_extent, list);
+ bp = container_of(b, struct xfs_repair_btree_extent, list);
+
+ if (ap->fsbno > bp->fsbno)
+ return 1;
+ else if (ap->fsbno < bp->fsbno)
+ return -1;
+ return 0;
+}
+
+/* Remove all the blocks in sublist from exlist. */
+int
+xfs_repair_subtract_extents(
+ struct xfs_mount *mp,
+ struct list_head *exlist,
+ struct list_head *sublist)
+{
+ struct xfs_repair_btree_extent *newrbe;
+ struct xfs_repair_btree_extent *rbe;
+ struct xfs_repair_btree_extent *n;
+ struct xfs_repair_btree_extent *subp;
+ struct xfs_repair_btree_extent sub;
+ xfs_fsblock_t fsb;
+ xfs_fsblock_t newfsb;
+ xfs_extlen_t newlen;
+
+ list_sort(NULL, exlist, xfs_repair_btree_extent_cmp);
+ list_sort(NULL, sublist, xfs_repair_btree_extent_cmp);
+
+ subp = list_first_entry(sublist, struct xfs_repair_btree_extent, list);
+ if (subp == NULL)
+ return 0;
+
+ sub = *subp;
+ /* For every block mentioned in exlist... */
+ list_for_each_entry_safe(rbe, n, exlist, list) {
+ newfsb = NULLFSBLOCK;
+ newlen = 0;
+ for (fsb = rbe->fsbno; fsb < rbe->fsbno + rbe->len; fsb++) {
+ /*
+ * If the current location of the extent list is
+ * beyond the subtract list, move the subtract list
+ * forward by one block or by one record.
+ */
+ while (fsb > sub.fsbno || sub.len == 0) {
+ if (sub.len) {
+ sub.len--;
+ sub.fsbno++;
+ } else {
+ /*
+ * Get the next subtract extent. If
+ * there isn't one, make the current
+ * extent match the unprocessed part of
+ * that extent, and jump out.
+ */
+ if (subp->list.next == sublist ||
+ subp->list.next == NULL) {
+ rbe->len -= fsb - rbe->fsbno;
+ rbe->fsbno = fsb;
+ subp = NULL;
+ rbe = NULL;
+ goto out_frag;
+ }
+ subp = list_next_entry(subp, list);
+ sub = *subp;
+ }
+ }
+
+ if (fsb != sub.fsbno) {
+ /*
+ * Block not in the subtract list; stash
+ * it for later reinsertion in the list.
+ */
+ if (newfsb == NULLFSBLOCK) {
+ newfsb = fsb;
+ newlen = 1;
+ } else
+ newlen++;
+ } else {
+ /* Match! */
+ if (newfsb != NULLFSBLOCK) {
+ /*
+ * Last block of the extent and we have
+ * a saved extent. Store the saved
+ * extent in this extent.
+ */
+ if (fsb == rbe->fsbno + rbe->len - 1) {
+ rbe->fsbno = newfsb;
+ rbe->len = newlen;
+ newfsb = NULLFSBLOCK;
+ rbe = NULL;
+ goto out_frag;
+ }
+ /* Stash the new extent in the list. */
+ newrbe = kmem_alloc(sizeof(*newrbe),
+ KM_NOFS);
+ if (!newrbe)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&newrbe->list);
+ newrbe->fsbno = newfsb;
+ newrbe->len = newlen;
+ list_add_tail(&newrbe->list,
+ &rbe->list);
+ }
+
+ newfsb = NULLFSBLOCK;
+ newlen = 0;
+ }
+ } /* end for loop */
+
+out_frag:
+ /* If we have an extent to add back, do that now. */
+ if (newfsb != NULLFSBLOCK) {
+ if (rbe) {
+ newrbe = rbe;
+ rbe = NULL;
+ } else {
+ newrbe = kmem_alloc(sizeof(*newrbe), KM_NOFS);
+ if (!newrbe)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&newrbe->list);
+ list_add_tail(&newrbe->list, &rbe->list);
+ }
+ newrbe->fsbno = newfsb;
+ newrbe->len = newlen;
+ }
+ if (rbe) {
+ list_del(&rbe->list);
+ kmem_free(rbe);
+ }
+ if (subp == NULL)
+ break;
+ }
+
+ return 0;
+}
+
+/* Find btree roots from the AGF. */
+STATIC int
+xfs_repair_find_ag_btree_roots_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_repair_find_ag_btree *btree_info = priv;
+ struct xfs_repair_find_ag_btree *fab;
+ struct xfs_buf *bp;
+ struct xfs_btree_block *btblock;
+ xfs_daddr_t daddr;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
+ return 0;
+
+ for (agbno = 0; agbno < rec->rm_blockcount; agbno++) {
+ daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno,
+ rec->rm_startblock + agbno);
+ for (fab = btree_info; fab->buf_ops; fab++) {
+ if (rec->rm_owner != fab->rmap_owner)
+ continue;
+
+ error = xfs_trans_read_buf(mp, cur->bc_tp,
+ mp->m_ddev_targp, daddr, mp->m_bsize,
+ 0, &bp, NULL);
+ if (error)
+ return error;
+
+ btblock = XFS_BUF_TO_BLOCK(bp);
+ if (be32_to_cpu(btblock->bb_magic) != fab->magic)
+ goto next_fab;
+ if (fab->root != NULLAGBLOCK &&
+ xfs_btree_get_level(btblock) <= fab->level)
+ goto next_fab;
+
+ bp->b_ops = fab->buf_ops;
+ bp->b_ops->verify_read(bp);
+ if (bp->b_error)
+ goto next_fab;
+ fab->root = rec->rm_startblock + agbno;
+ fab->level = xfs_btree_get_level(btblock);
+
+ trace_xfs_repair_find_ag_btree_roots_helper(mp,
+ cur->bc_private.a.agno,
+ rec->rm_startblock + agbno,
+ be32_to_cpu(btblock->bb_magic),
+ fab->level);
+next_fab:
+ xfs_trans_brelse(cur->bc_tp, bp);
+ if (be32_to_cpu(btblock->bb_magic) == fab->magic)
+ break;
+ }
+ }
+
+ return error;
+}
+
+/* Find the roots of the given btrees from the rmap info. */
+int
+xfs_repair_find_ag_btree_roots(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *agf_bp,
+ struct xfs_repair_find_ag_btree *btree_info)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_repair_find_ag_btree *fab;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ for (fab = btree_info; fab->buf_ops; fab++) {
+ fab->root = NULLAGBLOCK;
+ fab->level = 0;
+ }
+
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+ error = xfs_rmap_query_all(cur, xfs_repair_find_ag_btree_roots_helper,
+ btree_info);
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+ for (fab = btree_info; !error && fab->buf_ops; fab++)
+ if (fab->root != NULLAGBLOCK)
+ fab->level++;
+
+ return error;
+}
+
+/* Reset the superblock counters from the AGF/AGI. */
+int
+xfs_repair_reset_counters(
+ struct xfs_mount *mp)
+{
+ struct xfs_trans *tp;
+ struct xfs_buf *agi_bp;
+ struct xfs_buf *agf_bp;
+ struct xfs_agi *agi;
+ struct xfs_agf *agf;
+ xfs_agnumber_t agno;
+ xfs_ino_t icount = 0;
+ xfs_ino_t ifree = 0;
+ xfs_filblks_t fdblocks = 0;
+ int64_t delta_icount;
+ int64_t delta_ifree;
+ int64_t delta_fdblocks;
+ int error;
+
+ trace_xfs_repair_reset_counters(mp);
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ /* Count all the inodes... */
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agi_bp);
+ if (error)
+ goto out;
+ agi = XFS_BUF_TO_AGI(agi_bp);
+ icount += be32_to_cpu(agi->agi_count);
+ ifree += be32_to_cpu(agi->agi_freecount);
+
+ /* Add up the free/freelist/bnobt/cntbt blocks... */
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agf_bp);
+ if (error)
+ goto out;
+ agf = XFS_BUF_TO_AGF(agf_bp);
+ fdblocks += be32_to_cpu(agf->agf_freeblks);
+ fdblocks += be32_to_cpu(agf->agf_flcount);
+ fdblocks += be32_to_cpu(agf->agf_btreeblks);
+ }
+
+ /*
+ * Reinitialize the counters. The on-disk and in-core counters
+ * differ by the number of inodes/blocks reserved by the admin,
+ * the per-AG reservation, and any transactions in progress, so
+ * we have to account for that.
+ */
+ spin_lock(&mp->m_sb_lock);
+ delta_icount = (int64_t)mp->m_sb.sb_icount - icount;
+ delta_ifree = (int64_t)mp->m_sb.sb_ifree - ifree;
+ delta_fdblocks = (int64_t)mp->m_sb.sb_fdblocks - fdblocks;
+ mp->m_sb.sb_icount = icount;
+ mp->m_sb.sb_ifree = ifree;
+ mp->m_sb.sb_fdblocks = fdblocks;
+ spin_unlock(&mp->m_sb_lock);
+
+ if (delta_icount) {
+ error = xfs_mod_icount(mp, delta_icount);
+ if (error)
+ goto out;
+ }
+ if (delta_ifree) {
+ error = xfs_mod_ifree(mp, delta_ifree);
+ if (error)
+ goto out;
+ }
+ if (delta_fdblocks) {
+ error = xfs_mod_fdblocks(mp, delta_fdblocks, false);
+ if (error)
+ goto out;
+ }
+
+out:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/* Figure out how many blocks to reserve for an AG repair. */
+xfs_extlen_t
+xfs_repair_calc_ag_resblks(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_agi *agi;
+ struct xfs_agf *agf;
+ struct xfs_buf *bp;
+ xfs_agino_t icount;
+ xfs_extlen_t aglen;
+ xfs_extlen_t usedlen;
+ xfs_extlen_t freelen;
+ xfs_extlen_t bnobt_sz;
+ xfs_extlen_t inobt_sz;
+ xfs_extlen_t rmapbt_sz;
+ xfs_extlen_t refcbt_sz;
+ int error;
+
+ if (!(sm->sm_flags & XFS_SCRUB_FLAG_REPAIR))
+ return 0;
+
+ if (sm->sm_agno >= mp->m_sb.sb_agcount)
+ return -EINVAL;
+
+ /*
+ * Try to get the actual counters from disk; if not, make
+ * some worst case assumptions.
+ */
+ error = xfs_read_agi(mp, NULL, sm->sm_agno, &bp);
+ if (!error) {
+ agi = XFS_BUF_TO_AGI(bp);
+ icount = be32_to_cpu(agi->agi_count);
+ xfs_trans_brelse(NULL, bp);
+ } else
+ icount = mp->m_sb.sb_agblocks / mp->m_sb.sb_inopblock;
+
+ error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp);
+ if (!error) {
+ agf = XFS_BUF_TO_AGF(bp);
+ aglen = be32_to_cpu(agf->agf_length);
+ freelen = be32_to_cpu(agf->agf_freeblks);
+ usedlen = aglen - freelen;
+ xfs_trans_brelse(NULL, bp);
+ } else {
+ aglen = mp->m_sb.sb_agblocks;
+ freelen = aglen;
+ usedlen = aglen;
+ }
+
+ trace_xfs_repair_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
+ freelen, usedlen);
+
+ /*
+ * Figure out how many blocks we'd need worst case to rebuild
+ * each type of btree. Note that we can only rebuild the
+ * bnobt/cntbt or inobt/finobt as pairs.
+ */
+ bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb))
+ inobt_sz = xfs_iallocbt_calc_size(mp, icount /
+ XFS_INODES_PER_HOLEMASK_BIT);
+ else
+ inobt_sz = xfs_iallocbt_calc_size(mp, icount /
+ XFS_INODES_PER_CHUNK);
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ inobt_sz *= 2;
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ rmapbt_sz = xfs_rmapbt_calc_size(mp, aglen);
+ refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
+ } else {
+ rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen);
+ refcbt_sz = 0;
+ }
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ rmapbt_sz = 0;
+
+ trace_xfs_repair_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
+ inobt_sz, rmapbt_sz, refcbt_sz);
+
+ return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
+}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 36/47] xfs: repair superblocks
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (34 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 35/47] xfs: add helper routines for the repair code Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 37/47] xfs: repair the AGF and AGFL Darrick J. Wong
` (11 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
If one of the backup superblocks is found to differ seriously from
superblock 0, write out a fresh copy from the in-core sb.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
| 35 +++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 2 +-
fs/xfs/repair/common.h | 4 ++++
3 files changed, 40 insertions(+), 1 deletion(-)
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index 5a3d4c1..b2a85df 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -343,6 +343,41 @@ xfs_scrub_superblock(
#undef XFS_SCRUB_SB_OP_ERROR_GOTO
#undef XFS_SCRUB_SB_CHECK
+/* Repair the superblock. */
+int
+xfs_repair_superblock(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *bp;
+ struct xfs_dsb *sbp;
+ xfs_agnumber_t agno;
+ int error;
+
+ /* Don't try to repair AG 0's sb; let xfs_repair deal with it. */
+ agno = sc->sm->sm_agno;
+ if (agno == 0)
+ return -EOPNOTSUPP;
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
+ XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
+ if (error)
+ return error;
+ bp->b_ops = &xfs_sb_buf_ops;
+
+ /* Copy AG 0's superblock to this one. */
+ sbp = XFS_BUF_TO_SBP(bp);
+ memset(sbp, 0, mp->m_sb.sb_sectsize);
+ xfs_sb_to_disk(sbp, &mp->m_sb);
+ sbp->sb_bad_features2 = sbp->sb_features2;
+
+ /* Write this to disk. */
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
+ xfs_trans_log_buf(sc->tp, bp, 0, mp->m_sb.sb_sectsize - 1);
+ return error;
+}
+
/* AGF */
/* Tally freespace record lengths. */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index a3efaf0..2f7a0a3 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -754,7 +754,7 @@ struct xfs_scrub_meta_fns {
static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup, xfs_scrub_dummy, NULL, NULL},
- {xfs_scrub_setup_ag, xfs_scrub_superblock, NULL, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_superblock, xfs_repair_superblock, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agf, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agfl, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agi, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 1f01ab3..5ee7472 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -324,4 +324,8 @@ xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm);
+/* Metadata repairers */
+
+int xfs_repair_superblock(struct xfs_scrub_context *sc);
+
#endif /* __XFS_REPAIR_COMMON_H__ */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 37/47] xfs: repair the AGF and AGFL
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (35 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 36/47] xfs: repair superblocks Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 38/47] xfs: rebuild the AGI Darrick J. Wong
` (10 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Regenerate the AGF and AGFL from the rmap data.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
| 392 ++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 4
fs/xfs/repair/common.h | 2
3 files changed, 396 insertions(+), 2 deletions(-)
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index b2a85df..e5144a0 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -32,9 +32,13 @@
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
#include "repair/common.h"
/* Set us up to check an AG header. */
@@ -608,6 +612,182 @@ xfs_scrub_agf(
#undef XFS_SCRUB_AGF_OP_ERROR_GOTO
#undef XFS_SCRUB_AGF_CHECK
+struct xfs_repair_agf_allocbt {
+ xfs_agblock_t freeblks;
+ xfs_agblock_t longest;
+};
+
+/* Record free space shape information. */
+STATIC int
+xfs_repair_agf_walk_allocbt(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ struct xfs_repair_agf_allocbt *raa = priv;
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(&error))
+ return error;
+
+ raa->freeblks += rec->ar_blockcount;
+ if (rec->ar_blockcount > raa->longest)
+ raa->longest = rec->ar_blockcount;
+ return error;
+}
+
+/* Repair the AGF. */
+int
+xfs_repair_agf(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_repair_find_ag_btree fab[] = {
+ {XFS_RMAP_OWN_AG, &xfs_allocbt_buf_ops, XFS_ABTB_CRC_MAGIC, 0, 0},
+ {XFS_RMAP_OWN_AG, &xfs_allocbt_buf_ops, XFS_ABTC_CRC_MAGIC, 0, 0},
+ {XFS_RMAP_OWN_AG, &xfs_rmapbt_buf_ops, XFS_RMAP_CRC_MAGIC, 0, 0},
+ {XFS_RMAP_OWN_REFC, &xfs_refcountbt_buf_ops, XFS_REFC_CRC_MAGIC, 0, 0},
+ {0, NULL, 0, 0, 0},
+ };
+ struct xfs_repair_agf_allocbt raa = {0};
+ struct xfs_agf old_agf;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *agf_bp;
+ struct xfs_agf *agf;
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_perag *pag;
+ xfs_agblock_t blocks;
+ xfs_agblock_t freesp_blocks;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL);
+ if (error)
+ return error;
+ agf_bp->b_ops = &xfs_agf_buf_ops;
+
+ /* Find the btree roots. */
+ error = xfs_repair_find_ag_btree_roots(sc, agf_bp, fab);
+ if (error)
+ return error;
+ if (fab[0].root == NULLAGBLOCK || fab[0].level > XFS_BTREE_MAXLEVELS ||
+ fab[1].root == NULLAGBLOCK || fab[1].level > XFS_BTREE_MAXLEVELS ||
+ fab[2].root == NULLAGBLOCK || fab[2].level > XFS_BTREE_MAXLEVELS)
+ return -EFSCORRUPTED;
+
+ /* Start rewriting the header. */
+ agf = XFS_BUF_TO_AGF(agf_bp);
+ old_agf = *agf;
+ memset(agf, 0, mp->m_sb.sb_sectsize);
+ agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
+ agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
+ agf->agf_seqno = cpu_to_be32(sc->sa.agno);
+ agf->agf_length = cpu_to_be32(xfs_scrub_ag_blocks(mp, sc->sa.agno));
+ agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(fab[0].root);
+ agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(fab[1].root);
+ agf->agf_roots[XFS_BTNUM_RMAPi] = cpu_to_be32(fab[2].root);
+ agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(fab[0].level);
+ agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(fab[1].level);
+ agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(fab[2].level);
+ agf->agf_flfirst = old_agf.agf_flfirst;
+ agf->agf_fllast = old_agf.agf_fllast;
+ agf->agf_flcount = old_agf.agf_flcount;
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ agf->agf_refcount_root = cpu_to_be32(fab[3].root);
+ agf->agf_refcount_level = cpu_to_be32(fab[3].level);
+ }
+
+ /* Update the AGF counters from the bnobt. */
+ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+ XFS_BTNUM_BNO);
+ error = xfs_alloc_query_all(cur, xfs_repair_agf_walk_allocbt, &raa);
+ if (error)
+ goto err;
+ error = xfs_btree_count_blocks(cur, &blocks);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ freesp_blocks = blocks - 1;
+ agf->agf_freeblks = cpu_to_be32(raa.freeblks);
+ agf->agf_longest = cpu_to_be32(raa.longest);
+
+ /* Update the AGF counters from the cntbt. */
+ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+ XFS_BTNUM_CNT);
+ error = xfs_btree_count_blocks(cur, &blocks);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ freesp_blocks += blocks - 1;
+
+ /* Update the AGF counters from the rmapbt. */
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+ error = xfs_btree_count_blocks(cur, &blocks);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ agf->agf_rmap_blocks = cpu_to_be32(blocks);
+ freesp_blocks += blocks - 1;
+
+ /* Update the AGF counters from the refcountbt. */
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp,
+ sc->sa.agno, NULL);
+ error = xfs_btree_count_blocks(cur, &blocks);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ agf->agf_refcount_blocks = cpu_to_be32(blocks);
+ }
+ agf->agf_btreeblks = cpu_to_be32(freesp_blocks);
+ cur = NULL;
+
+ /* Trigger reinitialization of the in-core data. */
+ if (raa.freeblks != be32_to_cpu(old_agf.agf_freeblks) ||
+ freesp_blocks != be32_to_cpu(old_agf.agf_btreeblks) ||
+ raa.longest != be32_to_cpu(old_agf.agf_longest) ||
+ fab[0].level != be32_to_cpu(old_agf.agf_levels[XFS_BTNUM_BNOi]) ||
+ fab[1].level != be32_to_cpu(old_agf.agf_levels[XFS_BTNUM_CNTi]) ||
+ fab[2].level != be32_to_cpu(old_agf.agf_levels[XFS_BTNUM_RMAPi]) ||
+ fab[3].level != be32_to_cpu(old_agf.agf_refcount_level)) {
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ if (pag->pagf_init) {
+ pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+ pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
+ pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
+ pag->pagf_longest = be32_to_cpu(agf->agf_longest);
+ pag->pagf_levels[XFS_BTNUM_BNOi] =
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
+ pag->pagf_levels[XFS_BTNUM_CNTi] =
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+ pag->pagf_levels[XFS_BTNUM_RMAPi] =
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+ pag->pagf_refcount_level =
+ be32_to_cpu(agf->agf_refcount_level);
+ }
+ xfs_perag_put(pag);
+ sc->reset_counters = true;
+ }
+
+ /* Write this to disk. */
+ xfs_trans_buf_set_type(sc->tp, agf_bp, XFS_BLFT_AGF_BUF);
+ xfs_trans_log_buf(sc->tp, agf_bp, 0, mp->m_sb.sb_sectsize - 1);
+ return error;
+
+err:
+ if (cur)
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ *agf = old_agf;
+ return error;
+}
+
/* AGFL */
#define XFS_SCRUB_AGFL_CHECK(fs_ok) \
@@ -763,6 +943,218 @@ xfs_scrub_agfl(
#undef XFS_SCRUB_AGFL_OP_ERROR_GOTO
#undef XFS_SCRUB_AGFL_CHECK
+/* AGFL repair. */
+
+struct xfs_repair_agfl {
+ struct list_head freesp_list;
+ struct list_head agmeta_list;
+};
+
+/* Record all freespace information. */
+STATIC int
+xfs_repair_agfl_rmap_fn(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_repair_agfl *ra = priv;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsb;
+ int i;
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(&error))
+ return error;
+
+ /* Record all the OWN_AG blocks... */
+ if (rec->rm_owner == XFS_RMAP_OWN_AG) {
+ fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ rec->rm_startblock);
+ error = xfs_repair_collect_btree_extent(cur->bc_mp,
+ &ra->freesp_list, fsb, rec->rm_blockcount);
+ if (error)
+ return error;
+ }
+
+ /* ...and all the rmapbt blocks... */
+ for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
+ xfs_btree_get_block(cur, i, &bp);
+ if (!bp)
+ continue;
+ fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ error = xfs_repair_collect_btree_extent(cur->bc_mp,
+ &ra->agmeta_list, fsb, 1);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+/* Add a btree block to the agmeta list. */
+STATIC int
+xfs_repair_agfl_visit_btblock(
+ struct xfs_btree_cur *cur,
+ int level,
+ void *priv)
+{
+ struct xfs_repair_agfl *ra = priv;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsb;
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(&error))
+ return error;
+
+ xfs_btree_get_block(cur, level, &bp);
+ if (!bp)
+ return 0;
+
+ fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ return xfs_repair_collect_btree_extent(cur->bc_mp, &ra->agmeta_list,
+ fsb, 1);
+}
+
+/* Repair the AGFL. */
+int
+xfs_repair_agfl(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_repair_agfl ra;
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *agf_bp;
+ struct xfs_buf *agfl_bp;
+ struct xfs_agf *agf;
+ struct xfs_agfl *agfl;
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_perag *pag;
+ __be32 *agfl_bno;
+ struct xfs_repair_btree_extent *rbe;
+ struct xfs_repair_btree_extent *n;
+ xfs_agblock_t flcount;
+ xfs_agblock_t agbno;
+ xfs_agblock_t bno;
+ xfs_agblock_t old_flcount;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ INIT_LIST_HEAD(&ra.freesp_list);
+ INIT_LIST_HEAD(&ra.agmeta_list);
+ error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
+ if (error)
+ return error;
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGFL_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL);
+ if (error)
+ return error;
+ agfl_bp->b_ops = &xfs_agfl_buf_ops;
+
+ /* Find all space used by the free space btrees & rmapbt. */
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+ error = xfs_rmap_query_all(cur, xfs_repair_agfl_rmap_fn, &ra);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+ /* Find all space used by bnobt. */
+ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+ XFS_BTNUM_BNO);
+ error = xfs_btree_visit_blocks(cur, xfs_repair_agfl_visit_btblock,
+ &ra);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+ /* Find all space used by cntbt. */
+ cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
+ XFS_BTNUM_CNT);
+ error = xfs_btree_visit_blocks(cur, xfs_repair_agfl_visit_btblock,
+ &ra);
+ if (error)
+ goto err;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /*
+ * Drop the freesp meta blocks that are in use by btrees.
+ * The remaining blocks /should/ be AGFL blocks.
+ */
+ error = xfs_repair_subtract_extents(mp, &ra.freesp_list,
+ &ra.agmeta_list);
+ if (error)
+ goto err;
+ xfs_repair_cancel_btree_extents(sc, &ra.agmeta_list);
+
+ /* Start rewriting the header. */
+ agfl = XFS_BUF_TO_AGFL(agfl_bp);
+ memset(agfl, 0xFF, mp->m_sb.sb_sectsize);
+ agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
+ agfl->agfl_seqno = cpu_to_be32(sc->sa.agno);
+ uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
+
+ /* Fill the AGFL with the remaining blocks. */
+ flcount = 0;
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agfl_bp);
+ list_for_each_entry_safe(rbe, n, &ra.freesp_list, list) {
+ agbno = XFS_FSB_TO_AGBNO(mp, rbe->fsbno);
+
+ trace_xfs_repair_agfl_insert(mp, sc->sa.agno, agbno, rbe->len);
+
+ for (bno = 0; bno < rbe->len; bno++) {
+ if (flcount >= XFS_AGFL_SIZE(mp))
+ break;
+ agfl_bno[flcount] = cpu_to_be32(agbno + bno);
+ flcount++;
+ }
+ rbe->fsbno += bno;
+ rbe->len -= bno;
+ if (rbe->len)
+ break;
+ list_del(&rbe->list);
+ kmem_free(rbe);
+ }
+
+ /* Update the AGF counters. */
+ agf = XFS_BUF_TO_AGF(agf_bp);
+ old_flcount = be32_to_cpu(agf->agf_flcount);
+ agf->agf_flfirst = 0;
+ agf->agf_flcount = cpu_to_be32(flcount);
+ agf->agf_fllast = cpu_to_be32(flcount - 1);
+
+ /* Trigger reinitialization of the in-core data. */
+ if (flcount != old_flcount) {
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ if (pag->pagf_init)
+ pag->pagf_flcount = flcount;
+ xfs_perag_put(pag);
+ sc->reset_counters = true;
+ }
+
+ /* Write AGF and AGFL to disk. */
+ xfs_alloc_log_agf(sc->tp, agf_bp,
+ XFS_AGF_FLFIRST | XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+ xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF);
+ xfs_trans_log_buf(sc->tp, agfl_bp, 0, mp->m_sb.sb_sectsize - 1);
+
+ /* Dump any AGFL overflow. */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+ return xfs_repair_reap_btree_extents(sc, &ra.freesp_list, &oinfo,
+ XFS_AG_RESV_AGFL);
+err:
+ xfs_repair_cancel_btree_extents(sc, &ra.agmeta_list);
+ xfs_repair_cancel_btree_extents(sc, &ra.freesp_list);
+ if (cur)
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ return error;
+}
+
/* AGI */
#define XFS_SCRUB_AGI_CHECK(fs_ok) \
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 2f7a0a3..e068feb 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -755,8 +755,8 @@ struct xfs_scrub_meta_fns {
static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup, xfs_scrub_dummy, NULL, NULL},
{xfs_scrub_setup_ag, xfs_scrub_superblock, xfs_repair_superblock, NULL},
- {xfs_scrub_setup_ag, xfs_scrub_agf, NULL, NULL},
- {xfs_scrub_setup_ag, xfs_scrub_agfl, NULL, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_agf, xfs_repair_agf, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_agfl, xfs_repair_agfl, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agi, NULL, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_bnobt, NULL, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_cntbt, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 5ee7472..771279a 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -327,5 +327,7 @@ xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc,
/* Metadata repairers */
int xfs_repair_superblock(struct xfs_scrub_context *sc);
+int xfs_repair_agf(struct xfs_scrub_context *sc);
+int xfs_repair_agfl(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 38/47] xfs: rebuild the AGI
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (36 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 37/47] xfs: repair the AGF and AGFL Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:39 ` [PATCH 39/47] xfs: repair free space btrees Darrick J. Wong
` (9 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Rebuild the AGI header items with some help from the rmapbt.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
| 98 ++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 2 -
fs/xfs/repair/common.h | 1
3 files changed, 100 insertions(+), 1 deletion(-)
--git a/fs/xfs/repair/agheader.c b/fs/xfs/repair/agheader.c
index e5144a0..d38134e 100644
--- a/fs/xfs/repair/agheader.c
+++ b/fs/xfs/repair/agheader.c
@@ -1317,3 +1317,101 @@ xfs_scrub_agi(
return error;
}
#undef XFS_SCRUB_AGI_CHECK
+
+/* Repair the AGI. */
+int
+xfs_repair_agi(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_repair_find_ag_btree fab[] = {
+ {XFS_RMAP_OWN_INOBT, &xfs_inobt_buf_ops, XFS_IBT_CRC_MAGIC, 0, 0},
+ {XFS_RMAP_OWN_INOBT, &xfs_inobt_buf_ops, XFS_FIBT_CRC_MAGIC, 0, 0},
+ {0, NULL, 0, 0, 0},
+ };
+ struct xfs_agi old_agi;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *agi_bp;
+ struct xfs_buf *agf_bp;
+ struct xfs_agi *agi;
+ struct xfs_btree_cur *cur;
+ struct xfs_perag *pag;
+ xfs_agino_t old_count;
+ xfs_agino_t old_freecount;
+ xfs_agino_t count;
+ xfs_agino_t freecount;
+ int bucket;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGI_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), 0, &agi_bp, NULL);
+ if (error)
+ return error;
+ agi_bp->b_ops = &xfs_agi_buf_ops;
+
+ error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
+ if (error)
+ return error;
+
+ /* Find the btree roots. */
+ error = xfs_repair_find_ag_btree_roots(sc, agf_bp, fab);
+ if (error)
+ return error;
+ if (fab[0].root == NULLAGBLOCK || fab[0].level > XFS_BTREE_MAXLEVELS)
+ return -EFSCORRUPTED;
+ if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
+ (fab[1].root == NULLAGBLOCK || fab[1].level > XFS_BTREE_MAXLEVELS))
+ return -EFSCORRUPTED;
+
+ /* Start rewriting the header. */
+ agi = XFS_BUF_TO_AGI(agi_bp);
+ old_agi = *agi;
+ old_count = be32_to_cpu(old_agi.agi_count);
+ old_freecount = be32_to_cpu(old_agi.agi_freecount);
+ memset(agi, 0, mp->m_sb.sb_sectsize);
+ agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
+ agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
+ agi->agi_seqno = cpu_to_be32(sc->sa.agno);
+ agi->agi_length = cpu_to_be32(xfs_scrub_ag_blocks(mp, sc->sa.agno));
+ agi->agi_newino = cpu_to_be32(NULLAGINO);
+ agi->agi_dirino = cpu_to_be32(NULLAGINO);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
+ for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
+ agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
+ agi->agi_root = cpu_to_be32(fab[0].root);
+ agi->agi_level = cpu_to_be32(fab[0].level);
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ agi->agi_free_root = cpu_to_be32(fab[1].root);
+ agi->agi_free_level = cpu_to_be32(fab[1].level);
+ }
+
+ /* Update the AGI counters. */
+ cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno,
+ XFS_BTNUM_INO);
+ error = xfs_ialloc_count_inodes(cur, &count, &freecount);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ if (error)
+ goto err;
+ agi->agi_count = cpu_to_be32(count);
+ agi->agi_freecount = cpu_to_be32(freecount);
+ if (old_count != count || old_freecount != freecount) {
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ pag->pagi_init = 0;
+ xfs_perag_put(pag);
+ sc->reset_counters = true;
+ }
+
+ /* Write this to disk. */
+ xfs_trans_buf_set_type(sc->tp, agi_bp, XFS_BLFT_AGI_BUF);
+ xfs_trans_log_buf(sc->tp, agi_bp, 0, mp->m_sb.sb_sectsize - 1);
+ return error;
+
+err:
+ *agi = old_agi;
+ return error;
+}
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index e068feb..269e55a 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -757,7 +757,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag, xfs_scrub_superblock, xfs_repair_superblock, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agf, xfs_repair_agf, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agfl, xfs_repair_agfl, NULL},
- {xfs_scrub_setup_ag, xfs_scrub_agi, NULL, NULL},
+ {xfs_scrub_setup_ag, xfs_scrub_agi, xfs_repair_agi, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_bnobt, NULL, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_cntbt, NULL, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 771279a..4988887 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -329,5 +329,6 @@ xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc,
int xfs_repair_superblock(struct xfs_scrub_context *sc);
int xfs_repair_agf(struct xfs_scrub_context *sc);
int xfs_repair_agfl(struct xfs_scrub_context *sc);
+int xfs_repair_agi(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 39/47] xfs: repair free space btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (37 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 38/47] xfs: rebuild the AGI Darrick J. Wong
@ 2017-01-07 0:39 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 40/47] xfs: repair inode btrees Darrick J. Wong
` (8 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:39 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Rebuild the free space btrees from the gaps in the rmap btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/repair/alloc.c | 364 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/repair/common.c | 4 -
fs/xfs/repair/common.h | 1
3 files changed, 367 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/repair/alloc.c b/fs/xfs/repair/alloc.c
index 0ed9fe1..a696ea7 100644
--- a/fs/xfs/repair/alloc.c
+++ b/fs/xfs/repair/alloc.c
@@ -32,7 +32,9 @@
#include "xfs_sb.h"
#include "xfs_rmap.h"
#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
+#include "xfs_rmap_btree.h"
#include "xfs_refcount.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -184,3 +186,365 @@ xfs_scrub_cntbt(
{
return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
}
+
+/* Free space btree repair. */
+
+struct xfs_repair_alloc_extent {
+ struct list_head list;
+ xfs_agblock_t bno;
+ xfs_extlen_t len;
+};
+
+struct xfs_repair_alloc {
+ struct list_head extlist;
+ struct list_head btlist; /* OWN_AG blocks */
+ struct list_head nobtlist; /* rmapbt/agfl blocks */
+ xfs_agblock_t next_bno;
+ uint64_t nr_records;
+};
+
+/* Record extents that aren't in use from gaps in the rmap records. */
+STATIC int
+xfs_repair_alloc_extent_fn(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_repair_alloc *ra = priv;
+ struct xfs_repair_alloc_extent *rae;
+ struct xfs_buf *bp;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_fsblock_t fsb;
+ int i;
+ int error;
+
+ /* Record all the OWN_AG blocks... */
+ if (rec->rm_owner == XFS_RMAP_OWN_AG) {
+ fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ rec->rm_startblock);
+ error = xfs_repair_collect_btree_extent(cur->bc_mp,
+ &ra->btlist, fsb, rec->rm_blockcount);
+ if (error)
+ return error;
+ }
+
+ /* ...and all the rmapbt blocks... */
+ for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
+ xfs_btree_get_block(cur, i, &bp);
+ if (!bp)
+ continue;
+ fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ error = xfs_repair_collect_btree_extent(cur->bc_mp,
+ &ra->nobtlist, fsb, 1);
+ if (error)
+ return error;
+ }
+
+ /* ...and all the free space. */
+ if (rec->rm_startblock > ra->next_bno) {
+ trace_xfs_repair_alloc_extent_fn(mp, cur->bc_private.a.agno,
+ rec->rm_startblock, rec->rm_blockcount,
+ rec->rm_owner, rec->rm_offset, rec->rm_flags);
+
+ rae = kmem_alloc(sizeof(*rae), KM_NOFS);
+ if (!rae)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&rae->list);
+ rae->bno = ra->next_bno;
+ rae->len = rec->rm_startblock - ra->next_bno;
+ list_add_tail(&rae->list, &ra->extlist);
+ ra->nr_records++;
+ }
+ ra->next_bno = max_t(xfs_agblock_t, ra->next_bno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/* Find the longest free extent in the list. */
+static struct xfs_repair_alloc_extent *
+xfs_repair_allocbt_get_longest(
+ struct xfs_repair_alloc *ra)
+{
+ struct xfs_repair_alloc_extent *rae;
+ struct xfs_repair_alloc_extent *longest = NULL;
+
+ list_for_each_entry(rae, &ra->extlist, list)
+ if (!longest || rae->len > longest->len)
+ longest = rae;
+ return longest;
+}
+
+/* Collect an AGFL block for the not-to-release list. */
+static int
+xfs_repair_collect_agfl_block(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ void *data)
+{
+ struct xfs_repair_alloc *ra = data;
+ xfs_fsblock_t fsb;
+
+ fsb = XFS_AGB_TO_FSB(sc->tp->t_mountp, sc->sa.agno, bno);
+ return xfs_repair_collect_btree_extent(sc->tp->t_mountp,
+ &ra->nobtlist, fsb, 1);
+}
+
+/* Compare two btree extents. */
+static int
+xfs_repair_allocbt_extent_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_repair_alloc_extent *ap;
+ struct xfs_repair_alloc_extent *bp;
+
+ ap = container_of(a, struct xfs_repair_alloc_extent, list);
+ bp = container_of(b, struct xfs_repair_alloc_extent, list);
+
+ if (ap->bno > bp->bno)
+ return 1;
+ else if (ap->bno < bp->bno)
+ return -1;
+ return 0;
+}
+
+/* Put an extent onto the free list. */
+STATIC int
+xfs_repair_allocbt_free_extent(
+ struct xfs_scrub_context *sc,
+ xfs_fsblock_t fsbno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo)
+{
+ int error;
+
+ error = xfs_free_extent(sc->tp, fsbno, len, oinfo, 0);
+ if (error)
+ return error;
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ return error;
+ return xfs_mod_fdblocks(sc->tp->t_mountp, -(int64_t)len, false);
+}
+
+/* Repair the freespace btrees for some AG. */
+int
+xfs_repair_allocbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_repair_alloc ra;
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_repair_alloc_extent *longest;
+ struct xfs_repair_alloc_extent *rae;
+ struct xfs_repair_alloc_extent *n;
+ struct xfs_perag *pag;
+ struct xfs_agf *agf;
+ struct xfs_buf *bp;
+ xfs_fsblock_t bnofsb;
+ xfs_fsblock_t cntfsb;
+ xfs_extlen_t oldf;
+ xfs_extlen_t nr_blocks;
+ xfs_agblock_t agend;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ /*
+ * Collect all reverse mappings for free extents, and the rmapbt
+ * blocks. We can discover the rmapbt blocks completely from a
+ * query_all handler because there are always rmapbt entries.
+ * (One cannot use on query_all to visit all of a btree's blocks
+ * unless that btree is guaranteed to have at least one entry.)
+ */
+ INIT_LIST_HEAD(&ra.extlist);
+ INIT_LIST_HEAD(&ra.btlist);
+ INIT_LIST_HEAD(&ra.nobtlist);
+ ra.next_bno = 0;
+ ra.nr_records = 0;
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
+ error = xfs_rmap_query_all(cur, xfs_repair_alloc_extent_fn, &ra);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /* Insert a record for space between the last rmap and EOAG. */
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ agend = be32_to_cpu(agf->agf_length);
+ if (ra.next_bno < agend) {
+ rae = kmem_alloc(sizeof(*rae), KM_NOFS);
+ if (!rae) {
+ error = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&rae->list);
+ rae->bno = ra.next_bno;
+ rae->len = agend - ra.next_bno;
+ list_add_tail(&rae->list, &ra.extlist);
+ ra.nr_records++;
+ }
+
+ /* Collect all the AGFL blocks. */
+ error = xfs_scrub_walk_agfl(sc, xfs_repair_collect_agfl_block, &ra);
+ if (error)
+ goto out;
+
+ /* Do we actually have enough space to do this? */
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ nr_blocks = 2 * xfs_allocbt_calc_size(mp, ra.nr_records);
+ if (!xfs_repair_ag_has_space(pag, nr_blocks, XFS_AG_RESV_NONE)) {
+ xfs_perag_put(pag);
+ error = -ENOSPC;
+ goto out;
+ }
+ xfs_perag_put(pag);
+
+ /* Allocate new bnobt root. */
+ longest = xfs_repair_allocbt_get_longest(&ra);
+ if (longest == NULL) {
+ error = -ENOSPC;
+ goto out;
+ }
+ bnofsb = XFS_AGB_TO_FSB(mp, sc->sa.agno, longest->bno);
+ longest->bno++;
+ longest->len--;
+
+ /* Allocate new cntbt root. */
+ if (longest->len == 0) {
+ list_del(&longest->list);
+ kmem_free(longest);
+ longest = xfs_repair_allocbt_get_longest(&ra);
+ if (longest == NULL) {
+ error = -ENOSPC;
+ goto out;
+ }
+ }
+ cntfsb = XFS_AGB_TO_FSB(mp, sc->sa.agno, longest->bno);
+ longest->bno++;
+ longest->len--;
+ if (longest->len == 0) {
+ list_del(&longest->list);
+ kmem_free(longest);
+ longest = xfs_repair_allocbt_get_longest(&ra);
+ }
+
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ /* Initialize new bnobt root. */
+ error = xfs_repair_init_btblock(sc, bnofsb, &bp, XFS_ABTB_CRC_MAGIC,
+ &xfs_allocbt_buf_ops);
+ if (error)
+ goto out;
+ agf->agf_roots[XFS_BTNUM_BNOi] =
+ cpu_to_be32(XFS_FSB_TO_AGBNO(mp, bnofsb));
+ agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
+
+ /* Initialize new cntbt root. */
+ error = xfs_repair_init_btblock(sc, cntfsb, &bp, XFS_ABTC_CRC_MAGIC,
+ &xfs_allocbt_buf_ops);
+ if (error)
+ goto out;
+ agf->agf_roots[XFS_BTNUM_CNTi] =
+ cpu_to_be32(XFS_FSB_TO_AGBNO(mp, cntfsb));
+ agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+
+ /*
+ * Since we're abandoning the old bnobt/cntbt, we have to
+ * decrease fdblocks by the # of blocks in those trees.
+ * btreeblks counts the non-root blocks of the free space
+ * and rmap btrees. Do this before resetting the AGF counters.
+ */
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ oldf = pag->pagf_btreeblks + 2;
+ oldf -= (be32_to_cpu(agf->agf_rmap_blocks) - 1);
+ error = xfs_mod_fdblocks(mp, -(int64_t)oldf, false);
+ if (error) {
+ xfs_perag_put(pag);
+ goto out;
+ }
+
+ /* Reset the perag info. */
+ pag->pagf_btreeblks = be32_to_cpu(agf->agf_rmap_blocks) - 1;
+ pag->pagf_freeblks = 0;
+ pag->pagf_longest = 0;
+ pag->pagf_levels[XFS_BTNUM_BNOi] =
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
+ pag->pagf_levels[XFS_BTNUM_CNTi] =
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+
+ /* Now reset the AGF counters. */
+ agf->agf_btreeblks = cpu_to_be32(pag->pagf_btreeblks);
+ agf->agf_freeblks = cpu_to_be32(pag->pagf_freeblks);
+ agf->agf_longest = cpu_to_be32(pag->pagf_longest);
+ xfs_perag_put(pag);
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp,
+ XFS_AGF_ROOTS | XFS_AGF_LEVELS | XFS_AGF_BTREEBLKS |
+ XFS_AGF_LONGEST | XFS_AGF_FREEBLKS);
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ goto out;
+
+ /*
+ * Insert the longest free extent in case it's necessary to
+ * refresh the AGFL with multiple blocks.
+ */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_UNKNOWN);
+ if (longest && longest->len == 0) {
+ error = xfs_repair_allocbt_free_extent(sc,
+ XFS_AGB_TO_FSB(mp, sc->sa.agno, longest->bno),
+ longest->len, &oinfo);
+ if (error)
+ goto out;
+ list_del(&longest->list);
+ kmem_free(longest);
+ }
+
+ /* Insert records into the new btrees. */
+ list_sort(NULL, &ra.extlist, xfs_repair_allocbt_extent_cmp);
+ list_for_each_entry_safe(rae, n, &ra.extlist, list) {
+ error = xfs_repair_allocbt_free_extent(sc,
+ XFS_AGB_TO_FSB(mp, sc->sa.agno, rae->bno),
+ rae->len, &oinfo);
+ if (error)
+ goto out;
+ list_del(&rae->list);
+ kmem_free(rae);
+ }
+
+ /* Add rmap records for the btree roots */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+ error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno,
+ XFS_FSB_TO_AGBNO(mp, bnofsb), 1, &oinfo);
+ if (error)
+ goto out;
+ error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno,
+ XFS_FSB_TO_AGBNO(mp, cntfsb), 1, &oinfo);
+ if (error)
+ goto out;
+
+ /* Free all the OWN_AG blocks that are not in the rmapbt/agfl. */
+ error = xfs_repair_subtract_extents(mp, &ra.btlist, &ra.nobtlist);
+ if (error)
+ goto out;
+ xfs_repair_cancel_btree_extents(sc, &ra.nobtlist);
+ error = xfs_repair_reap_btree_extents(sc, &ra.btlist, &oinfo,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out;
+
+ return 0;
+out:
+ xfs_repair_cancel_btree_extents(sc, &ra.btlist);
+ xfs_repair_cancel_btree_extents(sc, &ra.nobtlist);
+ if (cur)
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ list_for_each_entry_safe(rae, n, &ra.extlist, list) {
+ list_del(&rae->list);
+ kmem_free(rae);
+ }
+ return error;
+}
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 269e55a..e4d1815 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -758,8 +758,8 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag, xfs_scrub_agf, xfs_repair_agf, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agfl, xfs_repair_agfl, NULL},
{xfs_scrub_setup_ag, xfs_scrub_agi, xfs_repair_agi, NULL},
- {xfs_scrub_setup_ag_header, xfs_scrub_bnobt, NULL, NULL},
- {xfs_scrub_setup_ag_header, xfs_scrub_cntbt, NULL, NULL},
+ {xfs_scrub_setup_ag_header, xfs_scrub_bnobt, xfs_repair_allocbt, NULL},
+ {xfs_scrub_setup_ag_header, xfs_scrub_cntbt, xfs_repair_allocbt, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, NULL, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, NULL, xfs_sb_version_hasfinobt},
{xfs_scrub_setup_ag_header, xfs_scrub_rmapbt, NULL, xfs_sb_version_hasrmapbt},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 4988887..47f2b02 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -330,5 +330,6 @@ int xfs_repair_superblock(struct xfs_scrub_context *sc);
int xfs_repair_agf(struct xfs_scrub_context *sc);
int xfs_repair_agfl(struct xfs_scrub_context *sc);
int xfs_repair_agi(struct xfs_scrub_context *sc);
+int xfs_repair_allocbt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 40/47] xfs: repair inode btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (38 preceding siblings ...)
2017-01-07 0:39 ` [PATCH 39/47] xfs: repair free space btrees Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 41/47] xfs: rebuild the rmapbt Darrick J. Wong
` (7 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Use the rmapbt to find inode chunks, query the chunks to compute
hole and free masks, and with that information rebuild the inobt
and finobt.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_ialloc.c | 2
fs/xfs/libxfs/xfs_ialloc.h | 3
fs/xfs/repair/common.c | 4
fs/xfs/repair/common.h | 1
fs/xfs/repair/ialloc.c | 379 ++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 386 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0fb7ba0..e34053d 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -147,7 +147,7 @@ xfs_inobt_get_rec(
/*
* Insert a single inobt record. Cursor must already point to desired location.
*/
-STATIC int
+int
xfs_inobt_insert_rec(
struct xfs_btree_cur *cur,
__uint16_t holemask,
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 17f0f1b..95216bb 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -177,5 +177,8 @@ int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low,
xfs_agino_t high, bool *exists);
int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count,
xfs_agino_t *freecount);
+int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, __uint16_t holemask,
+ __uint8_t count, __int32_t freecount, xfs_inofree_t free,
+ int *stat);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index e4d1815..d20b337 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -760,8 +760,8 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag, xfs_scrub_agi, xfs_repair_agi, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_bnobt, xfs_repair_allocbt, NULL},
{xfs_scrub_setup_ag_header, xfs_scrub_cntbt, xfs_repair_allocbt, NULL},
- {xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, NULL, NULL},
- {xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, NULL, xfs_sb_version_hasfinobt},
+ {xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, xfs_repair_iallocbt, NULL},
+ {xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, xfs_repair_iallocbt, xfs_sb_version_hasfinobt},
{xfs_scrub_setup_ag_header, xfs_scrub_rmapbt, NULL, xfs_sb_version_hasrmapbt},
{xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, NULL, xfs_sb_version_hasreflink},
{xfs_scrub_setup_inode_raw, xfs_scrub_inode, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 47f2b02..2b881b8 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -331,5 +331,6 @@ int xfs_repair_agf(struct xfs_scrub_context *sc);
int xfs_repair_agfl(struct xfs_scrub_context *sc);
int xfs_repair_agi(struct xfs_scrub_context *sc);
int xfs_repair_allocbt(struct xfs_scrub_context *sc);
+int xfs_repair_iallocbt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/ialloc.c b/fs/xfs/repair/ialloc.c
index d68e354..38910a3c 100644
--- a/fs/xfs/repair/ialloc.c
+++ b/fs/xfs/repair/ialloc.c
@@ -38,7 +38,9 @@
#include "xfs_log.h"
#include "xfs_trans_priv.h"
#include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
#include "xfs_refcount.h"
+#include "xfs_error.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -430,3 +432,380 @@ xfs_scrub_finobt(
{
return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
}
+
+/* Inode btree repair. */
+
+struct xfs_repair_ialloc_extent {
+ struct list_head list;
+ xfs_inofree_t freemask;
+ xfs_agino_t startino;
+ unsigned int count;
+ unsigned int usedcount;
+ __uint16_t holemask;
+};
+
+struct xfs_repair_ialloc {
+ struct list_head extlist;
+ struct list_head btlist;
+ uint64_t nr_records;
+};
+
+/* Set usedmask if the inode is in use. */
+STATIC int
+xfs_repair_ialloc_check_free(
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
+ xfs_ino_t fsino,
+ xfs_agino_t chunkino,
+ xfs_agino_t clusterino,
+ xfs_inofree_t *usedmask,
+ int *usedcount)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *ip;
+ struct xfs_dinode *dip;
+ int error;
+
+ error = xfs_iget(mp, tp, fsino + clusterino, XFS_IGET_HITONLY, 0, &ip);
+ if (error == -ENOENT) {
+ return 0;
+ } else if (!error && ip) {
+ if (VFS_I(ip)->i_mode) {
+ *usedmask |= 1ULL << (chunkino + clusterino);
+ (*usedcount)++;
+ }
+ IRELE(ip);
+ return 0;
+ }
+
+ dip = xfs_buf_offset(bp, clusterino * mp->m_sb.sb_inodesize);
+ if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
+ return -EFSCORRUPTED;
+
+ if (dip->di_version >= 3 &&
+ be64_to_cpu(dip->di_ino) != fsino + clusterino)
+ return -EFSCORRUPTED;
+
+ if (dip->di_mode) {
+ *usedmask |= 1ULL << (chunkino + clusterino);
+ (*usedcount)++;
+ }
+
+ return 0;
+}
+
+/* Record extents that belong to inode btrees. */
+STATIC int
+xfs_repair_ialloc_extent_fn(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_imap imap;
+ struct xfs_repair_ialloc *ri = priv;
+ struct xfs_repair_ialloc_extent *rie;
+ struct xfs_dinode *dip;
+ struct xfs_buf *bp;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_ino_t fsino;
+ xfs_inofree_t usedmask;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_agino_t agino;
+ xfs_agino_t startino;
+ xfs_agino_t chunkino;
+ xfs_agino_t nr_inodes;
+ xfs_agino_t i;
+ __uint16_t fillmask;
+ int blks_per_cluster;
+ int usedcount;
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(&error))
+ return error;
+
+ /* Fragment of the old btrees; dispose of them later. */
+ if (rec->rm_owner == XFS_RMAP_OWN_INOBT) {
+ fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ rec->rm_startblock);
+ return xfs_repair_collect_btree_extent(mp, &ri->btlist,
+ fsbno, rec->rm_blockcount);
+ }
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != XFS_RMAP_OWN_INODES)
+ return 0;
+
+ agno = cur->bc_private.a.agno;
+ blks_per_cluster = xfs_icluster_size_fsb(mp);
+ nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0);
+
+ ASSERT(rec->rm_startblock % blks_per_cluster == 0);
+
+ trace_xfs_repair_ialloc_extent_fn(mp, cur->bc_private.a.agno,
+ rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
+ rec->rm_offset, rec->rm_flags);
+
+ for (agbno = rec->rm_startblock;
+ agbno < rec->rm_startblock + rec->rm_blockcount;
+ agbno += blks_per_cluster) {
+ agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0);
+ fsino = XFS_AGINO_TO_INO(mp, agno, agino);
+ chunkino = agino & (XFS_INODES_PER_CHUNK - 1);
+ startino = agino & ~(XFS_INODES_PER_CHUNK - 1);
+
+ /* Which inodes are not holes? */
+ fillmask = xfs_inobt_maskn(
+ chunkino / XFS_INODES_PER_HOLEMASK_BIT,
+ nr_inodes / XFS_INODES_PER_HOLEMASK_BIT);
+
+ /* Grab the inode cluster buffer. */
+ imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
+ imap.im_boffset = 0;
+
+ error = xfs_imap_to_bp(mp, cur->bc_tp, &imap,
+ &dip, &bp, 0, XFS_IGET_UNTRUSTED);
+ if (error)
+ return error;
+
+ /* Which inodes are free? */
+ for (usedmask = 0, usedcount = 0, i = 0; i < nr_inodes; i++) {
+ error = xfs_repair_ialloc_check_free(cur->bc_tp, bp,
+ fsino, chunkino, i, &usedmask,
+ &usedcount);
+ if (error) {
+ xfs_trans_brelse(cur->bc_tp, bp);
+ return error;
+ }
+ }
+ xfs_trans_brelse(cur->bc_tp, bp);
+
+ /*
+ * If the last item in the list is our chunk record,
+ * update that.
+ */
+ if (!list_empty(&ri->extlist)) {
+ rie = list_last_entry(&ri->extlist,
+ struct xfs_repair_ialloc_extent, list);
+ if (rie->startino == startino) {
+ rie->freemask &= ~usedmask;
+ rie->holemask &= ~fillmask;
+ rie->count += nr_inodes;
+ rie->usedcount += usedcount;
+ continue;
+ }
+ }
+
+ /* New inode chunk; add to the list. */
+ rie = kmem_alloc(sizeof(*rie), KM_NOFS);
+ if (!rie)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&rie->list);
+ rie->startino = startino;
+ rie->freemask = XFS_INOBT_ALL_FREE & ~usedmask;
+ rie->holemask = XFS_INOBT_ALL_FREE & ~fillmask;
+ rie->count = nr_inodes;
+ rie->usedcount = usedcount;
+ list_add_tail(&rie->list, &ri->extlist);
+ ri->nr_records++;
+ }
+
+ return 0;
+}
+
+/* Compare two ialloc extents. */
+static int
+xfs_repair_ialloc_extent_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_repair_ialloc_extent *ap;
+ struct xfs_repair_ialloc_extent *bp;
+
+ ap = container_of(a, struct xfs_repair_ialloc_extent, list);
+ bp = container_of(b, struct xfs_repair_ialloc_extent, list);
+
+ if (ap->startino > bp->startino)
+ return 1;
+ else if (ap->startino < bp->startino)
+ return -1;
+ return 0;
+}
+
+/* Repair both inode btrees. */
+int
+xfs_repair_iallocbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_repair_ialloc ri;
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *bp;
+ struct xfs_repair_ialloc_extent *rie;
+ struct xfs_repair_ialloc_extent *n;
+ struct xfs_agi *agi;
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_perag *pag;
+ xfs_fsblock_t inofsb;
+ xfs_fsblock_t finofsb;
+ xfs_extlen_t nr_blocks;
+ unsigned int count;
+ unsigned int usedcount;
+ int stat;
+ int logflags;
+ int error = 0;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ /* Collect all reverse mappings for inode blocks. */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
+ INIT_LIST_HEAD(&ri.extlist);
+ INIT_LIST_HEAD(&ri.btlist);
+ ri.nr_records = 0;
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
+ error = xfs_rmap_query_all(cur, xfs_repair_ialloc_extent_fn, &ri);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /* Do we actually have enough space to do this? */
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ nr_blocks = xfs_iallocbt_calc_size(mp, ri.nr_records);
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ nr_blocks *= 2;
+ if (!xfs_repair_ag_has_space(pag, nr_blocks, XFS_AG_RESV_NONE)) {
+ xfs_perag_put(pag);
+ error = -ENOSPC;
+ goto out;
+ }
+ xfs_perag_put(pag);
+
+ agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+ /* Initialize new btree roots. */
+ error = xfs_repair_alloc_ag_block(sc, &oinfo, &inofsb,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out;
+ error = xfs_repair_init_btblock(sc, inofsb, &bp, XFS_IBT_CRC_MAGIC,
+ &xfs_inobt_buf_ops);
+ if (error)
+ goto out;
+ agi->agi_root = cpu_to_be32(XFS_FSB_TO_AGBNO(mp, inofsb));
+ agi->agi_level = cpu_to_be32(1);
+ logflags = XFS_AGI_ROOT | XFS_AGI_LEVEL;
+
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ error = xfs_repair_alloc_ag_block(sc, &oinfo, &finofsb,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out;
+ error = xfs_repair_init_btblock(sc, finofsb, &bp,
+ XFS_FIBT_CRC_MAGIC, &xfs_inobt_buf_ops);
+ if (error)
+ goto out;
+ agi->agi_free_root = cpu_to_be32(XFS_FSB_TO_AGBNO(mp, finofsb));
+ agi->agi_free_level = cpu_to_be32(1);
+ logflags |= XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL;
+ }
+
+ xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, logflags);
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ goto out;
+
+ /* Insert records into the new btrees. */
+ count = 0;
+ usedcount = 0;
+ list_sort(NULL, &ri.extlist, xfs_repair_ialloc_extent_cmp);
+ list_for_each_entry_safe(rie, n, &ri.extlist, list) {
+ count += rie->count;
+ usedcount += rie->usedcount;
+
+ trace_xfs_repair_ialloc_insert(mp, sc->sa.agno, rie->startino,
+ rie->holemask, rie->count,
+ rie->count - rie->usedcount, rie->freemask);
+
+ /* Insert into the inobt. */
+ cur = xfs_inobt_init_cursor(mp, sc->tp, sc->sa.agi_bp,
+ sc->sa.agno, XFS_BTNUM_INO);
+ error = xfs_inobt_lookup(cur, rie->startino, XFS_LOOKUP_EQ,
+ &stat);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, stat == 0, out);
+ error = xfs_inobt_insert_rec(cur, rie->holemask, rie->count,
+ rie->count - rie->usedcount, rie->freemask,
+ &stat);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, out);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /* Insert into the finobt. */
+ if (rie->count != rie->usedcount &&
+ xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ cur = xfs_inobt_init_cursor(mp, sc->tp, sc->sa.agi_bp,
+ sc->sa.agno, XFS_BTNUM_FINO);
+ error = xfs_inobt_lookup(cur, rie->startino,
+ XFS_LOOKUP_EQ, &stat);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, stat == 0, out);
+ error = xfs_inobt_insert_rec(cur, rie->holemask,
+ rie->count, rie->count - rie->usedcount,
+ rie->freemask, &stat);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, out);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+ }
+
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ goto out;
+
+ list_del(&rie->list);
+ kmem_free(rie);
+ }
+
+ /* Update the AGI counters. */
+ agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+ if (be32_to_cpu(agi->agi_count) != count ||
+ be32_to_cpu(agi->agi_freecount) != count - usedcount) {
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ pag->pagi_init = 0;
+ xfs_perag_put(pag);
+
+ agi->agi_count = cpu_to_be32(count);
+ agi->agi_freecount = cpu_to_be32(count - usedcount);
+ xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
+ XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
+ sc->reset_counters = true;
+ }
+
+ /* Free the old inode btree blocks if they're not in use. */
+ error = xfs_repair_reap_btree_extents(sc, &ri.btlist, &oinfo,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out;
+
+ return error;
+out:
+ if (cur)
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ xfs_repair_cancel_btree_extents(sc, &ri.btlist);
+ list_for_each_entry_safe(rie, n, &ri.extlist, list) {
+ list_del(&rie->list);
+ kmem_free(rie);
+ }
+ return error;
+}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 41/47] xfs: rebuild the rmapbt
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (39 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 40/47] xfs: repair inode btrees Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 42/47] xfs: repair refcount btrees Darrick J. Wong
` (6 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Rebuild the reverse mapping btree from all primary metadata.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_refcount.c | 2
fs/xfs/libxfs/xfs_refcount.h | 3
fs/xfs/libxfs/xfs_rmap.c | 28 +
fs/xfs/libxfs/xfs_rmap.h | 1
fs/xfs/repair/common.c | 10 -
fs/xfs/repair/common.h | 8
fs/xfs/repair/rmap.c | 793 ++++++++++++++++++++++++++++++++++++++++++
7 files changed, 843 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index c6c875d..f63cfdb 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -88,7 +88,7 @@ xfs_refcount_lookup_ge(
}
/* Convert on-disk record to in-core format. */
-static inline void
+void
xfs_refcount_btrec_to_irec(
union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec)
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 78cb142..5973c56 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -69,5 +69,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+union xfs_btree_rec;
+extern void xfs_refcount_btrec_to_irec(union xfs_btree_rec *rec,
+ struct xfs_refcount_irec *irec);
#endif /* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index e61d816..8531cbc 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1977,6 +1977,34 @@ xfs_rmap_map_shared(
return error;
}
+/* Insert a raw rmap into the rmapbt. */
+int
+xfs_rmap_map_raw(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rmap)
+{
+ struct xfs_owner_info oinfo;
+
+ oinfo.oi_owner = rmap->rm_owner;
+ oinfo.oi_offset = rmap->rm_offset;
+ oinfo.oi_flags = 0;
+ if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
+ oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+ if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+
+ if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
+ return xfs_rmap_map(cur, rmap->rm_startblock,
+ rmap->rm_blockcount,
+ rmap->rm_flags & XFS_RMAP_UNWRITTEN,
+ &oinfo);
+
+ return xfs_rmap_map_shared(cur, rmap->rm_startblock,
+ rmap->rm_blockcount,
+ rmap->rm_flags & XFS_RMAP_UNWRITTEN,
+ &oinfo);
+}
+
struct xfs_rmap_query_range_info {
xfs_rmap_query_range_fn fn;
void *priv;
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 606efe3..eac90d7 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -225,5 +225,6 @@ int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
int xfs_rmap_has_other_keys(struct xfs_btree_cur *cur, xfs_fsblock_t bno,
xfs_filblks_t len, struct xfs_owner_info *oinfo,
bool *has_rmap);
+int xfs_rmap_map_raw(struct xfs_btree_cur *cur, struct xfs_rmap_irec *rmap);
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index d20b337..152fb59 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -635,6 +635,7 @@ xfs_scrub_teardown(
int error)
{
struct xfs_mount *mp = sc->tp->t_mountp;
+ int err2;
xfs_scrub_ag_free(&sc->sa);
if (sc->ag_lock.agmask != sc->ag_lock.__agmask)
@@ -645,6 +646,13 @@ xfs_scrub_teardown(
else
xfs_trans_cancel(sc->tp);
sc->tp = NULL;
+
+ if (sc->teardown) {
+ err2 = sc->teardown(sc, ip_in, error);
+ if (!error && err2)
+ error = err2;
+ }
+
if (sc->ip != NULL) {
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
xfs_iunlock(sc->ip, XFS_IOLOCK_EXCL);
@@ -762,7 +770,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_header, xfs_scrub_cntbt, xfs_repair_allocbt, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, xfs_repair_iallocbt, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, xfs_repair_iallocbt, xfs_sb_version_hasfinobt},
- {xfs_scrub_setup_ag_header, xfs_scrub_rmapbt, NULL, xfs_sb_version_hasrmapbt},
+ {xfs_scrub_setup_ag_header_freeze, xfs_scrub_rmapbt, xfs_repair_rmapbt, xfs_sb_version_hasrmapbt},
{xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, NULL, xfs_sb_version_hasreflink},
{xfs_scrub_setup_inode_raw, xfs_scrub_inode, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_data, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 2b881b8..1401042 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -68,6 +68,9 @@ struct xfs_scrub_context {
/* State tracking for single-AG operations. */
struct xfs_scrub_ag sa;
+
+ int (*teardown)(struct xfs_scrub_context *,
+ struct xfs_inode *, int);
};
/* Should we end the scrub early? */
@@ -227,6 +230,10 @@ int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_ag_header_freeze(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
@@ -332,5 +339,6 @@ int xfs_repair_agfl(struct xfs_scrub_context *sc);
int xfs_repair_agi(struct xfs_scrub_context *sc);
int xfs_repair_allocbt(struct xfs_scrub_context *sc);
int xfs_repair_iallocbt(struct xfs_scrub_context *sc);
+int xfs_repair_rmapbt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/rmap.c b/fs/xfs/repair/rmap.c
index d53ff46..f4ae408 100644
--- a/fs/xfs/repair/rmap.c
+++ b/fs/xfs/repair/rmap.c
@@ -30,13 +30,93 @@
#include "xfs_trans.h"
#include "xfs_trace.h"
#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
#include "repair/common.h"
#include "repair/btree.h"
+/* Unfreeze the FS. */
+STATIC int
+xfs_scrub_teardown_thaw(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ int error)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct super_block *sb = mp->m_super;
+ int err2;
+
+ /* Re-freeze the last level of filesystem. */
+ down_write(&sb->s_umount);
+ percpu_down_write(sb->s_writers.rw_sem + SB_FREEZE_PAGEFAULT);
+ sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+ up_write(&sb->s_umount);
+ err2 = thaw_super(sb);
+ if (!error && err2)
+ error = err2;
+
+ return error;
+}
+
+/* Set us up with AG headers and btree cursors, and freeze the FS. */
+int
+xfs_scrub_setup_ag_header_freeze(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct super_block *sb = mp->m_super;
+ int error;
+
+ if (!(sm->sm_flags & XFS_SCRUB_FLAG_REPAIR))
+ return xfs_scrub_setup_ag_header(sc, ip, sm, retry_deadlocked);
+
+ /* Freeze out any further writes or page faults. */
+ error = freeze_super(sb);
+ if (error)
+ return error;
+
+ /* Thaw it to the point that we can make transactions. */
+ down_write(&sb->s_umount);
+ percpu_up_write(sb->s_writers.rw_sem + SB_FREEZE_PAGEFAULT);
+ sb->s_writers.frozen = SB_FREEZE_FS;
+ up_write(&sb->s_umount);
+
+ /* Check the AG number and set up the scrub context. */
+ error = xfs_scrub_setup_ag(sc, ip, sm, retry_deadlocked);
+ if (error)
+ return xfs_scrub_teardown_thaw(sc, ip, error);
+
+ /* Lock all the AG header buffers. */
+ sc->teardown = xfs_scrub_teardown_thaw;
+ xfs_scrub_ag_lock_init(mp, &sc->ag_lock);
+ error = xfs_scrub_ag_lock_all(sc);
+ if (error)
+ return error;
+
+ /* Now grab the headers of the AGF we want. */
+ sc->sa.agno = sm->sm_agno;
+ error = xfs_scrub_ag_read_headers(sc, sm->sm_agno, &sc->sa.agi_bp,
+ &sc->sa.agf_bp, &sc->sa.agfl_bp);
+ if (error)
+ return error;
+
+ /* ...and initialize the btree cursors for xref. */
+ return xfs_scrub_ag_btcur_init(sc, &sc->sa);
+}
+
/* Reverse-mapping scrubber. */
/* Scrub an rmapbt record. */
@@ -219,3 +299,716 @@ xfs_scrub_rmapbt(
return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_helper,
&oinfo, NULL);
}
+
+/* Reverse-mapping repair. */
+
+struct xfs_repair_rmapbt_extent {
+ struct list_head list;
+ struct xfs_rmap_irec rmap;
+};
+
+struct xfs_repair_rmapbt {
+ struct list_head rmaplist;
+ struct list_head rmap_freelist;
+ struct list_head bno_freelist;
+ struct xfs_scrub_context *sc;
+ uint64_t owner;
+ xfs_extlen_t btblocks;
+ xfs_agblock_t next_bno;
+ uint64_t nr_records;
+};
+
+/* Initialize an rmap. */
+static inline int
+xfs_repair_rmapbt_new_rmap(
+ struct xfs_repair_rmapbt *rr,
+ xfs_agblock_t startblock,
+ xfs_extlen_t blockcount,
+ __uint64_t owner,
+ __uint64_t offset,
+ unsigned int flags)
+{
+ struct xfs_repair_rmapbt_extent *rre;
+ int error = 0;
+
+ trace_xfs_repair_rmap_extent_fn(rr->sc->tp->t_mountp, rr->sc->sa.agno,
+ startblock, blockcount, owner, offset, flags);
+
+ if (xfs_scrub_should_terminate(&error))
+ return error;
+
+ rre = kmem_alloc(sizeof(*rre), KM_NOFS);
+ if (!rre)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&rre->list);
+ rre->rmap.rm_startblock = startblock;
+ rre->rmap.rm_blockcount = blockcount;
+ rre->rmap.rm_owner = owner;
+ rre->rmap.rm_offset = offset;
+ rre->rmap.rm_flags = flags;
+ list_add_tail(&rre->list, &rr->rmaplist);
+ rr->nr_records++;
+
+ return 0;
+}
+
+/* Add an AGFL block to the rmap list. */
+STATIC int
+xfs_repair_rmapbt_walk_agfl(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ void *priv)
+{
+ struct xfs_repair_rmapbt *rr = priv;
+
+ return xfs_repair_rmapbt_new_rmap(rr, bno, 1, XFS_RMAP_OWN_AG, 0, 0);
+}
+
+/* Add a btree block to the rmap list. */
+STATIC int
+xfs_repair_rmapbt_visit_btblock(
+ struct xfs_btree_cur *cur,
+ int level,
+ void *priv)
+{
+ struct xfs_repair_rmapbt *rr = priv;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsb;
+
+ xfs_btree_get_block(cur, level, &bp);
+ if (!bp)
+ return 0;
+
+ rr->btblocks++;
+ fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ return xfs_repair_rmapbt_new_rmap(rr, XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
+ 1, rr->owner, 0, 0);
+}
+
+/* Record inode btree rmaps. */
+STATIC int
+xfs_repair_rmapbt_inodes(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_inobt_rec_incore irec;
+ struct xfs_repair_rmapbt *rr = priv;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsb;
+ xfs_agino_t agino;
+ xfs_agino_t iperhole;
+ unsigned int i;
+ int error;
+
+ /* Record the inobt blocks */
+ for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
+ xfs_btree_get_block(cur, i, &bp);
+ if (!bp)
+ continue;
+ fsb = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ error = xfs_repair_rmapbt_new_rmap(rr,
+ XFS_FSB_TO_AGBNO(mp, fsb), 1,
+ XFS_RMAP_OWN_INOBT, 0, 0);
+ if (error)
+ return error;
+ }
+
+ xfs_inobt_btrec_to_irec(mp, rec, &irec);
+
+ /* Record a non-sparse inode chunk. */
+ if (irec.ir_holemask == XFS_INOBT_HOLEMASK_FULL)
+ return xfs_repair_rmapbt_new_rmap(rr,
+ XFS_AGINO_TO_AGBNO(mp, irec.ir_startino),
+ XFS_INODES_PER_CHUNK / mp->m_sb.sb_inopblock,
+ XFS_RMAP_OWN_INODES, 0, 0);
+
+ /* Iterate each chunk. */
+ iperhole = max_t(xfs_agino_t, mp->m_sb.sb_inopblock,
+ XFS_INODES_PER_HOLEMASK_BIT);
+ for (i = 0, agino = irec.ir_startino;
+ i < XFS_INOBT_HOLEMASK_BITS;
+ i += iperhole / XFS_INODES_PER_HOLEMASK_BIT, agino += iperhole) {
+ /* Skip holes. */
+ if (irec.ir_holemask & (1 << i))
+ continue;
+
+ /* Record the inode chunk otherwise. */
+ error = xfs_repair_rmapbt_new_rmap(rr,
+ XFS_AGINO_TO_AGBNO(mp, agino),
+ iperhole / mp->m_sb.sb_inopblock,
+ XFS_RMAP_OWN_INODES, 0, 0);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+/* Record a CoW staging extent. */
+STATIC int
+xfs_repair_rmapbt_refcount(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_repair_rmapbt *rr = priv;
+ struct xfs_refcount_irec refc;
+
+ xfs_refcount_btrec_to_irec(rec, &refc);
+ if (refc.rc_refcount != 1)
+ return -EFSCORRUPTED;
+
+ return xfs_repair_rmapbt_new_rmap(rr,
+ refc.rc_startblock - XFS_REFC_COW_START,
+ refc.rc_blockcount, XFS_RMAP_OWN_COW, 0, 0);
+}
+
+/* Add a bmbt block to the rmap list. */
+STATIC int
+xfs_repair_rmapbt_visit_bmbt(
+ struct xfs_btree_cur *cur,
+ int level,
+ void *priv)
+{
+ struct xfs_repair_rmapbt *rr = priv;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsb;
+ unsigned int flags = XFS_RMAP_BMBT_BLOCK;
+
+ xfs_btree_get_block(cur, level, &bp);
+ if (!bp)
+ return 0;
+
+ fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
+ if (XFS_FSB_TO_AGNO(cur->bc_mp, fsb) != rr->sc->sa.agno)
+ return 0;
+
+ if (cur->bc_private.b.whichfork == XFS_ATTR_FORK)
+ flags |= XFS_RMAP_ATTR_FORK;
+ return xfs_repair_rmapbt_new_rmap(rr,
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsb), 1,
+ cur->bc_private.b.ip->i_ino, 0, flags);
+}
+
+/* Determine rmap flags from fork and bmbt state. */
+static inline unsigned int
+xfs_repair_rmapbt_bmap_flags(
+ int whichfork,
+ xfs_exntst_t state)
+{
+ return (whichfork == XFS_ATTR_FORK ? XFS_RMAP_ATTR_FORK : 0) |
+ (state == XFS_EXT_UNWRITTEN ? XFS_RMAP_UNWRITTEN : 0);
+}
+
+/* Find all the extents from a given AG in an inode fork. */
+STATIC int
+xfs_repair_rmapbt_scan_ifork(
+ struct xfs_repair_rmapbt *rr,
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xfs_bmbt_irec rec;
+ struct xfs_mount *mp = rr->sc->tp->t_mountp;
+ struct xfs_btree_cur *cur = NULL;
+ xfs_fileoff_t off;
+ xfs_fileoff_t endoff;
+ unsigned int bflags;
+ unsigned int rflags;
+ int nmaps;
+ int fmt;
+ int error;
+
+ /* Do we even have data mapping extents? */
+ fmt = XFS_IFORK_FORMAT(ip, whichfork);
+ switch (fmt) {
+ case XFS_DINODE_FMT_BTREE:
+ case XFS_DINODE_FMT_EXTENTS:
+ break;
+ default:
+ return 0;
+ }
+ if (!XFS_IFORK_PTR(ip, whichfork))
+ return 0;
+
+ /* Find all the BMBT blocks in the AG. */
+ if (fmt == XFS_DINODE_FMT_BTREE) {
+ cur = xfs_bmbt_init_cursor(mp, rr->sc->tp, ip, whichfork);
+ error = xfs_btree_visit_blocks(cur,
+ xfs_repair_rmapbt_visit_bmbt, rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+ }
+
+ /* We're done if this is an rt inode's data fork. */
+ if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip))
+ return 0;
+
+ /* Find the offset of the last extent in the mapping. */
+ error = xfs_bmap_last_offset(ip, &endoff, whichfork);
+ if (error)
+ goto out;
+
+ /* Find all the extents in the AG. */
+ bflags = whichfork == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0;
+ off = 0;
+ while (true) {
+ nmaps = 1;
+ error = xfs_bmapi_read(ip, off, endoff - off, &rec,
+ &nmaps, bflags);
+ if (error || nmaps == 0)
+ break;
+ /* Stash non-hole extent. */
+ if (rec.br_startblock != HOLESTARTBLOCK &&
+ rec.br_startblock != DELAYSTARTBLOCK &&
+ XFS_FSB_TO_AGNO(mp, rec.br_startblock) == rr->sc->sa.agno) {
+ rflags = xfs_repair_rmapbt_bmap_flags(whichfork,
+ rec.br_state);
+ error = xfs_repair_rmapbt_new_rmap(rr,
+ XFS_FSB_TO_AGBNO(mp, rec.br_startblock),
+ rec.br_blockcount, ip->i_ino,
+ rec.br_startoff, rflags);
+ if (error)
+ goto out;
+ }
+
+ off += rec.br_blockcount;
+ }
+out:
+ if (cur)
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/* Iterate all the inodes in an AG group. */
+STATIC int
+xfs_repair_rmapbt_scan_inobt(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_inobt_rec_incore irec;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip = NULL;
+ xfs_ino_t ino;
+ xfs_agino_t agino;
+ int chunkidx;
+ int error;
+
+ xfs_inobt_btrec_to_irec(mp, rec, &irec);
+
+ for (chunkidx = 0, agino = irec.ir_startino;
+ chunkidx < XFS_INODES_PER_CHUNK;
+ chunkidx++, agino++) {
+ /* Skip if this inode is free */
+ if (XFS_INOBT_MASK(chunkidx) & irec.ir_free)
+ continue;
+ ino = XFS_AGINO_TO_INO(mp, cur->bc_private.a.agno, agino);
+ error = xfs_iget(mp, cur->bc_tp, ino, 0, XFS_ILOCK_EXCL, &ip);
+ if (error)
+ break;
+
+ /* Check the data fork. */
+ error = xfs_repair_rmapbt_scan_ifork(priv, ip, XFS_DATA_FORK);
+ if (error)
+ break;
+
+ /* Check the attr fork. */
+ error = xfs_repair_rmapbt_scan_ifork(priv, ip, XFS_ATTR_FORK);
+ if (error)
+ break;
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
+ ip = NULL;
+ }
+
+ if (ip) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
+ }
+ return error;
+}
+
+/* Record extents that aren't in use from gaps in the rmap records. */
+STATIC int
+xfs_repair_rmapbt_record_rmap_freesp(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_repair_rmapbt *rr = priv;
+ xfs_fsblock_t fsb;
+ int error;
+
+ /* Record the free space we find. */
+ if (rec->rm_startblock > rr->next_bno) {
+ fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ rr->next_bno);
+ error = xfs_repair_collect_btree_extent(cur->bc_mp,
+ &rr->rmap_freelist, fsb,
+ rec->rm_startblock - rr->next_bno);
+ if (error)
+ return error;
+ }
+ rr->next_bno = max_t(xfs_agblock_t, rr->next_bno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/* Record extents that aren't in use from the bnobt records. */
+STATIC int
+xfs_repair_rmapbt_record_bno_freesp(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ struct xfs_repair_rmapbt *rr = priv;
+ xfs_fsblock_t fsb;
+
+ /* Record the free space we find. */
+ fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ rec->ar_startblock);
+ return xfs_repair_collect_btree_extent(cur->bc_mp, &rr->bno_freelist,
+ fsb, rec->ar_blockcount);
+}
+
+/* Compare two rmapbt extents. */
+static int
+xfs_repair_rmapbt_extent_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_repair_rmapbt_extent *ap;
+ struct xfs_repair_rmapbt_extent *bp;
+ __u64 oa;
+ __u64 ob;
+
+ ap = container_of(a, struct xfs_repair_rmapbt_extent, list);
+ bp = container_of(b, struct xfs_repair_rmapbt_extent, list);
+ oa = xfs_rmap_irec_offset_pack(&ap->rmap);
+ ob = xfs_rmap_irec_offset_pack(&bp->rmap);
+
+ if (ap->rmap.rm_startblock > bp->rmap.rm_startblock)
+ return 1;
+ else if (ap->rmap.rm_startblock < bp->rmap.rm_startblock)
+ return -1;
+ else if (ap->rmap.rm_owner > bp->rmap.rm_owner)
+ return 1;
+ else if (ap->rmap.rm_owner < bp->rmap.rm_owner)
+ return -1;
+ else if (oa > ob)
+ return 1;
+ else if (oa < ob)
+ return -1;
+ return 0;
+}
+
+#define RMAP(type, startblock, blockcount) xfs_repair_rmapbt_new_rmap( \
+ &rr, (startblock), (blockcount), \
+ XFS_RMAP_OWN_##type, 0, 0)
+/* Repair the rmap btree for some AG. */
+int
+xfs_repair_rmapbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_repair_rmapbt rr;
+ struct xfs_owner_info oinfo;
+ struct xfs_repair_rmapbt_extent *rre;
+ struct xfs_repair_rmapbt_extent *n;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_btree_cur *cur = NULL;
+ struct xfs_buf *bp = NULL;
+ struct xfs_agf *agf;
+ struct xfs_agi *agi;
+ struct xfs_perag *pag;
+ xfs_fsblock_t btfsb;
+ xfs_agnumber_t ag;
+ xfs_agblock_t agend;
+ xfs_extlen_t freesp_btblocks;
+ int error;
+
+ INIT_LIST_HEAD(&rr.rmaplist);
+ INIT_LIST_HEAD(&rr.rmap_freelist);
+ INIT_LIST_HEAD(&rr.bno_freelist);
+ rr.sc = sc;
+ rr.nr_records = 0;
+
+ /* Collect rmaps for all AG headers. */
+ error = RMAP(FS, XFS_SB_BLOCK(mp), 1);
+ if (error)
+ goto out;
+ rre = list_last_entry(&rr.rmaplist, struct xfs_repair_rmapbt_extent,
+ list);
+
+ if (rre->rmap.rm_startblock != XFS_AGF_BLOCK(mp)) {
+ error = RMAP(FS, XFS_AGF_BLOCK(mp), 1);
+ if (error)
+ goto out;
+ rre = list_last_entry(&rr.rmaplist,
+ struct xfs_repair_rmapbt_extent, list);
+ }
+
+ if (rre->rmap.rm_startblock != XFS_AGI_BLOCK(mp)) {
+ error = RMAP(FS, XFS_AGI_BLOCK(mp), 1);
+ if (error)
+ goto out;
+ rre = list_last_entry(&rr.rmaplist,
+ struct xfs_repair_rmapbt_extent, list);
+ }
+
+ if (rre->rmap.rm_startblock != XFS_AGFL_BLOCK(mp)) {
+ error = RMAP(FS, XFS_AGFL_BLOCK(mp), 1);
+ if (error)
+ goto out;
+ }
+
+ error = xfs_scrub_walk_agfl(sc, xfs_repair_rmapbt_walk_agfl, &rr);
+ if (error)
+ goto out;
+
+ /* Collect rmap for the log if it's in this AG. */
+ if (mp->m_sb.sb_logstart &&
+ XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == sc->sa.agno) {
+ error = RMAP(LOG, XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart),
+ mp->m_sb.sb_logblocks);
+ if (error)
+ goto out;
+ }
+
+ /* Collect rmaps for the free space btrees. */
+ rr.owner = XFS_RMAP_OWN_AG;
+ rr.btblocks = 0;
+ cur = xfs_allocbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno,
+ XFS_BTNUM_BNO);
+ error = xfs_btree_visit_blocks(cur, xfs_repair_rmapbt_visit_btblock,
+ &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /* Collect rmaps for the cntbt. */
+ cur = xfs_allocbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno,
+ XFS_BTNUM_CNT);
+ error = xfs_btree_visit_blocks(cur, xfs_repair_rmapbt_visit_btblock,
+ &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+ freesp_btblocks = rr.btblocks;
+
+ /* Collect rmaps for the inode btree. */
+ cur = xfs_inobt_init_cursor(mp, sc->tp, sc->sa.agi_bp, sc->sa.agno,
+ XFS_BTNUM_INO);
+ error = xfs_btree_query_all(cur, xfs_repair_rmapbt_inodes, &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+ /* If there are no inodes, we have to include the inobt root. */
+ agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+ if (agi->agi_count == cpu_to_be32(0)) {
+ error = xfs_repair_rmapbt_new_rmap(&rr,
+ be32_to_cpu(agi->agi_root), 1,
+ XFS_RMAP_OWN_INOBT, 0, 0);
+ if (error)
+ goto out;
+ }
+
+ /* Collect rmaps for the free inode btree. */
+ if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+ rr.owner = XFS_RMAP_OWN_INOBT;
+ cur = xfs_inobt_init_cursor(mp, sc->tp, sc->sa.agi_bp,
+ sc->sa.agno, XFS_BTNUM_FINO);
+ error = xfs_btree_visit_blocks(cur,
+ xfs_repair_rmapbt_visit_btblock, &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+ }
+
+ /* Collect rmaps for the refcount btree. */
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+
+ rr.owner = XFS_RMAP_OWN_REFC;
+ cur = xfs_refcountbt_init_cursor(mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.agno, NULL);
+ error = xfs_btree_visit_blocks(cur,
+ xfs_repair_rmapbt_visit_btblock, &rr);
+ if (error)
+ goto out;
+
+ /* Collect rmaps for CoW staging extents. */
+ memset(&low, 0, sizeof(low));
+ low.rc.rc_startblock = XFS_REFC_COW_START;
+ memset(&high, 0xFF, sizeof(high));
+ error = xfs_btree_query_range(cur, &low, &high,
+ xfs_repair_rmapbt_refcount, &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+ }
+
+ /* Iterate all AGs for inodes. */
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+ ASSERT(xfs_scrub_ag_can_lock(sc, ag));
+ error = xfs_ialloc_read_agi(mp, sc->tp, ag, &bp);
+ if (error)
+ goto out;
+ cur = xfs_inobt_init_cursor(mp, sc->tp, bp, ag, XFS_BTNUM_INO);
+ error = xfs_btree_query_all(cur, xfs_repair_rmapbt_scan_inobt,
+ &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+ xfs_trans_brelse(sc->tp, bp);
+ bp = NULL;
+ }
+
+ /* Do we actually have enough space to do this? */
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ if (!xfs_repair_ag_has_space(pag,
+ xfs_rmapbt_calc_size(mp, rr.nr_records),
+ XFS_AG_RESV_AGFL)) {
+ xfs_perag_put(pag);
+ error = -ENOSPC;
+ goto out;
+ }
+
+ /* Initialize a new rmapbt root. */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_UNKNOWN);
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ error = xfs_repair_alloc_ag_block(sc, &oinfo, &btfsb, XFS_AG_RESV_AGFL);
+ if (error) {
+ xfs_perag_put(pag);
+ goto out;
+ }
+ error = xfs_repair_init_btblock(sc, btfsb, &bp, XFS_RMAP_CRC_MAGIC,
+ &xfs_rmapbt_buf_ops);
+ if (error) {
+ xfs_perag_put(pag);
+ goto out;
+ }
+ agf->agf_roots[XFS_BTNUM_RMAPi] = cpu_to_be32(XFS_FSB_TO_AGBNO(mp,
+ btfsb));
+ agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+ agf->agf_rmap_blocks = cpu_to_be32(1);
+
+ /* Reset the perag info. */
+ pag->pagf_btreeblks = freesp_btblocks - 2;
+ pag->pagf_levels[XFS_BTNUM_RMAPi] =
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+
+ /* Now reset the AGF counters. */
+ agf->agf_btreeblks = cpu_to_be32(pag->pagf_btreeblks);
+ xfs_perag_put(pag);
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_ROOTS |
+ XFS_AGF_LEVELS | XFS_AGF_RMAP_BLOCKS |
+ XFS_AGF_BTREEBLKS);
+ bp = NULL;
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ goto out;
+
+ /* Insert all the metadata rmaps. */
+ list_sort(NULL, &rr.rmaplist, xfs_repair_rmapbt_extent_cmp);
+ list_for_each_entry_safe(rre, n, &rr.rmaplist, list) {
+ /*
+ * Ensure the freelist is full, but don't let it shrink.
+ * The rmapbt isn't fully set up yet, which means that
+ * the current AGFL blocks might not be reflected in the
+ * rmapbt, which is a problem if we want to unmap blocks
+ * from the AGFL.
+ */
+ error = xfs_repair_fix_freelist(sc, false);
+ if (error)
+ goto out;
+
+ /* Add the rmap. */
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.agno);
+ error = xfs_rmap_map_raw(cur, &rre->rmap);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ goto out;
+
+ list_del(&rre->list);
+ kmem_free(rre);
+ }
+
+ /* Compute free space from the new rmapbt. */
+ rr.next_bno = 0;
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
+ error = xfs_rmap_query_all(cur, xfs_repair_rmapbt_record_rmap_freesp,
+ &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /* Insert a record for space between the last rmap and EOAG. */
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ agend = be32_to_cpu(agf->agf_length);
+ if (rr.next_bno < agend) {
+ btfsb = XFS_AGB_TO_FSB(mp, sc->sa.agno, rr.next_bno);
+ error = xfs_repair_collect_btree_extent(mp, &rr.rmap_freelist,
+ btfsb, agend - rr.next_bno);
+ if (error)
+ goto out;
+ }
+
+ /* Compute free space from the existing bnobt. */
+ cur = xfs_allocbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno,
+ XFS_BTNUM_BNO);
+ error = xfs_alloc_query_all(cur, xfs_repair_rmapbt_record_bno_freesp,
+ &rr);
+ if (error)
+ goto out;
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /*
+ * Free the "free" blocks that the new rmapbt knows about but
+ * the old bnobt doesn't. These are the old rmapbt blocks.
+ */
+ error = xfs_repair_subtract_extents(mp, &rr.rmap_freelist,
+ &rr.bno_freelist);
+ if (error)
+ goto out;
+ xfs_repair_cancel_btree_extents(sc, &rr.bno_freelist);
+ error = xfs_repair_reap_btree_extents(sc, &rr.rmap_freelist, &oinfo,
+ XFS_AG_RESV_AGFL);
+ if (error)
+ goto out;
+
+ return 0;
+out:
+ if (cur)
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ if (bp)
+ xfs_trans_brelse(sc->tp, bp);
+ xfs_repair_cancel_btree_extents(sc, &rr.bno_freelist);
+ xfs_repair_cancel_btree_extents(sc, &rr.rmap_freelist);
+ list_for_each_entry_safe(rre, n, &rr.rmaplist, list) {
+ list_del(&rre->list);
+ kmem_free(rre);
+ }
+ return error;
+}
+#undef RMAP
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 42/47] xfs: repair refcount btrees
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (40 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 41/47] xfs: rebuild the rmapbt Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 43/47] xfs: online repair of inodes Darrick J. Wong
` (5 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Reconstruct the refcount data from the rmap btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_btree.c | 21 ++
fs/xfs/libxfs/xfs_btree.h | 1
fs/xfs/libxfs/xfs_refcount.c | 19 ++
fs/xfs/libxfs/xfs_refcount.h | 4
fs/xfs/repair/common.c | 2
fs/xfs/repair/common.h | 1
fs/xfs/repair/refcount.c | 467 ++++++++++++++++++++++++++++++++++++++++++
7 files changed, 513 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 3788adb..59c6b69 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4911,3 +4911,24 @@ xfs_btree_has_record(
return 0;
}
+
+/* Are there more records in this btree? */
+bool
+xfs_btree_has_more_records(
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cur, 0, &bp);
+
+ /* There are still records in this block. */
+ if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block))
+ return true;
+
+ /* There are more record blocks. */
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
+ else
+ return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 87f1e0b..1f2c5eb 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -516,5 +516,6 @@ struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
int level, struct xfs_buf **bpp);
int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
union xfs_btree_irec *high, bool *exists);
+bool xfs_btree_has_more_records(struct xfs_btree_cur *);
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index f63cfdb..1c47671 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -87,6 +87,23 @@ xfs_refcount_lookup_ge(
return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
}
+/*
+ * Look up the first record equal to [bno, len] in the btree
+ * given by cur.
+ */
+int
+xfs_refcount_lookup_eq(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ int *stat)
+{
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
+ XFS_LOOKUP_LE);
+ cur->bc_rec.rc.rc_startblock = bno;
+ cur->bc_rec.rc.rc_blockcount = 0;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
/* Convert on-disk record to in-core format. */
void
xfs_refcount_btrec_to_irec(
@@ -148,7 +165,7 @@ xfs_refcount_update(
* by [bno, len, refcount].
* This either works (return 0) or gets an EFSCORRUPTED error.
*/
-STATIC int
+int
xfs_refcount_insert(
struct xfs_btree_cur *cur,
struct xfs_refcount_irec *irec,
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 5973c56..cad61de 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -24,6 +24,8 @@ extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur,
xfs_agblock_t bno, int *stat);
extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur,
xfs_agblock_t bno, int *stat);
+extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur,
+ xfs_agblock_t bno, int *stat);
extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur,
struct xfs_refcount_irec *irec, int *stat);
@@ -72,5 +74,7 @@ extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
union xfs_btree_rec;
extern void xfs_refcount_btrec_to_irec(union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec);
+extern int xfs_refcount_insert(struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *irec, int *stat);
#endif /* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 152fb59..e75cf66 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -771,7 +771,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_inobt, xfs_repair_iallocbt, NULL},
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, xfs_repair_iallocbt, xfs_sb_version_hasfinobt},
{xfs_scrub_setup_ag_header_freeze, xfs_scrub_rmapbt, xfs_repair_rmapbt, xfs_sb_version_hasrmapbt},
- {xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, NULL, xfs_sb_version_hasreflink},
+ {xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, xfs_repair_refcountbt, xfs_sb_version_hasreflink},
{xfs_scrub_setup_inode_raw, xfs_scrub_inode, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_data, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_attr, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 1401042..02e5257 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -340,5 +340,6 @@ int xfs_repair_agi(struct xfs_scrub_context *sc);
int xfs_repair_allocbt(struct xfs_scrub_context *sc);
int xfs_repair_iallocbt(struct xfs_scrub_context *sc);
int xfs_repair_rmapbt(struct xfs_scrub_context *sc);
+int xfs_repair_refcountbt(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/refcount.c b/fs/xfs/repair/refcount.c
index 75071de..a0fafb7 100644
--- a/fs/xfs/repair/refcount.c
+++ b/fs/xfs/repair/refcount.c
@@ -30,9 +30,14 @@
#include "xfs_trans.h"
#include "xfs_trace.h"
#include "xfs_sb.h"
+#include "xfs_itable.h"
#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
+#include "xfs_error.h"
#include "repair/common.h"
#include "repair/btree.h"
@@ -311,3 +316,465 @@ xfs_scrub_refcountbt(
return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_helper,
&oinfo, NULL);
}
+
+/*
+ * Rebuilding the Reference Count Btree
+ *
+ * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
+ * entries as rectangles representing extents of physical blocks, and
+ * that the rectangles can be laid down to allow them to overlap each
+ * other; then we know that we must emit a refcnt btree entry wherever
+ * the amount of overlap changes, i.e. the emission stimulus is
+ * level-triggered:
+ *
+ * - ---
+ * -- ----- ---- --- ------
+ * -- ---- ----------- ---- ---------
+ * -------------------------------- -----------
+ * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
+ * 2 1 23 21 3 43 234 2123 1 01 2 3 0
+ *
+ * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
+ *
+ * Note that in the actual refcnt btree we don't store the refcount < 2
+ * cases because the bnobt tells us which blocks are free; single-use
+ * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
+ * supports storing multiple entries covering a given block we could
+ * theoretically dispense with the refcntbt and simply count rmaps, but
+ * that's inefficient in the (hot) write path, so we'll take the cost of
+ * the extra tree to save time. Also there's no guarantee that rmap
+ * will be enabled.
+ *
+ * Given an array of rmaps sorted by physical block number, a starting
+ * physical block (sp), a bag to hold rmaps that cover sp, and the next
+ * physical block where the level changes (np), we can reconstruct the
+ * refcount btree as follows:
+ *
+ * While there are still unprocessed rmaps in the array,
+ * - Set sp to the physical block (pblk) of the next unprocessed rmap.
+ * - Add to the bag all rmaps in the array where startblock == sp.
+ * - Set np to the physical block where the bag size will change. This
+ * is the minimum of (the pblk of the next unprocessed rmap) and
+ * (startblock + len of each rmap in the bag).
+ * - Record the bag size as old_bag_size.
+ *
+ * - While the bag isn't empty,
+ * - Remove from the bag all rmaps where startblock + len == np.
+ * - Add to the bag all rmaps in the array where startblock == np.
+ * - If the bag size isn't old_bag_size, store the refcount entry
+ * (sp, np - sp, bag_size) in the refcnt btree.
+ * - If the bag is empty, break out of the inner loop.
+ * - Set old_bag_size to the bag size
+ * - Set sp = np.
+ * - Set np to the physical block where the bag size will change.
+ * This is the minimum of (the pblk of the next unprocessed rmap)
+ * and (startblock + len of each rmap in the bag).
+ *
+ * Like all the other repairers, we make a list of all the refcount
+ * records we need, then reinitialize the refcount btree root and
+ * insert all the records.
+ */
+
+struct xfs_repair_refc_rmap {
+ struct list_head list;
+ struct xfs_rmap_irec rmap;
+};
+
+struct xfs_repair_refc_extent {
+ struct list_head list;
+ struct xfs_refcount_irec refc;
+};
+
+struct xfs_repair_refc {
+ struct list_head rmap_bag; /* rmaps we're tracking */
+ struct list_head rmap_idle; /* idle rmaps */
+ struct list_head extlist; /* refcount extents */
+ struct list_head btlist; /* old refcountbt blocks */
+ xfs_extlen_t btblocks; /* # of refcountbt blocks */
+};
+
+/* Grab the next record from the rmapbt. */
+STATIC int
+xfs_repair_refcountbt_next_rmap(
+ struct xfs_btree_cur *cur,
+ struct xfs_repair_refc *rr,
+ struct xfs_rmap_irec *rec,
+ bool *have_rec)
+{
+ struct xfs_rmap_irec rmap;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_repair_refc_extent *rre;
+ xfs_fsblock_t fsbno;
+ int have_gt;
+ int error = 0;
+
+ *have_rec = false;
+ /*
+ * Loop through the remaining rmaps. Remember CoW staging
+ * extents and the refcountbt blocks from the old tree for later
+ * disposal. We can only share written data fork extents, so
+ * keep looping until we find an rmap for one.
+ */
+ do {
+ if (xfs_scrub_should_terminate(&error))
+ goto out_error;
+
+ error = xfs_btree_increment(cur, 0, &have_gt);
+ if (error)
+ goto out_error;
+ if (!have_gt)
+ return 0;
+
+ error = xfs_rmap_get_rec(cur, &rmap, &have_gt);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error);
+
+ if (rmap.rm_owner == XFS_RMAP_OWN_COW) {
+ /* Pass CoW staging extents right through. */
+ rre = kmem_alloc(sizeof(*rre), KM_NOFS);
+ if (!rre)
+ goto out_error;
+
+ INIT_LIST_HEAD(&rre->list);
+ rre->refc.rc_startblock = rmap.rm_startblock +
+ XFS_REFC_COW_START;
+ rre->refc.rc_blockcount = rmap.rm_blockcount;
+ rre->refc.rc_refcount = 1;
+ list_add_tail(&rre->list, &rr->extlist);
+ } else if (rmap.rm_owner == XFS_RMAP_OWN_REFC) {
+ /* refcountbt block, dump it when we're done. */
+ rr->btblocks += rmap.rm_blockcount;
+ fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
+ cur->bc_private.a.agno,
+ rmap.rm_startblock);
+ error = xfs_repair_collect_btree_extent(mp, &rr->btlist,
+ fsbno, rmap.rm_blockcount);
+ if (error)
+ goto out_error;
+ }
+ } while (XFS_RMAP_NON_INODE_OWNER(rmap.rm_owner) ||
+ xfs_internal_inum(mp, rmap.rm_owner) ||
+ (rmap.rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
+ XFS_RMAP_UNWRITTEN)));
+
+ *rec = rmap;
+ *have_rec = true;
+ return 0;
+
+out_error:
+ return error;
+}
+
+/* Recycle an idle rmap or allocate a new one. */
+static struct xfs_repair_refc_rmap *
+xfs_repair_refcountbt_get_rmap(
+ struct xfs_repair_refc *rr)
+{
+ struct xfs_repair_refc_rmap *rrm;
+
+ if (list_empty(&rr->rmap_idle)) {
+ rrm = kmem_alloc(sizeof(*rrm), KM_NOFS);
+ if (!rrm)
+ return NULL;
+ INIT_LIST_HEAD(&rrm->list);
+ return rrm;
+ }
+
+ rrm = list_first_entry(&rr->rmap_idle, struct xfs_repair_refc_rmap,
+ list);
+ list_del_init(&rrm->list);
+ return rrm;
+}
+
+/* Compare two btree extents. */
+static int
+xfs_repair_refcount_extent_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_repair_refc_extent *ap;
+ struct xfs_repair_refc_extent *bp;
+
+ ap = container_of(a, struct xfs_repair_refc_extent, list);
+ bp = container_of(b, struct xfs_repair_refc_extent, list);
+
+ if (ap->refc.rc_startblock > bp->refc.rc_startblock)
+ return 1;
+ else if (ap->refc.rc_startblock < bp->refc.rc_startblock)
+ return -1;
+ return 0;
+}
+
+/* Record a reference count extent. */
+STATIC int
+xfs_repair_refcountbt_new_refc(
+ struct xfs_scrub_context *sc,
+ struct xfs_repair_refc *rr,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ xfs_nlink_t refcount)
+{
+ struct xfs_repair_refc_extent *rre;
+ struct xfs_refcount_irec irec;
+
+ irec.rc_startblock = agbno;
+ irec.rc_blockcount = len;
+ irec.rc_refcount = refcount;
+
+ trace_xfs_repair_refcount_extent_fn(sc->tp->t_mountp, sc->sa.agno,
+ &irec);
+
+ rre = kmem_alloc(sizeof(*rre), KM_NOFS);
+ if (!rre)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&rre->list);
+ rre->refc = irec;
+ list_add_tail(&rre->list, &rr->extlist);
+
+ return 0;
+}
+
+/* Rebuild the refcount btree. */
+#define RMAP_END(r) ((r).rm_startblock + (r).rm_blockcount)
+int
+xfs_repair_refcountbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_repair_refc rr;
+ struct xfs_rmap_irec rmap;
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_repair_refc_rmap *rrm;
+ struct xfs_repair_refc_rmap *n;
+ struct xfs_repair_refc_extent *rre;
+ struct xfs_repair_refc_extent *o;
+ struct xfs_buf *bp = NULL;
+ struct xfs_agf *agf;
+ struct xfs_btree_cur *cur;
+ struct xfs_perag *pag;
+ uint64_t nr_records;
+ xfs_fsblock_t btfsb;
+ size_t old_stack_sz;
+ size_t stack_sz = 0;
+ xfs_agblock_t sbno;
+ xfs_agblock_t cbno;
+ xfs_agblock_t nbno;
+ bool have;
+ int have_gt;
+ int error = 0;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ INIT_LIST_HEAD(&rr.rmap_bag);
+ INIT_LIST_HEAD(&rr.rmap_idle);
+ INIT_LIST_HEAD(&rr.extlist);
+ INIT_LIST_HEAD(&rr.btlist);
+ rr.btblocks = 0;
+ nr_records = 0;
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
+
+ /* Start the rmapbt cursor to the left of all records. */
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, sc->sa.agf_bp, sc->sa.agno);
+ error = xfs_rmap_lookup_le(cur, 0, 0, 0, 0, 0, &have_gt);
+ if (error)
+ return error;
+ ASSERT(have_gt == 0);
+
+ /* Process reverse mappings into refcount data. */
+ while (xfs_btree_has_more_records(cur)) {
+ /* Push all rmaps with pblk == sbno onto the stack */
+ error = xfs_repair_refcountbt_next_rmap(cur, &rr, &rmap, &have);
+ if (error)
+ goto out;
+ if (!have)
+ break;
+ sbno = cbno = rmap.rm_startblock;
+ while (have && rmap.rm_startblock == sbno) {
+ rrm = xfs_repair_refcountbt_get_rmap(&rr);
+ if (!rrm)
+ goto out;
+ rrm->rmap = rmap;
+ list_add_tail(&rrm->list, &rr.rmap_bag);
+ stack_sz++;
+ error = xfs_repair_refcountbt_next_rmap(cur, &rr, &rmap,
+ &have);
+ if (error)
+ goto out;
+ }
+ error = xfs_btree_decrement(cur, 0, &have_gt);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, have_gt, out);
+
+ /* Set nbno to the bno of the next refcount change */
+ nbno = have ? rmap.rm_startblock : NULLAGBLOCK;
+ list_for_each_entry(rrm, &rr.rmap_bag, list)
+ nbno = min_t(xfs_agblock_t, nbno, RMAP_END(rrm->rmap));
+
+ ASSERT(nbno > sbno);
+ old_stack_sz = stack_sz;
+
+ /* While stack isn't empty... */
+ while (stack_sz) {
+ /* Pop all rmaps that end at nbno */
+ list_for_each_entry_safe(rrm, n, &rr.rmap_bag, list) {
+ if (RMAP_END(rrm->rmap) != nbno)
+ continue;
+ stack_sz--;
+ list_del_init(&rrm->list);
+ list_add(&rrm->list, &rr.rmap_idle);
+ }
+
+ /* Push array items that start at nbno */
+ error = xfs_repair_refcountbt_next_rmap(cur, &rr, &rmap,
+ &have);
+ if (error)
+ goto out;
+ while (have && rmap.rm_startblock == nbno) {
+ rrm = xfs_repair_refcountbt_get_rmap(&rr);
+ if (!rrm)
+ goto out;
+ rrm->rmap = rmap;
+ list_add_tail(&rrm->list, &rr.rmap_bag);
+ stack_sz++;
+ error = xfs_repair_refcountbt_next_rmap(cur,
+ &rr, &rmap, &have);
+ if (error)
+ goto out;
+ }
+ error = xfs_btree_decrement(cur, 0, &have_gt);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, have_gt, out);
+
+ /* Emit refcount if necessary */
+ ASSERT(nbno > cbno);
+ if (stack_sz != old_stack_sz) {
+ if (old_stack_sz > 1) {
+ error = xfs_repair_refcountbt_new_refc(
+ sc, &rr, cbno,
+ nbno - cbno,
+ old_stack_sz);
+ if (error)
+ goto out;
+ nr_records++;
+ }
+ cbno = nbno;
+ }
+
+ /* Stack empty, go find the next rmap */
+ if (stack_sz == 0)
+ break;
+ old_stack_sz = stack_sz;
+ sbno = nbno;
+
+ /* Set nbno to the bno of the next refcount change */
+ nbno = have ? rmap.rm_startblock : NULLAGBLOCK;
+ list_for_each_entry(rrm, &rr.rmap_bag, list)
+ nbno = min_t(xfs_agblock_t, nbno,
+ RMAP_END(rrm->rmap));
+
+ /* Emit reverse mappings, if needed */
+ ASSERT(nbno > sbno);
+ }
+ }
+ ASSERT(list_empty(&rr.rmap_bag));
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ /* Free all the rmap records. */
+ list_for_each_entry_safe(rrm, n, &rr.rmap_idle, list) {
+ list_del(&rrm->list);
+ kmem_free(rrm);
+ }
+ list_for_each_entry_safe(rrm, n, &rr.rmap_bag, list) {
+ list_del(&rrm->list);
+ kmem_free(rrm);
+ }
+
+ /* Do we actually have enough space to do this? */
+ pag = xfs_perag_get(mp, sc->sa.agno);
+ if (!xfs_repair_ag_has_space(pag,
+ xfs_refcountbt_calc_size(mp, nr_records),
+ XFS_AG_RESV_METADATA)) {
+ xfs_perag_put(pag);
+ error = -ENOSPC;
+ goto out;
+ }
+ xfs_perag_put(pag);
+
+ agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ /* Initialize a new btree root. */
+ error = xfs_repair_alloc_ag_block(sc, &oinfo, &btfsb,
+ XFS_AG_RESV_METADATA);
+ if (error)
+ goto out;
+ error = xfs_repair_init_btblock(sc, btfsb, &bp, XFS_REFC_CRC_MAGIC,
+ &xfs_refcountbt_buf_ops);
+ if (error)
+ goto out;
+ agf->agf_refcount_root = cpu_to_be32(XFS_FSB_TO_AGBNO(mp, btfsb));
+ agf->agf_refcount_level = cpu_to_be32(1);
+ agf->agf_refcount_blocks = cpu_to_be32(1);
+ xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_REFCOUNT_BLOCKS |
+ XFS_AGF_REFCOUNT_ROOT | XFS_AGF_REFCOUNT_LEVEL);
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ goto out;
+
+ /* Insert records into the new btree. */
+ list_sort(NULL, &rr.extlist, xfs_repair_refcount_extent_cmp);
+ list_for_each_entry_safe(rre, o, &rr.extlist, list) {
+ /* Insert into the refcountbt. */
+ cur = xfs_refcountbt_init_cursor(mp, sc->tp, sc->sa.agf_bp,
+ sc->sa.agno, NULL);
+ error = xfs_refcount_lookup_eq(cur, rre->refc.rc_startblock,
+ &have_gt);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 0, out);
+ error = xfs_refcount_insert(cur, &rre->refc, &have_gt);
+ if (error)
+ goto out;
+ XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ cur = NULL;
+
+ error = xfs_repair_roll_ag_trans(sc);
+ if (error)
+ goto out;
+
+ list_del(&rre->list);
+ kmem_free(rre);
+ }
+
+ /* Free the old refcountbt blocks if they're not in use. */
+ error = xfs_repair_reap_btree_extents(sc, &rr.btlist, &oinfo,
+ XFS_AG_RESV_METADATA);
+ if (error)
+ goto out;
+
+ return error;
+
+out:
+ if (cur)
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ xfs_repair_cancel_btree_extents(sc, &rr.btlist);
+ list_for_each_entry_safe(rrm, n, &rr.rmap_idle, list) {
+ list_del(&rrm->list);
+ kmem_free(rrm);
+ }
+ list_for_each_entry_safe(rrm, n, &rr.rmap_bag, list) {
+ list_del(&rrm->list);
+ kmem_free(rrm);
+ }
+ list_for_each_entry_safe(rre, o, &rr.extlist, list) {
+ list_del(&rre->list);
+ kmem_free(rre);
+ }
+ return error;
+}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 43/47] xfs: online repair of inodes
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (41 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 42/47] xfs: repair refcount btrees Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 44/47] xfs: repair inode block maps Darrick J. Wong
` (4 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Try to reinitialize corrupt inodes, or clear the reflink flag
if it's not needed.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/repair/common.c | 2 -
fs/xfs/repair/common.h | 1
fs/xfs/repair/inode.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_bmap_util.c | 4 +
fs/xfs/xfs_reflink.c | 15 +++--
fs/xfs/xfs_reflink.h | 6 +-
6 files changed, 168 insertions(+), 13 deletions(-)
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index e75cf66..2b1cd09 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -772,7 +772,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_iallocbt, xfs_scrub_finobt, xfs_repair_iallocbt, xfs_sb_version_hasfinobt},
{xfs_scrub_setup_ag_header_freeze, xfs_scrub_rmapbt, xfs_repair_rmapbt, xfs_sb_version_hasrmapbt},
{xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, xfs_repair_refcountbt, xfs_sb_version_hasreflink},
- {xfs_scrub_setup_inode_raw, xfs_scrub_inode, NULL, NULL},
+ {xfs_scrub_setup_inode_raw, xfs_scrub_inode, xfs_repair_inode, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_data, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_attr, NULL, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_cow, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 02e5257..bb10e7e 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -341,5 +341,6 @@ int xfs_repair_allocbt(struct xfs_scrub_context *sc);
int xfs_repair_iallocbt(struct xfs_scrub_context *sc);
int xfs_repair_rmapbt(struct xfs_scrub_context *sc);
int xfs_repair_refcountbt(struct xfs_scrub_context *sc);
+int xfs_repair_inode(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/inode.c b/fs/xfs/repair/inode.c
index 51f0e78..a3eb872 100644
--- a/fs/xfs/repair/inode.c
+++ b/fs/xfs/repair/inode.c
@@ -41,6 +41,7 @@
#include "xfs_rmap.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
+#include "xfs_reflink.h"
#include "repair/common.h"
/*
@@ -443,3 +444,155 @@ xfs_scrub_inode(
#undef XFS_SCRUB_INODE_OP_ERROR_GOTO
#undef XFS_SCRUB_INODE_GOTO
#undef XFS_SCRUB_INODE_CHECK
+
+/* Repair an inode's fields. */
+int
+xfs_repair_inode(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_imap imap;
+ struct xfs_mount *mp = sc->tp->t_mountp;
+ struct xfs_buf *bp;
+ struct xfs_dinode *dip;
+ struct xfs_inode *ip;
+ xfs_ino_t ino;
+ unsigned long long count;
+ uint64_t flags2;
+ uint32_t nextents;
+ uint16_t flags;
+ int error = 0;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ /* Are we fixing this thing manually? */
+ if (!sc->ip) {
+ /* Map & read inode. */
+ ino = sc->sm->sm_ino;
+ error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED);
+ if (error)
+ goto out;
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp,
+ NULL);
+ if (error)
+ goto out;
+
+ /* Fix everything the verifier will complain about. */
+ bp->b_ops = &xfs_inode_buf_ops;
+ dip = xfs_buf_offset(bp, imap.im_boffset);
+ dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+ if (!xfs_dinode_good_version(mp, dip->di_version))
+ dip->di_version = 3;
+ dip->di_ino = cpu_to_be64(ino);
+ uuid_copy(&dip->di_uuid, &mp->m_sb.sb_meta_uuid);
+ flags = be16_to_cpu(dip->di_flags);
+ flags2 = be64_to_cpu(dip->di_flags2);
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ flags2 |= XFS_DIFLAG2_REFLINK;
+ else
+ flags2 &= ~(XFS_DIFLAG2_REFLINK |
+ XFS_DIFLAG2_COWEXTSIZE);
+ if (flags & XFS_DIFLAG_REALTIME)
+ flags2 &= ~XFS_DIFLAG2_REFLINK;
+ if (flags2 & XFS_DIFLAG2_REFLINK)
+ flags2 &= ~XFS_DIFLAG2_DAX;
+ dip->di_flags = cpu_to_be16(flags);
+ dip->di_flags2 = cpu_to_be64(flags2);
+ dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
+ if (be64_to_cpu(dip->di_size) & (1ULL << 63))
+ dip->di_size = cpu_to_be64((1ULL << 63) - 1);
+
+ /* Write out the inode... */
+ xfs_dinode_calc_crc(mp, dip);
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
+ xfs_trans_log_buf(sc->tp, bp, imap.im_boffset,
+ imap.im_boffset + mp->m_sb.sb_inodesize - 1);
+ error = xfs_trans_roll(&sc->tp, NULL);
+ if (error)
+ goto out;
+
+ /* ...and reload it? */
+ error = xfs_iget(mp, sc->tp, ino,
+ XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE,
+ 0, &sc->ip);
+ if (error)
+ goto out;
+ xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL);
+ xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
+ xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+ }
+
+ ip = sc->ip;
+ xfs_trans_ijoin(sc->tp, ip, 0);
+
+ /* di_size */
+ if (!S_ISDIR(VFS_I(ip)->i_mode) && !S_ISREG(VFS_I(ip)->i_mode) &&
+ !S_ISLNK(VFS_I(ip)->i_mode)) {
+ i_size_write(VFS_I(ip), 0);
+ ip->i_d.di_size = 0;
+ }
+
+ /* di_flags */
+ flags = ip->i_d.di_flags;
+ if ((flags & XFS_DIFLAG_IMMUTABLE) && (flags & XFS_DIFLAG_APPEND))
+ flags &= ~XFS_DIFLAG_APPEND;
+
+ if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME))
+ flags &= ~XFS_DIFLAG_FILESTREAM;
+ ip->i_d.di_flags = flags;
+
+ /* di_nblocks/di_nextents/di_anextents */
+ count = 0;
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
+ &nextents, &count);
+ if (error)
+ goto out;
+ ip->i_d.di_nextents = nextents;
+
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
+ &nextents, &count);
+ if (error)
+ goto out;
+ ip->i_d.di_anextents = nextents;
+ ip->i_d.di_nblocks = count;
+ if (ip->i_d.di_anextents != 0 && ip->i_d.di_forkoff == 0)
+ ip->i_d.di_anextents = 0;
+
+ /* Do we have prealloc blocks? */
+ if (S_ISREG(VFS_I(ip)->i_mode) && !(flags & XFS_DIFLAG_PREALLOC) &&
+ (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS ||
+ ip->i_d.di_format == XFS_DINODE_FMT_BTREE)) {
+ struct xfs_bmbt_irec got;
+ struct xfs_ifork *ifp;
+ xfs_fileoff_t lblk;
+ xfs_extnum_t idx;
+ bool found;
+
+ lblk = XFS_B_TO_FSB(mp, i_size_read(VFS_I(sc->ip)));
+ ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
+ found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &idx, &got);
+ while (found) {
+ if (got.br_startoff >= lblk &&
+ got.br_state == XFS_EXT_NORM) {
+ ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
+ break;
+ }
+ lblk = got.br_startoff + got.br_blockcount;
+ found = xfs_iext_get_extent(ifp, ++idx, &got);
+ }
+ }
+
+ /* Commit inode core changes. */
+ xfs_trans_log_inode(sc->tp, ip, XFS_ILOG_CORE);
+ error = xfs_trans_roll(&sc->tp, ip);
+ if (error)
+ goto out;
+
+ if (xfs_is_reflink_inode(sc->ip))
+ return xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
+
+out:
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d983f28..cfba408 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -481,8 +481,8 @@ xfs_getbmap_adjust_shared(
agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
- error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount,
- &ebno, &elen, true);
+ error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+ map->br_blockcount, &ebno, &elen, true);
if (error)
return error;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 07593a3..bdecdb8 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -128,6 +128,7 @@
int
xfs_reflink_find_shared(
struct xfs_mount *mp,
+ struct xfs_trans *tp,
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_extlen_t aglen,
@@ -139,18 +140,18 @@ xfs_reflink_find_shared(
struct xfs_btree_cur *cur;
int error;
- error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
if (error)
return error;
- cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+ cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
find_end_of_shared);
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
+ xfs_trans_brelse(tp, agbp);
return error;
}
@@ -194,7 +195,7 @@ xfs_reflink_trim_around_shared(
agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
aglen = irec->br_blockcount;
- error = xfs_reflink_find_shared(ip->i_mount, agno, agbno,
+ error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno,
aglen, &fbno, &flen, true);
if (error)
return error;
@@ -1256,8 +1257,8 @@ xfs_reflink_dirty_extents(
agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
aglen = map[1].br_blockcount;
- error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
- &rbno, &rlen, true);
+ error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+ aglen, &rbno, &rlen, true);
if (error)
goto out;
if (rbno == NULLAGBLOCK)
@@ -1330,7 +1331,7 @@ xfs_reflink_clear_inode_flag(
agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock);
aglen = map.br_blockcount;
- error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
+ error = xfs_reflink_find_shared(mp, *tpp, agno, agbno, aglen,
&rbno, &rlen, false);
if (error)
return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index aa6a4d6..2a18e4d 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -20,9 +20,9 @@
#ifndef __XFS_REFLINK_H
#define __XFS_REFLINK_H 1
-extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
- xfs_extlen_t *flen, bool find_maximal);
+extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
+ xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 44/47] xfs: repair inode block maps
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (42 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 43/47] xfs: online repair of inodes Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 45/47] xfs: repair damaged symlinks Darrick J. Wong
` (3 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Use the reverse-mapping btree information to rebuild an inode fork.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_bmap.c | 20 ++-
fs/xfs/libxfs/xfs_bmap.h | 6 +
fs/xfs/repair/bmap.c | 317 +++++++++++++++++++++++++++++++++++++++++++++-
fs/xfs/repair/common.c | 4 -
fs/xfs/repair/common.h | 10 +
fs/xfs/repair/inode.c | 45 ++++++-
6 files changed, 382 insertions(+), 20 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2760bc3..ba2bb00 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2153,9 +2153,12 @@ xfs_bmap_add_extent_delay_real(
}
/* add reverse mapping */
- error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
- if (error)
- goto done;
+ if (!(bma->flags & XFS_BMAPI_NORMAP)) {
+ error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip,
+ whichfork, new);
+ if (error)
+ goto done;
+ }
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
@@ -3096,9 +3099,12 @@ xfs_bmap_add_extent_hole_real(
}
/* add reverse mapping */
- error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
- if (error)
- goto done;
+ if (!(bma->flags & XFS_BMAPI_NORMAP)) {
+ error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip,
+ whichfork, new);
+ if (error)
+ goto done;
+ }
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
@@ -4542,8 +4548,6 @@ xfs_bmapi_write(
ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
- ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index cecd094..15454749 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
/* Map something in the CoW fork. */
#define XFS_BMAPI_COWFORK 0x200
+/* Don't update the rmap btree. */
+#define XFS_BMAPI_NORMAP 0x400
+
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_ZERO, "ZERO" }, \
{ XFS_BMAPI_REMAP, "REMAP" }, \
- { XFS_BMAPI_COWFORK, "COWFORK" }
+ { XFS_BMAPI_COWFORK, "COWFORK" }, \
+ { XFS_BMAPI_NORMAP, "NORMAP" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/repair/bmap.c b/fs/xfs/repair/bmap.c
index bd6a620..63ab446 100644
--- a/fs/xfs/repair/bmap.c
+++ b/fs/xfs/repair/bmap.c
@@ -36,6 +36,7 @@
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_refcount.h"
@@ -44,17 +45,21 @@
#include "repair/btree.h"
/* Set us up with an inode and AG headers, if needed. */
-int
-xfs_scrub_setup_inode_bmap(
+STATIC int
+__xfs_scrub_setup_inode_bmap(
struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
- bool retry_deadlocked)
+ bool retry_deadlocked,
+ bool data)
{
+ bool is_repair;
int error;
- error = xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked);
- if (error || !retry_deadlocked)
+ is_repair = (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR);
+ error = __xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked,
+ data && is_repair);
+ if (error || (!retry_deadlocked && !is_repair))
return error;
error = xfs_scrub_ag_lock_all(sc);
@@ -66,6 +71,28 @@ xfs_scrub_setup_inode_bmap(
return xfs_scrub_teardown(sc, ip, error);
}
+/* Set us up with an inode and AG headers, if needed. */
+int
+xfs_scrub_setup_inode_bmap(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool deadlocked)
+{
+ return __xfs_scrub_setup_inode_bmap(sc, ip, sm, deadlocked, false);
+}
+
+/* Set us up with an inode and AG headers, if needed. */
+int
+xfs_scrub_setup_inode_bmap_data(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool deadlocked)
+{
+ return __xfs_scrub_setup_inode_bmap(sc, ip, sm, deadlocked, true);
+}
+
/*
* Inode fork block mapping (BMBT) scrubber.
* More complex than the others because we have to scrub
@@ -553,3 +580,283 @@ xfs_scrub_bmap_cow(
return xfs_scrub_bmap(sc, XFS_COW_FORK);
}
+
+/* Inode fork block mapping (BMBT) repair. */
+
+struct xfs_repair_bmap_extent {
+ struct list_head list;
+ struct xfs_rmap_irec rmap;
+ xfs_agnumber_t agno;
+};
+
+struct xfs_repair_bmap {
+ struct list_head extlist;
+ struct list_head btlist;
+ xfs_ino_t ino;
+ xfs_rfsblock_t bmbt_blocks;
+ int whichfork;
+};
+
+/* Record extents that belong to this inode's fork. */
+STATIC int
+xfs_repair_bmap_extent_fn(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_repair_bmap *rb = priv;
+ struct xfs_repair_bmap_extent *rbe;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_fsblock_t fsbno;
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(&error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != rb->ino)
+ return 0;
+ else if (rb->whichfork == XFS_DATA_FORK &&
+ (rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+ else if (rb->whichfork == XFS_ATTR_FORK &&
+ !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+
+ /* Delete the old bmbt blocks later. */
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
+ fsbno = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno,
+ rec->rm_startblock);
+ rb->bmbt_blocks += rec->rm_blockcount;
+ return xfs_repair_collect_btree_extent(mp, &rb->btlist,
+ fsbno, rec->rm_blockcount);
+ }
+
+ /* Remember this rmap. */
+ trace_xfs_repair_bmap_extent_fn(mp, cur->bc_private.a.agno,
+ rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
+ rec->rm_offset, rec->rm_flags);
+
+ rbe = kmem_alloc(sizeof(*rbe), KM_NOFS);
+ if (!rbe)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&rbe->list);
+ rbe->rmap = *rec;
+ rbe->agno = cur->bc_private.a.agno;
+ list_add_tail(&rbe->list, &rb->extlist);
+
+ return 0;
+}
+
+/* Compare two bmap extents. */
+static int
+xfs_repair_bmap_extent_cmp(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct xfs_repair_bmap_extent *ap;
+ struct xfs_repair_bmap_extent *bp;
+
+ ap = container_of(a, struct xfs_repair_bmap_extent, list);
+ bp = container_of(b, struct xfs_repair_bmap_extent, list);
+
+ if (ap->rmap.rm_offset > bp->rmap.rm_offset)
+ return 1;
+ else if (ap->rmap.rm_offset < bp->rmap.rm_offset)
+ return -1;
+ return 0;
+}
+
+/* Repair an inode fork. */
+STATIC int
+xfs_repair_bmap(
+ struct xfs_scrub_context *sc,
+ int whichfork)
+{
+ struct xfs_repair_bmap rb = {0};
+ struct xfs_bmbt_irec bmap;
+ struct xfs_defer_ops dfops;
+ struct xfs_owner_info oinfo;
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buf *agf_bp = NULL;
+ struct xfs_repair_bmap_extent *rbe;
+ struct xfs_repair_bmap_extent *n;
+ struct xfs_btree_cur *cur;
+ xfs_fsblock_t firstfsb;
+ xfs_agnumber_t agno;
+ xfs_extlen_t extlen;
+ int baseflags;
+ int flags;
+ int nimaps;
+ int error = 0;
+
+ ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
+
+ /* Don't know how to repair the other fork formats. */
+ if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
+ return -EOPNOTSUPP;
+
+ /* Only files, symlinks, and directories get to have data forks. */
+ if (whichfork == XFS_DATA_FORK && !S_ISREG(VFS_I(ip)->i_mode) &&
+ !S_ISDIR(VFS_I(ip)->i_mode) && !S_ISLNK(VFS_I(ip)->i_mode))
+ return -EINVAL;
+
+ /* If we somehow have delalloc extents, forget it. */
+ if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks)
+ return -EBUSY;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ /* Don't know how to rebuild realtime data forks. */
+ if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK)
+ return -EOPNOTSUPP;
+
+ /*
+ * If this is a file data fork, wait for all pending directio to
+ * complete, then tear everything out of the page cache.
+ */
+ if (S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ inode_dio_wait(VFS_I(ip));
+ truncate_inode_pages(VFS_I(ip)->i_mapping, 0);
+ }
+
+ /* Collect all reverse mappings for this fork's extents. */
+ INIT_LIST_HEAD(&rb.extlist);
+ INIT_LIST_HEAD(&rb.btlist);
+ rb.ino = ip->i_ino;
+ rb.whichfork = whichfork;
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ ASSERT(xfs_scrub_ag_can_lock(sc, agno));
+ error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp);
+ if (error)
+ goto out;
+ cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, agno);
+ error = xfs_rmap_query_all(cur, xfs_repair_bmap_extent_fn, &rb);
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (error)
+ goto out;
+ }
+
+ /* Blow out the in-core fork and zero the on-disk fork. */
+ if (XFS_IFORK_PTR(ip, whichfork) != NULL)
+ xfs_idestroy_fork(sc->ip, whichfork);
+ XFS_IFORK_FMT_SET(sc->ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+ XFS_IFORK_NEXT_SET(sc->ip, whichfork, 0);
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ /* Reinitialize the on-disk fork. */
+ if (whichfork == XFS_DATA_FORK) {
+ memset(&ip->i_df, 0, sizeof(struct xfs_ifork));
+ ip->i_df.if_flags |= XFS_IFEXTENTS;
+ } else if (whichfork == XFS_ATTR_FORK) {
+ if (list_empty(&rb.extlist))
+ ip->i_afp = NULL;
+ else {
+ ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS);
+ ip->i_afp->if_flags |= XFS_IFEXTENTS;
+ }
+ }
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ error = xfs_trans_roll(&sc->tp, sc->ip);
+ if (error)
+ goto out;
+
+ baseflags = XFS_BMAPI_REMAP | XFS_BMAPI_NORMAP;
+ if (whichfork == XFS_ATTR_FORK)
+ baseflags |= XFS_BMAPI_ATTRFORK;
+
+ /* "Remap" the extents into the fork. */
+ list_sort(NULL, &rb.extlist, xfs_repair_bmap_extent_cmp);
+ list_for_each_entry_safe(rbe, n, &rb.extlist, list) {
+ /* Form the "new" mapping... */
+ bmap.br_startblock = XFS_AGB_TO_FSB(mp, rbe->agno,
+ rbe->rmap.rm_startblock);
+ bmap.br_startoff = rbe->rmap.rm_offset;
+ flags = 0;
+ if (rbe->rmap.rm_flags & XFS_RMAP_UNWRITTEN)
+ flags = XFS_BMAPI_PREALLOC;
+ while (rbe->rmap.rm_blockcount > 0) {
+ xfs_defer_init(&dfops, &firstfsb);
+ extlen = min_t(xfs_extlen_t, rbe->rmap.rm_blockcount,
+ MAXEXTLEN);
+ bmap.br_blockcount = extlen;
+
+ /* Drop the block counter... */
+ sc->ip->i_d.di_nblocks -= extlen;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ /* Re-add the extent to the fork. */
+ nimaps = 1;
+ firstfsb = bmap.br_startblock;
+ error = xfs_bmapi_write(sc->tp, sc->ip,
+ bmap.br_startoff,
+ extlen, baseflags | flags, &firstfsb,
+ extlen, &bmap, &nimaps,
+ &dfops);
+ if (error)
+ goto out;
+
+ bmap.br_startblock += extlen;
+ bmap.br_startoff += extlen;
+ rbe->rmap.rm_blockcount -= extlen;
+ error = xfs_defer_finish(&sc->tp, &dfops, sc->ip);
+ if (error)
+ goto out;
+ /* Make sure we roll the transaction. */
+ error = xfs_trans_roll(&sc->tp, sc->ip);
+ if (error)
+ goto out;
+ }
+ list_del(&rbe->list);
+ kmem_free(rbe);
+ }
+
+ /* Decrease nblocks to reflect the freed bmbt blocks. */
+ if (rb.bmbt_blocks) {
+ sc->ip->i_d.di_nblocks -= rb.bmbt_blocks;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ error = xfs_trans_roll(&sc->tp, sc->ip);
+ if (error)
+ goto out;
+ }
+
+ /* Dispose of all the old bmbt blocks. */
+ xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, whichfork);
+ error = xfs_repair_reap_btree_extents(sc, &rb.btlist, &oinfo,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out;
+
+ return error;
+out:
+ xfs_repair_cancel_btree_extents(sc, &rb.btlist);
+ list_for_each_entry_safe(rbe, n, &rb.extlist, list) {
+ list_del(&rbe->list);
+ kmem_free(rbe);
+ }
+ return error;
+}
+
+/* Repair an inode's data fork. */
+int
+xfs_repair_bmap_data(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_repair_bmap(sc, XFS_DATA_FORK);
+}
+
+/* Repair an inode's attr fork. */
+int
+xfs_repair_bmap_attr(
+ struct xfs_scrub_context *sc)
+{
+ return xfs_repair_bmap(sc, XFS_ATTR_FORK);
+}
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 2b1cd09..7b5bcad 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -773,8 +773,8 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_ag_header_freeze, xfs_scrub_rmapbt, xfs_repair_rmapbt, xfs_sb_version_hasrmapbt},
{xfs_scrub_setup_ag_header, xfs_scrub_refcountbt, xfs_repair_refcountbt, xfs_sb_version_hasreflink},
{xfs_scrub_setup_inode_raw, xfs_scrub_inode, xfs_repair_inode, NULL},
- {xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_data, NULL, NULL},
- {xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_attr, NULL, NULL},
+ {xfs_scrub_setup_inode_bmap_data, xfs_scrub_bmap_data, xfs_repair_bmap_data, NULL},
+ {xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_attr, xfs_repair_bmap_attr, NULL},
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_cow, NULL, NULL},
{xfs_scrub_setup_inode, xfs_scrub_directory, NULL, NULL},
{xfs_scrub_setup_inode_xattr, xfs_scrub_xattr, NULL, NULL},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index bb10e7e..76fba90 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -238,6 +238,10 @@ int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int __xfs_scrub_setup_inode(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked, bool flush_data);
int xfs_scrub_setup_inode(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
@@ -250,6 +254,10 @@ int xfs_scrub_setup_inode_bmap(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
bool retry_deadlocked);
+int xfs_scrub_setup_inode_bmap_data(struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked);
int xfs_scrub_setup_inode_xattr(struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
@@ -342,5 +350,7 @@ int xfs_repair_iallocbt(struct xfs_scrub_context *sc);
int xfs_repair_rmapbt(struct xfs_scrub_context *sc);
int xfs_repair_refcountbt(struct xfs_scrub_context *sc);
int xfs_repair_inode(struct xfs_scrub_context *sc);
+int xfs_repair_bmap_data(struct xfs_scrub_context *sc);
+int xfs_repair_bmap_attr(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/inode.c b/fs/xfs/repair/inode.c
index a3eb872..62576aff 100644
--- a/fs/xfs/repair/inode.c
+++ b/fs/xfs/repair/inode.c
@@ -42,6 +42,7 @@
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_reflink.h"
+#include "xfs_bmap_btree.h"
#include "repair/common.h"
/*
@@ -88,13 +89,15 @@ xfs_scrub_get_inode(
/* Set us up with an inode. */
int
-xfs_scrub_setup_inode(
+__xfs_scrub_setup_inode(
struct xfs_scrub_context *sc,
struct xfs_inode *ip,
struct xfs_scrub_metadata *sm,
- bool retry_deadlocked)
+ bool retry_deadlocked,
+ bool flush_data)
{
struct xfs_mount *mp = ip->i_mount;
+ unsigned long long resblks;
int error;
memset(sc, 0, sizeof(*sc));
@@ -107,8 +110,31 @@ xfs_scrub_setup_inode(
xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL);
+
+ /*
+ * We don't want any ephemeral data fork updates sitting around
+ * while we inspect block mappings, so wait for directio to finish
+ * and flush dirty data if we have delalloc reservations.
+ */
+ if (flush_data) {
+ inode_dio_wait(VFS_I(sc->ip));
+ error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
+ if (error)
+ goto out_unlock;
+ }
+
+ /*
+ * Guess how many blocks we're going to need to rebuild an
+ * entire bmap. We don't actually know which fork, so err
+ * on the side of asking for more blocks than we might
+ * actually need. Since we're reloading the btree sequentially
+ * there should be fewer splits.
+ */
+ resblks = xfs_bmbt_calc_size(mp,
+ max_t(xfs_extnum_t, sc->ip->i_d.di_nextents,
+ sc->ip->i_d.di_anextents));
error = xfs_scrub_trans_alloc(sm, mp, &M_RES(mp)->tr_itruncate,
- 0, 0, 0, &sc->tp);
+ resblks, 0, 0, &sc->tp);
if (error)
goto out_unlock;
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
@@ -137,7 +163,7 @@ xfs_scrub_setup_inode_raw(
if (sm->sm_ino && xfs_internal_inum(mp, sm->sm_ino))
return -ENOENT;
- error = xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked);
+ error = __xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked, false);
if (error) {
memset(sc, 0, sizeof(*sc));
sc->ip = NULL;
@@ -155,6 +181,17 @@ xfs_scrub_setup_inode_raw(
return 0;
}
+/* Set us up with an inode. */
+int
+xfs_scrub_setup_inode(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm,
+ bool retry_deadlocked)
+{
+ return __xfs_scrub_setup_inode(sc, ip, sm, retry_deadlocked, false);
+}
+
/* Inode core */
#define XFS_SCRUB_INODE_CHECK(fs_ok) \
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 45/47] xfs: repair damaged symlinks
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (43 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 44/47] xfs: repair inode block maps Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 46/47] xfs: query the per-AG reservation counters Darrick J. Wong
` (2 subsequent siblings)
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Repair inconsistent symbolic link data.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/repair/common.c | 2
fs/xfs/repair/common.h | 1
fs/xfs/repair/inode.c | 1
fs/xfs/repair/symlink.c | 235 +++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 238 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/repair/common.c b/fs/xfs/repair/common.c
index 7b5bcad..137e1a4 100644
--- a/fs/xfs/repair/common.c
+++ b/fs/xfs/repair/common.c
@@ -778,7 +778,7 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
{xfs_scrub_setup_inode_bmap, xfs_scrub_bmap_cow, NULL, NULL},
{xfs_scrub_setup_inode, xfs_scrub_directory, NULL, NULL},
{xfs_scrub_setup_inode_xattr, xfs_scrub_xattr, NULL, NULL},
- {xfs_scrub_setup_inode_symlink, xfs_scrub_symlink, NULL, NULL},
+ {xfs_scrub_setup_inode_symlink, xfs_scrub_symlink, xfs_repair_symlink, NULL},
#ifdef CONFIG_XFS_RT
{xfs_scrub_setup_rt, xfs_scrub_rtbitmap, NULL, xfs_sb_version_hasrealtime},
{xfs_scrub_setup_rt, xfs_scrub_rtsummary, NULL, xfs_sb_version_hasrealtime},
diff --git a/fs/xfs/repair/common.h b/fs/xfs/repair/common.h
index 76fba90..a4dfc67 100644
--- a/fs/xfs/repair/common.h
+++ b/fs/xfs/repair/common.h
@@ -352,5 +352,6 @@ int xfs_repair_refcountbt(struct xfs_scrub_context *sc);
int xfs_repair_inode(struct xfs_scrub_context *sc);
int xfs_repair_bmap_data(struct xfs_scrub_context *sc);
int xfs_repair_bmap_attr(struct xfs_scrub_context *sc);
+int xfs_repair_symlink(struct xfs_scrub_context *sc);
#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/repair/inode.c b/fs/xfs/repair/inode.c
index 62576aff..be85be6 100644
--- a/fs/xfs/repair/inode.c
+++ b/fs/xfs/repair/inode.c
@@ -133,6 +133,7 @@ __xfs_scrub_setup_inode(
resblks = xfs_bmbt_calc_size(mp,
max_t(xfs_extnum_t, sc->ip->i_d.di_nextents,
sc->ip->i_d.di_anextents));
+ resblks = max_t(unsigned long long, resblks, XFS_SYMLINK_MAPS);
error = xfs_scrub_trans_alloc(sm, mp, &M_RES(mp)->tr_itruncate,
resblks, 0, 0, &sc->tp);
if (error)
diff --git a/fs/xfs/repair/symlink.c b/fs/xfs/repair/symlink.c
index 8b4fb31..0f910b8 100644
--- a/fs/xfs/repair/symlink.c
+++ b/fs/xfs/repair/symlink.c
@@ -33,6 +33,8 @@
#include "xfs_inode.h"
#include "xfs_inode_fork.h"
#include "xfs_symlink.h"
+#include "xfs_bmap.h"
+#include "xfs_quota.h"
#include "repair/common.h"
/* Set us up with an inode and a buffer for reading symlink targets. */
@@ -105,3 +107,236 @@ xfs_scrub_symlink(
}
#undef XFS_SCRUB_SYMLINK_GOTO
#undef XFS_SCRUB_SYMLINK_CHECK
+
+/* Blow out the whole symlink; replace contents. */
+STATIC int
+xfs_repair_symlink_rewrite(
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ const char *target_path,
+ int pathlen)
+{
+ struct xfs_defer_ops dfops;
+ struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
+ struct xfs_ifork *ifp;
+ const char *cur_chunk;
+ struct xfs_mount *mp = (*tpp)->t_mountp;
+ struct xfs_buf *bp;
+ xfs_fsblock_t first_block;
+ xfs_fileoff_t first_fsb;
+ xfs_filblks_t fs_blocks;
+ xfs_daddr_t d;
+ uint resblks;
+ int byte_cnt;
+ int n;
+ int nmaps;
+ int offset;
+ int error = 0;
+
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+
+ /* Truncate the whole data fork if it wasn't inline. */
+ if (!(ifp->if_flags & XFS_IFINLINE)) {
+ error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, 0);
+ if (error)
+ goto out;
+ }
+
+ /* Blow out the in-core fork and zero the on-disk fork. */
+ xfs_idestroy_fork(ip, XFS_DATA_FORK);
+ ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+ ip->i_d.di_nextents = 0;
+ memset(&ip->i_df, 0, sizeof(struct xfs_ifork));
+ ip->i_df.if_flags |= XFS_IFEXTENTS;
+
+ /* Rewrite an inline symlink. */
+ if (pathlen <= XFS_IFORK_DSIZE(ip)) {
+ xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen);
+
+ i_size_write(VFS_I(ip), pathlen);
+ ip->i_d.di_size = pathlen;
+ ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
+ goto out;
+
+ }
+
+ /* Rewrite a remote symlink. */
+ fs_blocks = xfs_symlink_blocks(mp, pathlen);
+ first_fsb = 0;
+ nmaps = XFS_SYMLINK_MAPS;
+
+ /* Reserve quota for new blocks. */
+ error = xfs_trans_reserve_quota_nblks(*tpp, ip, fs_blocks, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out;
+
+ /* Map blocks, write symlink target. */
+ xfs_defer_init(&dfops, &first_block);
+
+ error = xfs_bmapi_write(*tpp, ip, first_fsb, fs_blocks,
+ XFS_BMAPI_METADATA, &first_block, fs_blocks,
+ mval, &nmaps, &dfops);
+ if (error)
+ goto out_bmap_cancel;
+
+ if (resblks)
+ resblks -= fs_blocks;
+ ip->i_d.di_size = pathlen;
+ i_size_write(VFS_I(ip), pathlen);
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+
+ cur_chunk = target_path;
+ offset = 0;
+ for (n = 0; n < nmaps; n++) {
+ char *buf;
+
+ d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
+ byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
+ bp = xfs_trans_get_buf(*tpp, mp->m_ddev_targp, d,
+ BTOBB(byte_cnt), 0);
+ if (!bp) {
+ error = -ENOMEM;
+ goto out_bmap_cancel;
+ }
+ bp->b_ops = &xfs_symlink_buf_ops;
+
+ byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
+ byte_cnt = min(byte_cnt, pathlen);
+
+ buf = bp->b_addr;
+ buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
+ byte_cnt, bp);
+
+ memcpy(buf, cur_chunk, byte_cnt);
+
+ cur_chunk += byte_cnt;
+ pathlen -= byte_cnt;
+ offset += byte_cnt;
+
+ xfs_trans_buf_set_type(*tpp, bp, XFS_BLFT_SYMLINK_BUF);
+ xfs_trans_log_buf(*tpp, bp, 0, (buf + byte_cnt - 1) -
+ (char *)bp->b_addr);
+ }
+ ASSERT(pathlen == 0);
+
+ error = xfs_defer_finish(tpp, &dfops, NULL);
+ if (error)
+ goto out_bmap_cancel;
+
+ return 0;
+
+out_bmap_cancel:
+ xfs_defer_cancel(&dfops);
+out:
+ return error;
+}
+
+int
+xfs_repair_symlink(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ struct xfs_buf *bp;
+ loff_t len;
+ size_t newlen;
+ xfs_daddr_t d;
+ int fsblocks;
+ int nmaps = XFS_SYMLINK_MAPS;
+ int nr;
+ int offset;
+ int n;
+ int byte_cnt;
+ int error = 0;
+
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ len = i_size_read(VFS_I(ip));
+ xfs_trans_ijoin(sc->tp, ip, 0);
+
+ /* Truncate the inode if there's a zero inside the length. */
+ if (ifp->if_flags & XFS_IFINLINE) {
+ if (ifp->if_u1.if_data)
+ newlen = strnlen(ifp->if_u1.if_data,
+ XFS_IFORK_DSIZE(ip));
+ else {
+ newlen = 1;
+ ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+ ifp->if_u1.if_data[0] = '/';
+ }
+ if (len > newlen) {
+ i_size_write(VFS_I(ip), newlen);
+ ip->i_d.di_size = newlen;
+ xfs_trans_log_inode(sc->tp, ip, XFS_ILOG_DDATA |
+ XFS_ILOG_CORE);
+ }
+ goto out;
+ }
+
+ fsblocks = xfs_symlink_blocks(mp, len);
+ error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0);
+ if (error)
+ goto out;
+
+ /* Fix everything that fails the verifiers. */
+ offset = 0;
+ for (n = 0; n < nmaps; n++) {
+ d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
+ byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
+
+ error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
+ d, BTOBB(byte_cnt), 0, &bp, NULL);
+ if (error)
+ goto out;
+ bp->b_ops = &xfs_symlink_buf_ops;
+
+ byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
+ if (len < byte_cnt)
+ byte_cnt = len;
+
+ nr = xfs_symlink_hdr_set(mp, ip->i_ino, offset, byte_cnt, bp);
+
+ len -= byte_cnt;
+ offset += byte_cnt;
+
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SYMLINK_BUF);
+ xfs_trans_log_buf(sc->tp, bp, 0, nr - 1);
+ xfs_trans_brelse(sc->tp, bp);
+ }
+ if (len != 0) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ /* Roll transaction, release buffers. */
+ error = xfs_trans_roll(&sc->tp, ip);
+ if (error)
+ goto out;
+
+ /* Size set correctly? */
+ len = i_size_read(VFS_I(ip));
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ error = xfs_readlink(ip, sc->buf);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (error)
+ goto out;
+
+ /*
+ * Figure out the new target length. We can't handle zero-length
+ * symlinks, so make sure that we don't write that out.
+ */
+ newlen = strnlen(sc->buf, MAXPATHLEN);
+ if (newlen == 0) {
+ *((char *)sc->buf) = '/';
+ newlen = 1;
+ }
+
+ if (len > newlen)
+ error = xfs_repair_symlink_rewrite(&sc->tp, ip, sc->buf,
+ newlen);
+out:
+ return error;
+}
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 46/47] xfs: query the per-AG reservation counters
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (44 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 45/47] xfs: repair damaged symlinks Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-07 0:40 ` [PATCH 47/47] xfs: avoid mount-time deadlock in CoW extent recovery Darrick J. Wong
2017-01-09 12:40 ` [PATCH v4 00/47] xfs: online scrub/repair support Amir Goldstein
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
Establish an ioctl for userspace to query the original and current
per-AG reservation counts. This will be used by xfs_scrub to
check that the vfs counters are at least somewhat sane.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_fs.h | 10 ++++++++++
fs/xfs/xfs_fsops.c | 29 +++++++++++++++++++++++++++++
fs/xfs/xfs_fsops.h | 2 ++
fs/xfs/xfs_ioctl.c | 16 ++++++++++++++++
fs/xfs/xfs_ioctl32.c | 1 +
5 files changed, 58 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 5cc8462..160916c 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -608,6 +608,15 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
/*
+ * AG reserved block counters
+ */
+struct xfs_fsop_ag_resblks {
+ __u64 resblks; /* blocks reserved now */
+ __u64 resblks_orig; /* blocks reserved at mount time */
+ __u64 reserved[2];
+};
+
+/*
* ioctl limits
*/
#ifdef XATTR_LIST_MAX
@@ -683,6 +692,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom)
#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t)
+#define XFS_IOC_GET_AG_RESBLKS _IOR ('X', 126, struct xfs_fsop_ag_resblks)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 242e809..70b50f2 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -44,6 +44,7 @@
#include "xfs_filestream.h"
#include "xfs_rmap.h"
#include "xfs_ag_resv.h"
+#include "xfs_fs.h"
/*
* File system operations
@@ -1067,3 +1068,31 @@ xfs_fs_unreserve_ag_blocks(
return error;
}
+
+/* Query the per-AG reservations to see how many blocks we have reserved. */
+int
+xfs_fs_get_ag_reserve_blocks(
+ struct xfs_mount *mp,
+ struct xfs_fsop_ag_resblks *out)
+{
+ struct xfs_ag_resv *r;
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ out->resblks = 0;
+ out->resblks_orig = 0;
+ out->reserved[0] = out->reserved[1] = 0;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ pag = xfs_perag_get(mp, agno);
+ r = xfs_perag_resv(pag, XFS_AG_RESV_METADATA);
+ out->resblks += r->ar_reserved;
+ out->resblks_orig += r->ar_asked;
+ r = xfs_perag_resv(pag, XFS_AG_RESV_AGFL);
+ out->resblks += r->ar_reserved;
+ out->resblks_orig += r->ar_asked;
+ xfs_perag_put(pag);
+ }
+
+ return 0;
+}
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f349158..91609ae 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,8 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
xfs_fsop_resblks_t *outval);
extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern int xfs_fs_get_ag_reserve_blocks(struct xfs_mount *mp,
+ struct xfs_fsop_ag_resblks *out);
extern int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
extern int xfs_fs_unreserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index e9a4619..ee3720c 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -2021,6 +2021,22 @@ xfs_file_ioctl(
return 0;
}
+ case XFS_IOC_GET_AG_RESBLKS: {
+ struct xfs_fsop_ag_resblks out;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ error = xfs_fs_get_ag_reserve_blocks(mp, &out);
+ if (error)
+ return error;
+
+ if (copy_to_user(arg, &out, sizeof(out)))
+ return -EFAULT;
+
+ return 0;
+ }
+
case XFS_IOC_FSGROWFSDATA: {
xfs_growfs_data_t in;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 40952b1..5e46832 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -556,6 +556,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_ERROR_CLEARALL:
case XFS_IOC_GETFSMAP:
case XFS_IOC_SCRUB_METADATA:
+ case XFS_IOC_GET_AG_RESBLKS:
return xfs_file_ioctl(filp, cmd, p);
#ifndef BROKEN_X86_ALIGNMENT
/* These are handled fine if no alignment issues */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* [PATCH 47/47] xfs: avoid mount-time deadlock in CoW extent recovery
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (45 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 46/47] xfs: query the per-AG reservation counters Darrick J. Wong
@ 2017-01-07 0:40 ` Darrick J. Wong
2017-01-09 12:40 ` [PATCH v4 00/47] xfs: online scrub/repair support Amir Goldstein
47 siblings, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-07 0:40 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs
If a malicious user corrupts the refcount btree to cause a cycle between
different levels of the tree, the next mount attempt will deadlock in
the CoW recovery routine. The scrub code uses the ability to re-grab a
buffer that was previous locked to a transaction to avoid deadlocks, so
do that here too.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_refcount.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 1c47671..c845253 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1646,10 +1646,14 @@ xfs_refcount_recover_cow_leftovers(
if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
return -EOPNOTSUPP;
- error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ error = xfs_trans_alloc_empty(mp, &tp);
if (error)
return error;
- cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (error)
+ goto out_trans;
+ cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
/* Find all the leftover CoW staging extents. */
INIT_LIST_HEAD(&debris);
@@ -1662,7 +1666,7 @@ xfs_refcount_recover_cow_leftovers(
if (error)
goto out_cursor;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
+ xfs_trans_cancel(tp);
/* Now iterate the list to free the leftovers */
list_for_each_entry(rr, &debris, rr_list) {
@@ -1705,13 +1709,17 @@ xfs_refcount_recover_cow_leftovers(
out_cursor:
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- xfs_buf_relse(agbp);
+ xfs_trans_cancel(tp);
goto out_free;
out_defer:
xfs_defer_cancel(&dfops);
xfs_trans_cancel(tp);
goto out_free;
+
+out_trans:
+ xfs_trans_cancel(tp);
+ return error;
}
/* Is there a record covering a given extent? */
^ permalink raw reply related [flat|nested] 55+ messages in thread
* Re: [PATCH v4 00/47] xfs: online scrub/repair support
2017-01-07 0:35 [PATCH v4 00/47] xfs: online scrub/repair support Darrick J. Wong
` (46 preceding siblings ...)
2017-01-07 0:40 ` [PATCH 47/47] xfs: avoid mount-time deadlock in CoW extent recovery Darrick J. Wong
@ 2017-01-09 12:40 ` Amir Goldstein
2017-01-09 21:15 ` Darrick J. Wong
47 siblings, 1 reply; 55+ messages in thread
From: Amir Goldstein @ 2017-01-09 12:40 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs
On Sat, Jan 7, 2017 at 2:35 AM, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> Hi all,
>
...
> If you're going to start using this mess, you probably ought to just
> pull from my github trees. The kernel patches[1] should apply against
> 4.10-rc2. xfsprogs[2] and xfstests[3] can be found in their usual
> places.
>
> The patches have survived all auto group xfstests both with scrub-only
> mode and also a special debugging mode to xfs_scrub that forces it to
> rebuild the metadata structures even if they're not damaged. Since the
> last patch release, I have now had time to run the new tests in [3] that
> try to fuzz every field in every data structure on disk.
>
Darrick,
I started running the dangerous_scrub group yersterday and it's killing my
test machine. The test machine is x86_64 (i5-3470) 16GB RAM
and test partitions are 100GB volume on spinning disk.
xfs_db swaps my system to death and most of the tests it eventually
gets shot down by oom killer.
Is that surprising to you? How much RAM does you test systems have?
Can you figure out a minimal RAM requirement to run these fuzzers
and maybe check required RAM before running the test?
Alternatively, can you figure out how to reduce the amount of RAM
used by the fuzzer?
I was using mkfs options "-m rmapbt=1,reflink=1"
and I tried running with and then without TEST_XFS_SCRUB=1.
I don't see a reason to send the logs at this point, they are just a complete
mass of destruction.
Let me know if you need more inputs from me.
Amir.
^ permalink raw reply [flat|nested] 55+ messages in thread
* Re: [PATCH v4 00/47] xfs: online scrub/repair support
2017-01-09 12:40 ` [PATCH v4 00/47] xfs: online scrub/repair support Amir Goldstein
@ 2017-01-09 21:15 ` Darrick J. Wong
2017-01-10 7:54 ` Eryu Guan
0 siblings, 1 reply; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-09 21:15 UTC (permalink / raw)
To: Amir Goldstein; +Cc: linux-xfs
On Mon, Jan 09, 2017 at 02:40:56PM +0200, Amir Goldstein wrote:
> On Sat, Jan 7, 2017 at 2:35 AM, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > Hi all,
> >
> ...
> > If you're going to start using this mess, you probably ought to just
> > pull from my github trees. The kernel patches[1] should apply against
> > 4.10-rc2. xfsprogs[2] and xfstests[3] can be found in their usual
> > places.
> >
> > The patches have survived all auto group xfstests both with scrub-only
> > mode and also a special debugging mode to xfs_scrub that forces it to
> > rebuild the metadata structures even if they're not damaged. Since the
> > last patch release, I have now had time to run the new tests in [3] that
> > try to fuzz every field in every data structure on disk.
> >
>
> Darrick,
>
> I started running the dangerous_scrub group yersterday and it's killing my
> test machine. The test machine is x86_64 (i5-3470) 16GB RAM
> and test partitions are 100GB volume on spinning disk.
>
> xfs_db swaps my system to death and most of the tests it eventually
> gets shot down by oom killer.
>
> Is that surprising to you?
Yes.
> How much RAM does you test systems have?
2GB in a VM so the host system won't go down. Usually the test disks
are 8GB disks to keep the fuzzer runtimes down, but I've also run them
against 100GB volumes without OOMing...
> Can you figure out a minimal RAM requirement to run these fuzzers
> and maybe check required RAM before running the test?
I wouldn't have thought xfs_check would OOM... it would help to know
exactly what the xfs_db invocation thought it was doing.
> Alternatively, can you figure out how to reduce the amount of RAM
> used by the fuzzer?
>
> I was using mkfs options "-m rmapbt=1,reflink=1"
> and I tried running with and then without TEST_XFS_SCRUB=1.
> I don't see a reason to send the logs at this point, they are just a complete
> mass of destruction.
All the tests? The full dmesg output would be useful to narrow it down to
a specific xfstest number, field name, and fuzz verb. I'm running them
--D
> Let me know if you need more inputs from me.
>
> Amir.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 55+ messages in thread
* Re: [PATCH v4 00/47] xfs: online scrub/repair support
2017-01-09 21:15 ` Darrick J. Wong
@ 2017-01-10 7:54 ` Eryu Guan
2017-01-10 8:13 ` Amir Goldstein
2017-01-10 18:20 ` Darrick J. Wong
0 siblings, 2 replies; 55+ messages in thread
From: Eryu Guan @ 2017-01-10 7:54 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Amir Goldstein, linux-xfs
[-- Attachment #1: Type: text/plain, Size: 2597 bytes --]
On Mon, Jan 09, 2017 at 01:15:40PM -0800, Darrick J. Wong wrote:
> On Mon, Jan 09, 2017 at 02:40:56PM +0200, Amir Goldstein wrote:
> > On Sat, Jan 7, 2017 at 2:35 AM, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > > Hi all,
> > >
> > ...
> > > If you're going to start using this mess, you probably ought to just
> > > pull from my github trees. The kernel patches[1] should apply against
> > > 4.10-rc2. xfsprogs[2] and xfstests[3] can be found in their usual
> > > places.
> > >
> > > The patches have survived all auto group xfstests both with scrub-only
> > > mode and also a special debugging mode to xfs_scrub that forces it to
> > > rebuild the metadata structures even if they're not damaged. Since the
> > > last patch release, I have now had time to run the new tests in [3] that
> > > try to fuzz every field in every data structure on disk.
> > >
> >
> > Darrick,
> >
> > I started running the dangerous_scrub group yersterday and it's killing my
> > test machine. The test machine is x86_64 (i5-3470) 16GB RAM
> > and test partitions are 100GB volume on spinning disk.
> >
> > xfs_db swaps my system to death and most of the tests it eventually
> > gets shot down by oom killer.
> >
> > Is that surprising to you?
>
> Yes.
I hit OOM too in xfs/1301. (I ran xfs/13??, xfs/1300 passed and 1301
oom'ed the host, I haven't run other tests yet.)
>
> > How much RAM does you test systems have?
>
> 2GB in a VM so the host system won't go down. Usually the test disks
> are 8GB disks to keep the fuzzer runtimes down, but I've also run them
> against 100GB volumes without OOMing...
>
> > Can you figure out a minimal RAM requirement to run these fuzzers
> > and maybe check required RAM before running the test?
>
> I wouldn't have thought xfs_check would OOM... it would help to know
> exactly what the xfs_db invocation thought it was doing.
My test host has 64G memory, it's running on a 15G SCRATCH_DEV.
>
> > Alternatively, can you figure out how to reduce the amount of RAM
> > used by the fuzzer?
> >
> > I was using mkfs options "-m rmapbt=1,reflink=1"
> > and I tried running with and then without TEST_XFS_SCRUB=1.
> > I don't see a reason to send the logs at this point, they are just a complete
> > mass of destruction.
>
> All the tests? The full dmesg output would be useful to narrow it down to
> a specific xfstest number, field name, and fuzz verb. I'm running them
In my case, the xfs_db command is doing
/usr/sbin/xfs_db -x -c sb 0 -c fuzz /dev/mapper/systemvg-testlv2
I attached console log and xfs-1301.full I have so far.
Thanks,
Eryu
[-- Attachment #2: xfs-1301.dmesg.gz --]
[-- Type: application/gzip, Size: 28736 bytes --]
[-- Attachment #3: xfs-1301.full --]
[-- Type: text/plain, Size: 20800 bytes --]
Fields we propose to fuzz under: sb 0
xfs_db>
blocksize
dblocks
rblocks
rextents
uuid
logstart
rootino
rbmino
rsumino
rextsize
agblocks
agcount
rbmblocks
logblocks
versionnum
sectsize
inodesize
inopblock
fname
blocklog
sectlog
inodelog
inopblog
agblklog
rextslog
inprogress
imax_pct
icount
ifree
fdblocks
frextents
uquotino
gquotino
qflags
flags
shared_vn
inoalignmt
unit
width
dirblklog
logsectlog
logsectsize
logsunit
features2
bad_features2
features_compat
features_ro_compat
features_incompat
features_log_incompat
crc
spino_align
pquotino
lsn
meta_uuid
+ Fuzz xfs_db> = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz xfs_db> = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz xfs_db> = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz xfs_db> = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz xfs_db> = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz xfs_db> = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz xfs_db> = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz xfs_db> = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
bad character in field >
unable to parse 'xfs_db>'.
xfs_db>
Field xfs_db> already set to xfs_db> xfs_db> bad character in field >
xfs_db> , skipping test.
+ Fuzz blocksize = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 0
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz blocksize = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 4294967295
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz blocksize = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 2147487744
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz blocksize = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 36864
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz blocksize = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 4097
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz blocksize = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 6113
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz blocksize = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 2079
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz blocksize = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
blocksize = 1858079377
xfs_db>
Field blocksize already set to , skipping test.
+ Fuzz dblocks = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = 0
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz dblocks = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = null
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz dblocks = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = 9223372036858706944
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz dblocks = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = 2151414784
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz dblocks = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = 3931137
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz dblocks = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = 3933153
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz dblocks = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = 3929119
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz dblocks = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
dblocks = 7980390161092602286
xfs_db>
Field dblocks already set to , skipping test.
+ Fuzz rblocks = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = 0
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rblocks = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = null
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rblocks = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = 9223372036854775808
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rblocks = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = 2147483648
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rblocks = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = 1
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rblocks = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = 2017
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rblocks = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = 18446744073709549599
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rblocks = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rblocks = 7980390161092602286
xfs_db>
Field rblocks already set to , skipping test.
+ Fuzz rextents = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = 0
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz rextents = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = null
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz rextents = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = 9223372036854775808
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz rextents = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = 2147483648
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz rextents = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = 1
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz rextents = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = 2017
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz rextents = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = 18446744073709549599
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz rextents = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextents = 7980390161092602286
xfs_db>
Field rextents already set to , skipping test.
+ Fuzz uuid = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
uuid = 00000000-0000-0000-0000-000000000000
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz uuid = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
uuid = ffffffff-ffff-ffff-ffff-ffffffffffff
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz uuid = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
uuid = 57d4ecda-f10b-458c-b2d0-9fe3727e1445
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz uuid = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
uuid = d7d4ecda-f10b-458c-32d0-9fe3727e1445
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz uuid = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
uuid = d7d4ecda-f10b-458c-b2d0-9fe3727e1444
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz uuid = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
unable to fuzz field 'uuid'
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz uuid = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
unable to fuzz field 'uuid'
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz uuid = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
uuid = 6ec00a91-d0f2-69ae-bab4-483b0b5cea66
xfs_db>
Field uuid already set to , skipping test.
+ Fuzz logstart = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = 0
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz logstart = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = null
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz logstart = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = 9223372036856873024
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz logstart = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = 2149580864
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz logstart = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = 2097217
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz logstart = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = 2099233
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz logstart = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = 2095199
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz logstart = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
logstart = 7980390161092602286
xfs_db>
Field logstart already set to , skipping test.
+ Fuzz rootino = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = 0
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rootino = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = null
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rootino = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = 9223372036854776320
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rootino = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = 2147484160
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rootino = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = 513
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rootino = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = 2529
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rootino = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = 18446744073709550111
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rootino = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rootino = 7980390161092602286
xfs_db>
Field rootino already set to , skipping test.
+ Fuzz rbmino = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = 0
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rbmino = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = null
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rbmino = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = 9223372036854776321
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rbmino = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = 2147484161
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rbmino = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = 513
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rbmino = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = 2530
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rbmino = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = 18446744073709550112
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rbmino = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rbmino = 7980390161092602286
xfs_db>
Field rbmino already set to , skipping test.
+ Fuzz rsumino = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = 0
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rsumino = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = null
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rsumino = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = 9223372036854776322
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rsumino = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = 2147484162
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rsumino = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = 515
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rsumino = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = 2531
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rsumino = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = 18446744073709550113
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rsumino = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rsumino = 7980390161092602286
xfs_db>
Field rsumino already set to , skipping test.
+ Fuzz rextsize = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = 0
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz rextsize = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = null
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz rextsize = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = 2147483649
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz rextsize = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = 32769
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz rextsize = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = 1
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz rextsize = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = 2018
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz rextsize = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = 4294965280
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz rextsize = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
rextsize = 1858079377
xfs_db>
Field rextsize already set to , skipping test.
+ Fuzz agblocks = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = 0
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agblocks = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = null
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agblocks = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = 2147729344
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agblocks = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = 245696
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agblocks = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = 245697
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agblocks = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = 247713
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agblocks = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = 243679
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agblocks = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agblocks = 1858079377
xfs_db>
Field agblocks already set to , skipping test.
+ Fuzz agcount = zeroes
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = 0
xfs_db>
Field agcount already set to , skipping test.
+ Fuzz agcount = ones
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = null
xfs_db>
Field agcount already set to , skipping test.
+ Fuzz agcount = firstbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = 2147483664
xfs_db>
Field agcount already set to , skipping test.
+ Fuzz agcount = middlebit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = 32784
xfs_db>
Field agcount already set to , skipping test.
+ Fuzz agcount = lastbit
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = 17
xfs_db>
Field agcount already set to , skipping test.
+ Fuzz agcount = add
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = 2033
xfs_db>
Field agcount already set to , skipping test.
+ Fuzz agcount = sub
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = 4294965295
xfs_db>
Field agcount already set to , skipping test.
+ Fuzz agcount = random
========================
xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
agcount = 1858079377
xfs_db>
Field agcount already set to , skipping test.
^ permalink raw reply [flat|nested] 55+ messages in thread
* Re: [PATCH v4 00/47] xfs: online scrub/repair support
2017-01-10 7:54 ` Eryu Guan
@ 2017-01-10 8:13 ` Amir Goldstein
2017-01-10 8:44 ` Eryu Guan
[not found] ` <CAOQ4uxiFg18fVh3RFr-Y1-XRmV82dTxc5r05QH8OFYpv2=juvg@mail.gmail.com>
2017-01-10 18:20 ` Darrick J. Wong
1 sibling, 2 replies; 55+ messages in thread
From: Amir Goldstein @ 2017-01-10 8:13 UTC (permalink / raw)
To: Eryu Guan; +Cc: Darrick J. Wong, linux-xfs
On Tue, Jan 10, 2017 at 9:54 AM, Eryu Guan <eguan@redhat.com> wrote:
> On Mon, Jan 09, 2017 at 01:15:40PM -0800, Darrick J. Wong wrote:
>> On Mon, Jan 09, 2017 at 02:40:56PM +0200, Amir Goldstein wrote:
>> > On Sat, Jan 7, 2017 at 2:35 AM, Darrick J. Wong <darrick.wong@oracle.com> wrote:
>> > > Hi all,
>> > >
>> > ...
>> > > If you're going to start using this mess, you probably ought to just
>> > > pull from my github trees. The kernel patches[1] should apply against
>> > > 4.10-rc2. xfsprogs[2] and xfstests[3] can be found in their usual
>> > > places.
>> > >
Darick,
Not sure if this is interesting, but I had to 'make realclean' to xfsprogs,
for make to build xfs_scrub (make clean was not enough).
Is this the standard practice for building xfsprogs after checking out
a new branch?
>> > > The patches have survived all auto group xfstests both with scrub-only
>> > > mode and also a special debugging mode to xfs_scrub that forces it to
>> > > rebuild the metadata structures even if they're not damaged. Since the
>> > > last patch release, I have now had time to run the new tests in [3] that
>> > > try to fuzz every field in every data structure on disk.
>> > >
>> >
>> > Darrick,
>> >
>> > I started running the dangerous_scrub group yersterday and it's killing my
>> > test machine. The test machine is x86_64 (i5-3470) 16GB RAM
>> > and test partitions are 100GB volume on spinning disk.
>> >
>> > xfs_db swaps my system to death and most of the tests it eventually
>> > gets shot down by oom killer.
>> >
>> > Is that surprising to you?
>>
>> Yes.
>
> I hit OOM too in xfs/1301. (I ran xfs/13??, xfs/1300 passed and 1301
> oom'ed the host, I haven't run other tests yet.)
>
xfs/1300 passed for me as well. Note that it passed both with
kernel scrubbing disabled and eanbled (XFS_DEBUG=y), but with kernel
scrubbing it ran 7 seconds on my machine, while without kernel scrubbing
it ran 70 seconds.
Eryu, you mentioned that you do not use XFS_DEBUG=y on a previous
thread. Did you turn it on for the scrubbing tests? Although I think tests
should be run with and without kernel scrubbing support. right?
>>
>> > How much RAM does you test systems have?
>>
>> 2GB in a VM so the host system won't go down. Usually the test disks
>> are 8GB disks to keep the fuzzer runtimes down, but I've also run them
>> against 100GB volumes without OOMing...
>>
>> > Can you figure out a minimal RAM requirement to run these fuzzers
>> > and maybe check required RAM before running the test?
>>
>> I wouldn't have thought xfs_check would OOM... it would help to know
>> exactly what the xfs_db invocation thought it was doing.
>
> My test host has 64G memory, it's running on a 15G SCRATCH_DEV.
>
>>
>> > Alternatively, can you figure out how to reduce the amount of RAM
>> > used by the fuzzer?
>> >
>> > I was using mkfs options "-m rmapbt=1,reflink=1"
>> > and I tried running with and then without TEST_XFS_SCRUB=1.
>> > I don't see a reason to send the logs at this point, they are just a complete
>> > mass of destruction.
>>
>> All the tests? The full dmesg output would be useful to narrow it down to
>> a specific xfstest number, field name, and fuzz verb. I'm running them
>
In my case, yes, most of the test (51 out of 65) failed due to
some sort of crash, but the entire system is so unstable due to all the OOM
killing that the entire dmesg output is a big mess.
I'll rerun only 1301 to send my logs.
> In my case, the xfs_db command is doing
>
> /usr/sbin/xfs_db -x -c sb 0 -c fuzz /dev/mapper/systemvg-testlv2
>
> I attached console log and xfs-1301.full I have so far.
>
> Thanks,
> Eryu
^ permalink raw reply [flat|nested] 55+ messages in thread
* Re: [PATCH v4 00/47] xfs: online scrub/repair support
2017-01-10 8:13 ` Amir Goldstein
@ 2017-01-10 8:44 ` Eryu Guan
[not found] ` <CAOQ4uxiFg18fVh3RFr-Y1-XRmV82dTxc5r05QH8OFYpv2=juvg@mail.gmail.com>
1 sibling, 0 replies; 55+ messages in thread
From: Eryu Guan @ 2017-01-10 8:44 UTC (permalink / raw)
To: Amir Goldstein; +Cc: Darrick J. Wong, linux-xfs
On Tue, Jan 10, 2017 at 10:13:06AM +0200, Amir Goldstein wrote:
> On Tue, Jan 10, 2017 at 9:54 AM, Eryu Guan <eguan@redhat.com> wrote:
> > On Mon, Jan 09, 2017 at 01:15:40PM -0800, Darrick J. Wong wrote:
> >> On Mon, Jan 09, 2017 at 02:40:56PM +0200, Amir Goldstein wrote:
> >> > On Sat, Jan 7, 2017 at 2:35 AM, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >> > > Hi all,
> >> > >
> >> > ...
> >> > > If you're going to start using this mess, you probably ought to just
> >> > > pull from my github trees. The kernel patches[1] should apply against
> >> > > 4.10-rc2. xfsprogs[2] and xfstests[3] can be found in their usual
> >> > > places.
> >> > >
>
> Darick,
>
> Not sure if this is interesting, but I had to 'make realclean' to xfsprogs,
> for make to build xfs_scrub (make clean was not enough).
> Is this the standard practice for building xfsprogs after checking out
> a new branch?
>
> >> > > The patches have survived all auto group xfstests both with scrub-only
> >> > > mode and also a special debugging mode to xfs_scrub that forces it to
> >> > > rebuild the metadata structures even if they're not damaged. Since the
> >> > > last patch release, I have now had time to run the new tests in [3] that
> >> > > try to fuzz every field in every data structure on disk.
> >> > >
> >> >
> >> > Darrick,
> >> >
> >> > I started running the dangerous_scrub group yersterday and it's killing my
> >> > test machine. The test machine is x86_64 (i5-3470) 16GB RAM
> >> > and test partitions are 100GB volume on spinning disk.
> >> >
> >> > xfs_db swaps my system to death and most of the tests it eventually
> >> > gets shot down by oom killer.
> >> >
> >> > Is that surprising to you?
> >>
> >> Yes.
> >
> > I hit OOM too in xfs/1301. (I ran xfs/13??, xfs/1300 passed and 1301
> > oom'ed the host, I haven't run other tests yet.)
> >
>
> xfs/1300 passed for me as well. Note that it passed both with
> kernel scrubbing disabled and eanbled (XFS_DEBUG=y), but with kernel
> scrubbing it ran 7 seconds on my machine, while without kernel scrubbing
> it ran 70 seconds.
>
> Eryu, you mentioned that you do not use XFS_DEBUG=y on a previous
> thread. Did you turn it on for the scrubbing tests? Although I think tests
> should be run with and without kernel scrubbing support. right?
You're right, I didn't turn on XFS_DEBUG, and I agreed that I should run
tests with and without online scurb support. I just haven't gone that
far yet. Thanks for the reminder!
Eryu
^ permalink raw reply [flat|nested] 55+ messages in thread
[parent not found: <CAOQ4uxiFg18fVh3RFr-Y1-XRmV82dTxc5r05QH8OFYpv2=juvg@mail.gmail.com>]
* Re: [PATCH v4 00/47] xfs: online scrub/repair support
2017-01-10 7:54 ` Eryu Guan
2017-01-10 8:13 ` Amir Goldstein
@ 2017-01-10 18:20 ` Darrick J. Wong
1 sibling, 0 replies; 55+ messages in thread
From: Darrick J. Wong @ 2017-01-10 18:20 UTC (permalink / raw)
To: Eryu Guan; +Cc: Amir Goldstein, linux-xfs, Eric Sandeen
On Tue, Jan 10, 2017 at 03:54:44PM +0800, Eryu Guan wrote:
> On Mon, Jan 09, 2017 at 01:15:40PM -0800, Darrick J. Wong wrote:
> > On Mon, Jan 09, 2017 at 02:40:56PM +0200, Amir Goldstein wrote:
> > > On Sat, Jan 7, 2017 at 2:35 AM, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > > > Hi all,
> > > >
> > > ...
> > > > If you're going to start using this mess, you probably ought to just
> > > > pull from my github trees. The kernel patches[1] should apply against
> > > > 4.10-rc2. xfsprogs[2] and xfstests[3] can be found in their usual
> > > > places.
> > > >
> > > > The patches have survived all auto group xfstests both with scrub-only
> > > > mode and also a special debugging mode to xfs_scrub that forces it to
> > > > rebuild the metadata structures even if they're not damaged. Since the
> > > > last patch release, I have now had time to run the new tests in [3] that
> > > > try to fuzz every field in every data structure on disk.
> > > >
> > >
> > > Darrick,
> > >
> > > I started running the dangerous_scrub group yersterday and it's killing my
> > > test machine. The test machine is x86_64 (i5-3470) 16GB RAM
> > > and test partitions are 100GB volume on spinning disk.
> > >
> > > xfs_db swaps my system to death and most of the tests it eventually
> > > gets shot down by oom killer.
> > >
> > > Is that surprising to you?
> >
> > Yes.
>
> I hit OOM too in xfs/1301. (I ran xfs/13??, xfs/1300 passed and 1301
> oom'ed the host, I haven't run other tests yet.)
>
> >
> > > How much RAM does you test systems have?
> >
> > 2GB in a VM so the host system won't go down. Usually the test disks
> > are 8GB disks to keep the fuzzer runtimes down, but I've also run them
> > against 100GB volumes without OOMing...
> >
> > > Can you figure out a minimal RAM requirement to run these fuzzers
> > > and maybe check required RAM before running the test?
> >
> > I wouldn't have thought xfs_check would OOM... it would help to know
> > exactly what the xfs_db invocation thought it was doing.
>
> My test host has 64G memory, it's running on a 15G SCRATCH_DEV.
Aha, I /have/ been hitting OOM, but it got lost in the noise.
> > > Alternatively, can you figure out how to reduce the amount of RAM
> > > used by the fuzzer?
> > >
> > > I was using mkfs options "-m rmapbt=1,reflink=1"
> > > and I tried running with and then without TEST_XFS_SCRUB=1.
> > > I don't see a reason to send the logs at this point, they are just a complete
> > > mass of destruction.
> >
> > All the tests? The full dmesg output would be useful to narrow it down to
> > a specific xfstest number, field name, and fuzz verb. I'm running them
>
> In my case, the xfs_db command is doing
>
> /usr/sbin/xfs_db -x -c sb 0 -c fuzz /dev/mapper/systemvg-testlv2
>
> I attached console log and xfs-1301.full I have so far.
Aha, thank you.
> Thanks,
> Eryu
>
> Fields we propose to fuzz under: sb 0
> xfs_db>
> blocksize
> dblocks
<snip>
> Field agblocks already set to , skipping test.
> + Fuzz agcount = zeroes
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = 0
> xfs_db>
>
> Field agcount already set to , skipping test.
> + Fuzz agcount = ones
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = null
> xfs_db>
>
> Field agcount already set to , skipping test.
> + Fuzz agcount = firstbit
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = 2147483664
> xfs_db>
>
> Field agcount already set to , skipping test.
> + Fuzz agcount = middlebit
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = 32784
> xfs_db>
>
> Field agcount already set to , skipping test.
> + Fuzz agcount = lastbit
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = 17
> xfs_db>
>
> Field agcount already set to , skipping test.
> + Fuzz agcount = add
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = 2033
> xfs_db>
>
> Field agcount already set to , skipping test.
> + Fuzz agcount = sub
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = 4294965295
> xfs_db>
>
> Field agcount already set to , skipping test.
> + Fuzz agcount = random
> ========================
> xfs_db> xfs_db> Allowing fuzz of corrupted data with good CRC
> agcount = 1858079377
> xfs_db>
>
> Field agcount already set to , skipping test.
Now I see what the problem is. We set an insane number of AGs. The
next thing we try to do is read the fuzzed value back from the sb, which
fires up another xfs_db instance. That instance thinks we have
1,858,079,377 AGs and tries to allocate per-ag data for all of them and
OOMs the system, causing xfs_db to fail.
Fortunately the kernel and xfs_repair notice the broken geometry and
handle it nicely, but that leaves xfs_db unable to deal with it. We
could clamp agcount to a "reasonable" value, though it isn't clear what
that means if agblocks is also insane.
OTOH xfs_db AFAIK doesn't use the in-core perag stuff anyway so maybe
setting agcount to 0 is reasonable enough(???) Will experiment with
this and report back.
--D
^ permalink raw reply [flat|nested] 55+ messages in thread