* [PATCH 01/53] xfs_io: allow zero-length reflink/dedupe commands
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
@ 2015-12-19 9:04 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 02/53] xfs_db: make check work for sparse inodes Darrick J. Wong
` (51 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:04 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Since it's convention that calling the reflink/dedupe ioctls with
a zero length means "do this until you hit EOF in the source file",
explicitly permit this as an input. Fix the ioctl code to handle
this correctly.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
io/reflink.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/io/reflink.c b/io/reflink.c
index 5ba1c93..def01be 100644
--- a/io/reflink.c
+++ b/io/reflink.c
@@ -71,7 +71,7 @@ dedupe_ioctl(
info->fd = file->fd;
info->logical_offset = doffset;
- while (args->length > 0) {
+ while (args->length > 0 || !*ops) {
error = ioctl(fd, XFS_IOC_FILE_EXTENT_SAME, args);
if (error) {
perror("XFS_IOC_FILE_EXTENT_SAME");
@@ -85,14 +85,16 @@ dedupe_ioctl(
printf(_("Extents did not match.\n"));
goto done;
}
- if (info->bytes_deduped == 0 ||
- info->bytes_deduped > args->length)
+ if (args->length != 0 &&
+ (info->bytes_deduped == 0 ||
+ info->bytes_deduped > args->length))
break;
(*ops)++;
args->logical_offset += info->bytes_deduped;
info->logical_offset += info->bytes_deduped;
- args->length -= info->bytes_deduped;
+ if (args->length >= info->bytes_deduped)
+ args->length -= info->bytes_deduped;
deduped += info->bytes_deduped;
}
done:
@@ -145,7 +147,7 @@ dedupe_f(
}
optind++;
count = cvtnum(fsblocksize, fssectsize, argv[optind]);
- if (count < 1) {
+ if (count < 0) {
printf(_("non-positive length argument -- %s\n"), argv[optind]);
return 0;
}
@@ -202,7 +204,11 @@ reflink_ioctl(
struct xfs_clone_args args;
int error;
- if (len) {
+ if (soffset == 0 && doffset == 0 && len == 0) {
+ error = ioctl(file->fd, XFS_IOC_CLONE, fd);
+ if (error)
+ perror("XFS_IOC_CLONE");
+ } else {
args.src_fd = fd;
args.src_offset = soffset;
args.src_length = len;
@@ -210,10 +216,6 @@ reflink_ioctl(
error = ioctl(file->fd, XFS_IOC_CLONE_RANGE, &args);
if (error)
perror("XFS_IOC_CLONE_RANGE");
- } else {
- error = ioctl(file->fd, XFS_IOC_CLONE, fd);
- if (error)
- perror("XFS_IOC_CLONE");
}
if (!error)
(*ops)++;
@@ -268,7 +270,7 @@ reflink_f(
}
optind++;
count = cvtnum(fsblocksize, fssectsize, argv[optind]);
- if (count < 1) {
+ if (count < 0) {
printf(_("non-positive length argument -- %s\n"), argv[optind]);
return 0;
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 02/53] xfs_db: make check work for sparse inodes
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
2015-12-19 9:04 ` [PATCH 01/53] xfs_io: allow zero-length reflink/dedupe commands Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 03/53] repair: request inode buffers sized to fit one inode cluster Darrick J. Wong
` (50 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: Brian Foster, xfs
Teach the inobt/finobt scanning functions how to deal with sparse
inode chunks well enough that we can pass the spot-check. Should
fix the xfs/076 failures.
v2: Put the alignment checks back in, and for each individual chunk
of inodes hanging off an inobt record, parse each chunk separately.
There is no (future) guarantee that all the inodes in a sparse inobt
record will be contiguous.
Cc: Brian Foster <bfoster@redhat.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/check.c | 249 ++++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 200 insertions(+), 49 deletions(-)
diff --git a/db/check.c b/db/check.c
index 9c1541d..f1620f8 100644
--- a/db/check.c
+++ b/db/check.c
@@ -4319,6 +4319,51 @@ scanfunc_cnt(
scan_sbtree(agf, be32_to_cpu(pp[i]), level, 0, scanfunc_cnt, TYP_CNTBT);
}
+static bool
+ino_issparse(
+ struct xfs_inobt_rec *rp,
+ int offset)
+{
+ if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
+ return false;
+
+ return xfs_inobt_is_sparse_disk(rp, offset);
+}
+
+static int
+find_one_ino_bit(
+ __u16 mask,
+ int startino)
+{
+ int n;
+ int b;
+
+ startino /= XFS_INODES_PER_HOLEMASK_BIT;
+ b = startino;
+ mask >>= startino;
+ for (n = startino; n < sizeof(mask) * NBBY && !(mask & 1); n++, mask >>= 1)
+ b++;
+
+ return b * XFS_INODES_PER_HOLEMASK_BIT;
+}
+
+static int
+find_zero_ino_bit(
+ __u16 mask,
+ int startino)
+{
+ int n;
+ int b;
+
+ startino /= XFS_INODES_PER_HOLEMASK_BIT;
+ b = startino;
+ mask >>= startino;
+ for (n = startino; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
+ b++;
+
+ return b * XFS_INODES_PER_HOLEMASK_BIT;
+}
+
static void
scanfunc_ino(
struct xfs_btree_block *block,
@@ -4336,6 +4381,13 @@ scanfunc_ino(
int off;
xfs_inobt_ptr_t *pp;
xfs_inobt_rec_t *rp;
+ bool sparse, crc;
+ int inodes_per_chunk;
+ int freecount;
+ int startidx, endidx;
+ __u16 holemask;
+ xfs_agino_t rino;
+ xfs_extlen_t cblocks;
if (be32_to_cpu(block->bb_magic) != XFS_IBT_MAGIC &&
be32_to_cpu(block->bb_magic) != XFS_IBT_CRC_MAGIC) {
@@ -4363,59 +4415,111 @@ scanfunc_ino(
return;
}
rp = XFS_INOBT_REC_ADDR(mp, block, 1);
+ sparse = xfs_sb_version_hassparseinodes(&mp->m_sb);
+ crc = xfs_sb_version_hascrc(&mp->m_sb);
for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) {
+ nfree = 0;
+
+ /* First let's look at the inode chunk alignment */
agino = be32_to_cpu(rp[i].ir_startino);
off = XFS_INO_TO_OFFSET(mp, agino);
- if (off == 0) {
- if ((sbversion & XFS_SB_VERSION_ALIGNBIT) &&
- mp->m_sb.sb_inoalignmt &&
- (XFS_INO_TO_AGBNO(mp, agino) %
- mp->m_sb.sb_inoalignmt))
+ if (off == 0 &&
+ (sbversion & XFS_SB_VERSION_ALIGNBIT) &&
+ mp->m_sb.sb_inoalignmt &&
+ (XFS_INO_TO_AGBNO(mp, agino) %
+ mp->m_sb.sb_inoalignmt)) {
+ if (sparse || crc) {
+ dbprintf(_("incorrect record %u/%u "
+ "alignment in inobt block "
+ "%u/%u\n"),
+ seqno, agino, seqno, bno);
+ error++;
+ } else
sbversion &= ~XFS_SB_VERSION_ALIGNBIT;
- set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp, agino),
- (xfs_extlen_t)MAX(1,
- XFS_INODES_PER_CHUNK >>
- mp->m_sb.sb_inopblog),
- DBM_INODE, seqno, bno);
}
- icount += XFS_INODES_PER_CHUNK;
- agicount += XFS_INODES_PER_CHUNK;
- ifree += be32_to_cpu(rp[i].ir_u.f.ir_freecount);
- agifreecount += be32_to_cpu(rp[i].ir_u.f.ir_freecount);
- push_cur();
- set_cur(&typtab[TYP_INODE],
- XFS_AGB_TO_DADDR(mp, seqno,
- XFS_AGINO_TO_AGBNO(mp, agino)),
- (int)XFS_FSB_TO_BB(mp, mp->m_ialloc_blks),
- DB_RING_IGN, NULL);
- if (iocur_top->data == NULL) {
- if (!sflag)
- dbprintf(_("can't read inode block "
+
+ /* Move on to examining the inode chunks */
+ if (sparse) {
+ inodes_per_chunk = rp[i].ir_u.sp.ir_count;
+ freecount = rp[i].ir_u.sp.ir_freecount;
+ holemask = be16_to_cpu(rp[i].ir_u.sp.ir_holemask);
+ startidx = find_zero_ino_bit(holemask, 0);
+ } else {
+ inodes_per_chunk = XFS_INODES_PER_CHUNK;
+ freecount = be32_to_cpu(rp[i].ir_u.f.ir_freecount);
+ holemask = 0;
+ startidx = 0;
+ }
+
+ /* For each allocated chunk, look at each inode. */
+ endidx = find_one_ino_bit(holemask, startidx);
+ do {
+ rino = agino + startidx;
+ cblocks = (endidx - startidx) >>
+ mp->m_sb.sb_inopblog;
+
+ /* Check the sparse chunk alignment */
+ if (sparse &&
+ (XFS_INO_TO_AGBNO(mp, rino) %
+ mp->m_sb.sb_spino_align)) {
+ dbprintf(_("incorrect chunk %u/%u "
+ "alignment in inobt block "
"%u/%u\n"),
- seqno,
- XFS_AGINO_TO_AGBNO(mp, agino));
- error++;
+ seqno, rino, seqno, bno);
+ error++;
+ }
+
+ /* Check the block map */
+ set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp, rino),
+ cblocks, DBM_INODE, seqno, bno);
+
+ push_cur();
+ set_cur(&typtab[TYP_INODE],
+ XFS_AGB_TO_DADDR(mp, seqno,
+ XFS_AGINO_TO_AGBNO(mp, rino)),
+ (int)XFS_FSB_TO_BB(mp, cblocks),
+ DB_RING_IGN, NULL);
+ if (iocur_top->data == NULL) {
+ if (!sflag)
+ dbprintf(_("can't read inode block "
+ "%u/%u\n"),
+ seqno,
+ XFS_AGINO_TO_AGBNO(mp, agino));
+ error++;
+ pop_cur();
+ continue;
+ }
+
+ /* Examine each inode in this chunk */
+ for (j = startidx; j < endidx; j++) {
+ if (ino_issparse(&rp[i], j))
+ continue;
+ isfree = XFS_INOBT_IS_FREE_DISK(&rp[i], j);
+ if (isfree)
+ nfree++;
+ process_inode(agf, agino + j,
+ (xfs_dinode_t *)((char *)iocur_top->data + ((j - startidx) << mp->m_sb.sb_inodelog)),
+ isfree);
+ }
pop_cur();
- continue;
- }
- for (j = 0, nfree = 0; j < XFS_INODES_PER_CHUNK; j++) {
- isfree = XFS_INOBT_IS_FREE_DISK(&rp[i], j);
- if (isfree)
- nfree++;
- process_inode(agf, agino + j,
- (xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)),
- isfree);
- }
- if (nfree != be32_to_cpu(rp[i].ir_u.f.ir_freecount)) {
+
+ startidx = find_zero_ino_bit(holemask, endidx);
+ endidx = find_one_ino_bit(holemask, startidx);
+ } while (endidx < XFS_INODES_PER_CHUNK);
+ icount += inodes_per_chunk;
+ agicount += inodes_per_chunk;
+ ifree += freecount;
+ agifreecount += freecount;
+
+ if (nfree != freecount) {
if (!sflag)
dbprintf(_("ir_freecount/free mismatch, "
"inode chunk %u/%u, freecount "
"%d nfree %d\n"),
seqno, agino,
- be32_to_cpu(rp[i].ir_u.f.ir_freecount), nfree);
+ freecount, nfree);
error++;
}
- pop_cur();
}
return;
}
@@ -4447,6 +4551,11 @@ scanfunc_fino(
int off;
xfs_inobt_ptr_t *pp;
struct xfs_inobt_rec *rp;
+ bool sparse, crc;
+ int startidx, endidx;
+ __u16 holemask;
+ xfs_agino_t rino;
+ xfs_extlen_t cblocks;
if (be32_to_cpu(block->bb_magic) != XFS_FIBT_MAGIC &&
be32_to_cpu(block->bb_magic) != XFS_FIBT_CRC_MAGIC) {
@@ -4474,21 +4583,63 @@ scanfunc_fino(
return;
}
rp = XFS_INOBT_REC_ADDR(mp, block, 1);
+ sparse = xfs_sb_version_hassparseinodes(&mp->m_sb);
+ crc = xfs_sb_version_hascrc(&mp->m_sb);
for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) {
+ /* First let's look at the inode chunk alignment */
agino = be32_to_cpu(rp[i].ir_startino);
off = XFS_INO_TO_OFFSET(mp, agino);
- if (off == 0) {
- if ((sbversion & XFS_SB_VERSION_ALIGNBIT) &&
- mp->m_sb.sb_inoalignmt &&
- (XFS_INO_TO_AGBNO(mp, agino) %
- mp->m_sb.sb_inoalignmt))
+ if (off == 0 &&
+ (sbversion & XFS_SB_VERSION_ALIGNBIT) &&
+ mp->m_sb.sb_inoalignmt &&
+ (XFS_INO_TO_AGBNO(mp, agino) %
+ mp->m_sb.sb_inoalignmt)) {
+ if (sparse || crc) {
+ dbprintf(_("incorrect record %u/%u "
+ "alignment in finobt block "
+ "%u/%u\n"),
+ seqno, agino, seqno, bno);
+ error++;
+ } else
sbversion &= ~XFS_SB_VERSION_ALIGNBIT;
- check_set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp, agino),
- (xfs_extlen_t)MAX(1,
- XFS_INODES_PER_CHUNK >>
- mp->m_sb.sb_inopblog),
- DBM_INODE, DBM_INODE, seqno, bno);
}
+
+ /* Move on to examining the inode chunks */
+ if (sparse) {
+ holemask = be16_to_cpu(rp[i].ir_u.sp.ir_holemask);
+ startidx = find_zero_ino_bit(holemask, 0);
+ } else {
+ holemask = 0;
+ startidx = 0;
+ }
+
+ /* For each allocated chunk... */
+ endidx = find_one_ino_bit(holemask, startidx);
+ do {
+ rino = agino + startidx;
+ cblocks = (endidx - startidx) >>
+ mp->m_sb.sb_inopblog;
+
+ /* Check the sparse chunk alignment */
+ if (sparse &&
+ (XFS_INO_TO_AGBNO(mp, rino) %
+ mp->m_sb.sb_spino_align)) {
+ dbprintf(_("incorrect chunk %u/%u "
+ "alignment in finobt block "
+ "%u/%u\n"),
+ seqno, rino, seqno, bno);
+ error++;
+ }
+
+ /* Check the block map */
+ check_set_dbmap(seqno,
+ XFS_AGINO_TO_AGBNO(mp, rino),
+ cblocks, DBM_INODE, DBM_INODE,
+ seqno, bno);
+
+ startidx = find_zero_ino_bit(holemask, endidx);
+ endidx = find_one_ino_bit(holemask, startidx);
+ } while (endidx < XFS_INODES_PER_CHUNK);
}
return;
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 03/53] repair: request inode buffers sized to fit one inode cluster
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
2015-12-19 9:04 ` [PATCH 01/53] xfs_io: allow zero-length reflink/dedupe commands Darrick J. Wong
2015-12-19 9:05 ` [PATCH 02/53] xfs_db: make check work for sparse inodes Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 04/53] libxfs: reorder xfs_bmap_add_free args Darrick J. Wong
` (49 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
In get_agino_buf, grab inode buffers using the same size as the inode
processing code. Since the inode processing code uses that same
buffer size, this means that get_agino_buf can serve requests from the
cache instead of pointlessly dropping the cache entry and screaming
about it.
(This function is currently unused, but the refink flag fixer code
wants to use it.)
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/dinode.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/repair/dinode.c b/repair/dinode.c
index 43142d6..269f9d8 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -850,7 +850,8 @@ get_agino_buf(xfs_mount_t *mp,
if ((irec = find_inode_rec(mp, agno, agino)) == NULL)
return(NULL);
- size = XFS_FSB_TO_BB(mp, MAX(1, XFS_INODES_PER_CHUNK/inodes_per_block));
+ size = MAX(1, XFS_FSB_TO_BB(mp,
+ mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog));
bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno,
XFS_AGINO_TO_AGBNO(mp, irec->ino_startnum)), size, 0,
&xfs_inode_buf_ops);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 04/53] libxfs: reorder xfs_bmap_add_free args
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (2 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 03/53] repair: request inode buffers sized to fit one inode cluster Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 05/53] libxfs: use a convenience variable instead of open-coding the fork Darrick J. Wong
` (48 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Move the mount & transaction arguments to the start of xfs_bmap_add_free,
like most API calls. The kernel version of rmap makes this change, so
porting it to xfsprogs will make maintenance easier.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_bmap.c | 12 ++++++------
libxfs/xfs_bmap.h | 4 ++--
libxfs/xfs_bmap_btree.c | 2 +-
libxfs/xfs_ialloc.c | 8 ++++----
4 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index 68869f6..46f8469 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -559,10 +559,10 @@ xfs_bmap_validate_ret(
*/
void
xfs_bmap_add_free(
+ struct xfs_mount *mp, /* mount point structure */
+ struct xfs_bmap_free *flist, /* list of extents */
xfs_fsblock_t bno, /* fs block number of extent */
- xfs_filblks_t len, /* length of extent */
- xfs_bmap_free_t *flist, /* list of extents */
- xfs_mount_t *mp) /* mount point structure */
+ xfs_filblks_t len) /* length of extent */
{
xfs_bmap_free_item_t *cur; /* current (next) element */
xfs_bmap_free_item_t *new; /* new element */
@@ -688,7 +688,7 @@ xfs_bmap_btree_to_extents(
cblock = XFS_BUF_TO_BLOCK(cbp);
if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
- xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
+ xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1);
ip->i_d.di_nblocks--;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
@@ -4969,8 +4969,8 @@ xfs_bmap_del_extent(
* If we need to, add to list of extents to delete.
*/
if (do_fx)
- xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
- mp);
+ xfs_bmap_add_free(mp, flist, del->br_startblock,
+ del->br_blockcount);
/*
* Adjust inode # blocks in the file.
*/
diff --git a/libxfs/xfs_bmap.h b/libxfs/xfs_bmap.h
index 85143e5..d3daf6d 100644
--- a/libxfs/xfs_bmap.h
+++ b/libxfs/xfs_bmap.h
@@ -182,8 +182,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
-void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
- struct xfs_bmap_free *flist, struct xfs_mount *mp);
+void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist,
+ xfs_fsblock_t bno, xfs_filblks_t len);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
index 52c9c75..2ef1836 100644
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -523,7 +523,7 @@ xfs_bmbt_free_block(
struct xfs_trans *tp = cur->bc_tp;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
- xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
+ xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1);
ip->i_d.di_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index 5039f4b..af670a8 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -1822,9 +1822,9 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
- xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
+ xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno,
XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
- mp->m_ialloc_blks, flist, mp);
+ mp->m_ialloc_blks);
return;
}
@@ -1867,8 +1867,8 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
- xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
- flist, mp);
+ xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno),
+ contigblk);
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 05/53] libxfs: use a convenience variable instead of open-coding the fork
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (3 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 04/53] libxfs: reorder xfs_bmap_add_free args Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 06/53] libxfs: refactor the btree size calculator code Darrick J. Wong
` (47 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Use a convenience variable instead of open-coding the inode fork.
This isn't really needed for now, but will become important when we
add the copy-on-write fork later.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_bmap.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index 46f8469..aef7cf3 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -1714,9 +1714,10 @@ xfs_bmap_add_extent_delay_real(
xfs_filblks_t temp2=0;/* value for da_new calculations */
int tmp_rval; /* partial logging flags */
struct xfs_mount *mp;
+ int whichfork = XFS_DATA_FORK;
mp = bma->tp ? bma->tp->t_mountp : NULL;
- ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
+ ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
@@ -1775,7 +1776,7 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+ if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
@@ -2006,10 +2007,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist,
- &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
+ &bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2090,10 +2091,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur, 1,
- &tmp_rval, XFS_DATA_FORK);
+ &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2159,10 +2160,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- 1, &tmp_rval, XFS_DATA_FORK);
+ 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2205,13 +2206,13 @@ xfs_bmap_add_extent_delay_real(
}
/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- da_old > 0, &tmp_logflags, XFS_DATA_FORK);
+ da_old > 0, &tmp_logflags, whichfork);
bma->logflags |= tmp_logflags;
if (error)
goto done;
@@ -2232,7 +2233,7 @@ xfs_bmap_add_extent_delay_real(
if (bma->cur)
bma->cur->bc_private.b.allocated = 0;
- xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
+ xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
bma->logflags |= rval;
return error;
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 06/53] libxfs: refactor the btree size calculator code
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (4 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 05/53] libxfs: use a convenience variable instead of open-coding the fork Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 07/53] libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct Darrick J. Wong
` (46 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Create a macro to generate btree height calculator functions.
This will be used (much) later when we get to the refcount
btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_bmap.c | 18 +-----------------
libxfs/xfs_bmap_btree.c | 2 ++
libxfs/xfs_bmap_btree.h | 2 ++
libxfs/xfs_inode_fork.c | 1 +
libxfs/xfs_shared.h | 29 +++++++++++++++++++++++++++++
5 files changed, 35 insertions(+), 17 deletions(-)
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index aef7cf3..ad383b4 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -172,25 +172,9 @@ xfs_bmap_worst_indlen(
xfs_inode_t *ip, /* incore inode pointer */
xfs_filblks_t len) /* delayed extent length */
{
- int level; /* btree level number */
- int maxrecs; /* maximum record count at this level */
- xfs_mount_t *mp; /* mount structure */
xfs_filblks_t rval; /* return value */
- mp = ip->i_mount;
- maxrecs = mp->m_bmap_dmxr[0];
- for (level = 0, rval = 0;
- level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
- level++) {
- len += maxrecs - 1;
- do_div(len, maxrecs);
- rval += len;
- if (len == 1)
- return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
- level - 1;
- if (level == 0)
- maxrecs = mp->m_bmap_dmxr[1];
- }
+ rval = xfs_bmbt_calc_btree_size(ip->i_mount, len);
return rval;
}
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
index 2ef1836..a21f774 100644
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -880,3 +880,5 @@ xfs_bmbt_change_owner(
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
return error;
}
+
+DEFINE_BTREE_SIZE_FN(bmbt, m_bmap_dmxr, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))
diff --git a/libxfs/xfs_bmap_btree.h b/libxfs/xfs_bmap_btree.h
index 819a8a4..7165a1b 100644
--- a/libxfs/xfs_bmap_btree.h
+++ b/libxfs/xfs_bmap_btree.h
@@ -140,4 +140,6 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
+DECLARE_BTREE_SIZE_FN(bmbt);
+
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/libxfs/xfs_inode_fork.c b/libxfs/xfs_inode_fork.c
index e1968b4..96a633e 100644
--- a/libxfs/xfs_inode_fork.c
+++ b/libxfs/xfs_inode_fork.c
@@ -17,6 +17,7 @@
*/
#include "libxfs_priv.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
diff --git a/libxfs/xfs_shared.h b/libxfs/xfs_shared.h
index 5be5297..544d0e9 100644
--- a/libxfs/xfs_shared.h
+++ b/libxfs/xfs_shared.h
@@ -234,4 +234,33 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_inode *ip, struct xfs_ifork *ifp);
+/* btree size calculator templates */
+#define DECLARE_BTREE_SIZE_FN(btree) \
+xfs_filblks_t xfs_##btree##_calc_btree_size(struct xfs_mount *mp, \
+ unsigned long len);
+
+#define DEFINE_BTREE_SIZE_FN(btree, limitfield, maxlevels) \
+xfs_filblks_t \
+xfs_##btree##_calc_btree_size( \
+ struct xfs_mount *mp, \
+ unsigned long len) \
+{ \
+ int level; \
+ int maxrecs; \
+ xfs_filblks_t rval; \
+\
+ maxrecs = mp->limitfield[0]; \
+ for (level = 0, rval = 0; level < maxlevels; level++) { \
+ len += maxrecs - 1; \
+ do_div(len, maxrecs); \
+ rval += len; \
+ if (len == 1) \
+ return rval + maxlevels - \
+ level - 1; \
+ if (level == 0) \
+ maxrecs = mp->limitfield[1]; \
+ } \
+ return rval; \
+}
+
#endif /* __XFS_SHARED_H__ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 07/53] libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (5 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 06/53] libxfs: refactor the btree size calculator code Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 08/53] libxfs: add the reverse-mapping btree Darrick J. Wong
` (45 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Because struct xfs_agfl is 36 bytes long and has a 64-bit integer
inside it, gcc will quietly round the structure size up to the nearest
64 bits -- in this case, 40 bytes. This results in the XFS_AGFL_SIZE
macro returning incorrect results for v5 filesystems on 64-bit
machines (118 items instead of 119). As a result, a 32-bit xfs_repair
will see garbage in AGFL item 119 and complain.
Therefore, tell gcc not to pad the structure so that the AGFL size
calculation is correct.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_format.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index 946bcd1..f6100be 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -787,7 +787,7 @@ typedef struct xfs_agfl {
__be64 agfl_lsn;
__be32 agfl_crc;
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
-} xfs_agfl_t;
+} __attribute__((packed)) xfs_agfl_t;
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 08/53] libxfs: add the reverse-mapping btree
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (6 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 07/53] libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 09/53] libxfs: resync xfs_prealloc_blocks with the kernel Darrick J. Wong
` (44 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: Dave Chinner, xfs
>From : Dave Chinner <david@fromorbit.com>
Provide the basic libxfs code for the rmap btree from the kernel.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
[split patch, add commit message]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
include/libxfs.h | 1
include/xfs_mount.h | 2
include/xfs_trace.h | 7 +
libxfs/Makefile | 3
libxfs/xfs_alloc.c | 27 +++
libxfs/xfs_alloc.h | 6 +
libxfs/xfs_bmap.c | 3
libxfs/xfs_bmap_btree.c | 1
libxfs/xfs_btree.h | 22 ++
libxfs/xfs_format.h | 86 ++++++++-
libxfs/xfs_ialloc.c | 1
libxfs/xfs_ialloc_btree.c | 1
libxfs/xfs_rmap.c | 413 +++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_rmap_btree.c | 404 ++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_rmap_btree.h | 65 +++++++
libxfs/xfs_sb.c | 6 +
libxfs/xfs_shared.h | 1
libxfs/xfs_types.h | 4
18 files changed, 1032 insertions(+), 21 deletions(-)
create mode 100644 libxfs/xfs_rmap.c
create mode 100644 libxfs/xfs_rmap_btree.c
create mode 100644 libxfs/xfs_rmap_btree.h
diff --git a/include/libxfs.h b/include/libxfs.h
index 04c6f9c..7d1ad46 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -66,6 +66,7 @@ extern uint32_t crc32c_le(uint32_t crc, unsigned char const *p, size_t len);
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_rmap_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_inode_fork.h"
#include "xfs_inode_buf.h"
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 67f3b05..1daee74 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -64,6 +64,8 @@ typedef struct xfs_mount {
uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */
uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */
uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */
+ uint m_rmap_mxr[2]; /* max rmap btree records */
+ uint m_rmap_mnr[2]; /* min rmap btree records */
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
diff --git a/include/xfs_trace.h b/include/xfs_trace.h
index 423772f..ebdf778 100644
--- a/include/xfs_trace.h
+++ b/include/xfs_trace.h
@@ -171,4 +171,11 @@
#define trace_xfs_perag_get_tag(a,b,c,d) ((c) = (c))
#define trace_xfs_perag_put(a,b,c,d) ((c) = (c))
+#define trace_xfs_rmap_alloc_extent(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_alloc_extent_done(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_alloc_extent_error(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_free_extent(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_free_extent_done(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmap_free_extent_error(a,b,c,d,e) ((void) 0)
+
#endif /* __TRACE_H__ */
diff --git a/libxfs/Makefile b/libxfs/Makefile
index ecf1921..3255917 100644
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -35,6 +35,7 @@ HFILES = \
xfs_inode_buf.h \
xfs_inode_fork.h \
xfs_quota_defs.h \
+ xfs_rmap_btree.h \
xfs_sb.h \
xfs_shared.h \
xfs_trans_resv.h \
@@ -80,6 +81,8 @@ CFILES = cache.c \
xfs_ialloc_btree.c \
xfs_log_rlimit.c \
xfs_rtbitmap.c \
+ xfs_rmap.c \
+ xfs_rmap_btree.c \
xfs_sb.c \
xfs_symlink_remote.c \
xfs_trans_resv.c
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index b43655c..fb6c705 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -26,6 +26,7 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_cksum.h"
@@ -632,6 +633,12 @@ xfs_alloc_ag_vextent(
ASSERT(!args->wasfromfl || !args->isfl);
ASSERT(args->agbno % args->alignment == 0);
+ /* insert new block into the reverse map btree */
+ error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
+ args->agbno, args->len, args->owner);
+ if (error)
+ return error;
+
if (!args->wasfromfl) {
error = xfs_alloc_update_counters(args->tp, args->pag,
args->agbp,
@@ -2016,6 +2023,7 @@ xfs_alloc_fix_freelist(
memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
+ targs.owner = XFS_RMAP_OWN_AG;
targs.agbp = agbp;
targs.agno = args->agno;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
@@ -2651,6 +2659,8 @@ error0:
* Free an extent.
* Just break up the extent address and hand off to xfs_free_ag_extent
* after fixing up the freelist.
+ *
+ * XXX: need owner of extent being freed
*/
int /* error */
xfs_free_extent(
@@ -2692,6 +2702,12 @@ xfs_free_extent(
goto error0;
}
+ /* XXX: need owner */
+ error = xfs_rmap_free(tp, args.agbp, args.agno, args.agbno, len, 0);
+ if (error)
+ goto error0;
+
+ /* XXX: initially no multiple references, so just free it */
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
if (!error)
xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
@@ -2699,3 +2715,14 @@ error0:
xfs_perag_put(args.pag);
return error;
}
+
+xfs_extlen_t
+xfs_prealloc_blocks(
+ struct xfs_mount *mp)
+{
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return XFS_RMAP_BLOCK(mp) + 1;
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ return XFS_FIBT_BLOCK(mp) + 1;
+ return XFS_IBT_BLOCK(mp) + 1;
+}
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index 071b28b..a9d8e97 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -72,6 +72,8 @@ typedef unsigned int xfs_alloctype_t;
* needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
* btree requires 1 fsb, so we set the number of set-aside blocks
* to 4 + 4*agcount.
+ *
+ * XXX: this changes for rmapbt filesystems.
*/
#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
@@ -86,10 +88,13 @@ typedef unsigned int xfs_alloctype_t;
*
* The AG headers are sector sized, so the amount of space they take up is
* dependent on filesystem geometry. The others are all single blocks.
+ *
+ * XXX: this changes for rmapbt filesystems.
*/
#define XFS_ALLOC_AG_MAX_USABLE(mp) \
((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
+xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
/*
* Argument structure for xfs_alloc routines.
@@ -122,6 +127,7 @@ typedef struct xfs_alloc_arg {
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
xfs_fsblock_t firstblock; /* io first block allocated */
+ uint64_t owner; /* owner of blocks being allocated */
} xfs_alloc_arg_t;
/*
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index ad383b4..3fe18fc 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -753,6 +753,7 @@ xfs_bmap_extents_to_btree(
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = mp;
+ args.owner = ip->i_ino;
args.firstblock = *firstblock;
if (*firstblock == NULLFSBLOCK) {
args.type = XFS_ALLOCTYPE_START_BNO;
@@ -899,6 +900,7 @@ xfs_bmap_local_to_extents(
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = ip->i_mount;
+ args.owner = ip->i_ino;
args.firstblock = *firstblock;
/*
* Allocate a block. We know we need only one, since the
@@ -3680,6 +3682,7 @@ xfs_bmap_btalloc(
memset(&args, 0, sizeof(args));
args.tp = ap->tp;
args.mp = mp;
+ args.owner = ap->ip->i_ino;
args.fsbno = ap->blkno;
/* Trim the allocation back to the maximum an AG can fit. */
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
index a21f774..12d1a2d 100644
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -443,6 +443,7 @@ xfs_bmbt_alloc_block(
args.mp = cur->bc_mp;
args.fsbno = cur->bc_private.b.firstblock;
args.firstblock = args.fsbno;
+ args.owner = cur->bc_private.b.ip->i_ino;
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
index 8f18bab..48ab2b1 100644
--- a/libxfs/xfs_btree.h
+++ b/libxfs/xfs_btree.h
@@ -38,17 +38,19 @@ union xfs_btree_ptr {
};
union xfs_btree_key {
- xfs_bmbt_key_t bmbt;
- xfs_bmdr_key_t bmbr; /* bmbt root block */
- xfs_alloc_key_t alloc;
- xfs_inobt_key_t inobt;
+ struct xfs_bmbt_key bmbt;
+ xfs_bmdr_key_t bmbr; /* bmbt root block */
+ xfs_alloc_key_t alloc;
+ struct xfs_inobt_key inobt;
+ struct xfs_rmap_key rmap;
};
union xfs_btree_rec {
- xfs_bmbt_rec_t bmbt;
- xfs_bmdr_rec_t bmbr; /* bmbt root block */
- xfs_alloc_rec_t alloc;
- xfs_inobt_rec_t inobt;
+ struct xfs_bmbt_rec bmbt;
+ xfs_bmdr_rec_t bmbr; /* bmbt root block */
+ struct xfs_alloc_rec alloc;
+ struct xfs_inobt_rec inobt;
+ struct xfs_rmap_rec rmap;
};
/*
@@ -63,6 +65,7 @@ union xfs_btree_rec {
#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
+#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
/*
* For logging record fields.
@@ -94,6 +97,7 @@ do { \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
+ case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(rmap, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -108,6 +112,7 @@ do { \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
+ case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_ADD(rmap, stat, val); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -199,6 +204,7 @@ typedef struct xfs_btree_cur
xfs_alloc_rec_incore_t a;
xfs_bmbt_irec_t b;
xfs_inobt_rec_incore_t i;
+ struct xfs_rmap_irec r;
} bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index f6100be..52b1d06 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -455,8 +455,10 @@ xfs_sb_has_compat_feature(
}
#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
+#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
- (XFS_SB_FEAT_RO_COMPAT_FINOBT)
+ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
+ XFS_SB_FEAT_RO_COMPAT_RMAPBT)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
@@ -521,6 +523,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
}
+static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
+{
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+ (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
+}
+
static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
@@ -599,10 +607,10 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
/*
- * Btree number 0 is bno, 1 is cnt. This value gives the size of the
+ * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
* arrays below.
*/
-#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1)
+#define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1)
/*
* The second word of agf_levels in the first a.g. overlaps the EFS
@@ -619,12 +627,10 @@ typedef struct xfs_agf {
__be32 agf_seqno; /* sequence # starting from 0 */
__be32 agf_length; /* size in blocks of a.g. */
/*
- * Freespace information
+ * Freespace and rmap information
*/
__be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */
- __be32 agf_spare0; /* spare field */
__be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
- __be32 agf_spare1; /* spare field */
__be32 agf_flfirst; /* first freelist block's index */
__be32 agf_fllast; /* last freelist block's index */
@@ -1301,16 +1307,74 @@ typedef __be32 xfs_inobt_ptr_t;
#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
/*
- * The first data block of an AG depends on whether the filesystem was formatted
- * with the finobt feature. If so, account for the finobt reserved root btree
- * block.
+ * Reverse mapping btree format definitions
+ *
+ * There is a btree for the reverse map per allocation group
*/
-#define XFS_PREALLOC_BLOCKS(mp) \
+#define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */
+
+/*
+ * Special owner types.
+ *
+ * Seeing as we only support up to 8EB, we have the upper bit of the owner field
+ * to tell us we have a special owner value. We use these for static metadata
+ * allocated at mkfs/growfs time, as well as for freespace management metadata.
+ */
+#define XFS_RMAP_OWN_NULL (-1ULL) /* No owner, for growfs */
+#define XFS_RMAP_OWN_UNKNOWN (-2ULL) /* Unknown owner, for EFI recovery */
+#define XFS_RMAP_OWN_FS (-3ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_LOG (-4ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */
+#define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */
+#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */
+#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */
+
+/*
+ * Data record structure
+ */
+struct xfs_rmap_rec {
+ __be32 rm_startblock; /* extent start block */
+ __be32 rm_blockcount; /* extent length */
+ __be64 rm_owner; /* extent owner */
+};
+
+struct xfs_rmap_irec {
+ xfs_agblock_t rm_startblock; /* extent start block */
+ xfs_extlen_t rm_blockcount; /* extent length */
+ __uint64_t rm_owner; /* extent owner */
+};
+
+/*
+ * Key structure
+ *
+ * We don't use the length for lookups
+ */
+struct xfs_rmap_key {
+ __be32 rm_startblock; /* extent start block */
+};
+
+/* btree pointer type */
+typedef __be32 xfs_rmap_ptr_t;
+
+/*
+ * block numbers in the AG.
+ */
+#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
+#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+#define XFS_RMAP_BLOCK(mp) \
(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
XFS_FIBT_BLOCK(mp) + 1 : \
XFS_IBT_BLOCK(mp) + 1)
-
+/*
+ * The first data block of an AG depends on whether the filesystem was formatted
+ * with the optional btree features. These need to be accounted for
+ * appropriately.
+ *
+ * XXX: this should be calculated once at mount time and stored in the struct
+ * xfs_mount rather than calculated every time it is used.
+ */
+#define XFS_PREALLOC_BLOCKS(mp) xfs_prealloc_blocks(mp)
/*
* BMAP Btree format definitions
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index af670a8..12eaaab 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -615,6 +615,7 @@ xfs_ialloc_ag_alloc(
args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
do_sparse = prandom_u32() & 1;
#endif
+ args.owner = XFS_RMAP_OWN_INODES;
/*
* Locking will ensure that we don't have two callers in here
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index f592ad1..77b41be 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -95,6 +95,7 @@ xfs_inobt_alloc_block(
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
+ args.owner = XFS_RMAP_OWN_INOBT;
args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
args.minlen = 1;
args.maxlen = 1;
diff --git a/libxfs/xfs_rmap.c b/libxfs/xfs_rmap.c
new file mode 100644
index 0000000..b2a3330
--- /dev/null
+++ b/libxfs/xfs_rmap.c
@@ -0,0 +1,413 @@
+
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_btree.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int
+xfs_rmap_lookup_le(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ uint64_t owner,
+ int *stat)
+{
+ cur->bc_rec.r.rm_startblock = bno;
+ cur->bc_rec.r.rm_blockcount = len;
+ cur->bc_rec.r.rm_owner = owner;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, ref].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_rmap_update(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *irec)
+{
+ union xfs_btree_rec rec;
+
+ rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
+ rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
+ rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+STATIC int
+xfs_rmap_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *irec,
+ int *stat)
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (error || !*stat)
+ return error;
+
+ irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
+ irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
+ irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
+ return 0;
+}
+
+/*
+ * Find the extent in the rmap btree and remove it.
+ *
+ * The record we find should always span a range greater than or equal to the
+ * the extent being freed. This makes the code simple as, in theory, we do not
+ * have to handle ranges that are split across multiple records as extents that
+ * result in bmap btree extent merges should also result in rmap btree extent
+ * merges. The owner field ensures we don't merge extents from different
+ * structures into the same record, hence this property should always hold true
+ * if we ensure that the rmap btree supports at least the same size maximum
+ * extent as the bmap btree (2^21 blocks at present).
+ *
+ * Complexity: when growing the filesystem, we "free" an extent when growing the
+ * last AG. This extent is new space and so it is not tracked as used space in
+ * the btree. The growfs code will pass in an owner of XFS_RMAP_OWN_NULL to
+ * indicate that it expected that there is no owner of this extent. We verify
+ * that - the extent lookup result in a record that does not overlap.
+ *
+ * Complexity #2: EFIs do not record the owner of the extent, so when recovering
+ * EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap btree to
+ * ignore the owner (i.e. wildcard match) so we don't trigger corruption checks
+ * during log recovery.
+ */
+int
+xfs_rmap_free(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ uint64_t owner)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_rmap_irec ltrec;
+ int error;
+ int i;
+
+ /*
+ * if rmap btree is not supported, then just return success without
+ * doing anything.
+ */
+ if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+ return 0;
+
+ trace_xfs_rmap_free_extent(mp, agno, bno, len, owner);
+ cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+ /*
+ * We always have a left record because there's a static record
+ * for the AG headers at rm_startblock == 0.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+ error = xfs_rmap_get_rec(cur, <rec, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+ /* special growfs case - bno is beyond last record */
+ if (owner == XFS_RMAP_OWN_NULL) {
+ XFS_WANT_CORRUPTED_GOTO(mp, bno > ltrec.rm_startblock +
+ ltrec.rm_blockcount, out_error);
+ goto out_done;
+ }
+
+ /* make sure the extent we found covers the entire freeing range. */
+ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno, out_error);
+ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_blockcount >= len, out_error);
+
+/*
+ if (owner != ltrec.rm_owner ||
+ bno > ltrec.rm_startblock + ltrec.rm_blockcount)
+ */
+ //printk("rmfree ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
+ // agno, bno, len, owner, ltrec.rm_startblock,
+ // ltrec.rm_blockcount, ltrec.rm_owner);
+ XFS_WANT_CORRUPTED_GOTO(mp, bno <= ltrec.rm_startblock + ltrec.rm_blockcount,
+ out_error);
+ XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
+ (owner < XFS_RMAP_OWN_NULL &&
+ owner >= XFS_RMAP_OWN_MIN), out_error);
+
+ /* exact match is easy */
+ if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
+ //printk("remove exact\n");
+ /* remove extent from rmap tree */
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ } else if (ltrec.rm_startblock == bno) {
+ //printk("remove left\n");
+ /*
+ * overlap left hand side of extent
+ *
+ * ltbno ltlen
+ * Orig: |oooooooooooooooooooo|
+ * Freeing: |fffffffff|
+ * Result: |rrrrrrrrrr|
+ * bno len
+ */
+ ltrec.rm_startblock += len;
+ ltrec.rm_blockcount -= len;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+ } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
+ //printk("remove right\n");
+ /*
+ * overlap right hand side of extent
+ *
+ * ltbno ltlen
+ * Orig: |oooooooooooooooooooo|
+ * Freeing: |fffffffff|
+ * Result: |rrrrrrrrrr|
+ * bno len
+ */
+ ltrec.rm_blockcount -= len;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+ } else {
+ /*
+ * overlap middle of extent
+ *
+ * ltbno ltlen
+ * Orig: |oooooooooooooooooooo|
+ * Freeing: |fffffffff|
+ * Result: |rrrrr| |rrrr|
+ * bno len
+ */
+ xfs_extlen_t orig_len = ltrec.rm_blockcount;
+ //printk("remove middle\n");
+
+ ltrec.rm_blockcount = bno - ltrec.rm_startblock;;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+
+ error = xfs_btree_increment(cur, 0, &i);
+ if (error)
+ goto out_error;
+
+ cur->bc_rec.r.rm_startblock = bno + len;
+ cur->bc_rec.r.rm_blockcount = orig_len - len -
+ ltrec.rm_blockcount;
+ cur->bc_rec.r.rm_owner = ltrec.rm_owner;
+ error = xfs_btree_insert(cur, &i);
+ if (error)
+ goto out_error;
+ }
+
+out_done:
+ trace_xfs_rmap_free_extent_done(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+
+out_error:
+ trace_xfs_rmap_free_extent_error(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/*
+ * When we allocate a new block, the first thing we do is add a reference to the
+ * extent in the rmap btree. This is how we track the owner of the extent and th
+ * enumber of references to it.
+ *
+ * Initially, we do not have shared extents, and so the extent can only have a
+ * single reference count and owner. This makes the initial implementation easy,
+ * but does not allow us to use the rmap tree for tracking reflink shared files.
+ * Hence the initial implementation is simply a lookup to find the place to
+ * insert (and checking we don't find a duplicate/overlap) and then insertng the
+ * appropriate record.
+ */
+int
+xfs_rmap_alloc(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ uint64_t owner)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_rmap_irec ltrec;
+ struct xfs_rmap_irec gtrec;
+ int have_gt;
+ int error;
+ int i;
+
+ /*
+ * if rmap btree is not supported, then just return success without
+ * doing anything.
+ */
+ if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+ return 0;
+
+ trace_xfs_rmap_alloc_extent(mp, agno, bno, len, owner);
+ cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+ /*
+ * chekc to see if we find an existing record for this extent rather
+ * than just the location for insert.
+ */
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+ error = xfs_rmap_get_rec(cur, <rec, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ //printk("rmalloc ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
+ // agno, bno, len, owner, ltrec.rm_startblock,
+ // ltrec.rm_blockcount, ltrec.rm_owner);
+
+ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock + ltrec.rm_blockcount <= bno,
+ out_error);
+
+ error = xfs_btree_increment(cur, 0, &have_gt);
+ if (error)
+ goto out_error;
+ if (have_gt) {
+ error = xfs_rmap_get_rec(cur, >rec, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ //printk("rmalloc ag %d bno 0x%x/0x%x/0x%llx, gtrec 0x%x/0x%x/0x%llx\n",
+ // agno, bno, len, owner, gtrec.rm_startblock,
+ // gtrec.rm_blockcount, gtrec.rm_owner);
+ XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
+ out_error);
+ } else {
+ gtrec.rm_owner = XFS_RMAP_OWN_NULL;
+ }
+
+ /* cursor currently points one record past ltrec */
+ if (ltrec.rm_owner == owner &&
+ ltrec.rm_startblock + ltrec.rm_blockcount == bno) {
+ /*
+ * left edge contiguous
+ *
+ * ltbno ltlen
+ * orig: |ooooooooo|
+ * adding: |aaaaaaaaa|
+ * result: |rrrrrrrrrrrrrrrrrrr|
+ * bno len
+ */
+ //printk("add left\n");
+ ltrec.rm_blockcount += len;
+ if (gtrec.rm_owner == owner &&
+ bno + len == gtrec.rm_startblock) {
+ //printk("add middle\n");
+ /*
+ * right edge also contiguous
+ *
+ * ltbno ltlen gtbno gtlen
+ * orig: |ooooooooo| |ooooooooo|
+ * adding: |aaaaaaaaa|
+ * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
+ */
+ ltrec.rm_blockcount += gtrec.rm_blockcount;
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ }
+
+ error = xfs_btree_decrement(cur, 0, &have_gt);
+ if (error)
+ goto out_error;
+ error = xfs_rmap_update(cur, <rec);
+ if (error)
+ goto out_error;
+ } else if (gtrec.rm_owner == owner &&
+ bno + len == gtrec.rm_startblock) {
+ /*
+ * right edge contiguous
+ *
+ * gtbno gtlen
+ * Orig: |ooooooooo|
+ * adding: |aaaaaaaaa|
+ * Result: |rrrrrrrrrrrrrrrrrrr|
+ * bno len
+ */
+ //printk("add right\n");
+ gtrec.rm_startblock = bno;
+ gtrec.rm_blockcount += len;
+ error = xfs_rmap_update(cur, >rec);
+ if (error)
+ goto out_error;
+ } else {
+ //printk("add no match\n");
+ /* no contiguous edge with identical owner */
+ cur->bc_rec.r.rm_startblock = bno;
+ cur->bc_rec.r.rm_blockcount = len;
+ cur->bc_rec.r.rm_owner = owner;
+ error = xfs_btree_insert(cur, &i);
+ if (error)
+ goto out_error;
+ }
+
+ trace_xfs_rmap_alloc_extent_done(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+
+out_error:
+ trace_xfs_rmap_alloc_extent_error(mp, agno, bno, len, owner);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
diff --git a/libxfs/xfs_rmap_btree.c b/libxfs/xfs_rmap_btree.c
new file mode 100644
index 0000000..ed1792d
--- /dev/null
+++ b/libxfs/xfs_rmap_btree.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+
+
+/*
+ * Reverse map btree.
+ *
+ * This is a per-ag tree used to track the owner of a given extent. Owner
+ * records are inserted when an extent is allocated, and removed when an extent
+ * is freed. For existing filesystems, there can only be one owner of an extent,
+ * usually an inode or some other metadata structure like a AG btree.
+ *
+ * Initial thoughts are that the
+ * value of the owner field needs external flags to define what it means, and
+ * hence we need a flags field in the record. This means the record is going to
+ * be larger than 16 bytes (agbno,len,owner = 16 bytes), so maybe this isn't the
+ * best idea. Initially just implement the owner field - we can probably steal
+ * bits from the extent length field for type descriptors given that MAXEXTLEN
+ * is only 21 bits if we want to store the type as well. Keep in mind that if we
+ * want to do this there are still restrictions on the length of extents we
+ * track in the rmap btree (see comments on xfs_rmap_free()).
+ *
+ * The rmap btree is part of the free space management, so blocks for the tree
+ * are sourced from the agfl. Hence we need transaction reservation support for
+ * this tree so that the freelist is always large enough. This also impacts on
+ * the minimum space we need to leave free in the AG.
+ *
+ * The tree is ordered by block number - there's no need to order/search by
+ * extent size for online updating/management of the tree, and the reverse
+ * lookups are going to be "who owns this block" and so are by-block ordering is
+ * perfect for this.
+ *
+ * XXX: open question is how to handle blocks that are owned by the freespace
+ * tree blocks. Right now they will be classified when they are moved to the
+ * freelist or removed from the freelist. i.e. the extent allocation/freeing
+ * will mark the extents allocated as owned by the AG.
+ */
+STATIC struct xfs_btree_cur *
+xfs_rmapbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
+
+STATIC void
+xfs_rmapbt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int inc)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ int btnum = cur->bc_btnum;
+ struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
+
+ ASSERT(ptr->s != 0);
+
+ agf->agf_roots[btnum] = ptr->s;
+ be32_add_cpu(&agf->agf_levels[btnum], inc);
+ pag->pagf_levels[btnum] += inc;
+ xfs_perag_put(pag);
+
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_rmapbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ int error;
+ xfs_agblock_t bno;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ /* Allocate the new block from the freelist. If we can't, give up. */
+ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+ &bno, 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
+
+ if (bno == NULLAGBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
+ xfs_trans_agbtree_delta(cur->bc_tp, 1);
+ new->s = cpu_to_be32(bno);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+}
+
+STATIC int
+xfs_rmapbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agblock_t bno;
+ int error;
+
+ bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+ error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+ if (error)
+ return error;
+
+ xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+ XFS_EXTENT_BUSY_SKIP_DISCARD);
+ xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+ xfs_trans_binval(cur->bc_tp, bp);
+ return 0;
+}
+
+STATIC int
+xfs_rmapbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_rmap_mnr[level != 0];
+}
+
+STATIC int
+xfs_rmapbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_rmap_mxr[level != 0];
+}
+
+STATIC void
+xfs_rmapbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->rmap.rm_startblock = rec->rmap.rm_startblock;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ rec->rmap.rm_startblock = key->rmap.rm_startblock;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
+ rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
+ rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
+}
+
+STATIC void
+xfs_rmapbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
+
+ ptr->s = agf->agf_roots[cur->bc_btnum];
+}
+
+STATIC __int64_t
+xfs_rmapbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ struct xfs_rmap_irec *rec = &cur->bc_rec.r;
+ struct xfs_rmap_key *kp = &key->rmap;
+
+ return (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+}
+
+static bool
+xfs_rmapbt_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+ unsigned int level;
+
+ /*
+ * magic number and level verification
+ *
+ * During growfs operations, we can't verify the exact level or owner as
+ * the perag is not fully initialised and hence not attached to the
+ * buffer. In this case, check against the maximum tree depth.
+ *
+ * Similarly, during log recovery we will have a perag structure
+ * attached, but the agf information will not yet have been initialised
+ * from the on disk AGF. Again, we can only check against maximum limits
+ * in this case.
+ */
+ if (block->bb_magic!= cpu_to_be32(XFS_RMAP_CRC_MAGIC))
+ return false;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+
+ level = be16_to_cpu(block->bb_level);
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_rmap_mxr[level != 0])
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
+}
+
+static void
+xfs_rmapbt_read_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_rmapbt_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
+ }
+}
+
+static void
+xfs_rmapbt_write_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_rmapbt_verify(bp)) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
+ }
+ xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
+ .verify_read = xfs_rmapbt_read_verify,
+ .verify_write = xfs_rmapbt_write_verify,
+};
+
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_rmapbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->rmap.rm_startblock) <
+ be32_to_cpu(k2->rmap.rm_startblock);
+}
+
+STATIC int
+xfs_rmapbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return be32_to_cpu(r1->rmap.rm_startblock) +
+ be32_to_cpu(r1->rmap.rm_blockcount) <=
+ be32_to_cpu(r2->rmap.rm_startblock);
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_rmapbt_ops = {
+ .rec_len = sizeof(struct xfs_rmap_rec),
+ .key_len = sizeof(struct xfs_rmap_key),
+
+ .dup_cursor = xfs_rmapbt_dup_cursor,
+ .set_root = xfs_rmapbt_set_root,
+ .alloc_block = xfs_rmapbt_alloc_block,
+ .free_block = xfs_rmapbt_free_block,
+ .get_minrecs = xfs_rmapbt_get_minrecs,
+ .get_maxrecs = xfs_rmapbt_get_maxrecs,
+ .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
+ .init_rec_from_key = xfs_rmapbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
+ .key_diff = xfs_rmapbt_key_diff,
+ .buf_ops = &xfs_rmapbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_rmapbt_keys_inorder,
+ .recs_inorder = xfs_rmapbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new allocation btree cursor.
+ */
+struct xfs_btree_cur *
+xfs_rmapbt_init_cursor(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
+
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_btnum = XFS_BTNUM_RMAP;
+ cur->bc_flags = XFS_BTREE_CRC_BLOCKS;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
+ cur->bc_ops = &xfs_rmapbt_ops;
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
+
+ return cur;
+}
+
+/*
+ * Calculate number of records in an rmap btree block.
+ */
+int
+xfs_rmapbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
+{
+ blocklen -= XFS_RMAP_BLOCK_LEN;
+
+ if (leaf)
+ return blocklen / sizeof(struct xfs_rmap_rec);
+ return blocklen /
+ (sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
diff --git a/libxfs/xfs_rmap_btree.h b/libxfs/xfs_rmap_btree.h
new file mode 100644
index 0000000..9ad65e5
--- /dev/null
+++ b/libxfs/xfs_rmap_btree.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_RMAP_BTREE_H__
+#define __XFS_RMAP_BTREE_H__
+
+/*
+ * Freespace on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/* rmaps only exist on crc enabled filesystems */
+#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_RMAP_REC_ADDR(block, index) \
+ ((struct xfs_rmap_rec *) \
+ ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+ (((index) - 1) * sizeof(struct xfs_rmap_rec))))
+
+#define XFS_RMAP_KEY_ADDR(block, index) \
+ ((struct xfs_rmap_key *) \
+ ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+ ((index) - 1) * sizeof(struct xfs_rmap_key)))
+
+#define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \
+ ((xfs_rmap_ptr_t *) \
+ ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+ (maxrecs) * sizeof(struct xfs_rmap_key) + \
+ ((index) - 1) * sizeof(xfs_rmap_ptr_t)))
+
+struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
+ struct xfs_trans *tp, struct xfs_buf *bp,
+ xfs_agnumber_t agno);
+int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
+
+int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
+ xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+ uint64_t owner);
+int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
+ xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+ uint64_t owner);
+
+#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
index 78ad889..b23dfd1 100644
--- a/libxfs/xfs_sb.c
+++ b/libxfs/xfs_sb.c
@@ -33,6 +33,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_rmap_btree.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -711,6 +712,11 @@ xfs_sb_mount_common(
mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
+ mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
+ mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
+ mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
diff --git a/libxfs/xfs_shared.h b/libxfs/xfs_shared.h
index 544d0e9..f756920 100644
--- a/libxfs/xfs_shared.h
+++ b/libxfs/xfs_shared.h
@@ -38,6 +38,7 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
extern const struct xfs_buf_ops xfs_agf_buf_ops;
extern const struct xfs_buf_ops xfs_agfl_buf_ops;
extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
diff --git a/libxfs/xfs_types.h b/libxfs/xfs_types.h
index f0d145a..da87796 100644
--- a/libxfs/xfs_types.h
+++ b/libxfs/xfs_types.h
@@ -111,8 +111,8 @@ typedef enum {
} xfs_lookup_t;
typedef enum {
- XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
- XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+ XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
+ XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
} xfs_btnum_t;
struct xfs_name {
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 09/53] libxfs: resync xfs_prealloc_blocks with the kernel
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (7 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 08/53] libxfs: add the reverse-mapping btree Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:05 ` [PATCH 10/53] xfs: rmap btree transaction reservations Darrick J. Wong
` (43 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Move xfs_prealloc_blocks() to the same line as in the kernel code.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_alloc.c | 22 +++++++++++-----------
libxfs/xfs_alloc.h | 4 ++--
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index fb6c705..fea94c8 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -46,6 +46,17 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+xfs_extlen_t
+xfs_prealloc_blocks(
+ struct xfs_mount *mp)
+{
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return XFS_RMAP_BLOCK(mp) + 1;
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ return XFS_FIBT_BLOCK(mp) + 1;
+ return XFS_IBT_BLOCK(mp) + 1;
+}
+
/*
* Lookup the record equal to [bno, len] in the btree given by cur.
*/
@@ -2715,14 +2726,3 @@ error0:
xfs_perag_put(args.pag);
return error;
}
-
-xfs_extlen_t
-xfs_prealloc_blocks(
- struct xfs_mount *mp)
-{
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
- return XFS_RMAP_BLOCK(mp) + 1;
- if (xfs_sb_version_hasfinobt(&mp->m_sb))
- return XFS_FIBT_BLOCK(mp) + 1;
- return XFS_IBT_BLOCK(mp) + 1;
-}
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index a9d8e97..35b60ae 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -94,8 +94,6 @@ typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_AG_MAX_USABLE(mp) \
((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
-xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
-
/*
* Argument structure for xfs_alloc routines.
* This is turned into a structure to avoid having 20 arguments passed
@@ -241,4 +239,6 @@ int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
+xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
+
#endif /* __XFS_ALLOC_H__ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 10/53] xfs: rmap btree transaction reservations
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (8 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 09/53] libxfs: resync xfs_prealloc_blocks with the kernel Darrick J. Wong
@ 2015-12-19 9:05 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 11/53] xfs: rmap btree requires more reserved free space Darrick J. Wong
` (42 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:05 UTC (permalink / raw)
To: david, darrick.wong; +Cc: Dave Chinner, xfs
>From : Dave Chinner <dchinner@redhat.com>
The rmap btrees will use the AGFL as the block allocation source, so
we need to ensure that the transaction reservations reflect the fact
this tree is modified by allocation and freeing. Hence we need to
extend all the extent allocation/free reservations used in
transactions to handle this.
Note that this also gets rid of the unused XFS_ALLOCFREE_LOG_RES
macro, as we now do buffer reservations based on the number of
buffers logged via xfs_calc_buf_res(). Hence we only need the buffer
count calculation now.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
[port to xfsprogs]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_trans_resv.c | 56 +++++++++++++++++++++++++++++++++--------------
libxfs/xfs_trans_resv.h | 10 --------
2 files changed, 39 insertions(+), 27 deletions(-)
diff --git a/libxfs/xfs_trans_resv.c b/libxfs/xfs_trans_resv.c
index 0c40b52..3a05e42 100644
--- a/libxfs/xfs_trans_resv.c
+++ b/libxfs/xfs_trans_resv.c
@@ -63,6 +63,28 @@ xfs_calc_buf_res(
}
/*
+ * Per-extent log reservation for the allocation btree changes
+ * involved in freeing or allocating an extent. When rmap is not enabled,
+ * there are only two trees that will be modified (free space trees), and when
+ * rmap is enabled there will be three (freespace + rmap trees). The number of
+ * blocks reserved is based on the formula:
+ *
+ * num trees * ((2 blocks/level * max depth) - 1)
+ */
+static uint
+xfs_allocfree_log_count(
+ struct xfs_mount *mp,
+ uint num_ops)
+{
+ uint num_trees = 2;
+
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ num_trees++;
+
+ return num_ops * num_trees * (2 * mp->m_ag_maxlevels - 1);
+}
+
+/*
* Logging inodes is really tricksy. They are logged in memory format,
* which means that what we write into the log doesn't directly translate into
* the amount of space they use on disk.
@@ -125,7 +147,7 @@ xfs_calc_inode_res(
*/
STATIC uint
xfs_calc_finobt_res(
- struct xfs_mount *mp,
+ struct xfs_mount *mp,
int alloc,
int modify)
{
@@ -136,7 +158,7 @@ xfs_calc_finobt_res(
res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
if (alloc)
- res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
if (modify)
res += (uint)XFS_FSB_TO_B(mp, 1);
@@ -187,10 +209,10 @@ xfs_calc_write_reservation(
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
@@ -216,10 +238,10 @@ xfs_calc_itruncate_reservation(
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(5, 0) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(2 + mp->m_ialloc_blks +
mp->m_in_maxlevels, 0)));
@@ -246,7 +268,7 @@ xfs_calc_rename_reservation(
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3),
XFS_FSB_TO_B(mp, 1))));
}
@@ -285,7 +307,7 @@ xfs_calc_link_reservation(
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1))));
}
@@ -323,7 +345,7 @@ xfs_calc_remove_reservation(
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
@@ -370,7 +392,7 @@ xfs_calc_create_resv_alloc(
mp->m_sb.sb_sectsize +
xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
@@ -398,7 +420,7 @@ xfs_calc_icreate_resv_alloc(
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_finobt_res(mp, 0, 0);
}
@@ -482,7 +504,7 @@ xfs_calc_ifree_reservation(
xfs_calc_buf_res(1, 0) +
xfs_calc_buf_res(2 + mp->m_ialloc_blks +
mp->m_in_maxlevels, 0) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_finobt_res(mp, 0, 1);
}
@@ -512,7 +534,7 @@ xfs_calc_growdata_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
@@ -534,7 +556,7 @@ xfs_calc_growrtalloc_reservation(
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
@@ -610,7 +632,7 @@ xfs_calc_addafork_reservation(
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
@@ -633,7 +655,7 @@ xfs_calc_attrinval_reservation(
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
XFS_FSB_TO_B(mp, 1))));
}
@@ -700,7 +722,7 @@ xfs_calc_attrrm_reservation(
XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
diff --git a/libxfs/xfs_trans_resv.h b/libxfs/xfs_trans_resv.h
index 7978150..0eb46ed 100644
--- a/libxfs/xfs_trans_resv.h
+++ b/libxfs/xfs_trans_resv.h
@@ -68,16 +68,6 @@ struct xfs_trans_resv {
#define M_RES(mp) (&(mp)->m_resv)
/*
- * Per-extent log reservation for the allocation btree changes
- * involved in freeing or allocating an extent.
- * 2 trees * (2 blocks/level * max depth - 1) * block size
- */
-#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
- ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
-#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
- ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
-
-/*
* Per-directory log reservation for any directory change.
* dir blocks: (1 btree block per level + data block + free block) * dblock size
* bmap btree: (levels + 2) * max depth * block size
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 11/53] xfs: rmap btree requires more reserved free space
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (9 preceding siblings ...)
2015-12-19 9:05 ` [PATCH 10/53] xfs: rmap btree transaction reservations Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 12/53] libxfs: propagate a bunch of case changes to mkfs and repair Darrick J. Wong
` (41 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: Dave Chinner, xfs
>From : Dave Chinner <dchinner@redhat.com>
The rmap btree is allocated from the AGFL, which means we have to
ensure ENOSPC is reported to userspace before we run out of free
space in each AG. The last allocation in an AG can cause a full
height rmap btree split, and that means we have to reserve at least
this many blocks *in each AG* to be placed on the AGFL at ENOSPC.
Update the various space calculation functiosn to handle this.
Also, because the macros are now executing conditional code and are called quite
frequently, convert them to functions that initialise varaibles in the struct
xfs_mount, use the new variables everywhere and document the calculations
better.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
[port to xfsprogs]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
include/xfs_mount.h | 2 +
libxfs/xfs_alloc.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_alloc.h | 43 ++++----------------------------
libxfs/xfs_bmap.c | 2 +
libxfs/xfs_sb.c | 2 +
5 files changed, 79 insertions(+), 39 deletions(-)
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 1daee74..390ec77 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -69,6 +69,8 @@ typedef struct xfs_mount {
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
+ uint m_alloc_set_aside; /* space we can't use */
+ uint m_ag_max_usable; /* max space per AG */
struct radix_tree_root m_perag_tree;
uint m_flags; /* global mount flags */
uint m_qflags; /* quota status flags */
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index fea94c8..bc2b3d4 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -58,6 +58,72 @@ xfs_prealloc_blocks(
}
/*
+ * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
+ * AGF buffer (PV 947395), we place constraints on the relationship among actual
+ * allocations for data blocks, freelist blocks, and potential file data bmap
+ * btree blocks. However, these restrictions may result in no actual space
+ * allocated for a delayed extent, for example, a data block in a certain AG is
+ * allocated but there is no additional block for the additional bmap btree
+ * block due to a split of the bmap btree of the file. The result of this may
+ * lead to an infinite loop when the file gets flushed to disk and all delayed
+ * extents need to be actually allocated. To get around this, we explicitly set
+ * aside a few blocks which will not be reserved in delayed allocation.
+ *
+ * The minimum number of needed freelist blocks is 4 fsbs _per AG_ when we are
+ * not using rmap btrees a potential split of file's bmap btree requires 1 fsb,
+ * so we set the number of set-aside blocks to 4 + 4*agcount when not using rmap
+ * btrees.
+ *
+ * When rmap btrees are active, we have to consider that using the last block in
+ * the AG can cause a full height rmap btree split and we need enough blocks on
+ * the AGFL to be able to handle this. That means we have, in addition to the
+ * above consideration, another (2 * mp->m_ag_levels) - 1 blocks required to be
+ * available to the free list.
+ */
+unsigned int
+xfs_alloc_set_aside(
+ struct xfs_mount *mp)
+{
+ unsigned int blocks;
+
+ blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return blocks;
+ return blocks + (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+}
+
+/*
+ * When deciding how much space to allocate out of an AG, we limit the
+ * allocation maximum size to the size the AG. However, we cannot use all the
+ * blocks in the AG - some are permanently used by metadata. These
+ * blocks are generally:
+ * - the AG superblock, AGF, AGI and AGFL
+ * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
+ * the AGI free inode and rmap btree root blocks.
+ * - blocks on the AGFL according to xfs_alloc_set_aside() limits
+ *
+ * The AG headers are sector sized, so the amount of space they take up is
+ * dependent on filesystem geometry. The others are all single blocks.
+ */
+unsigned int
+xfs_alloc_ag_max_usable(struct xfs_mount *mp)
+{
+ unsigned int blocks;
+
+ blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
+ blocks += XFS_ALLOC_AGFL_RESERVE;
+ blocks += 3; /* AGF, AGI btree root blocks */
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ blocks++; /* finobt root block */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ /* rmap root block + full tree split on full AG */
+ blocks += 1 + (2 * mp->m_ag_maxlevels) - 1;
+ }
+
+ return mp->m_sb.sb_agblocks - blocks;
+}
+
+/*
* Lookup the record equal to [bno, len] in the btree given by cur.
*/
STATIC int /* error */
@@ -1900,6 +1966,9 @@ xfs_alloc_min_freelist(
/* space needed by-size freespace btree */
min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels);
+ /* space needed reverse mapping used space btree */
+ min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
+ mp->m_ag_maxlevels);
return min_free;
}
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index 35b60ae..5b2b616 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -55,44 +55,6 @@ typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
-/*
- * In order to avoid ENOSPC-related deadlock caused by
- * out-of-order locking of AGF buffer (PV 947395), we place
- * constraints on the relationship among actual allocations for
- * data blocks, freelist blocks, and potential file data bmap
- * btree blocks. However, these restrictions may result in no
- * actual space allocated for a delayed extent, for example, a data
- * block in a certain AG is allocated but there is no additional
- * block for the additional bmap btree block due to a split of the
- * bmap btree of the file. The result of this may lead to an
- * infinite loop in xfssyncd when the file gets flushed to disk and
- * all delayed extents need to be actually allocated. To get around
- * this, we explicitly set aside a few blocks which will not be
- * reserved in delayed allocation. Considering the minimum number of
- * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
- * btree requires 1 fsb, so we set the number of set-aside blocks
- * to 4 + 4*agcount.
- *
- * XXX: this changes for rmapbt filesystems.
- */
-#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
-
-/*
- * When deciding how much space to allocate out of an AG, we limit the
- * allocation maximum size to the size the AG. However, we cannot use all the
- * blocks in the AG - some are permanently used by metadata. These
- * blocks are generally:
- * - the AG superblock, AGF, AGI and AGFL
- * - the AGF (bno and cnt) and AGI btree root blocks
- * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
- *
- * The AG headers are sector sized, so the amount of space they take up is
- * dependent on filesystem geometry. The others are all single blocks.
- *
- * XXX: this changes for rmapbt filesystems.
- */
-#define XFS_ALLOC_AG_MAX_USABLE(mp) \
- ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
/*
* Argument structure for xfs_alloc routines.
@@ -134,6 +96,11 @@ typedef struct xfs_alloc_arg {
#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
+/* freespace limit calculations */
+#define XFS_ALLOC_AGFL_RESERVE 4
+unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
+unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
+
xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
struct xfs_perag *pag, xfs_extlen_t need);
unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index 3fe18fc..785d10a 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -3686,7 +3686,7 @@ xfs_bmap_btalloc(
args.fsbno = ap->blkno;
/* Trim the allocation back to the maximum an AG can fit. */
- args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
+ args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
args.firstblock = *ap->firstblock;
blen = 0;
if (nullfb) {
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
index b23dfd1..85ef128 100644
--- a/libxfs/xfs_sb.c
+++ b/libxfs/xfs_sb.c
@@ -726,6 +726,8 @@ xfs_sb_mount_common(
mp->m_ialloc_min_blks = sbp->sb_spino_align;
else
mp->m_ialloc_min_blks = mp->m_ialloc_blks;
+ mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+ mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
}
/*
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 12/53] libxfs: propagate a bunch of case changes to mkfs and repair
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (10 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 11/53] xfs: rmap btree requires more reserved free space Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 13/53] libxfs: fix min freelist length calculation Darrick J. Wong
` (40 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
A few macros got replaced with functions, so update mkfs and repair
to use the new symbol names. Fix a duplicate #define too.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_format.h | 12 ------------
mkfs/xfs_mkfs.c | 16 ++++++++--------
2 files changed, 8 insertions(+), 20 deletions(-)
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index 52b1d06..9a4f328 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -1359,24 +1359,12 @@ typedef __be32 xfs_rmap_ptr_t;
/*
* block numbers in the AG.
*/
-#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
-#define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
#define XFS_RMAP_BLOCK(mp) \
(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
XFS_FIBT_BLOCK(mp) + 1 : \
XFS_IBT_BLOCK(mp) + 1)
/*
- * The first data block of an AG depends on whether the filesystem was formatted
- * with the optional btree features. These need to be accounted for
- * appropriately.
- *
- * XXX: this should be calculated once at mount time and stored in the struct
- * xfs_mount rather than calculated every time it is used.
- */
-#define XFS_PREALLOC_BLOCKS(mp) xfs_prealloc_blocks(mp)
-
-/*
* BMAP Btree format definitions
*
* This includes both the root block definition that sits inside an inode fork
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 7cba41a..934d7c0 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -2482,7 +2482,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
/*
* sb_versionnum and finobt flags must be set before we use
- * XFS_PREALLOC_BLOCKS().
+ * xfs_prealloc_blocks().
*/
sbp->sb_features2 = XFS_SB_VERSION2_MKFS(crcs_enabled, lazy_sb_counters,
attrversion == 2, !projid16bit, 0,
@@ -2511,12 +2511,12 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
*/
if (!logsize) {
logblocks = MIN(logblocks,
- XFS_ALLOC_AG_MAX_USABLE(mp));
+ xfs_alloc_ag_max_usable(mp));
/* revalidate the log size is valid if we changed it */
validate_log_size(logblocks, blocklog, min_logblocks);
}
- if (logblocks > agsize - XFS_PREALLOC_BLOCKS(mp)) {
+ if (logblocks > agsize - xfs_prealloc_blocks(mp)) {
fprintf(stderr,
_("internal log size %lld too large, must fit in allocation group\n"),
(long long)logblocks);
@@ -2533,7 +2533,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
} else
logagno = (xfs_agnumber_t)(agcount / 2);
- logstart = XFS_AGB_TO_FSB(mp, logagno, XFS_PREALLOC_BLOCKS(mp));
+ logstart = XFS_AGB_TO_FSB(mp, logagno, xfs_prealloc_blocks(mp));
/*
* Align the logstart at stripe unit boundary.
*/
@@ -2616,7 +2616,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
sbp->sb_imax_pct = imaxpct;
sbp->sb_icount = 0;
sbp->sb_ifree = 0;
- sbp->sb_fdblocks = dblocks - agcount * XFS_PREALLOC_BLOCKS(mp) -
+ sbp->sb_fdblocks = dblocks - agcount * xfs_prealloc_blocks(mp) -
(loginternal ? logblocks : 0);
sbp->sb_frextents = 0; /* will do a free later */
sbp->sb_uquotino = sbp->sb_gquotino = sbp->sb_pquotino = 0;
@@ -2767,7 +2767,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
agf->agf_flcount = 0;
- nbmblocks = (xfs_extlen_t)(agsize - XFS_PREALLOC_BLOCKS(mp));
+ nbmblocks = (xfs_extlen_t)(agsize - xfs_prealloc_blocks(mp));
agf->agf_freeblks = cpu_to_be32(nbmblocks);
agf->agf_longest = cpu_to_be32(nbmblocks);
if (xfs_sb_version_hascrc(&mp->m_sb))
@@ -2848,7 +2848,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
- arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+ arec->ar_startblock = cpu_to_be32(xfs_prealloc_blocks(mp));
if (loginternal && agno == logagno) {
if (lalign) {
/*
@@ -2903,7 +2903,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
- arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
+ arec->ar_startblock = cpu_to_be32(xfs_prealloc_blocks(mp));
if (loginternal && agno == logagno) {
if (lalign) {
arec->ar_blockcount = cpu_to_be32(
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 13/53] libxfs: fix min freelist length calculation
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (11 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 12/53] libxfs: propagate a bunch of case changes to mkfs and repair Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 14/53] libxfs: add the RMAP CRC to the xfs_magics list Darrick J. Wong
` (39 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
If rmapbt is disabled, it is incorrect to require 1 extra AGFL block
for the rmapbt (due to the + 1); the entire clause needs to be gated
on the feature flag.
This causes serious problems if formatting a v4 filesystem because the
extra AGFL block causes the root inode not to be where xfs_repair
expects it. In turn, xfs_repair reports major FS damage when
everything is fine.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_alloc.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index bc2b3d4..b66ca99 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -1967,8 +1967,10 @@ xfs_alloc_min_freelist(
min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */
- min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
- mp->m_ag_maxlevels);
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ min_free += min_t(unsigned int,
+ pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
+ mp->m_ag_maxlevels);
return min_free;
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 14/53] libxfs: add the RMAP CRC to the xfs_magics list
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (12 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 13/53] libxfs: fix min freelist length calculation Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 15/53] libxfs: piggyback rmapbt update intents in the bmap free structure Darrick J. Wong
` (38 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_btree.c | 7 +++++--
libxfs/xfs_shared.h | 1 +
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
index f28e3a5..5cf8ee5 100644
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -39,9 +39,9 @@ kmem_zone_t *xfs_btree_cur_zone;
* Btree magic numbers.
*/
static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
- { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
+ { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
XFS_FIBT_MAGIC },
- { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
+ { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
};
#define xfs_btree_magic(cur) \
@@ -1125,6 +1125,9 @@ xfs_btree_set_refs(
case XFS_BTNUM_BMAP:
xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
break;
+ case XFS_BTNUM_RMAP:
+ xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
+ break;
default:
ASSERT(0);
}
diff --git a/libxfs/xfs_shared.h b/libxfs/xfs_shared.h
index f756920..fa2bb9b 100644
--- a/libxfs/xfs_shared.h
+++ b/libxfs/xfs_shared.h
@@ -211,6 +211,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
#define XFS_INO_BTREE_REF 3
#define XFS_ALLOC_BTREE_REF 2
#define XFS_BMAP_BTREE_REF 2
+#define XFS_RMAP_BTREE_REF 2
#define XFS_DIR_BTREE_REF 2
#define XFS_INO_REF 2
#define XFS_ATTR_BTREE_REF 1
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 15/53] libxfs: piggyback rmapbt update intents in the bmap free structure
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (13 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 14/53] libxfs: add the RMAP CRC to the xfs_magics list Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 16/53] libxfs: enhance rmapbt definition to support reflink Darrick J. Wong
` (37 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Extend the xfs_bmap_free structure to track a list of rmapbt update
intents. In a subsequent patch, we'll defer all data fork rmapbt
edits until we're done making changes to the bmbt, at which point we
can replay the rmap edits in order of increasing AG number. This
enables us to avoid deadlocks by complying with AG lock ordering
rules.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
include/libxfs.h | 3 ++-
libxfs/util.c | 9 +++++----
libxfs/xfs_attr.c | 18 +++++++++---------
libxfs/xfs_attr_remote.c | 4 ++--
libxfs/xfs_bmap.c | 4 ++--
libxfs/xfs_bmap.h | 13 ++++++++++++-
mkfs/proto.c | 12 ++++++------
repair/phase6.c | 24 ++++++++++++------------
8 files changed, 50 insertions(+), 37 deletions(-)
diff --git a/include/libxfs.h b/include/libxfs.h
index 7d1ad46..2357aec 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -165,7 +165,8 @@ extern unsigned int libxfs_log2_roundup(unsigned int i);
extern int libxfs_alloc_file_space (struct xfs_inode *, xfs_off_t,
xfs_off_t, int, int);
-extern int libxfs_bmap_finish(xfs_trans_t **, xfs_bmap_free_t *, int *);
+extern int libxfs_bmap_finish(xfs_trans_t **, xfs_bmap_free_t *, int *,
+ struct xfs_inode *ip);
extern void libxfs_fs_repair_cmn_err(int, struct xfs_mount *, char *, ...);
extern void libxfs_fs_cmn_err(int, struct xfs_mount *, char *, ...);
diff --git a/libxfs/util.c b/libxfs/util.c
index 0609ba4..19d5e0e 100644
--- a/libxfs/util.c
+++ b/libxfs/util.c
@@ -491,9 +491,10 @@ libxfs_mod_incore_sb(
int
libxfs_bmap_finish(
- xfs_trans_t **tp,
- xfs_bmap_free_t *flist,
- int *committed)
+ xfs_trans_t **tp,
+ xfs_bmap_free_t *flist,
+ int *committed,
+ struct xfs_inode *ip)
{
xfs_bmap_free_item_t *free; /* free extent list item */
xfs_bmap_free_item_t *next; /* next item on free list */
@@ -586,7 +587,7 @@ libxfs_alloc_file_space(
goto error0;
/* complete the transaction */
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed, ip);
if (error)
goto error0;
diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c
index bdde0f6..083a14c 100644
--- a/libxfs/xfs_attr.c
+++ b/libxfs/xfs_attr.c
@@ -329,7 +329,7 @@ xfs_attr_set(
error = xfs_attr_shortform_to_leaf(&args);
if (!error) {
error = xfs_bmap_finish(&args.trans, args.flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -623,7 +623,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
error = xfs_attr3_leaf_to_node(args);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -725,7 +725,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -798,7 +798,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
/* bp is gone due to xfs_da_shrink_inode */
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -934,7 +934,7 @@ restart:
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -972,7 +972,7 @@ restart:
error = xfs_da3_split(state);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -1082,7 +1082,7 @@ restart:
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -1215,7 +1215,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
error = xfs_da3_join(state);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -1261,7 +1261,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
diff --git a/libxfs/xfs_attr_remote.c b/libxfs/xfs_attr_remote.c
index 95383e3..ac64009 100644
--- a/libxfs/xfs_attr_remote.c
+++ b/libxfs/xfs_attr_remote.c
@@ -464,7 +464,7 @@ xfs_attr_rmtval_set(
args->total, &map, &nmap, args->flist);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
+ &committed, dp);
}
if (error) {
ASSERT(committed);
@@ -618,7 +618,7 @@ xfs_attr_rmtval_remove(
args->flist, &done);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- &committed);
+ &committed, args->dp);
}
if (error) {
ASSERT(committed);
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index 785d10a..9595971 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -1190,7 +1190,7 @@ xfs_bmap_add_attrfork(
xfs_log_sb(tp);
}
- error = xfs_bmap_finish(&tp, &flist, &committed);
+ error = xfs_bmap_finish(&tp, &flist, &committed, NULL);
if (error)
goto bmap_cancel;
error = xfs_trans_commit(tp);
@@ -5918,7 +5918,7 @@ xfs_bmap_split_extent(
if (error)
goto out;
- error = xfs_bmap_finish(&tp, &free_list, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed, NULL);
if (error)
goto out;
diff --git a/libxfs/xfs_bmap.h b/libxfs/xfs_bmap.h
index d3daf6d..722f36c 100644
--- a/libxfs/xfs_bmap.h
+++ b/libxfs/xfs_bmap.h
@@ -56,6 +56,7 @@ struct xfs_bmalloca {
bool aeof; /* allocated space at eof */
bool conv; /* overwriting unwritten extents */
int flags;
+ struct xfs_rmap_list *rlist;
};
/*
@@ -69,6 +70,13 @@ typedef struct xfs_bmap_free_item
struct xfs_bmap_free_item *xbfi_next; /* link to next entry */
} xfs_bmap_free_item_t;
+struct xfs_rmap_intent;
+
+struct xfs_rmap_list {
+ struct xfs_rmap_intent *rl_first;
+ unsigned int rl_count;
+};
+
/*
* Header for free extent list.
*
@@ -88,6 +96,7 @@ typedef struct xfs_bmap_free
xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */
int xbf_count; /* count of items on list */
int xbf_low; /* alloc in low mode */
+ struct xfs_rmap_list xbf_rlist; /* rmap intent list */
} xfs_bmap_free_t;
#define XFS_BMAP_MAX_NMAP 4
@@ -134,6 +143,8 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
{
((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
(flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK);
+ flp->xbf_rlist.rl_first = NULL;
+ flp->xbf_rlist.rl_count = 0;
}
/*
@@ -186,7 +197,7 @@ void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist,
xfs_fsblock_t bno, xfs_filblks_t len);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
- int *committed);
+ int *committed, struct xfs_inode *ip);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
diff --git a/mkfs/proto.c b/mkfs/proto.c
index cb34b28..c7e94e3 100644
--- a/mkfs/proto.c
+++ b/mkfs/proto.c
@@ -481,7 +481,7 @@ parseproto(
newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist);
libxfs_trans_log_inode(tp, ip, flags);
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
if (error)
fail(_("Pre-allocated file creation failed"), error);
libxfs_trans_commit(tp);
@@ -563,7 +563,7 @@ parseproto(
}
newdirectory(mp, tp, ip, pip);
libxfs_trans_log_inode(tp, ip, flags);
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
if (error)
fail(_("Directory creation failed"), error);
libxfs_trans_commit(tp);
@@ -589,7 +589,7 @@ parseproto(
fail(_("Unknown format"), EINVAL);
}
libxfs_trans_log_inode(tp, ip, flags);
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
if (error) {
fail(_("Error encountered creating file from prototype file"),
error);
@@ -700,7 +700,7 @@ rtinit(
}
}
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, rbmip);
if (error) {
fail(_("Completion of the realtime bitmap failed"), error);
}
@@ -735,7 +735,7 @@ rtinit(
bno += ep->br_blockcount;
}
}
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, rsumip);
if (error) {
fail(_("Completion of the realtime summary failed"), error);
}
@@ -759,7 +759,7 @@ rtinit(
fail(_("Error initializing the realtime space"),
error);
}
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, rbmip);
if (error) {
fail(_("Error completing the realtime space"), error);
}
diff --git a/repair/phase6.c b/repair/phase6.c
index e41bf20..e7b2387 100644
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -578,7 +578,7 @@ mk_rbmino(xfs_mount_t *mp)
bno += ep->br_blockcount;
}
}
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
if (error) {
do_error(
_("allocation of the realtime bitmap failed, error = %d\n"),
@@ -843,7 +843,7 @@ mk_rsumino(xfs_mount_t *mp)
bno += ep->br_blockcount;
}
}
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
if (error) {
do_error(
_("allocation of the realtime summary ino failed, error = %d\n"),
@@ -1059,7 +1059,7 @@ mk_orphanage(xfs_mount_t *mp)
libxfs_dir_init(tp, ip, pip);
libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, pip);
if (error) {
do_error(_("%s directory creation failed -- bmapf error %d\n"),
ORPHANAGE, error);
@@ -1168,7 +1168,7 @@ mv_orphanage(
ino_p->i_d.di_nlink++;
libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
- err = -libxfs_bmap_finish(&tp, &flist, &committed);
+ err = -libxfs_bmap_finish(&tp, &flist, &committed, ino_p);
if (err)
do_error(
_("bmap finish failed (err - %d), filesystem may be out of space\n"),
@@ -1215,7 +1215,7 @@ mv_orphanage(
err);
}
- err = -libxfs_bmap_finish(&tp, &flist, &committed);
+ err = -libxfs_bmap_finish(&tp, &flist, &committed, ino_p);
if (err)
do_error(
_("bmap finish failed (%d), filesystem may be out of space\n"),
@@ -1254,7 +1254,7 @@ mv_orphanage(
ino_p->i_d.di_nlink = 1;
libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
- err = -libxfs_bmap_finish(&tp, &flist, &committed);
+ err = -libxfs_bmap_finish(&tp, &flist, &committed, orphanage_ip);
if (err)
do_error(
_("bmap finish failed (%d), filesystem may be out of space\n"),
@@ -1356,7 +1356,7 @@ longform_dir2_rebuild(
goto out_bmap_cancel;
}
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
libxfs_trans_commit(tp);
@@ -1391,7 +1391,7 @@ _("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"
goto out_bmap_cancel;
}
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
if (error) {
do_warn(
_("bmap finish failed (%d), filesystem may be out of space\n"),
@@ -1453,7 +1453,7 @@ dir2_kill_block(
if (error)
do_error(_("shrink_inode failed inode %" PRIu64 " block %u\n"),
ip->i_ino, da_bno);
- libxfs_bmap_finish(&tp, &flist, &committed);
+ libxfs_bmap_finish(&tp, &flist, &committed, ip);
libxfs_trans_commit(tp);
}
@@ -1930,7 +1930,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 "
libxfs_dir2_data_freescan(mp->m_dir_geo, M_DIROPS(mp), d, &i);
if (needlog)
libxfs_dir2_data_log_header(&da, bp);
- libxfs_bmap_finish(&tp, &flist, &committed);
+ libxfs_bmap_finish(&tp, &flist, &committed, ip);
libxfs_trans_commit(tp);
/* record the largest free space in the freetab for later checking */
@@ -2996,7 +2996,7 @@ process_dir_inode(
libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
ASSERT(error == 0);
libxfs_trans_commit(tp);
@@ -3057,7 +3057,7 @@ process_dir_inode(
libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = -libxfs_bmap_finish(&tp, &flist, &committed);
+ error = -libxfs_bmap_finish(&tp, &flist, &committed, ip);
ASSERT(error == 0);
libxfs_trans_commit(tp);
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 16/53] libxfs: enhance rmapbt definition to support reflink
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (14 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 15/53] libxfs: piggyback rmapbt update intents in the bmap free structure Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 17/53] libxfs: refactor short btree block verification Darrick J. Wong
` (36 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Enlarge the rmapbt records to support reflink operation.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
include/xfs_trace.h | 12 +
libxfs/util.c | 13 -
libxfs/xfs_alloc.c | 48 +-
libxfs/xfs_alloc.h | 5
libxfs/xfs_bmap.c | 200 ++++++++-
libxfs/xfs_bmap.h | 4
libxfs/xfs_bmap_btree.c | 7
libxfs/xfs_format.h | 122 +++++
libxfs/xfs_ialloc.c | 8
libxfs/xfs_ialloc_btree.c | 6
libxfs/xfs_rmap.c | 1013 +++++++++++++++++++++++++++++++++++++++++----
libxfs/xfs_rmap_btree.c | 86 ++--
libxfs/xfs_rmap_btree.h | 78 +++
13 files changed, 1434 insertions(+), 168 deletions(-)
diff --git a/include/xfs_trace.h b/include/xfs_trace.h
index ebdf778..2c8d34e 100644
--- a/include/xfs_trace.h
+++ b/include/xfs_trace.h
@@ -178,4 +178,16 @@
#define trace_xfs_rmap_free_extent_done(a,b,c,d,e) ((void) 0)
#define trace_xfs_rmap_free_extent_error(a,b,c,d,e) ((void) 0)
+#define trace_xfs_rmapbt_delete(a...) ((void) 0)
+#define trace_xfs_rmapbt_insert(a...) ((void) 0)
+#define trace_xfs_rmap_insert(a...) ((void) 0)
+#define trace_xfs_rmap_delete(a...) ((void) 0)
+#define trace_xfs_rmap_move(a...) ((void) 0)
+#define trace_xfs_rmap_slide(a...) ((void) 0)
+#define trace_xfs_rmap_resize(a...) ((void) 0)
+#define trace_xfs_rmapbt_update(a...) ((void) 0)
+#define trace_xfs_rmap_combine(a...) ((void) 0)
+#define trace_xfs_rmap_lcombine(a...) ((void) 0)
+#define trace_xfs_rmap_rcombine(a...) ((void) 0)
+
#endif /* __TRACE_H__ */
diff --git a/libxfs/util.c b/libxfs/util.c
index 19d5e0e..37fb2c4 100644
--- a/libxfs/util.c
+++ b/libxfs/util.c
@@ -33,6 +33,7 @@
#include "xfs_trans_space.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
/*
* Calculate the worst case log unit reservation for a given superblock
@@ -500,15 +501,19 @@ libxfs_bmap_finish(
xfs_bmap_free_item_t *next; /* next item on free list */
int error;
- if (flist->xbf_count == 0) {
- *committed = 0;
+ *committed = 0;
+ error = xfs_rmap_finish((*tp)->t_mountp, tp, ip, &flist->xbf_rlist,
+ committed);
+ if (error)
+ return error;
+
+ if (flist->xbf_count == 0)
return 0;
- }
for (free = flist->xbf_first; free != NULL; free = next) {
next = free->xbfi_next;
if ((error = xfs_free_extent(*tp, free->xbfi_startblock,
- free->xbfi_blockcount)))
+ free->xbfi_blockcount, &free->xbfi_oinfo)))
return error;
xfs_bmap_del_free(flist, NULL, free);
}
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index b66ca99..6c2b991 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -710,11 +710,13 @@ xfs_alloc_ag_vextent(
ASSERT(!args->wasfromfl || !args->isfl);
ASSERT(args->agbno % args->alignment == 0);
- /* insert new block into the reverse map btree */
- error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
- args->agbno, args->len, args->owner);
- if (error)
- return error;
+ /* if not file data, insert new block into the reverse map btree */
+ if (args->oinfo.oi_owner) {
+ error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
+ args->agbno, args->len, &args->oinfo);
+ if (error)
+ return error;
+ }
if (!args->wasfromfl) {
error = xfs_alloc_update_counters(args->tp, args->pag,
@@ -1664,6 +1666,7 @@ xfs_free_ag_extent(
xfs_agnumber_t agno, /* allocation group number */
xfs_agblock_t bno, /* starting block number */
xfs_extlen_t len, /* length of extent */
+ struct xfs_owner_info *oinfo, /* extent owner */
int isfl) /* set if is freelist blocks - no sb acctg */
{
xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
@@ -1681,12 +1684,19 @@ xfs_free_ag_extent(
xfs_extlen_t nlen; /* new length of freespace */
xfs_perag_t *pag; /* per allocation group data */
+ bno_cur = cnt_cur = NULL;
mp = tp->t_mountp;
+
+ if (oinfo->oi_owner) {
+ error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
+ if (error)
+ goto error0;
+ }
+
/*
* Allocate and initialize a cursor for the by-block btree.
*/
bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
- cnt_cur = NULL;
/*
* Look for a neighboring block on the left (lower block numbers)
* that is contiguous with this space.
@@ -2089,13 +2099,15 @@ xfs_alloc_fix_freelist(
* back on the free list? Maybe we should only do this when space is
* getting low or the AGFL is more than half full?
*/
+ XFS_RMAP_AG_OWNER(&targs.oinfo, XFS_RMAP_OWN_AG);
while (pag->pagf_flcount > need) {
struct xfs_buf *bp;
error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
if (error)
goto out_agbp_relse;
- error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
+ error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
+ &targs.oinfo, 1);
if (error)
goto out_agbp_relse;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
@@ -2105,7 +2117,7 @@ xfs_alloc_fix_freelist(
memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
- targs.owner = XFS_RMAP_OWN_AG;
+ XFS_RMAP_AG_OWNER(&targs.oinfo, XFS_RMAP_OWN_AG);
targs.agbp = agbp;
targs.agno = args->agno;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
@@ -2368,6 +2380,10 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
return false;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
+ return false;
+
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
@@ -2499,6 +2515,8 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
pag->pagf_levels[XFS_BTNUM_CNTi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+ pag->pagf_levels[XFS_BTNUM_RMAPi] =
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
/* XXX: pagb_tree doesn't exist in userspace */
@@ -2741,14 +2759,13 @@ error0:
* Free an extent.
* Just break up the extent address and hand off to xfs_free_ag_extent
* after fixing up the freelist.
- *
- * XXX: need owner of extent being freed
*/
int /* error */
xfs_free_extent(
xfs_trans_t *tp, /* transaction pointer */
xfs_fsblock_t bno, /* starting block number of extent */
- xfs_extlen_t len) /* length of extent */
+ xfs_extlen_t len, /* length of extent */
+ struct xfs_owner_info *oinfo) /* extent owner */
{
xfs_alloc_arg_t args;
int error;
@@ -2784,13 +2801,8 @@ xfs_free_extent(
goto error0;
}
- /* XXX: need owner */
- error = xfs_rmap_free(tp, args.agbp, args.agno, args.agbno, len, 0);
- if (error)
- goto error0;
-
- /* XXX: initially no multiple references, so just free it */
- error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
+ error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
+ len, oinfo, 0);
if (!error)
xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
error0:
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index 5b2b616..f78ce53 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -87,7 +87,7 @@ typedef struct xfs_alloc_arg {
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
xfs_fsblock_t firstblock; /* io first block allocated */
- uint64_t owner; /* owner of blocks being allocated */
+ struct xfs_owner_info oinfo; /* owner of blocks being allocated */
} xfs_alloc_arg_t;
/*
@@ -179,7 +179,8 @@ int /* error */
xfs_free_extent(
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t bno, /* starting block number of extent */
- xfs_extlen_t len); /* length of extent */
+ xfs_extlen_t len, /* length of extent */
+ struct xfs_owner_info *oinfo); /* extent owner */
int /* error */
xfs_alloc_lookup_le(
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index 9595971..cedb64b 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -37,6 +37,7 @@
#include "xfs_trace.h"
#include "xfs_attr_leaf.h"
#include "xfs_quota_defs.h"
+#include "xfs_rmap_btree.h"
kmem_zone_t *xfs_bmap_free_item_zone;
@@ -546,7 +547,8 @@ xfs_bmap_add_free(
struct xfs_mount *mp, /* mount point structure */
struct xfs_bmap_free *flist, /* list of extents */
xfs_fsblock_t bno, /* fs block number of extent */
- xfs_filblks_t len) /* length of extent */
+ xfs_filblks_t len, /* length of extent */
+ struct xfs_owner_info *oinfo) /* extent owner */
{
xfs_bmap_free_item_t *cur; /* current (next) element */
xfs_bmap_free_item_t *new; /* new element */
@@ -567,9 +569,14 @@ xfs_bmap_add_free(
ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
#endif
ASSERT(xfs_bmap_free_item_zone != NULL);
+
new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
new->xbfi_startblock = bno;
new->xbfi_blockcount = (xfs_extlen_t)len;
+ if (oinfo)
+ memcpy(&new->xbfi_oinfo, oinfo, sizeof(struct xfs_owner_info));
+ else
+ memset(&new->xbfi_oinfo, 0, sizeof(struct xfs_owner_info));
for (prev = NULL, cur = flist->xbf_first;
cur != NULL;
prev = cur, cur = cur->xbfi_next) {
@@ -612,6 +619,8 @@ xfs_bmap_cancel(
xfs_bmap_free_item_t *free; /* free list item */
xfs_bmap_free_item_t *next;
+ xfs_rmap_cancel(&flist->xbf_rlist);
+
if (flist->xbf_count == 0)
return;
ASSERT(flist->xbf_first != NULL);
@@ -649,6 +658,7 @@ xfs_bmap_btree_to_extents(
xfs_mount_t *mp; /* mount point structure */
__be64 *pp; /* ptr to block address */
struct xfs_btree_block *rblock;/* root btree block */
+ struct xfs_owner_info oinfo;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -672,7 +682,8 @@ xfs_bmap_btree_to_extents(
cblock = XFS_BUF_TO_BLOCK(cbp);
if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
- xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1);
+ XFS_RMAP_INO_BMBT_OWNER(&oinfo, ip->i_ino, whichfork);
+ xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1, &oinfo);
ip->i_d.di_nblocks--;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
@@ -753,7 +764,7 @@ xfs_bmap_extents_to_btree(
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = mp;
- args.owner = ip->i_ino;
+ XFS_RMAP_INO_BMBT_OWNER(&args.oinfo, ip->i_ino, whichfork);
args.firstblock = *firstblock;
if (*firstblock == NULLFSBLOCK) {
args.type = XFS_ALLOCTYPE_START_BNO;
@@ -900,7 +911,7 @@ xfs_bmap_local_to_extents(
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = ip->i_mount;
- args.owner = ip->i_ino;
+ XFS_RMAP_INO_OWNER(&args.oinfo, ip->i_ino, whichfork, 0);
args.firstblock = *firstblock;
/*
* Allocate a block. We know we need only one, since the
@@ -1830,6 +1841,10 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ error = xfs_rmap_combine(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, &LEFT, &RIGHT, &PREV);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1862,6 +1877,10 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, &LEFT, PREV.br_blockcount);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1893,6 +1912,10 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, &RIGHT, -PREV.br_blockcount);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1922,6 +1945,10 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, new);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1957,6 +1984,10 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, &LEFT, new->br_blockcount);
+ if (error)
+ goto done;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock));
xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
@@ -1992,6 +2023,10 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, new);
+ if (error)
+ goto done;
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
@@ -2040,6 +2075,8 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, &RIGHT, -new->br_blockcount);
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock));
@@ -2076,6 +2113,10 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, new);
+ if (error)
+ goto done;
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
@@ -2145,6 +2186,10 @@ xfs_bmap_add_extent_delay_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+ XFS_DATA_FORK, new);
+ if (error)
+ goto done;
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
@@ -2386,6 +2431,10 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_blockcount, LEFT.br_state)))
goto done;
}
+ error = xfs_rmap_combine(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &LEFT, &RIGHT, &PREV);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -2423,6 +2472,10 @@ xfs_bmap_add_extent_unwritten_real(
LEFT.br_state)))
goto done;
}
+ error = xfs_rmap_lcombine(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &LEFT, &PREV);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -2458,6 +2511,10 @@ xfs_bmap_add_extent_unwritten_real(
newext)))
goto done;
}
+ error = xfs_rmap_rcombine(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &RIGHT, &PREV);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -2484,6 +2541,11 @@ xfs_bmap_add_extent_unwritten_real(
newext)))
goto done;
}
+
+ error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, new, 0);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -2531,6 +2593,14 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
}
+ error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &PREV, new->br_blockcount);
+ if (error)
+ goto done;
+ error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &LEFT, new->br_blockcount);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING:
@@ -2569,6 +2639,14 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &PREV, new->br_blockcount);
+ if (error)
+ goto done;
+ error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, new);
+ if (error)
+ goto done;
break;
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -2611,6 +2689,14 @@ xfs_bmap_add_extent_unwritten_real(
newext)))
goto done;
}
+ error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &PREV, -new->br_blockcount);
+ if (error)
+ goto done;
+ error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &RIGHT, -new->br_blockcount);
+ if (error)
+ goto done;
break;
case BMAP_RIGHT_FILLING:
@@ -2651,6 +2737,14 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &PREV, -new->br_blockcount);
+ if (error)
+ goto done;
+ error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, new);
+ if (error)
+ goto done;
break;
case 0:
@@ -2712,6 +2806,19 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &PREV, new->br_startoff -
+ PREV.br_startoff - PREV.br_blockcount);
+ if (error)
+ goto done;
+ error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, new);
+ if (error)
+ goto done;
+ error = xfs_rmap_insert(mp, &flist->xbf_rlist, ip->i_ino,
+ XFS_DATA_FORK, &r[1]);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -2915,6 +3022,7 @@ xfs_bmap_add_extent_hole_real(
int rval=0; /* return value (logging flags) */
int state; /* state bits, accessed thru macros */
struct xfs_mount *mp;
+ struct xfs_bmbt_irec prev; /* fake previous extent entry */
mp = bma->tp ? bma->tp->t_mountp : NULL;
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
@@ -3022,6 +3130,12 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
}
+ prev = *new;
+ prev.br_startblock = nullstartblock(0);
+ error = xfs_rmap_combine(mp, bma->rlist, bma->ip->i_ino,
+ whichfork, &left, &right, &prev);
+ if (error)
+ goto done;
break;
case BMAP_LEFT_CONTIG:
@@ -3054,6 +3168,10 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
}
+ error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
+ whichfork, &left, new->br_blockcount);
+ if (error)
+ goto done;
break;
case BMAP_RIGHT_CONTIG:
@@ -3088,6 +3206,10 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
}
+ error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
+ whichfork, &right, -new->br_blockcount);
+ if (error)
+ goto done;
break;
case 0:
@@ -3116,6 +3238,10 @@ xfs_bmap_add_extent_hole_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
+ error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
+ whichfork, new);
+ if (error)
+ goto done;
break;
}
@@ -3682,7 +3808,6 @@ xfs_bmap_btalloc(
memset(&args, 0, sizeof(args));
args.tp = ap->tp;
args.mp = mp;
- args.owner = ap->ip->i_ino;
args.fsbno = ap->blkno;
/* Trim the allocation back to the maximum an AG can fit. */
@@ -4246,7 +4371,6 @@ xfs_bmapi_delay(
return 0;
}
-
static int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
@@ -4534,6 +4658,7 @@ xfs_bmapi_write(
bma.userdata = 0;
bma.flist = flist;
bma.firstblock = firstblock;
+ bma.rlist = &flist->xbf_rlist;
while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno;
@@ -4772,6 +4897,7 @@ xfs_bmap_del_extent(
nblks = 0;
do_fx = 0;
}
+
/*
* Set flag value to use in switch statement.
* Left-contig is 2, right-contig is 1.
@@ -4791,6 +4917,10 @@ xfs_bmap_del_extent(
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
flags |= XFS_ILOG_CORE;
+ error = xfs_rmap_delete(mp, &flist->xbf_rlist, ip->i_ino,
+ whichfork, &got);
+ if (error)
+ goto done;
if (!cur) {
flags |= xfs_ilog_fext(whichfork);
break;
@@ -4818,6 +4948,10 @@ xfs_bmap_del_extent(
}
xfs_bmbt_set_startblock(ep, del_endblock);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ error = xfs_rmap_move(mp, &flist->xbf_rlist, ip->i_ino,
+ whichfork, &got, del->br_blockcount);
+ if (error)
+ goto done;
if (!cur) {
flags |= xfs_ilog_fext(whichfork);
break;
@@ -4844,6 +4978,10 @@ xfs_bmap_del_extent(
break;
}
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ error = xfs_rmap_resize(mp, &flist->xbf_rlist, ip->i_ino,
+ whichfork, &got, -del->br_blockcount);
+ if (error)
+ goto done;
if (!cur) {
flags |= xfs_ilog_fext(whichfork);
break;
@@ -4869,6 +5007,15 @@ xfs_bmap_del_extent(
if (!delay) {
new.br_startblock = del_endblock;
flags |= XFS_ILOG_CORE;
+ error = xfs_rmap_resize(mp, &flist->xbf_rlist,
+ ip->i_ino, whichfork, &got,
+ temp - got.br_blockcount);
+ if (error)
+ goto done;
+ error = xfs_rmap_insert(mp, &flist->xbf_rlist,
+ ip->i_ino, whichfork, &new);
+ if (error)
+ goto done;
if (cur) {
if ((error = xfs_bmbt_update(cur,
got.br_startoff,
@@ -4958,7 +5105,7 @@ xfs_bmap_del_extent(
*/
if (do_fx)
xfs_bmap_add_free(mp, flist, del->br_startblock,
- del->br_blockcount);
+ del->br_blockcount, NULL);
/*
* Adjust inode # blocks in the file.
*/
@@ -5105,6 +5252,7 @@ xfs_bunmapi(
got.br_startoff + got.br_blockcount - 1);
if (bno < start)
break;
+
/*
* Then deal with the (possibly delayed) allocated space
* we found.
@@ -5407,7 +5555,8 @@ xfs_bmse_merge(
struct xfs_bmbt_rec_host *gotp, /* extent to shift */
struct xfs_bmbt_rec_host *leftp, /* preceding extent */
struct xfs_btree_cur *cur,
- int *logflags) /* output */
+ int *logflags, /* output */
+ struct xfs_rmap_list *rlist) /* rmap intent list */
{
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec left;
@@ -5438,6 +5587,13 @@ xfs_bmse_merge(
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
*logflags |= XFS_ILOG_CORE;
+ error = xfs_rmap_resize(mp, rlist, ip->i_ino, whichfork, &left,
+ blockcount - left.br_blockcount);
+ if (error)
+ return error;
+ error = xfs_rmap_delete(mp, rlist, ip->i_ino, whichfork, &got);
+ if (error)
+ return error;
if (!cur) {
*logflags |= XFS_ILOG_DEXT;
return 0;
@@ -5480,7 +5636,8 @@ xfs_bmse_shift_one(
struct xfs_bmbt_rec_host *gotp,
struct xfs_btree_cur *cur,
int *logflags,
- enum shift_direction direction)
+ enum shift_direction direction,
+ struct xfs_rmap_list *rlist)
{
struct xfs_ifork *ifp;
struct xfs_mount *mp;
@@ -5530,7 +5687,7 @@ xfs_bmse_shift_one(
offset_shift_fsb)) {
return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
*current_ext, gotp, adj_irecp,
- cur, logflags);
+ cur, logflags, rlist);
}
} else {
startoff = got.br_startoff + offset_shift_fsb;
@@ -5567,6 +5724,10 @@ update_current_ext:
(*current_ext)--;
xfs_bmbt_set_startoff(gotp, startoff);
*logflags |= XFS_ILOG_CORE;
+ error = xfs_rmap_slide(mp, rlist, ip->i_ino, whichfork,
+ &got, startoff - got.br_startoff);
+ if (error)
+ return error;
if (!cur) {
*logflags |= XFS_ILOG_DEXT;
return 0;
@@ -5706,9 +5867,11 @@ xfs_bmap_shift_extents(
}
while (nexts++ < num_exts) {
+ xfs_bmbt_get_all(gotp, &got);
+
error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
¤t_ext, gotp, cur, &logflags,
- direction);
+ direction, &flist->xbf_rlist);
if (error)
goto del_cursor;
/*
@@ -5761,6 +5924,7 @@ xfs_bmap_split_extent_at(
int whichfork = XFS_DATA_FORK;
struct xfs_btree_cur *cur = NULL;
struct xfs_bmbt_rec_host *gotp;
+ struct xfs_bmbt_irec rgot;
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec new; /* split extent */
struct xfs_mount *mp = ip->i_mount;
@@ -5770,6 +5934,7 @@ xfs_bmap_split_extent_at(
int error = 0;
int logflags = 0;
int i = 0;
+ long adj;
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5809,6 +5974,7 @@ xfs_bmap_split_extent_at(
if (got.br_startoff >= split_fsb)
return 0;
+ rgot = got;
gotblkcnt = split_fsb - got.br_startoff;
new.br_startoff = split_fsb;
new.br_startblock = got.br_startblock + gotblkcnt;
@@ -5864,6 +6030,17 @@ xfs_bmap_split_extent_at(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
}
+ /* update rmapbt */
+ adj = -(long)rgot.br_blockcount + gotblkcnt;
+ error = xfs_rmap_resize(mp, &free_list->xbf_rlist, ip->i_ino,
+ whichfork, &rgot, adj);
+ if (error)
+ goto del_cursor;
+ error = xfs_rmap_insert(mp, &free_list->xbf_rlist, ip->i_ino,
+ whichfork, &new);
+ if (error)
+ goto del_cursor;
+
/*
* Convert to a btree if necessary.
*/
@@ -5925,6 +6102,7 @@ xfs_bmap_split_extent(
return xfs_trans_commit(tp);
out:
+ xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp);
return error;
}
diff --git a/libxfs/xfs_bmap.h b/libxfs/xfs_bmap.h
index 722f36c..77d8771 100644
--- a/libxfs/xfs_bmap.h
+++ b/libxfs/xfs_bmap.h
@@ -67,6 +67,7 @@ typedef struct xfs_bmap_free_item
{
xfs_fsblock_t xbfi_startblock;/* starting fs block number */
xfs_extlen_t xbfi_blockcount;/* number of blocks in extent */
+ struct xfs_owner_info xbfi_oinfo; /* extent owner */
struct xfs_bmap_free_item *xbfi_next; /* link to next entry */
} xfs_bmap_free_item_t;
@@ -194,7 +195,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist,
- xfs_fsblock_t bno, xfs_filblks_t len);
+ xfs_fsblock_t bno, xfs_filblks_t len,
+ struct xfs_owner_info *oinfo);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed, struct xfs_inode *ip);
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
index 12d1a2d..bc09b2b 100644
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -443,7 +443,8 @@ xfs_bmbt_alloc_block(
args.mp = cur->bc_mp;
args.fsbno = cur->bc_private.b.firstblock;
args.firstblock = args.fsbno;
- args.owner = cur->bc_private.b.ip->i_ino;
+ XFS_RMAP_INO_BMBT_OWNER(&args.oinfo, cur->bc_private.b.ip->i_ino,
+ cur->bc_private.b.whichfork);
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
@@ -523,8 +524,10 @@ xfs_bmbt_free_block(
struct xfs_inode *ip = cur->bc_private.b.ip;
struct xfs_trans *tp = cur->bc_tp;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+ struct xfs_owner_info oinfo;
- xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1);
+ XFS_RMAP_INO_BMBT_OWNER(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
+ xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1, &oinfo);
ip->i_d.di_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index 9a4f328..94bd2f9 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -1314,6 +1314,55 @@ typedef __be32 xfs_inobt_ptr_t;
#define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */
/*
+ * Ownership info for an extent. This is used to create reverse-mapping
+ * entries.
+ */
+#define XFS_RMAP_INO_ATTR_FORK (1)
+#define XFS_RMAP_BMBT_BLOCK (2)
+struct xfs_owner_info {
+ uint64_t oi_owner;
+ xfs_fileoff_t oi_offset;
+ unsigned int oi_flags;
+};
+
+static inline void
+XFS_RMAP_AG_OWNER(
+ struct xfs_owner_info *oi,
+ uint64_t owner)
+{
+ oi->oi_owner = owner;
+ oi->oi_offset = 0;
+ oi->oi_flags = 0;
+}
+
+static inline void
+XFS_RMAP_INO_BMBT_OWNER(
+ struct xfs_owner_info *oi,
+ xfs_ino_t ino,
+ int whichfork)
+{
+ oi->oi_owner = ino;
+ oi->oi_offset = 0;
+ oi->oi_flags = XFS_RMAP_BMBT_BLOCK;
+ if (whichfork == XFS_ATTR_FORK)
+ oi->oi_flags |= XFS_RMAP_INO_ATTR_FORK;
+}
+
+static inline void
+XFS_RMAP_INO_OWNER(
+ struct xfs_owner_info *oi,
+ xfs_ino_t ino,
+ int whichfork,
+ xfs_fileoff_t offset)
+{
+ oi->oi_owner = ino;
+ oi->oi_offset = offset;
+ oi->oi_flags = 0;
+ if (whichfork == XFS_ATTR_FORK)
+ oi->oi_flags |= XFS_RMAP_INO_ATTR_FORK;
+}
+
+/*
* Special owner types.
*
* Seeing as we only support up to 8EB, we have the upper bit of the owner field
@@ -1329,6 +1378,8 @@ typedef __be32 xfs_inobt_ptr_t;
#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */
#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */
+#define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63)))
+
/*
* Data record structure
*/
@@ -1336,12 +1387,44 @@ struct xfs_rmap_rec {
__be32 rm_startblock; /* extent start block */
__be32 rm_blockcount; /* extent length */
__be64 rm_owner; /* extent owner */
+ __be64 rm_offset; /* offset within the owner */
};
+/*
+ * rmap btree record
+ * rm_blockcount:31 is the unwritten extent flag (same as l0:63 in bmbt)
+ * rm_blockcount:0-30 are the extent length
+ * rm_offset:63 is the attribute fork flag
+ * rm_offset:62 is the bmbt block flag
+ * rm_offset:0-61 is the block offset within the inode
+ */
+#define XFS_RMAP_OFF_ATTR ((__uint64_t)1ULL << 63)
+#define XFS_RMAP_OFF_BMBT ((__uint64_t)1ULL << 62)
+#define XFS_RMAP_LEN_UNWRITTEN ((xfs_extlen_t)1U << 31)
+
+#define XFS_RMAP_OFF_MASK ~(XFS_RMAP_OFF_ATTR | XFS_RMAP_OFF_BMBT)
+#define XFS_RMAP_LEN_MASK ~XFS_RMAP_LEN_UNWRITTEN
+
+#define XFS_RMAP_OFF(off) ((off) & XFS_RMAP_OFF_MASK)
+#define XFS_RMAP_LEN(len) ((len) & XFS_RMAP_LEN_MASK)
+
+#define XFS_RMAP_IS_BMBT(off) (!!((off) & XFS_RMAP_OFF_BMBT))
+#define XFS_RMAP_IS_ATTR_FORK(off) (!!((off) & XFS_RMAP_OFF_ATTR))
+#define XFS_RMAP_IS_UNWRITTEN(len) (!!((len) & XFS_RMAP_LEN_UNWRITTEN))
+
+#define RMAPBT_STARTBLOCK_BITLEN 32
+#define RMAPBT_EXNTFLAG_BITLEN 1
+#define RMAPBT_BLOCKCOUNT_BITLEN 31
+#define RMAPBT_OWNER_BITLEN 64
+#define RMAPBT_ATTRFLAG_BITLEN 1
+#define RMAPBT_BMBTFLAG_BITLEN 1
+#define RMAPBT_OFFSET_BITLEN 62
+
struct xfs_rmap_irec {
xfs_agblock_t rm_startblock; /* extent start block */
xfs_extlen_t rm_blockcount; /* extent length */
__uint64_t rm_owner; /* extent owner */
+ __uint64_t rm_offset; /* offset within the owner */
};
/*
@@ -1351,19 +1434,50 @@ struct xfs_rmap_irec {
*/
struct xfs_rmap_key {
__be32 rm_startblock; /* extent start block */
-};
+ __be64 rm_owner; /* extent owner */
+ __be64 rm_offset; /* offset within the owner */
+} __attribute__((packed));
/* btree pointer type */
typedef __be32 xfs_rmap_ptr_t;
-/*
- * block numbers in the AG.
- */
#define XFS_RMAP_BLOCK(mp) \
(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
XFS_FIBT_BLOCK(mp) + 1 : \
XFS_IBT_BLOCK(mp) + 1)
+static inline void
+xfs_owner_info_unpack(
+ struct xfs_owner_info *oinfo,
+ uint64_t *owner,
+ uint64_t *offset)
+{
+ __uint64_t r;
+
+ *owner = oinfo->oi_owner;
+ r = oinfo->oi_offset;
+ if (oinfo->oi_flags & XFS_RMAP_INO_ATTR_FORK)
+ r |= XFS_RMAP_OFF_ATTR;
+ if (oinfo->oi_flags & XFS_RMAP_BMBT_BLOCK)
+ r |= XFS_RMAP_OFF_BMBT;
+ *offset = r;
+}
+
+static inline void
+xfs_owner_info_pack(
+ struct xfs_owner_info *oinfo,
+ uint64_t owner,
+ uint64_t offset)
+{
+ oinfo->oi_owner = owner;
+ oinfo->oi_offset = XFS_RMAP_OFF(offset);
+ oinfo->oi_flags = 0;
+ if (XFS_RMAP_IS_ATTR_FORK(offset))
+ oinfo->oi_flags |= XFS_RMAP_INO_ATTR_FORK;
+ if (XFS_RMAP_IS_BMBT(offset))
+ oinfo->oi_flags |= XFS_RMAP_BMBT_BLOCK;
+}
+
/*
* BMAP Btree format definitions
*
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index 12eaaab..6881952 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -608,6 +608,7 @@ xfs_ialloc_ag_alloc(
args.tp = tp;
args.mp = tp->t_mountp;
args.fsbno = NULLFSBLOCK;
+ XFS_RMAP_AG_OWNER(&args.oinfo, XFS_RMAP_OWN_INODES);
#ifdef DEBUG
/* randomly do sparse inode allocations */
@@ -615,7 +616,6 @@ xfs_ialloc_ag_alloc(
args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
do_sparse = prandom_u32() & 1;
#endif
- args.owner = XFS_RMAP_OWN_INODES;
/*
* Locking will ensure that we don't have two callers in here
@@ -1819,13 +1819,15 @@ xfs_difree_inode_chunk(
int nextbit;
xfs_agblock_t agbno;
int contigblk;
+ struct xfs_owner_info oinfo;
DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
+ XFS_RMAP_AG_OWNER(&oinfo, XFS_RMAP_OWN_INODES);
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno,
XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
- mp->m_ialloc_blks);
+ mp->m_ialloc_blks, &oinfo);
return;
}
@@ -1869,7 +1871,7 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk);
+ contigblk, &oinfo);
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index 77b41be..88a3e87 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -95,7 +95,7 @@ xfs_inobt_alloc_block(
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
- args.owner = XFS_RMAP_OWN_INOBT;
+ XFS_RMAP_AG_OWNER(&args.oinfo, XFS_RMAP_OWN_INOBT);
args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
args.minlen = 1;
args.maxlen = 1;
@@ -127,9 +127,11 @@ xfs_inobt_free_block(
{
xfs_fsblock_t fsbno;
int error;
+ struct xfs_owner_info oinfo;
+ XFS_RMAP_AG_OWNER(&oinfo, XFS_RMAP_OWN_INOBT);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
- error = xfs_free_extent(cur->bc_tp, fsbno, 1);
+ error = xfs_free_extent(cur->bc_tp, fsbno, 1, &oinfo);
if (error)
return error;
diff --git a/libxfs/xfs_rmap.c b/libxfs/xfs_rmap.c
index b2a3330..43354b9 100644
--- a/libxfs/xfs_rmap.c
+++ b/libxfs/xfs_rmap.c
@@ -33,29 +33,52 @@
#include "xfs_rmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
-
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
/*
- * Lookup the first record less than or equal to [bno, len]
+ * Lookup the first record less than or equal to [bno, len, owner, offset]
* in the btree given by cur.
*/
-STATIC int
+int
xfs_rmap_lookup_le(
struct xfs_btree_cur *cur,
xfs_agblock_t bno,
xfs_extlen_t len,
uint64_t owner,
+ uint64_t offset,
int *stat)
{
cur->bc_rec.r.rm_startblock = bno;
cur->bc_rec.r.rm_blockcount = len;
cur->bc_rec.r.rm_owner = owner;
+ cur->bc_rec.r.rm_offset = offset;
return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
}
/*
+ * Lookup the record exactly matching [bno, len, owner, offset]
+ * in the btree given by cur.
+ */
+int
+xfs_rmap_lookup_eq(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ uint64_t owner,
+ uint64_t offset,
+ int *stat)
+{
+ cur->bc_rec.r.rm_startblock = bno;
+ cur->bc_rec.r.rm_blockcount = len;
+ cur->bc_rec.r.rm_owner = owner;
+ cur->bc_rec.r.rm_offset = offset;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
* Update the record referred to by cur to the value given
- * by [bno, len, ref].
+ * by [bno, len, owner, offset].
* This either works (return 0) or gets an EFSCORRUPTED error.
*/
STATIC int
@@ -65,16 +88,79 @@ xfs_rmap_update(
{
union xfs_btree_rec rec;
+ trace_xfs_rmapbt_update(cur->bc_mp, cur->bc_private.a.agno,
+ irec->rm_startblock, irec->rm_blockcount,
+ irec->rm_owner, irec->rm_offset);
+
rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
+ rec.rmap.rm_offset = cpu_to_be64(irec->rm_offset);
return xfs_btree_update(cur, &rec);
}
+int
+xfs_rmapbt_insert(
+ struct xfs_btree_cur *rcur,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ uint64_t owner,
+ uint64_t offset)
+{
+ int i;
+ int error;
+
+ trace_xfs_rmapbt_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno,
+ len, owner, offset);
+
+ error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, &i);
+ if (error)
+ goto done;
+ XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done);
+
+ rcur->bc_rec.r.rm_startblock = agbno;
+ rcur->bc_rec.r.rm_blockcount = len;
+ rcur->bc_rec.r.rm_owner = owner;
+ rcur->bc_rec.r.rm_offset = offset;
+ error = xfs_btree_insert(rcur, &i);
+ if (error)
+ goto done;
+ XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+done:
+ return error;
+}
+
+STATIC int
+xfs_rmapbt_delete(
+ struct xfs_btree_cur *rcur,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ uint64_t owner,
+ uint64_t offset)
+{
+ int i;
+ int error;
+
+ trace_xfs_rmapbt_delete(rcur->bc_mp, rcur->bc_private.a.agno, agbno,
+ len, owner, offset);
+
+ error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, &i);
+ if (error)
+ goto done;
+ XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+
+ error = xfs_btree_delete(rcur, &i);
+ if (error)
+ goto done;
+ XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+done:
+ return error;
+}
+
/*
* Get the data from the pointed-to record.
*/
-STATIC int
+int
xfs_rmap_get_rec(
struct xfs_btree_cur *cur,
struct xfs_rmap_irec *irec,
@@ -90,31 +176,27 @@ xfs_rmap_get_rec(
irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
+ irec->rm_offset = be64_to_cpu(rec->rmap.rm_offset);
return 0;
}
/*
* Find the extent in the rmap btree and remove it.
*
- * The record we find should always span a range greater than or equal to the
- * the extent being freed. This makes the code simple as, in theory, we do not
- * have to handle ranges that are split across multiple records as extents that
- * result in bmap btree extent merges should also result in rmap btree extent
- * merges. The owner field ensures we don't merge extents from different
- * structures into the same record, hence this property should always hold true
- * if we ensure that the rmap btree supports at least the same size maximum
- * extent as the bmap btree (2^21 blocks at present).
+ * The record we find should always be an exact match for the extent that we're
+ * looking for, since we insert them into the btree without modification.
*
- * Complexity: when growing the filesystem, we "free" an extent when growing the
- * last AG. This extent is new space and so it is not tracked as used space in
- * the btree. The growfs code will pass in an owner of XFS_RMAP_OWN_NULL to
- * indicate that it expected that there is no owner of this extent. We verify
- * that - the extent lookup result in a record that does not overlap.
+ * Special Case #1: when growing the filesystem, we "free" an extent when
+ * growing the last AG. This extent is new space and so it is not tracked as
+ * used space in the btree. The growfs code will pass in an owner of
+ * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
+ * extent. We verify that - the extent lookup result in a record that does not
+ * overlap.
*
- * Complexity #2: EFIs do not record the owner of the extent, so when recovering
- * EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap btree to
- * ignore the owner (i.e. wildcard match) so we don't trigger corruption checks
- * during log recovery.
+ * Special Case #2: EFIs do not record the owner of the extent, so when
+ * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
+ * btree to ignore the owner (i.e. wildcard match) so we don't trigger
+ * corruption checks during log recovery.
*/
int
xfs_rmap_free(
@@ -123,29 +205,32 @@ xfs_rmap_free(
xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
- uint64_t owner)
+ struct xfs_owner_info *oinfo)
{
- struct xfs_btree_cur *cur;
struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *cur;
struct xfs_rmap_irec ltrec;
- int error;
+ uint64_t ltoff;
+ int error = 0;
int i;
+ uint64_t owner;
+ uint64_t offset;
- /*
- * if rmap btree is not supported, then just return success without
- * doing anything.
- */
- if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
return 0;
- trace_xfs_rmap_free_extent(mp, agno, bno, len, owner);
+ trace_xfs_rmap_free_extent(mp, agno, bno, len, oinfo);
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+ xfs_owner_info_unpack(oinfo, &owner, &offset);
+
+ ltoff = ltrec.rm_offset & ~XFS_RMAP_OFF_BMBT;
/*
- * We always have a left record because there's a static record
- * for the AG headers at rm_startblock == 0.
+ * We should always have a left record because there's a static record
+ * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
+ * will not ever be removed from the tree.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, &i);
if (error)
goto out_error;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
@@ -155,17 +240,18 @@ xfs_rmap_free(
goto out_error;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
- /* special growfs case - bno is beyond last record */
+ /*
+ * For growfs, the incoming extent must be beyond the left record we
+ * just found as it is new space and won't be used by anyone. This is
+ * just a corruption check as we don't actually do anything with this
+ * extent.
+ */
if (owner == XFS_RMAP_OWN_NULL) {
XFS_WANT_CORRUPTED_GOTO(mp, bno > ltrec.rm_startblock +
ltrec.rm_blockcount, out_error);
goto out_done;
}
- /* make sure the extent we found covers the entire freeing range. */
- XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno, out_error);
- XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_blockcount >= len, out_error);
-
/*
if (owner != ltrec.rm_owner ||
bno > ltrec.rm_startblock + ltrec.rm_blockcount)
@@ -173,16 +259,36 @@ xfs_rmap_free(
//printk("rmfree ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
// agno, bno, len, owner, ltrec.rm_startblock,
// ltrec.rm_blockcount, ltrec.rm_owner);
- XFS_WANT_CORRUPTED_GOTO(mp, bno <= ltrec.rm_startblock + ltrec.rm_blockcount,
- out_error);
+
+ /* make sure the extent we found covers the entire freeing range. */
+ XFS_WANT_CORRUPTED_GOTO(mp, !XFS_RMAP_IS_UNWRITTEN(ltrec.rm_blockcount),
+ out_error);
+ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
+ ltrec.rm_startblock + XFS_RMAP_LEN(ltrec.rm_blockcount) >=
+ bno + len, out_error);
+
+ /* make sure the owner matches what we expect to find in the tree */
XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
- (owner < XFS_RMAP_OWN_NULL &&
- owner >= XFS_RMAP_OWN_MIN), out_error);
+ XFS_RMAP_NON_INODE_OWNER(owner), out_error);
+
+ /* check the offset, if necessary */
+ if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
+ if (XFS_RMAP_IS_BMBT(offset)) {
+ XFS_WANT_CORRUPTED_GOTO(mp,
+ XFS_RMAP_IS_BMBT(ltrec.rm_offset),
+ out_error);
+ } else {
+ XFS_WANT_CORRUPTED_GOTO(mp,
+ ltrec.rm_offset <= offset, out_error);
+ XFS_WANT_CORRUPTED_GOTO(mp,
+ offset <= ltoff + ltrec.rm_blockcount,
+ out_error);
+ }
+ }
- /* exact match is easy */
if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
//printk("remove exact\n");
- /* remove extent from rmap tree */
+ /* exact match, simply remove the record from rmap tree */
error = xfs_btree_delete(cur, &i);
if (error)
goto out_error;
@@ -190,7 +296,8 @@ xfs_rmap_free(
} else if (ltrec.rm_startblock == bno) {
//printk("remove left\n");
/*
- * overlap left hand side of extent
+ * overlap left hand side of extent: move the start, trim the
+ * length and update the current record.
*
* ltbno ltlen
* Orig: |oooooooooooooooooooo|
@@ -206,7 +313,8 @@ xfs_rmap_free(
} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
//printk("remove right\n");
/*
- * overlap right hand side of extent
+ * overlap right hand side of extent: trim the length and update
+ * the current record.
*
* ltbno ltlen
* Orig: |oooooooooooooooooooo|
@@ -219,8 +327,12 @@ xfs_rmap_free(
if (error)
goto out_error;
} else {
+
/*
- * overlap middle of extent
+ * overlap middle of extent: trim the length of the existing
+ * record to the length of the new left-extent size, increment
+ * the insertion position so we can insert a new record
+ * containing the remaining right-extent space.
*
* ltbno ltlen
* Orig: |oooooooooooooooooooo|
@@ -231,7 +343,7 @@ xfs_rmap_free(
xfs_extlen_t orig_len = ltrec.rm_blockcount;
//printk("remove middle\n");
- ltrec.rm_blockcount = bno - ltrec.rm_startblock;;
+ ltrec.rm_blockcount = bno - ltrec.rm_startblock;
error = xfs_rmap_update(cur, <rec);
if (error)
goto out_error;
@@ -244,33 +356,52 @@ xfs_rmap_free(
cur->bc_rec.r.rm_blockcount = orig_len - len -
ltrec.rm_blockcount;
cur->bc_rec.r.rm_owner = ltrec.rm_owner;
+ cur->bc_rec.r.rm_offset = offset;
error = xfs_btree_insert(cur, &i);
if (error)
goto out_error;
}
out_done:
- trace_xfs_rmap_free_extent_done(mp, agno, bno, len, owner);
+ trace_xfs_rmap_free_extent_done(mp, agno, bno, len, oinfo);
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
return 0;
out_error:
- trace_xfs_rmap_free_extent_error(mp, agno, bno, len, owner);
+ trace_xfs_rmap_free_extent_error(mp, agno, bno, len, oinfo);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
/*
- * When we allocate a new block, the first thing we do is add a reference to the
- * extent in the rmap btree. This is how we track the owner of the extent and th
- * enumber of references to it.
- *
- * Initially, we do not have shared extents, and so the extent can only have a
- * single reference count and owner. This makes the initial implementation easy,
- * but does not allow us to use the rmap tree for tracking reflink shared files.
- * Hence the initial implementation is simply a lookup to find the place to
- * insert (and checking we don't find a duplicate/overlap) and then insertng the
- * appropriate record.
+ * A mergeable rmap should have the same owner, cannot be unwritten, and
+ * must be a bmbt rmap if we're asking about a bmbt rmap.
+ */
+static bool
+is_mergeable_rmap(
+ struct xfs_rmap_irec *irec,
+ uint64_t owner,
+ uint64_t offset)
+{
+ if (irec->rm_owner == XFS_RMAP_OWN_NULL)
+ return false;
+ if (irec->rm_owner != owner)
+ return false;
+ if (XFS_RMAP_IS_UNWRITTEN(irec->rm_blockcount))
+ return false;
+ if (XFS_RMAP_IS_ATTR_FORK(offset) ^
+ XFS_RMAP_IS_ATTR_FORK(irec->rm_offset))
+ return false;
+ if (XFS_RMAP_IS_BMBT(offset) ^ XFS_RMAP_IS_BMBT(irec->rm_offset))
+ return false;
+ return true;
+}
+
+/*
+ * When we allocate a new block, the first thing we do is add a reference to
+ * the extent in the rmap btree. This takes the form of a [agbno, length,
+ * owner, offset] record. Flags are encoded in the high bits of the offset
+ * field.
*/
int
xfs_rmap_alloc(
@@ -279,31 +410,32 @@ xfs_rmap_alloc(
xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
- uint64_t owner)
+ struct xfs_owner_info *oinfo)
{
- struct xfs_btree_cur *cur;
struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *cur;
struct xfs_rmap_irec ltrec;
struct xfs_rmap_irec gtrec;
int have_gt;
- int error;
+ int error = 0;
int i;
+ uint64_t owner;
+ uint64_t offset;
- /*
- * if rmap btree is not supported, then just return success without
- * doing anything.
- */
- if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
return 0;
- trace_xfs_rmap_alloc_extent(mp, agno, bno, len, owner);
+ trace_xfs_rmap_alloc_extent(mp, agno, bno, len, oinfo);
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+ xfs_owner_info_unpack(oinfo, &owner, &offset);
+
/*
- * chekc to see if we find an existing record for this extent rather
- * than just the location for insert.
+ * For the initial lookup, look for and exact match or the left-adjacent
+ * record for our insertion point. This will also give us the record for
+ * start block contiguity tests.
*/
- error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, &i);
if (error)
goto out_error;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
@@ -315,10 +447,18 @@ xfs_rmap_alloc(
//printk("rmalloc ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
// agno, bno, len, owner, ltrec.rm_startblock,
// ltrec.rm_blockcount, ltrec.rm_owner);
+ if (!is_mergeable_rmap(<rec, owner, offset))
+ ltrec.rm_owner = XFS_RMAP_OWN_NULL;
- XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock + ltrec.rm_blockcount <= bno,
- out_error);
+ XFS_WANT_CORRUPTED_GOTO(mp,
+ ltrec.rm_owner == XFS_RMAP_OWN_NULL ||
+ ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error);
+ /*
+ * Increment the cursor to see if we have a right-adjacent record to our
+ * insertion point. This will give us the record for end block
+ * contiguity tests.
+ */
error = xfs_btree_increment(cur, 0, &have_gt);
if (error)
goto out_error;
@@ -335,12 +475,17 @@ xfs_rmap_alloc(
} else {
gtrec.rm_owner = XFS_RMAP_OWN_NULL;
}
+ if (!is_mergeable_rmap(>rec, owner, offset))
+ gtrec.rm_owner = XFS_RMAP_OWN_NULL;
- /* cursor currently points one record past ltrec */
+ /*
+ * Note: cursor currently points one record to the right of ltrec, even
+ * if there is no record in the tree to the right.
+ */
if (ltrec.rm_owner == owner &&
ltrec.rm_startblock + ltrec.rm_blockcount == bno) {
/*
- * left edge contiguous
+ * left edge contiguous, merge into left record.
*
* ltbno ltlen
* orig: |ooooooooo|
@@ -354,7 +499,8 @@ xfs_rmap_alloc(
bno + len == gtrec.rm_startblock) {
//printk("add middle\n");
/*
- * right edge also contiguous
+ * right edge also contiguous, delete right record
+ * and merge into left record.
*
* ltbno ltlen gtbno gtlen
* orig: |ooooooooo| |ooooooooo|
@@ -368,6 +514,7 @@ xfs_rmap_alloc(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
}
+ /* point the cursor back to the left record and update */
error = xfs_btree_decrement(cur, 0, &have_gt);
if (error)
goto out_error;
@@ -377,7 +524,7 @@ xfs_rmap_alloc(
} else if (gtrec.rm_owner == owner &&
bno + len == gtrec.rm_startblock) {
/*
- * right edge contiguous
+ * right edge contiguous, merge into right record.
*
* gtbno gtlen
* Orig: |ooooooooo|
@@ -393,21 +540,723 @@ xfs_rmap_alloc(
goto out_error;
} else {
//printk("add no match\n");
- /* no contiguous edge with identical owner */
+ /*
+ * no contiguous edge with identical owner, insert
+ * new record at current cursor position.
+ */
cur->bc_rec.r.rm_startblock = bno;
cur->bc_rec.r.rm_blockcount = len;
cur->bc_rec.r.rm_owner = owner;
+ cur->bc_rec.r.rm_offset = offset;
error = xfs_btree_insert(cur, &i);
if (error)
goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
}
- trace_xfs_rmap_alloc_extent_done(mp, agno, bno, len, owner);
+ trace_xfs_rmap_alloc_extent_done(mp, agno, bno, len, oinfo);
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
return 0;
out_error:
- trace_xfs_rmap_alloc_extent_error(mp, agno, bno, len, owner);
+ trace_xfs_rmap_alloc_extent_error(mp, agno, bno, len, oinfo);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
+
+/* Encode logical offset for a rmapbt record */
+STATIC uint64_t
+b2r_off(
+ int whichfork,
+ xfs_fileoff_t off)
+{
+ uint64_t x;
+
+ x = off;
+ if (whichfork == XFS_ATTR_FORK)
+ x |= XFS_RMAP_OFF_ATTR;
+ return x;
+}
+
+/* Encode blockcount for a rmapbt record */
+STATIC xfs_extlen_t
+b2r_len(
+ struct xfs_bmbt_irec *irec)
+{
+ xfs_extlen_t x;
+
+ x = irec->br_blockcount;
+ if (irec->br_state == XFS_EXT_UNWRITTEN)
+ x |= XFS_RMAP_LEN_UNWRITTEN;
+ return x;
+}
+
+static int
+__xfs_rmap_move(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long start_adj);
+
+static int
+__xfs_rmap_resize(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long size_adj);
+
+/* Combine two adjacent rmap extents */
+static int
+__xfs_rmap_combine(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *LEFT,
+ struct xfs_bmbt_irec *RIGHT,
+ struct xfs_bmbt_irec *PREV)
+{
+ int error;
+
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_combine(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, LEFT, PREV, RIGHT);
+
+ /* Delete right rmap */
+ error = xfs_rmapbt_delete(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, RIGHT->br_startblock),
+ b2r_len(RIGHT), ino,
+ b2r_off(whichfork, RIGHT->br_startoff));
+ if (error)
+ goto done;
+
+ /* Delete prev rmap */
+ if (!isnullstartblock(PREV->br_startblock)) {
+ error = xfs_rmapbt_delete(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp,
+ PREV->br_startblock),
+ b2r_len(PREV), ino,
+ b2r_off(whichfork, PREV->br_startoff));
+ if (error)
+ goto done;
+ }
+
+ /* Enlarge left rmap */
+ return __xfs_rmap_resize(rcur, ino, whichfork, LEFT,
+ PREV->br_blockcount + RIGHT->br_blockcount);
+done:
+ return error;
+}
+
+/* Extend a left rmap extent */
+static int
+__xfs_rmap_lcombine(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *LEFT,
+ struct xfs_bmbt_irec *PREV)
+{
+ int error;
+
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_lcombine(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, LEFT, PREV);
+
+ /* Delete prev rmap */
+ if (!isnullstartblock(PREV->br_startblock)) {
+ error = xfs_rmapbt_delete(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp,
+ PREV->br_startblock),
+ b2r_len(PREV), ino,
+ b2r_off(whichfork, PREV->br_startoff));
+ if (error)
+ goto done;
+ }
+
+ /* Enlarge left rmap */
+ return __xfs_rmap_resize(rcur, ino, whichfork, LEFT,
+ PREV->br_blockcount);
+done:
+ return error;
+}
+
+/* Extend a right rmap extent */
+static int
+__xfs_rmap_rcombine(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *RIGHT,
+ struct xfs_bmbt_irec *PREV)
+{
+ int error;
+
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_rcombine(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, RIGHT, PREV);
+
+ /* Delete prev rmap */
+ if (!isnullstartblock(PREV->br_startblock)) {
+ error = xfs_rmapbt_delete(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp,
+ PREV->br_startblock),
+ b2r_len(PREV), ino,
+ b2r_off(whichfork, PREV->br_startoff));
+ if (error)
+ goto done;
+ }
+
+ /* Enlarge right rmap */
+ return __xfs_rmap_move(rcur, ino, whichfork, RIGHT,
+ -PREV->br_blockcount);
+done:
+ return error;
+}
+
+/* Insert a rmap extent */
+static int
+__xfs_rmap_insert(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *rec)
+{
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, rec);
+
+ return xfs_rmapbt_insert(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, rec->br_startblock),
+ b2r_len(rec), ino,
+ b2r_off(whichfork, rec->br_startoff));
+}
+
+/* Delete a rmap extent */
+static int
+__xfs_rmap_delete(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *rec)
+{
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, rec);
+
+ return xfs_rmapbt_delete(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, rec->br_startblock),
+ b2r_len(rec), ino,
+ b2r_off(whichfork, rec->br_startoff));
+}
+
+/* Change the start of an rmap */
+static int
+__xfs_rmap_move(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long start_adj)
+{
+ int error;
+ struct xfs_bmbt_irec irec;
+
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_move(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, PREV, start_adj);
+
+ /* Delete prev rmap */
+ error = xfs_rmapbt_delete(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, PREV->br_startblock),
+ b2r_len(PREV), ino,
+ b2r_off(whichfork, PREV->br_startoff));
+ if (error)
+ goto done;
+
+ /* Re-add rmap with new start */
+ irec = *PREV;
+ irec.br_startblock += start_adj;
+ irec.br_startoff += start_adj;
+ irec.br_blockcount -= start_adj;
+ return xfs_rmapbt_insert(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, irec.br_startblock),
+ b2r_len(&irec), ino,
+ b2r_off(whichfork, irec.br_startoff));
+done:
+ return error;
+}
+
+/* Change the logical offset of an rmap */
+static int
+__xfs_rmap_slide(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long start_adj)
+{
+ int error;
+
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_slide(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, PREV, start_adj);
+
+ /* Delete prev rmap */
+ error = xfs_rmapbt_delete(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, PREV->br_startblock),
+ b2r_len(PREV), ino,
+ b2r_off(whichfork, PREV->br_startoff));
+ if (error)
+ goto done;
+
+ /* Re-add rmap with new logical offset */
+ return xfs_rmapbt_insert(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, PREV->br_startblock),
+ b2r_len(PREV), ino,
+ b2r_off(whichfork, PREV->br_startoff + start_adj));
+done:
+ return error;
+}
+
+/* Change the size of an rmap */
+static int
+__xfs_rmap_resize(
+ struct xfs_btree_cur *rcur,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long size_adj)
+{
+ int i;
+ int error;
+ struct xfs_bmbt_irec irec;
+ struct xfs_rmap_irec rrec;
+
+ if (!rcur)
+ return 0;
+
+ trace_xfs_rmap_resize(rcur->bc_mp, rcur->bc_private.a.agno, ino,
+ whichfork, PREV, size_adj);
+
+ error = xfs_rmap_lookup_eq(rcur,
+ XFS_FSB_TO_AGBNO(rcur->bc_mp, PREV->br_startblock),
+ b2r_len(PREV), ino,
+ b2r_off(whichfork, PREV->br_startoff), &i);
+ if (error)
+ goto done;
+ XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+ error = xfs_rmap_get_rec(rcur, &rrec, &i);
+ if (error)
+ goto done;
+ XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+ irec = *PREV;
+ irec.br_blockcount += size_adj;
+ rrec.rm_blockcount = b2r_len(&irec);
+ error = xfs_rmap_update(rcur, &rrec);
+ if (error)
+ goto done;
+done:
+ return error;
+}
+
+/*
+ * Free up any items left in the list.
+ */
+void
+xfs_rmap_cancel(
+ struct xfs_rmap_list *rlist) /* list of bmap_free_items */
+{
+ struct xfs_rmap_intent *free; /* free list item */
+ struct xfs_rmap_intent *next;
+
+ if (rlist->rl_count == 0)
+ return;
+ ASSERT(rlist->rl_first != NULL);
+ for (free = rlist->rl_first; free; free = next) {
+ next = free->ri_next;
+ kmem_free(free);
+ }
+ rlist->rl_count = 0;
+ rlist->rl_first = NULL;
+}
+
+static xfs_agnumber_t
+rmap_ag(
+ struct xfs_mount *mp,
+ struct xfs_rmap_intent *ri)
+{
+ switch (ri->ri_type) {
+ case XFS_RMAP_COMBINE:
+ case XFS_RMAP_LCOMBINE:
+ return XFS_FSB_TO_AGNO(mp, ri->ri_u.a.left.br_startblock);
+ case XFS_RMAP_RCOMBINE:
+ return XFS_FSB_TO_AGNO(mp, ri->ri_u.a.right.br_startblock);
+ case XFS_RMAP_INSERT:
+ case XFS_RMAP_DELETE:
+ case XFS_RMAP_MOVE:
+ case XFS_RMAP_SLIDE:
+ case XFS_RMAP_RESIZE:
+ return XFS_FSB_TO_AGNO(mp, ri->ri_prev.br_startblock);
+ default:
+ ASSERT(0);
+ }
+ return 0; /* shut up, gcc */
+}
+
+/*
+ * Free up any items left in the extent list, using the given transaction.
+ */
+int
+__xfs_rmap_finish(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_rmap_list *rlist)
+{
+ struct xfs_rmap_intent *free; /* free list item */
+ struct xfs_rmap_intent *next;
+ struct xfs_btree_cur *rcur = NULL;
+ struct xfs_buf *agbp = NULL;
+ int error = 0;
+ xfs_agnumber_t agno;
+
+ if (rlist->rl_count == 0)
+ return 0;
+
+ ASSERT(rlist->rl_first != NULL);
+ for (free = rlist->rl_first; free; free = next) {
+ agno = rmap_ag(mp, free);
+ ASSERT(agno != NULLAGNUMBER);
+ if (rcur && agno < rcur->bc_private.a.agno) {
+ error = -EFSCORRUPTED;
+ break;
+ }
+
+ ASSERT(rcur == NULL || agno >= rcur->bc_private.a.agno);
+ if (rcur == NULL || agno > rcur->bc_private.a.agno) {
+ if (rcur) {
+ xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+ xfs_trans_brelse(tp, agbp);
+ }
+
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (error)
+ break;
+
+ rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+ if (!rcur) {
+ xfs_trans_brelse(tp, agbp);
+ error = -ENOMEM;
+ break;
+ }
+ }
+
+ switch (free->ri_type) {
+ case XFS_RMAP_COMBINE:
+ error = __xfs_rmap_combine(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_u.a.left,
+ &free->ri_u.a.right, &free->ri_prev);
+ break;
+ case XFS_RMAP_LCOMBINE:
+ error = __xfs_rmap_lcombine(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_u.a.left,
+ &free->ri_prev);
+ break;
+ case XFS_RMAP_RCOMBINE:
+ error = __xfs_rmap_rcombine(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_u.a.right,
+ &free->ri_prev);
+ break;
+ case XFS_RMAP_INSERT:
+ error = __xfs_rmap_insert(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_prev);
+ break;
+ case XFS_RMAP_DELETE:
+ error = __xfs_rmap_delete(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_prev);
+ break;
+ case XFS_RMAP_MOVE:
+ error = __xfs_rmap_move(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_prev,
+ free->ri_u.b.adj);
+ break;
+ case XFS_RMAP_SLIDE:
+ error = __xfs_rmap_slide(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_prev,
+ free->ri_u.b.adj);
+ break;
+ case XFS_RMAP_RESIZE:
+ error = __xfs_rmap_resize(rcur, free->ri_ino,
+ free->ri_whichfork, &free->ri_prev,
+ free->ri_u.b.adj);
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ if (error)
+ break;
+ next = free->ri_next;
+ kmem_free(free);
+ }
+
+ if (rcur)
+ xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (agbp)
+ xfs_trans_brelse(tp, agbp);
+
+ for (; free; free = next) {
+ next = free->ri_next;
+ kmem_free(free);
+ }
+
+ rlist->rl_count = 0;
+ rlist->rl_first = NULL;
+ return error;
+}
+
+/*
+ * Free up any items left in the intent list.
+ */
+int
+xfs_rmap_finish(
+ struct xfs_mount *mp,
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ struct xfs_rmap_list *rlist,
+ int *committed)
+{
+ int error;
+
+ *committed = 0;
+ if (rlist->rl_count == 0)
+ return 0;
+
+ error = xfs_trans_roll(tpp, ip);
+ if (error)
+ return error;
+ *committed = 1;
+
+ return __xfs_rmap_finish(mp, *tpp, rlist);
+}
+
+/*
+ * Record a rmap intent; the list is kept sorted first by AG and then by
+ * increasing age.
+ */
+static int
+__xfs_rmap_add(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ struct xfs_rmap_intent *ri)
+{
+ struct xfs_rmap_intent *cur; /* current (next) element */
+ struct xfs_rmap_intent *new;
+ struct xfs_rmap_intent *prev; /* previous element */
+ xfs_agnumber_t new_agno, cur_agno;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return 0;
+
+ new = kmem_zalloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
+ *new = *ri;
+ new_agno = rmap_ag(mp, new);
+ ASSERT(new_agno != NULLAGNUMBER);
+
+ for (prev = NULL, cur = rlist->rl_first;
+ cur != NULL;
+ prev = cur, cur = cur->ri_next) {
+ cur_agno = rmap_ag(mp, cur);
+ if (cur_agno > new_agno)
+ break;
+ }
+ if (prev)
+ prev->ri_next = new;
+ else
+ rlist->rl_first = new;
+ new->ri_next = cur;
+ rlist->rl_count++;
+ return 0;
+}
+
+/* Combine two adjacent rmap extents */
+int
+xfs_rmap_combine(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *left,
+ struct xfs_bmbt_irec *right,
+ struct xfs_bmbt_irec *prev)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_COMBINE;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *prev;
+ ri.ri_u.a.left = *left;
+ ri.ri_u.a.right = *right;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Extend a left rmap extent */
+int
+xfs_rmap_lcombine(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *LEFT,
+ struct xfs_bmbt_irec *PREV)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_LCOMBINE;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *PREV;
+ ri.ri_u.a.left = *LEFT;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Extend a right rmap extent */
+int
+xfs_rmap_rcombine(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *RIGHT,
+ struct xfs_bmbt_irec *PREV)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_RCOMBINE;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *PREV;
+ ri.ri_u.a.right = *RIGHT;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Insert a rmap extent */
+int
+xfs_rmap_insert(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *new)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_INSERT;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *new;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Delete a rmap extent */
+int
+xfs_rmap_delete(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *new)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_DELETE;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *new;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Change the start of an rmap */
+int
+xfs_rmap_move(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long start_adj)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_MOVE;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *PREV;
+ ri.ri_u.b.adj = start_adj;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Change the logical offset of an rmap */
+int
+xfs_rmap_slide(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long start_adj)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_SLIDE;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *PREV;
+ ri.ri_u.b.adj = start_adj;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
+
+/* Change the size of an rmap */
+int
+xfs_rmap_resize(
+ struct xfs_mount *mp,
+ struct xfs_rmap_list *rlist,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *PREV,
+ long size_adj)
+{
+ struct xfs_rmap_intent ri;
+
+ ri.ri_type = XFS_RMAP_RESIZE;
+ ri.ri_ino = ino;
+ ri.ri_whichfork = whichfork;
+ ri.ri_prev = *PREV;
+ ri.ri_u.b.adj = size_adj;
+
+ return __xfs_rmap_add(mp, rlist, &ri);
+}
diff --git a/libxfs/xfs_rmap_btree.c b/libxfs/xfs_rmap_btree.c
index ed1792d..ad6a928 100644
--- a/libxfs/xfs_rmap_btree.c
+++ b/libxfs/xfs_rmap_btree.c
@@ -36,37 +36,29 @@
/*
* Reverse map btree.
*
- * This is a per-ag tree used to track the owner of a given extent. Owner
- * records are inserted when an extent is allocated, and removed when an extent
- * is freed. For existing filesystems, there can only be one owner of an extent,
- * usually an inode or some other metadata structure like a AG btree.
- *
- * Initial thoughts are that the
- * value of the owner field needs external flags to define what it means, and
- * hence we need a flags field in the record. This means the record is going to
- * be larger than 16 bytes (agbno,len,owner = 16 bytes), so maybe this isn't the
- * best idea. Initially just implement the owner field - we can probably steal
- * bits from the extent length field for type descriptors given that MAXEXTLEN
- * is only 21 bits if we want to store the type as well. Keep in mind that if we
- * want to do this there are still restrictions on the length of extents we
- * track in the rmap btree (see comments on xfs_rmap_free()).
+ * This is a per-ag tree used to track the owner(s) of a given extent. With
+ * reflink it is possible for there to be multiple owners, which is a departure
+ * from classic XFS. Owner records for data extents are inserted when the
+ * extent is mapped and removed when an extent is unmapped. Owner records for
+ * all other block types (i.e. metadata) are inserted when an extent is
+ * allocated and removed when an extent is freed. There can only be one owner
+ * of a metadata extent, usually an inode or some other metadata structure like
+ * an AG btree.
*
* The rmap btree is part of the free space management, so blocks for the tree
* are sourced from the agfl. Hence we need transaction reservation support for
* this tree so that the freelist is always large enough. This also impacts on
* the minimum space we need to leave free in the AG.
*
- * The tree is ordered by block number - there's no need to order/search by
- * extent size for online updating/management of the tree, and the reverse
- * lookups are going to be "who owns this block" and so are by-block ordering is
- * perfect for this.
- *
- * XXX: open question is how to handle blocks that are owned by the freespace
- * tree blocks. Right now they will be classified when they are moved to the
- * freelist or removed from the freelist. i.e. the extent allocation/freeing
- * will mark the extents allocated as owned by the AG.
+ * The tree is ordered by [ag block, owner, offset]. This is a large key size,
+ * but it is the only way to enforce unique keys when a block can be owned by
+ * multiple files at any offset. There's no need to order/search by extent
+ * size for online updating/management of the tree. It is intended that most
+ * reverse lookups will be to find the owner(s) of a particular block, or to
+ * try to recover tree and file data from corrupt primary metadata.
*/
-STATIC struct xfs_btree_cur *
+
+static struct xfs_btree_cur *
xfs_rmapbt_dup_cursor(
struct xfs_btree_cur *cur)
{
@@ -177,6 +169,8 @@ xfs_rmapbt_init_key_from_rec(
union xfs_btree_rec *rec)
{
key->rmap.rm_startblock = rec->rmap.rm_startblock;
+ key->rmap.rm_owner = rec->rmap.rm_owner;
+ key->rmap.rm_offset = rec->rmap.rm_offset;
}
STATIC void
@@ -185,6 +179,8 @@ xfs_rmapbt_init_rec_from_key(
union xfs_btree_rec *rec)
{
rec->rmap.rm_startblock = key->rmap.rm_startblock;
+ rec->rmap.rm_owner = key->rmap.rm_owner;
+ rec->rmap.rm_offset = key->rmap.rm_offset;
}
STATIC void
@@ -195,6 +191,7 @@ xfs_rmapbt_init_rec_from_cur(
rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
+ rec->rmap.rm_offset = cpu_to_be64(cur->bc_rec.r.rm_offset);
}
STATIC void
@@ -217,8 +214,16 @@ xfs_rmapbt_key_diff(
{
struct xfs_rmap_irec *rec = &cur->bc_rec.r;
struct xfs_rmap_key *kp = &key->rmap;
-
- return (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+ __int64_t d;
+
+ d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+ if (d)
+ return d;
+ d = (__int64_t)be64_to_cpu(kp->rm_owner) - rec->rm_owner;
+ if (d)
+ return d;
+ d = (__int64_t)be64_to_cpu(kp->rm_offset) - rec->rm_offset;
+ return d;
}
static bool
@@ -242,7 +247,7 @@ xfs_rmapbt_verify(
* from the on disk AGF. Again, we can only check against maximum limits
* in this case.
*/
- if (block->bb_magic!= cpu_to_be32(XFS_RMAP_CRC_MAGIC))
+ if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
return false;
if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
@@ -308,11 +313,11 @@ xfs_rmapbt_write_verify(
}
const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
+ .name = "xfs_rmapbt",
.verify_read = xfs_rmapbt_read_verify,
.verify_write = xfs_rmapbt_write_verify,
};
-
#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
xfs_rmapbt_keys_inorder(
@@ -320,8 +325,16 @@ xfs_rmapbt_keys_inorder(
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- return be32_to_cpu(k1->rmap.rm_startblock) <
- be32_to_cpu(k2->rmap.rm_startblock);
+ if (be32_to_cpu(k1->rmap.rm_startblock) <
+ be32_to_cpu(k2->rmap.rm_startblock))
+ return 1;
+ if (be64_to_cpu(k1->rmap.rm_owner) <
+ be64_to_cpu(k2->rmap.rm_owner))
+ return 1;
+ if (be64_to_cpu(k1->rmap.rm_offset) <=
+ be64_to_cpu(k2->rmap.rm_offset))
+ return 1;
+ return 0;
}
STATIC int
@@ -330,9 +343,16 @@ xfs_rmapbt_recs_inorder(
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
- return be32_to_cpu(r1->rmap.rm_startblock) +
- be32_to_cpu(r1->rmap.rm_blockcount) <=
- be32_to_cpu(r2->rmap.rm_startblock);
+ if (be32_to_cpu(r1->rmap.rm_startblock) <
+ be32_to_cpu(r2->rmap.rm_startblock))
+ return 1;
+ if (be64_to_cpu(r1->rmap.rm_offset) <
+ be64_to_cpu(r2->rmap.rm_offset))
+ return 1;
+ if (be64_to_cpu(r1->rmap.rm_owner) <=
+ be64_to_cpu(r2->rmap.rm_owner))
+ return 1;
+ return 0;
}
#endif /* DEBUG */
diff --git a/libxfs/xfs_rmap_btree.h b/libxfs/xfs_rmap_btree.h
index 9ad65e5..4fe13f3 100644
--- a/libxfs/xfs_rmap_btree.h
+++ b/libxfs/xfs_rmap_btree.h
@@ -18,13 +18,10 @@
#ifndef __XFS_RMAP_BTREE_H__
#define __XFS_RMAP_BTREE_H__
-/*
- * Freespace on-disk structures
- */
-
struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
+struct xfs_rmap_list;
/* rmaps only exist on crc enabled filesystems */
#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
@@ -55,11 +52,80 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
xfs_agnumber_t agno);
int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
+int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, uint64_t owner, uint64_t offset, int *stat);
+int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, uint64_t owner, uint64_t offset, int *stat);
+int xfs_rmapbt_insert(struct xfs_btree_cur *rcur, xfs_agblock_t agbno,
+ xfs_extlen_t len, uint64_t owner, uint64_t offset);
+int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
+ int *stat);
+
+/* functions for updating the rmapbt for bmbt blocks and AG btree blocks */
int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- uint64_t owner);
+ struct xfs_owner_info *oinfo);
int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
- uint64_t owner);
+ struct xfs_owner_info *oinfo);
+
+/* functions for updating the rmapbt based on bmbt map/unmap operations */
+int xfs_rmap_combine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *LEFT,
+ struct xfs_bmbt_irec *RIGHT, struct xfs_bmbt_irec *PREV);
+int xfs_rmap_lcombine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *LEFT,
+ struct xfs_bmbt_irec *PREV);
+int xfs_rmap_rcombine(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *RIGHT,
+ struct xfs_bmbt_irec *PREV);
+int xfs_rmap_insert(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *rec);
+int xfs_rmap_delete(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *rec);
+int xfs_rmap_move(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
+ long start_adj);
+int xfs_rmap_slide(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
+ long start_adj);
+int xfs_rmap_resize(struct xfs_mount *mp, struct xfs_rmap_list *rlist,
+ xfs_ino_t ino, int whichfork, struct xfs_bmbt_irec *PREV,
+ long size_adj);
+
+enum xfs_rmap_intent_type {
+ XFS_RMAP_COMBINE,
+ XFS_RMAP_LCOMBINE,
+ XFS_RMAP_RCOMBINE,
+ XFS_RMAP_INSERT,
+ XFS_RMAP_DELETE,
+ XFS_RMAP_MOVE,
+ XFS_RMAP_SLIDE,
+ XFS_RMAP_RESIZE,
+};
+
+struct xfs_rmap_intent {
+ struct xfs_rmap_intent *ri_next;
+ enum xfs_rmap_intent_type ri_type;
+ xfs_ino_t ri_ino;
+ int ri_whichfork;
+ struct xfs_bmbt_irec ri_prev;
+ union {
+ struct {
+ struct xfs_bmbt_irec left;
+ struct xfs_bmbt_irec right;
+ } a;
+ struct {
+ long adj;
+ } b;
+ } ri_u;
+};
+
+void xfs_rmap_cancel(struct xfs_rmap_list *rlist);
+int __xfs_rmap_finish(struct xfs_mount *mp, struct xfs_trans *tp,
+ struct xfs_rmap_list *rlist);
+int xfs_rmap_finish(struct xfs_mount *mp, struct xfs_trans **tpp,
+ struct xfs_inode *ip, struct xfs_rmap_list *rlist,
+ int *committed);
#endif /* __XFS_RMAP_BTREE_H__ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 17/53] libxfs: refactor short btree block verification
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (15 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 16/53] libxfs: enhance rmapbt definition to support reflink Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 18/53] xfs: don't update rmapbt when fixing agfl Darrick J. Wong
` (35 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Create xfs_btree_sblock_verify() to verify short-format btree blocks
(i.e. the per-AG btrees with 32-bit block pointers) instead of
open-coding them.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_alloc_btree.c | 30 ++---------------------
libxfs/xfs_btree.c | 58 +++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_btree.h | 3 ++
libxfs/xfs_ialloc_btree.c | 24 ++-----------------
libxfs/xfs_rmap_btree.c | 23 ++----------------
5 files changed, 70 insertions(+), 68 deletions(-)
diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c
index f583bd1..6f13572 100644
--- a/libxfs/xfs_alloc_btree.c
+++ b/libxfs/xfs_alloc_btree.c
@@ -293,12 +293,7 @@ xfs_allocbt_verify(
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
@@ -311,12 +306,7 @@ xfs_allocbt_verify(
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
@@ -330,21 +320,7 @@ xfs_allocbt_verify(
return false;
}
- /* numrecs verification */
- if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
-
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
}
static void
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
index 5cf8ee5..9b918d2 100644
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -4083,3 +4083,61 @@ xfs_btree_change_owner(
return 0;
}
+
+/**
+ * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
+ * btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
+ * @pag_max_level: pointer to the per-ag max level field
+ */
+bool
+xfs_btree_sblock_v5hdr_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ return true;
+}
+
+/**
+ * xfs_btree_sblock_verify() -- verify a short-format btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: maximum records allowed in this btree node
+ */
+bool
+xfs_btree_sblock_verify(
+ struct xfs_buf *bp,
+ unsigned int max_recs)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
+}
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
index 48ab2b1..dd29d15 100644
--- a/libxfs/xfs_btree.h
+++ b/libxfs/xfs_btree.h
@@ -471,4 +471,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
+bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
+bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
index 88a3e87..ceeb7cb 100644
--- a/libxfs/xfs_ialloc_btree.c
+++ b/libxfs/xfs_ialloc_btree.c
@@ -223,7 +223,6 @@ xfs_inobt_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_perag *pag = bp->b_pag;
unsigned int level;
/*
@@ -241,12 +240,7 @@ xfs_inobt_verify(
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
@@ -256,24 +250,12 @@ xfs_inobt_verify(
return 0;
}
- /* numrecs and level verification */
+ /* level verification */
level = be16_to_cpu(block->bb_level);
if (level >= mp->m_in_maxlevels)
return false;
- if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
}
static void
diff --git a/libxfs/xfs_rmap_btree.c b/libxfs/xfs_rmap_btree.c
index ad6a928..b7024cc 100644
--- a/libxfs/xfs_rmap_btree.c
+++ b/libxfs/xfs_rmap_btree.c
@@ -252,13 +252,10 @@ xfs_rmapbt_verify(
if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
+ /* level verification */
level = be16_to_cpu(block->bb_level);
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
@@ -266,21 +263,7 @@ xfs_rmapbt_verify(
} else if (level >= mp->m_ag_maxlevels)
return false;
- /* numrecs verification */
- if (be16_to_cpu(block->bb_numrecs) > mp->m_rmap_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
-
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
}
static void
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 18/53] xfs: don't update rmapbt when fixing agfl
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (16 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 17/53] libxfs: refactor short btree block verification Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:06 ` [PATCH 19/53] libxfs: implement XFS_IOC_SWAPEXT when rmap btree is enabled Darrick J. Wong
` (34 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Allow a caller of xfs_alloc_fix_freelist to disable rmapbt updates
when fixing the AG freelist. xfs_repair needs this during phase 5
to be able to adjust the freelist while it's reconstructing the rmap
btree; the missing entries will be added back at the very end of
phase 5 once the AGFL contents settle down.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_alloc.c | 40 ++++++++++++++++++++++++++--------------
libxfs/xfs_alloc.h | 2 ++
2 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index 6c2b991..fd0767e 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -2098,26 +2098,38 @@ xfs_alloc_fix_freelist(
* anything other than extra overhead when we need to put more blocks
* back on the free list? Maybe we should only do this when space is
* getting low or the AGFL is more than half full?
+ *
+ * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
+ * big; the NORMAP flag prevents AGFL expand/shrink operations from
+ * updating the rmapbt. Both flags are used in xfs_repair while we're
+ * rebuilding the rmapbt, and neither are used by the kernel. They're
+ * both required to ensure that rmaps are correctly recorded for the
+ * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
+ * repair/rmap.c in xfsprogs for details.
*/
- XFS_RMAP_AG_OWNER(&targs.oinfo, XFS_RMAP_OWN_AG);
- while (pag->pagf_flcount > need) {
- struct xfs_buf *bp;
+ memset(&targs, 0, sizeof(targs));
+ if (!(flags & XFS_ALLOC_FLAG_NOSHRINK)) {
+ if (!(flags & XFS_ALLOC_FLAG_NORMAP))
+ XFS_RMAP_AG_OWNER(&targs.oinfo, XFS_RMAP_OWN_AG);
+ while (pag->pagf_flcount > need) {
+ struct xfs_buf *bp;
- error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
- if (error)
- goto out_agbp_relse;
- error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
- &targs.oinfo, 1);
- if (error)
- goto out_agbp_relse;
- bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
- xfs_trans_binval(tp, bp);
+ error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
+ if (error)
+ goto out_agbp_relse;
+ error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
+ &targs.oinfo, 1);
+ if (error)
+ goto out_agbp_relse;
+ bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
+ xfs_trans_binval(tp, bp);
+ }
}
- memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
- XFS_RMAP_AG_OWNER(&targs.oinfo, XFS_RMAP_OWN_AG);
+ if (!(flags & XFS_ALLOC_FLAG_NORMAP))
+ XFS_RMAP_AG_OWNER(&targs.oinfo, XFS_RMAP_OWN_AG);
targs.agbp = agbp;
targs.agno = args->agno;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
diff --git a/libxfs/xfs_alloc.h b/libxfs/xfs_alloc.h
index f78ce53..754b5dd 100644
--- a/libxfs/xfs_alloc.h
+++ b/libxfs/xfs_alloc.h
@@ -54,6 +54,8 @@ typedef unsigned int xfs_alloctype_t;
*/
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
+#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */
+#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */
/*
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 19/53] libxfs: implement XFS_IOC_SWAPEXT when rmap btree is enabled
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (17 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 18/53] xfs: don't update rmapbt when fixing agfl Darrick J. Wong
@ 2015-12-19 9:06 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 20/53] xfs_db: display rmap btree contents Darrick J. Wong
` (33 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:06 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Synchronize the libxfs components of the kernel patch. No code
in xfsprogs actually calls this.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_btree.c | 17 +++++++++
libxfs/xfs_rmap.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_rmap_btree.h | 9 +++++
3 files changed, 116 insertions(+)
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
index 9b918d2..1622ddd 100644
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -29,6 +29,7 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
/*
* Cursor allocation zone.
@@ -4001,6 +4002,8 @@ xfs_btree_block_change_owner(
struct xfs_btree_block *block;
struct xfs_buf *bp;
union xfs_btree_ptr rptr;
+ struct xfs_owner_info old_oinfo, new_oinfo;
+ int error;
/* do right sibling readahead */
xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
@@ -4012,6 +4015,20 @@ xfs_btree_block_change_owner(
else
block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
+ /* change rmap owners (bmbt blocks only) */
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ XFS_RMAP_INO_BMBT_OWNER(&old_oinfo,
+ cur->bc_private.b.ip->i_ino,
+ cur->bc_private.b.whichfork);
+ XFS_RMAP_INO_BMBT_OWNER(&new_oinfo,
+ new_owner,
+ cur->bc_private.b.whichfork);
+ error = xfs_rmap_change_bmbt_owner(cur, bp, &old_oinfo,
+ &new_oinfo);
+ if (error)
+ return error;
+ }
+
/*
* If the block is a root block hosted in an inode, we might not have a
* buffer pointer here and we shouldn't attempt to log the change as the
diff --git a/libxfs/xfs_rmap.c b/libxfs/xfs_rmap.c
index 43354b9..5ae4c1e 100644
--- a/libxfs/xfs_rmap.c
+++ b/libxfs/xfs_rmap.c
@@ -1260,3 +1260,93 @@ xfs_rmap_resize(
return __xfs_rmap_add(mp, rlist, &ri);
}
+
+/**
+ * Change ownership of a file's BMBT block reverse-mappings.
+ */
+int
+xfs_rmap_change_bmbt_owner(
+ struct xfs_btree_cur *bcur,
+ struct xfs_buf *bp,
+ struct xfs_owner_info *old_owner,
+ struct xfs_owner_info *new_owner)
+{
+ struct xfs_buf *agfbp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (!xfs_sb_version_hasrmapbt(&bcur->bc_mp->m_sb) || !bp)
+ return 0;
+
+ fsbno = XFS_DADDR_TO_FSB(bcur->bc_mp, XFS_BUF_ADDR(bp));
+ agno = XFS_FSB_TO_AGNO(bcur->bc_mp, fsbno);
+ agbno = XFS_FSB_TO_AGBNO(bcur->bc_mp, fsbno);
+
+ error = xfs_read_agf(bcur->bc_mp, bcur->bc_tp, agno, 0, &agfbp);
+
+ error = xfs_rmap_free(bcur->bc_tp, agfbp, agno, agbno, 1, old_owner);
+ if (error)
+ goto err;
+
+ error = xfs_rmap_alloc(bcur->bc_tp, agfbp, agno, agbno, 1, new_owner);
+ if (error)
+ goto err;
+
+err:
+ xfs_trans_brelse(bcur->bc_tp, agfbp);
+ return error;
+}
+
+/**
+ * Change the ownership on a file's extent's reverse-mappings.
+ */
+int
+xfs_rmap_change_extent_owner(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip,
+ xfs_ino_t ino,
+ xfs_fileoff_t isize,
+ struct xfs_trans *tp,
+ int whichfork,
+ xfs_ino_t new_owner,
+ struct xfs_rmap_list *rlist)
+{
+ struct xfs_bmbt_irec imap;
+ int nimaps;
+ xfs_fileoff_t offset;
+ xfs_filblks_t len;
+ int flags = 0;
+ int error;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return 0;
+
+ if (whichfork == XFS_ATTR_FORK)
+ flags |= XFS_BMAPI_ATTRFORK;
+
+ offset = 0;
+ len = XFS_B_TO_FSB(mp, isize);
+ nimaps = 1;
+ error = xfs_bmapi_read(ip, offset, len, &imap, &nimaps, flags);
+ while (error == 0 && nimaps > 0) {
+ if (imap.br_startblock == HOLESTARTBLOCK ||
+ imap.br_startblock == DELAYSTARTBLOCK)
+ goto advloop;
+
+ error = xfs_rmap_delete(mp, rlist, ino, whichfork, &imap);
+ if (error)
+ break;
+ error = xfs_rmap_insert(mp, rlist, new_owner, whichfork, &imap);
+ if (error)
+ break;
+advloop:
+ offset += imap.br_blockcount;
+ len -= imap.br_blockcount;
+ nimaps = 1;
+ error = xfs_bmapi_read(ip, offset, len, &imap, &nimaps, flags);
+ }
+
+ return error;
+}
diff --git a/libxfs/xfs_rmap_btree.h b/libxfs/xfs_rmap_btree.h
index 4fe13f3..b4e085c 100644
--- a/libxfs/xfs_rmap_btree.h
+++ b/libxfs/xfs_rmap_btree.h
@@ -128,4 +128,13 @@ int xfs_rmap_finish(struct xfs_mount *mp, struct xfs_trans **tpp,
struct xfs_inode *ip, struct xfs_rmap_list *rlist,
int *committed);
+/* functions for changing rmap ownership */
+int xfs_rmap_change_extent_owner(struct xfs_mount *mp, struct xfs_inode *ip,
+ xfs_ino_t ino, xfs_fileoff_t isize, struct xfs_trans *tp,
+ int whichfork, xfs_ino_t new_owner,
+ struct xfs_rmap_list *rlist);
+int xfs_rmap_change_bmbt_owner(struct xfs_btree_cur *bcur, struct xfs_buf *bp,
+ struct xfs_owner_info *old_owner,
+ struct xfs_owner_info *new_owner);
+
#endif /* __XFS_RMAP_BTREE_H__ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 20/53] xfs_db: display rmap btree contents
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (18 preceding siblings ...)
2015-12-19 9:06 ` [PATCH 19/53] libxfs: implement XFS_IOC_SWAPEXT when rmap btree is enabled Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 21/53] xfs_dump: display enhanced rmapbt fields Darrick J. Wong
` (32 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: Dave Chinner, xfs
>From : Dave Chinner <david@fromorbit.com>
Teach the debugger how to dump the reverse-mapping btree contents.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
[split patch, add commit message]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/agf.c | 6 ++++++
db/btblock.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
db/btblock.h | 5 +++++
db/field.c | 9 +++++++++
db/field.h | 4 ++++
db/type.c | 3 +++
db/type.h | 2 +-
7 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/db/agf.c b/db/agf.c
index e10526d..ffdd550 100644
--- a/db/agf.c
+++ b/db/agf.c
@@ -55,6 +55,9 @@ const field_t agf_flds[] = {
{ "cntroot", FLDT_AGBLOCK,
OI(OFF(roots) + XFS_BTNUM_CNT * SZ(roots[XFS_BTNUM_CNT])), C1, 0,
TYP_CNTBT },
+ { "rmaproot", FLDT_AGBLOCK,
+ OI(OFF(roots) + XFS_BTNUM_RMAP * SZ(roots[XFS_BTNUM_RMAP])), C1, 0,
+ TYP_RMAPBT },
{ "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF),
FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
{ "bnolevel", FLDT_UINT32D,
@@ -63,6 +66,9 @@ const field_t agf_flds[] = {
{ "cntlevel", FLDT_UINT32D,
OI(OFF(levels) + XFS_BTNUM_CNT * SZ(levels[XFS_BTNUM_CNT])), C1, 0,
TYP_NONE },
+ { "rmaplevel", FLDT_UINT32D,
+ OI(OFF(levels) + XFS_BTNUM_RMAP * SZ(levels[XFS_BTNUM_RMAP])), C1, 0,
+ TYP_NONE },
{ "flfirst", FLDT_UINT32D, OI(OFF(flfirst)), C1, 0, TYP_NONE },
{ "fllast", FLDT_UINT32D, OI(OFF(fllast)), C1, 0, TYP_NONE },
{ "flcount", FLDT_UINT32D, OI(OFF(flcount)), C1, 0, TYP_NONE },
diff --git a/db/btblock.c b/db/btblock.c
index 46140fc..e45ee03 100644
--- a/db/btblock.c
+++ b/db/btblock.c
@@ -96,6 +96,12 @@ struct xfs_db_btree {
sizeof(xfs_inobt_rec_t),
sizeof(__be32),
},
+ { XFS_RMAP_CRC_MAGIC,
+ XFS_BTREE_SBLOCK_CRC_LEN,
+ sizeof(struct xfs_rmap_key),
+ sizeof(struct xfs_rmap_rec),
+ sizeof(__be32),
+ },
{ 0,
},
};
@@ -607,3 +613,47 @@ const field_t cntbt_rec_flds[] = {
{ NULL }
};
#undef ROFF
+
+/* RMAP btree blocks */
+const field_t rmapbt_crc_hfld[] = {
+ { "", FLDT_RMAPBT_CRC, OI(0), C1, 0, TYP_NONE },
+ { NULL }
+};
+
+#define OFF(f) bitize(offsetof(struct xfs_btree_block, bb_ ## f))
+const field_t rmapbt_crc_flds[] = {
+ { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+ { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+ { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+ { "leftsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_leftsib)), C1, 0, TYP_RMAPBT },
+ { "rightsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_rightsib)), C1, 0, TYP_RMAPBT },
+ { "bno", FLDT_DFSBNO, OI(OFF(u.s.bb_blkno)), C1, 0, TYP_CNTBT },
+ { "lsn", FLDT_UINT64X, OI(OFF(u.s.bb_lsn)), C1, 0, TYP_NONE },
+ { "uuid", FLDT_UUID, OI(OFF(u.s.bb_uuid)), C1, 0, TYP_NONE },
+ { "owner", FLDT_AGNUMBER, OI(OFF(u.s.bb_owner)), C1, 0, TYP_NONE },
+ { "crc", FLDT_CRC, OI(OFF(u.s.bb_crc)), C1, 0, TYP_NONE },
+ { "recs", FLDT_RMAPBTREC, btblock_rec_offset, btblock_rec_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+ { "keys", FLDT_RMAPBTKEY, btblock_key_offset, btblock_key_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+ { "ptrs", FLDT_RMAPBTPTR, btblock_ptr_offset, btblock_key_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_RMAPBT },
+ { NULL }
+};
+#undef OFF
+
+#define KOFF(f) bitize(offsetof(struct xfs_rmap_key, rm_ ## f))
+const field_t rmapbt_key_flds[] = {
+ { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+ { NULL }
+};
+#undef KOFF
+
+#define ROFF(f) bitize(offsetof(struct xfs_rmap_rec, rm_ ## f))
+const field_t rmapbt_rec_flds[] = {
+ { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+ { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+ { "owner", FLDT_UINT64X, OI(ROFF(owner)), C1, 0, TYP_NONE },
+ { NULL }
+};
+#undef ROFF
diff --git a/db/btblock.h b/db/btblock.h
index 228eb36..35299b4 100644
--- a/db/btblock.h
+++ b/db/btblock.h
@@ -54,4 +54,9 @@ extern const struct field cntbt_crc_hfld[];
extern const struct field cntbt_key_flds[];
extern const struct field cntbt_rec_flds[];
+extern const struct field rmapbt_crc_flds[];
+extern const struct field rmapbt_crc_hfld[];
+extern const struct field rmapbt_key_flds[];
+extern const struct field rmapbt_rec_flds[];
+
extern int btblock_size(void *obj, int startoff, int idx);
diff --git a/db/field.c b/db/field.c
index 843c385..8298f29 100644
--- a/db/field.c
+++ b/db/field.c
@@ -164,6 +164,15 @@ const ftattr_t ftattrtab[] = {
{ FLDT_CNTBTREC, "cntbtrec", fp_sarray, (char *)cntbt_rec_flds,
SI(bitsz(xfs_alloc_rec_t)), 0, NULL, cntbt_rec_flds },
+ { FLDT_RMAPBT_CRC, "rmapbt", NULL, (char *)rmapbt_crc_flds, btblock_size,
+ FTARG_SIZE, NULL, rmapbt_crc_flds },
+ { FLDT_RMAPBTKEY, "rmapbtkey", fp_sarray, (char *)rmapbt_key_flds,
+ SI(bitsz(struct xfs_rmap_key)), 0, NULL, rmapbt_key_flds },
+ { FLDT_RMAPBTPTR, "rmapbtptr", fp_num, "%u",
+ SI(bitsz(xfs_rmap_ptr_t)), 0, fa_agblock, NULL },
+ { FLDT_RMAPBTREC, "rmapbtrec", fp_sarray, (char *)rmapbt_rec_flds,
+ SI(bitsz(struct xfs_rmap_rec)), 0, NULL, rmapbt_rec_flds },
+
/* CRC field */
{ FLDT_CRC, "crc", fp_crc, "%#x (%s)", SI(bitsz(__uint32_t)),
0, NULL, NULL },
diff --git a/db/field.h b/db/field.h
index 11aebc3..82701bb 100644
--- a/db/field.h
+++ b/db/field.h
@@ -80,6 +80,10 @@ typedef enum fldt {
FLDT_CNTBTKEY,
FLDT_CNTBTPTR,
FLDT_CNTBTREC,
+ FLDT_RMAPBT_CRC,
+ FLDT_RMAPBTKEY,
+ FLDT_RMAPBTPTR,
+ FLDT_RMAPBTREC,
/* CRC field type */
FLDT_CRC,
diff --git a/db/type.c b/db/type.c
index 1da7ee1..38372b8 100644
--- a/db/type.c
+++ b/db/type.c
@@ -58,6 +58,7 @@ static const typ_t __typtab[] = {
{ TYP_BMAPBTD, "bmapbtd", handle_struct, bmapbtd_hfld, NULL },
{ TYP_BNOBT, "bnobt", handle_struct, bnobt_hfld, NULL },
{ TYP_CNTBT, "cntbt", handle_struct, cntbt_hfld, NULL },
+ { TYP_RMAPBT, NULL },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir2", handle_struct, dir2_hfld, NULL },
{ TYP_DQBLK, "dqblk", handle_struct, dqblk_hfld, NULL },
@@ -88,6 +89,8 @@ static const typ_t __typtab_crc[] = {
&xfs_allocbt_buf_ops },
{ TYP_CNTBT, "cntbt", handle_struct, cntbt_crc_hfld,
&xfs_allocbt_buf_ops },
+ { TYP_RMAPBT, "rmapbt", handle_struct, rmapbt_crc_hfld,
+ &xfs_rmapbt_buf_ops },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir3", handle_struct, dir3_hfld,
&xfs_dir3_db_buf_ops },
diff --git a/db/type.h b/db/type.h
index d9583e5..1bef8e6 100644
--- a/db/type.h
+++ b/db/type.h
@@ -24,7 +24,7 @@ struct field;
typedef enum typnm
{
TYP_AGF, TYP_AGFL, TYP_AGI, TYP_ATTR, TYP_BMAPBTA,
- TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_DATA,
+ TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_RMAPBT, TYP_DATA,
TYP_DIR2, TYP_DQBLK, TYP_INOBT, TYP_INODATA, TYP_INODE,
TYP_LOG, TYP_RTBITMAP, TYP_RTSUMMARY, TYP_SB, TYP_SYMLINK,
TYP_TEXT, TYP_FINOBT, TYP_NONE
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 21/53] xfs_dump: display enhanced rmapbt fields
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (19 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 20/53] xfs_db: display rmap btree contents Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 22/53] xfs_db: check rmapbt Darrick J. Wong
` (31 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Decode the extra fields in the rmapbt records.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/agf.c | 2 +-
db/btblock.c | 24 +++++++++++++++++++++---
db/field.c | 10 ++++++++++
db/field.h | 5 +++++
db/type.c | 2 ++
5 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/db/agf.c b/db/agf.c
index ffdd550..f4c4269 100644
--- a/db/agf.c
+++ b/db/agf.c
@@ -55,7 +55,7 @@ const field_t agf_flds[] = {
{ "cntroot", FLDT_AGBLOCK,
OI(OFF(roots) + XFS_BTNUM_CNT * SZ(roots[XFS_BTNUM_CNT])), C1, 0,
TYP_CNTBT },
- { "rmaproot", FLDT_AGBLOCK,
+ { "rmaproot", FLDT_AGBLOCKNZ,
OI(OFF(roots) + XFS_BTNUM_RMAP * SZ(roots[XFS_BTNUM_RMAP])), C1, 0,
TYP_RMAPBT },
{ "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF),
diff --git a/db/btblock.c b/db/btblock.c
index e45ee03..430d84f 100644
--- a/db/btblock.c
+++ b/db/btblock.c
@@ -645,15 +645,33 @@ const field_t rmapbt_crc_flds[] = {
#define KOFF(f) bitize(offsetof(struct xfs_rmap_key, rm_ ## f))
const field_t rmapbt_key_flds[] = {
{ "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+ { "owner", FLDT_INT64D, OI(KOFF(owner)), C1, 0, TYP_NONE },
+ { "offset", FLDT_CFILEOFFD, OI(KOFF(offset)), C1, 0, TYP_NONE },
{ NULL }
};
#undef KOFF
#define ROFF(f) bitize(offsetof(struct xfs_rmap_rec, rm_ ## f))
+
+#define RMAPBT_STARTBLOCK_BITOFF 0
+#define RMAPBT_EXNTFLAG_BITOFF (RMAPBT_STARTBLOCK_BITOFF + RMAPBT_STARTBLOCK_BITLEN)
+#define RMAPBT_BLOCKCOUNT_BITOFF (RMAPBT_EXNTFLAG_BITOFF + RMAPBT_EXNTFLAG_BITLEN)
+#define RMAPBT_OWNER_BITOFF (RMAPBT_BLOCKCOUNT_BITOFF + RMAPBT_BLOCKCOUNT_BITLEN)
+#define RMAPBT_ATTRFLAG_BITOFF (RMAPBT_OWNER_BITOFF + RMAPBT_OWNER_BITLEN)
+#define RMAPBT_BMBTFLAG_BITOFF (RMAPBT_ATTRFLAG_BITOFF + RMAPBT_ATTRFLAG_BITLEN)
+#define RMAPBT_OFFSET_BITOFF (RMAPBT_BMBTFLAG_BITOFF + RMAPBT_BMBTFLAG_BITLEN)
+
const field_t rmapbt_rec_flds[] = {
- { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
- { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
- { "owner", FLDT_UINT64X, OI(ROFF(owner)), C1, 0, TYP_NONE },
+ { "startblock", FLDT_AGBLOCK, OI(RMAPBT_STARTBLOCK_BITOFF), C1, 0, TYP_DATA },
+ { "blockcount", FLDT_REXTLEN, OI(RMAPBT_BLOCKCOUNT_BITOFF), C1, 0, TYP_NONE },
+ { "owner", FLDT_INT64D, OI(RMAPBT_OWNER_BITOFF), C1, 0, TYP_NONE },
+ { "offset", FLDT_RFILEOFFD, OI(RMAPBT_OFFSET_BITOFF), C1, 0, TYP_NONE },
+ { "extentflag", FLDT_REXTFLG, OI(RMAPBT_EXNTFLAG_BITOFF), C1, 0,
+ TYP_NONE },
+ { "attrfork", FLDT_RATTRFORKFLG, OI(RMAPBT_ATTRFLAG_BITOFF), C1, 0,
+ TYP_NONE },
+ { "bmbtblock", FLDT_RBMBTFLG, OI(RMAPBT_BMBTFLAG_BITOFF), C1, 0,
+ TYP_NONE },
{ NULL }
};
#undef ROFF
diff --git a/db/field.c b/db/field.c
index 8298f29..5664b95 100644
--- a/db/field.c
+++ b/db/field.c
@@ -153,6 +153,16 @@ const ftattr_t ftattrtab[] = {
{ FLDT_CHARNS, "charns", fp_charns, NULL, SI(bitsz(char)), 0, NULL,
NULL },
{ FLDT_CHARS, "chars", fp_num, "%c", SI(bitsz(char)), 0, NULL, NULL },
+ { FLDT_REXTLEN, "rextlen", fp_num, "%u", SI(RMAPBT_BLOCKCOUNT_BITLEN),
+ 0, NULL, NULL },
+ { FLDT_RFILEOFFD, "rfileoffd", fp_num, "%llu", SI(RMAPBT_OFFSET_BITLEN),
+ 0, NULL, NULL },
+ { FLDT_REXTFLG, "rextflag", fp_num, "%u", SI(RMAPBT_EXNTFLAG_BITLEN), 0,
+ NULL, NULL },
+ { FLDT_RATTRFORKFLG, "rattrforkflag", fp_num, "%u", SI(RMAPBT_ATTRFLAG_BITLEN), 0,
+ NULL, NULL },
+ { FLDT_RBMBTFLG, "rbmbtflag", fp_num, "%u", SI(RMAPBT_BMBTFLAG_BITLEN), 0,
+ NULL, NULL },
{ FLDT_CNTBT, "cntbt", NULL, (char *)cntbt_flds, btblock_size, FTARG_SIZE,
NULL, cntbt_flds },
{ FLDT_CNTBT_CRC, "cntbt", NULL, (char *)cntbt_crc_flds, btblock_size,
diff --git a/db/field.h b/db/field.h
index 82701bb..47f562a 100644
--- a/db/field.h
+++ b/db/field.h
@@ -75,6 +75,11 @@ typedef enum fldt {
FLDT_CFSBLOCK,
FLDT_CHARNS,
FLDT_CHARS,
+ FLDT_REXTLEN,
+ FLDT_RFILEOFFD,
+ FLDT_REXTFLG,
+ FLDT_RATTRFORKFLG,
+ FLDT_RBMBTFLG,
FLDT_CNTBT,
FLDT_CNTBT_CRC,
FLDT_CNTBTKEY,
diff --git a/db/type.c b/db/type.c
index 38372b8..f78290e 100644
--- a/db/type.c
+++ b/db/type.c
@@ -127,6 +127,8 @@ static const typ_t __typtab_spcrc[] = {
&xfs_allocbt_buf_ops },
{ TYP_CNTBT, "cntbt", handle_struct, cntbt_crc_hfld,
&xfs_allocbt_buf_ops },
+ { TYP_RMAPBT, "rmapbt", handle_struct, rmapbt_crc_hfld,
+ &xfs_rmapbt_buf_ops },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir3", handle_struct, dir3_hfld,
&xfs_dir3_db_buf_ops },
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 22/53] xfs_db: check rmapbt
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (20 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 21/53] xfs_dump: display enhanced rmapbt fields Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 23/53] xfs_db: copy the rmap btree Darrick J. Wong
` (30 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/check.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 84 insertions(+), 1 deletion(-)
diff --git a/db/check.c b/db/check.c
index f1620f8..4721834 100644
--- a/db/check.c
+++ b/db/check.c
@@ -44,7 +44,7 @@ typedef enum {
DBM_FREE1, DBM_FREE2, DBM_FREELIST, DBM_INODE,
DBM_LOG, DBM_MISSING, DBM_QUOTA, DBM_RTBITMAP,
DBM_RTDATA, DBM_RTFREE, DBM_RTSUM, DBM_SB,
- DBM_SYMLINK, DBM_BTFINO,
+ DBM_SYMLINK, DBM_BTFINO, DBM_BTRMAP,
DBM_NDBM
} dbm_t;
@@ -171,6 +171,7 @@ static const char *typename[] = {
"sb",
"symlink",
"btfino",
+ "btrmap",
NULL
};
static int verbose;
@@ -349,6 +350,9 @@ static void scanfunc_ino(struct xfs_btree_block *block, int level,
static void scanfunc_fino(struct xfs_btree_block *block, int level,
struct xfs_agf *agf, xfs_agblock_t bno,
int isroot);
+static void scanfunc_rmap(struct xfs_btree_block *block, int level,
+ struct xfs_agf *agf, xfs_agblock_t bno,
+ int isroot);
static void set_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_extlen_t len, dbm_t type,
xfs_agnumber_t c_agno, xfs_agblock_t c_agbno);
@@ -1050,6 +1054,7 @@ blocktrash_f(
(1 << DBM_RTSUM) |
(1 << DBM_SYMLINK) |
(1 << DBM_BTFINO) |
+ (1 << DBM_BTRMAP) |
(1 << DBM_SB);
while ((c = getopt(argc, argv, "0123n:o:s:t:x:y:z")) != EOF) {
switch (c) {
@@ -3907,6 +3912,12 @@ scan_ag(
be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1, scanfunc_cnt, TYP_CNTBT);
+ if (agf->agf_roots[XFS_BTNUM_RMAP]) {
+ scan_sbtree(agf,
+ be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]),
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
+ 1, scanfunc_rmap, TYP_RMAPBT);
+ }
scan_sbtree(agf,
be32_to_cpu(agi->agi_root),
be32_to_cpu(agi->agi_level),
@@ -4658,6 +4669,78 @@ scanfunc_fino(
}
static void
+scanfunc_rmap(
+ struct xfs_btree_block *block,
+ int level,
+ struct xfs_agf *agf,
+ xfs_agblock_t bno,
+ int isroot)
+{
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ int i;
+ xfs_rmap_ptr_t *pp;
+ struct xfs_rmap_rec *rp;
+ xfs_agblock_t lastblock;
+
+ if (be32_to_cpu(block->bb_magic) != XFS_RMAP_CRC_MAGIC) {
+ dbprintf(_("bad magic # %#x in rmapbt block %u/%u\n"),
+ be32_to_cpu(block->bb_magic), seqno, bno);
+ serious_error++;
+ return;
+ }
+ if (be16_to_cpu(block->bb_level) != level) {
+ if (!sflag)
+ dbprintf(_("expected level %d got %d in rmapbt block "
+ "%u/%u\n"),
+ level, be16_to_cpu(block->bb_level), seqno, bno);
+ error++;
+ }
+ if (!isroot) {
+ fdblocks++;
+ agfbtreeblks++;
+ }
+ set_dbmap(seqno, bno, 1, DBM_BTRMAP, seqno, bno);
+ if (level == 0) {
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_rmap_mxr[0] ||
+ (isroot == 0 && be16_to_cpu(block->bb_numrecs) < mp->m_rmap_mnr[0])) {
+ dbprintf(_("bad btree nrecs (%u, min=%u, max=%u) in "
+ "rmapbt block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs), mp->m_rmap_mnr[0],
+ mp->m_rmap_mxr[0], seqno, bno);
+ serious_error++;
+ return;
+ }
+ rp = XFS_RMAP_REC_ADDR(block, 1);
+ lastblock = 0;
+ for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) {
+ if (be32_to_cpu(rp[i].rm_startblock) < lastblock) {
+ dbprintf(_(
+ "out-of-order rmap btree record %d (%u %u) block %u/%u\n"),
+ i, be32_to_cpu(rp[i].rm_startblock),
+ be32_to_cpu(rp[i].rm_startblock),
+ be32_to_cpu(agf->agf_seqno), bno);
+ } else {
+ lastblock = be32_to_cpu(rp[i].rm_startblock);
+ }
+ }
+ return;
+ }
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_rmap_mxr[1] ||
+ (isroot == 0 && be16_to_cpu(block->bb_numrecs) < mp->m_rmap_mnr[1])) {
+ dbprintf(_("bad btree nrecs (%u, min=%u, max=%u) in rmapbt "
+ "block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs), mp->m_rmap_mnr[1],
+ mp->m_rmap_mxr[1], seqno, bno);
+ serious_error++;
+ return;
+ }
+ pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
+ for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++)
+ scan_sbtree(agf, be32_to_cpu(pp[i]), level, 0, scanfunc_rmap,
+ TYP_RMAPBT);
+}
+
+static void
set_dbmap(
xfs_agnumber_t agno,
xfs_agblock_t agbno,
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 23/53] xfs_db: copy the rmap btree
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (21 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 22/53] xfs_db: check rmapbt Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 24/53] xfs_growfs: report rmapbt presence Darrick J. Wong
` (29 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/metadump.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/db/metadump.c b/db/metadump.c
index 8cdcb92..94c22a9 100644
--- a/db/metadump.c
+++ b/db/metadump.c
@@ -524,6 +524,78 @@ copy_free_cnt_btree(
return scan_btree(agno, root, levels, TYP_CNTBT, agf, scanfunc_freesp);
}
+static int
+scanfunc_rmapbt(
+ struct xfs_btree_block *block,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ int level,
+ typnm_t btype,
+ void *arg)
+{
+ xfs_rmap_ptr_t *pp;
+ int i;
+ int numrecs;
+
+ if (level == 0)
+ return 1;
+
+ numrecs = be16_to_cpu(block->bb_numrecs);
+ if (numrecs > mp->m_rmap_mxr[1]) {
+ if (show_warnings)
+ print_warning("invalid numrecs (%u) in %s block %u/%u",
+ numrecs, typtab[btype].name, agno, agbno);
+ return 1;
+ }
+
+ pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
+ for (i = 0; i < numrecs; i++) {
+ if (!valid_bno(agno, be32_to_cpu(pp[i]))) {
+ if (show_warnings)
+ print_warning("invalid block number (%u/%u) "
+ "in %s block %u/%u",
+ agno, be32_to_cpu(pp[i]),
+ typtab[btype].name, agno, agbno);
+ continue;
+ }
+ if (!scan_btree(agno, be32_to_cpu(pp[i]), level, btype, arg,
+ scanfunc_rmapbt))
+ return 0;
+ }
+ return 1;
+}
+
+static int
+copy_rmap_btree(
+ xfs_agnumber_t agno,
+ struct xfs_agf *agf)
+{
+ xfs_agblock_t root;
+ int levels;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return 1;
+
+ root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
+ levels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+
+ /* validate root and levels before processing the tree */
+ if (root == 0 || root > mp->m_sb.sb_agblocks) {
+ if (show_warnings)
+ print_warning("invalid block number (%u) in rmapbt "
+ "root in agf %u", root, agno);
+ return 1;
+ }
+ if (levels >= XFS_BTREE_MAXLEVELS) {
+ if (show_warnings)
+ print_warning("invalid level (%u) in rmapbt root "
+ "in agf %u", levels, agno);
+ return 1;
+ }
+
+ return scan_btree(agno, root, levels, TYP_RMAPBT, agf, scanfunc_rmapbt);
+}
+
/* filename and extended attribute obfuscation routines */
struct name_ent {
@@ -2432,6 +2504,8 @@ scan_ag(
goto pop_out;
if (!copy_free_cnt_btree(agno, agf))
goto pop_out;
+ if (!copy_rmap_btree(agno, agf))
+ goto pop_out;
}
/* copy inode btrees and the inodes and their associated metadata */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 24/53] xfs_growfs: report rmapbt presence
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (22 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 23/53] xfs_db: copy the rmap btree Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 25/53] xfs_repair: use rmap btree data to check block types Darrick J. Wong
` (28 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
growfs/xfs_growfs.c | 14 +++++++++-----
libxfs/xfs_fs.h | 1 +
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c
index 56315f9..2b46480 100644
--- a/growfs/xfs_growfs.c
+++ b/growfs/xfs_growfs.c
@@ -58,12 +58,13 @@ report_info(
int cimode,
int ftype_enabled,
int finobt_enabled,
- int spinodes)
+ int spinodes,
+ int rmapbt_enabled)
{
printf(_(
"meta-data=%-22s isize=%-6u agcount=%u, agsize=%u blks\n"
" =%-22s sectsz=%-5u attr=%u, projid32bit=%u\n"
- " =%-22s crc=%-8u finobt=%u spinodes=%u\n"
+ " =%-22s crc=%-8u finobt=%u spinodes=%u rmapbt=%u\n"
"data =%-22s bsize=%-6u blocks=%llu, imaxpct=%u\n"
" =%-22s sunit=%-6u swidth=%u blks\n"
"naming =version %-14u bsize=%-6u ascii-ci=%d ftype=%d\n"
@@ -73,7 +74,7 @@ report_info(
mntpoint, geo.inodesize, geo.agcount, geo.agblocks,
"", geo.sectsize, attrversion, projid32bit,
- "", crcs_enabled, finobt_enabled, spinodes,
+ "", crcs_enabled, finobt_enabled, spinodes, rmapbt_enabled,
"", geo.blocksize, (unsigned long long)geo.datablocks,
geo.imaxpct,
"", geo.sunit, geo.swidth,
@@ -127,6 +128,7 @@ main(int argc, char **argv)
int ftype_enabled = 0;
int finobt_enabled; /* free inode btree */
int spinodes;
+ int rmapbt_enabled;
progname = basename(argv[0]);
setlocale(LC_ALL, "");
@@ -250,11 +252,13 @@ main(int argc, char **argv)
ftype_enabled = geo.flags & XFS_FSOP_GEOM_FLAGS_FTYPE ? 1 : 0;
finobt_enabled = geo.flags & XFS_FSOP_GEOM_FLAGS_FINOBT ? 1 : 0;
spinodes = geo.flags & XFS_FSOP_GEOM_FLAGS_SPINODES ? 1 : 0;
+ rmapbt_enabled = geo.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT ? 1 : 0;
if (nflag) {
report_info(geo, datadev, isint, logdev, rtdev,
lazycount, dirversion, logversion,
attrversion, projid32bit, crcs_enabled, ci,
- ftype_enabled, finobt_enabled, spinodes);
+ ftype_enabled, finobt_enabled, spinodes,
+ rmapbt_enabled);
exit(0);
}
@@ -292,7 +296,7 @@ main(int argc, char **argv)
report_info(geo, datadev, isint, logdev, rtdev,
lazycount, dirversion, logversion,
attrversion, projid32bit, crcs_enabled, ci, ftype_enabled,
- finobt_enabled, spinodes);
+ finobt_enabled, spinodes, rmapbt_enabled);
ddsize = xi.dsize;
dlsize = ( xi.logBBsize? xi.logBBsize :
diff --git a/libxfs/xfs_fs.h b/libxfs/xfs_fs.h
index d8b733a..56990eb 100644
--- a/libxfs/xfs_fs.h
+++ b/libxfs/xfs_fs.h
@@ -240,6 +240,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
+#define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* reverse-mapping btree */
/*
* Minimum and maximum sizes need for growth checks.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 25/53] xfs_repair: use rmap btree data to check block types
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (23 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 24/53] xfs_growfs: report rmapbt presence Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 26/53] xfs_repair: mask off length appropriately Darrick J. Wong
` (27 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: Dave Chinner, xfs
>From : Dave Chinner <david@fromorbit.com>
Use the rmap btree to pre-populate the block type information so that
when repair iterates the primary metadata, we can confirm the block
type.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
[split patch, add commit message]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/dinode.c | 6 +
repair/incore.h | 16 +-
repair/scan.c | 336 ++++++++++++++++++++++++++++++++++++++++++++++++---
repair/xfs_repair.c | 2
4 files changed, 331 insertions(+), 29 deletions(-)
diff --git a/repair/dinode.c b/repair/dinode.c
index 269f9d8..caa4c1b 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -744,6 +744,7 @@ _("%s fork in ino %" PRIu64 " claims dup extent, "
_("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
forkname, ino, (__uint64_t) b);
/* fall through ... */
+ case XR_E_INUSE1: /* seen by rmap */
case XR_E_UNKNOWN:
set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
break;
@@ -751,6 +752,11 @@ _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
case XR_E_BAD_STATE:
do_error(_("bad state in block map %" PRIu64 "\n"), b);
+ case XR_E_FS_MAP1:
+ case XR_E_INO1:
+ case XR_E_INUSE_FS1:
+ do_warn(_("rmap claims metadata use!\n"));
+ /* fall through */
case XR_E_FS_MAP:
case XR_E_INO:
case XR_E_INUSE_FS:
diff --git a/repair/incore.h b/repair/incore.h
index c92475e..bc0810b 100644
--- a/repair/incore.h
+++ b/repair/incore.h
@@ -102,17 +102,11 @@ typedef struct rt_extent_tree_node {
#define XR_E_MULT 5 /* extent is multiply referenced */
#define XR_E_INO 6 /* extent used by inodes (inode blocks) */
#define XR_E_FS_MAP 7 /* extent used by fs space/inode maps */
-#define XR_E_BAD_STATE 8
-
-/* extent states, in 64 bit word chunks */
-#define XR_E_UNKNOWN_LL 0x0000000000000000LL
-#define XR_E_FREE1_LL 0x1111111111111111LL
-#define XR_E_FREE_LL 0x2222222222222222LL
-#define XR_E_INUSE_LL 0x3333333333333333LL
-#define XR_E_INUSE_FS_LL 0x4444444444444444LL
-#define XR_E_MULT_LL 0x5555555555555555LL
-#define XR_E_INO_LL 0x6666666666666666LL
-#define XR_E_FS_MAP_LL 0x7777777777777777LL
+#define XR_E_INUSE1 8 /* used block (marked by rmap btree) */
+#define XR_E_INUSE_FS1 9 /* used by fs ag header or log (rmap btree) */
+#define XR_E_INO1 10 /* used by inodes (marked by rmap btree) */
+#define XR_E_FS_MAP1 11 /* used by fs space/inode maps (rmap btree) */
+#define XR_E_BAD_STATE 12
/* separate state bit, OR'ed into high (4th) bit of ex_state field */
diff --git a/repair/scan.c b/repair/scan.c
index 1e7a4da..c1ab6df 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -44,6 +44,7 @@ struct aghdr_cnts {
__uint32_t agicount;
__uint32_t agifreecount;
__uint64_t fdblocks;
+ __uint64_t usedblocks;
__uint64_t ifreecount;
__uint32_t fibtfreecount;
};
@@ -308,6 +309,13 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
pthread_mutex_lock(&ag_locks[agno].lock);
state = get_bmap(agno, agbno);
switch (state) {
+ case XR_E_INUSE1:
+ /*
+ * block was claimed as in use data by the rmap
+ * btree, but has not been found in the data extent
+ * map for the inode. That means this bmbt block hasn't
+ * yet been claimed as in use, which means -it's ours-
+ */
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
@@ -763,6 +771,252 @@ ino_issparse(
return xfs_inobt_is_sparse_disk(rp, offset);
}
+
+static void
+scan_rmapbt(
+ struct xfs_btree_block *block,
+ int level,
+ xfs_agblock_t bno,
+ xfs_agnumber_t agno,
+ int suspect,
+ int isroot,
+ __uint32_t magic,
+ void *priv)
+{
+ struct aghdr_cnts *agcnts = priv;
+ const char *name = "rmap";
+ int i;
+ xfs_rmap_ptr_t *pp;
+ struct xfs_rmap_rec *rp;
+ int hdr_errors = 0;
+ int numrecs;
+ int state;
+ xfs_agblock_t lastblock = 0;
+
+ if (magic != XFS_RMAP_CRC_MAGIC) {
+ name = "(unknown)";
+ assert(0);
+ }
+
+ if (be32_to_cpu(block->bb_magic) != magic) {
+ do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
+ be32_to_cpu(block->bb_magic), name, agno, bno);
+ hdr_errors++;
+ if (suspect)
+ return;
+ }
+
+ /*
+ * All RMAP btree blocks except the roots are freed for a
+ * fully empty filesystem, thus they are counted towards the
+ * free data block counter.
+ */
+ if (!isroot) {
+ agcnts->agfbtreeblks++;
+ agcnts->fdblocks++;
+ }
+
+ if (be16_to_cpu(block->bb_level) != level) {
+ do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
+ level, be16_to_cpu(block->bb_level), name, agno, bno);
+ hdr_errors++;
+ if (suspect)
+ return;
+ }
+
+ /* check for btree blocks multiply claimed */
+ state = get_bmap(agno, bno);
+ if (!(state == XR_E_UNKNOWN || state == XR_E_FS_MAP1)) {
+ set_bmap(agno, bno, XR_E_MULT);
+ do_warn(
+_("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
+ name, state, agno, bno, suspect);
+ return;
+ }
+ set_bmap(agno, bno, XR_E_FS_MAP);
+
+ numrecs = be16_to_cpu(block->bb_numrecs);
+ if (level == 0) {
+ if (numrecs > mp->m_rmap_mxr[0]) {
+ numrecs = mp->m_rmap_mxr[0];
+ hdr_errors++;
+ }
+ if (isroot == 0 && numrecs < mp->m_rmap_mnr[0]) {
+ numrecs = mp->m_rmap_mnr[0];
+ hdr_errors++;
+ }
+
+ if (hdr_errors) {
+ do_warn(
+ _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs),
+ mp->m_rmap_mnr[0], mp->m_rmap_mxr[0],
+ name, agno, bno);
+ suspect++;
+ }
+
+ rp = XFS_RMAP_REC_ADDR(block, 1);
+ for (i = 0; i < numrecs; i++) {
+ xfs_agblock_t b, end;
+ xfs_extlen_t len, blen;
+ int64_t owner;
+
+ b = be32_to_cpu(rp[i].rm_startblock);
+ len = be32_to_cpu(rp[i].rm_blockcount);
+ owner = be64_to_cpu(rp[i].rm_owner);
+ end = b + len;
+
+ if (!verify_agbno(mp, agno, b)) {
+ do_warn(
+ _("invalid start block %u in record %u of %s btree block %u/%u\n"),
+ b, i, name, agno, bno);
+ continue;
+ }
+ if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
+ do_warn(
+ _("invalid length %u in record %u of %s btree block %u/%u\n"),
+ len, i, name, agno, bno);
+ continue;
+ }
+
+ /* XXX: range check owner */
+
+ if (b && b <= lastblock) {
+ do_warn(_(
+ "out-of-order rmap btree record %d (%u %u) block %u/%u\n"),
+ i, b, len, agno, bno);
+ } else {
+ lastblock = b;
+ }
+
+ for ( ; b < end; b += blen) {
+ state = get_bmap_ext(agno, b, end, &blen);
+ switch (state) {
+ case XR_E_UNKNOWN:
+ switch (owner) {
+ case XFS_RMAP_OWN_FS:
+ case XFS_RMAP_OWN_LOG:
+ set_bmap(agno, b, XR_E_INUSE_FS1);
+ break;
+ case XFS_RMAP_OWN_AG:
+ case XFS_RMAP_OWN_INOBT:
+ set_bmap(agno, b, XR_E_FS_MAP1);
+ break;
+ case XFS_RMAP_OWN_INODES:
+ set_bmap(agno, b, XR_E_INO1);
+ break;
+ case XFS_RMAP_OWN_NULL:
+ /* still unknown */
+ break;
+ default:
+ /* file data */
+ set_bmap(agno, b, XR_E_INUSE1);
+ break;
+ }
+ break;
+ case XR_E_INUSE_FS:
+ if (owner == XFS_RMAP_OWN_FS ||
+ owner == XFS_RMAP_OWN_LOG)
+ break;
+ do_warn(
+_("Static meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_FS_MAP:
+ if (owner == XFS_RMAP_OWN_AG ||
+ owner == XFS_RMAP_OWN_INOBT)
+ break;
+ do_warn(
+_("AG meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_INO:
+ if (owner == XFS_RMAP_OWN_INODES)
+ break;
+ do_warn(
+_("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_INUSE:
+ if (owner >= 0 &&
+ owner < mp->m_sb.sb_dblocks)
+ break;
+ do_warn(
+_("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ case XR_E_FREE1:
+ case XR_E_FREE:
+ /*
+ * May be on the AGFL. If not, they'll
+ * be caught later.
+ */
+ break;
+ default:
+ do_warn(
+_("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
+ }
+ }
+ }
+ return;
+ }
+
+ /*
+ * interior record
+ */
+ pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
+
+ if (numrecs > mp->m_rmap_mxr[1]) {
+ numrecs = mp->m_rmap_mxr[1];
+ hdr_errors++;
+ }
+ if (isroot == 0 && numrecs < mp->m_rmap_mnr[1]) {
+ numrecs = mp->m_rmap_mnr[1];
+ hdr_errors++;
+ }
+
+ /*
+ * don't pass bogus tree flag down further if this block
+ * looked ok. bail out if two levels in a row look bad.
+ */
+ if (hdr_errors) {
+ do_warn(
+ _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs),
+ mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
+ name, agno, bno);
+ if (suspect)
+ return;
+ suspect++;
+ } else if (suspect) {
+ suspect = 0;
+ }
+
+ for (i = 0; i < numrecs; i++) {
+ xfs_agblock_t bno = be32_to_cpu(pp[i]);
+
+ /*
+ * XXX - put sibling detection right here.
+ * we know our sibling chain is good. So as we go,
+ * we check the entry before and after each entry.
+ * If either of the entries references a different block,
+ * check the sibling pointer. If there's a sibling
+ * pointer mismatch, try and extract as much data
+ * as possible.
+ */
+ if (bno != 0 && verify_agbno(mp, agno, bno)) {
+ scan_sbtree(bno, level, agno, suspect, scan_rmapbt, 0,
+ magic, priv, &xfs_rmapbt_buf_ops);
+ }
+ }
+}
/*
* The following helpers are to help process and validate individual on-disk
@@ -976,20 +1230,27 @@ scan_single_ino_chunk(
agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
state = get_bmap(agno, agbno);
- if (state == XR_E_UNKNOWN) {
- set_bmap(agno, agbno, XR_E_INO);
- } else if (state == XR_E_INUSE_FS && agno == 0 &&
- ino + j >= first_prealloc_ino &&
- ino + j < last_prealloc_ino) {
+ switch (state) {
+ case XR_E_INO:
+ break;
+ case XR_E_UNKNOWN:
+ case XR_E_INO1: /* seen by rmap */
set_bmap(agno, agbno, XR_E_INO);
- } else {
+ break;
+ case XR_E_INUSE_FS:
+ case XR_E_INUSE_FS1:
+ if (agno == 0 &&
+ ino + j >= first_prealloc_ino &&
+ ino + j < last_prealloc_ino) {
+ set_bmap(agno, agbno, XR_E_INO);
+ break;
+ }
+ /* fall through */
+ default:
+ /* XXX - maybe should mark block a duplicate */
do_warn(
_("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
agno, agbno, mp->m_sb.sb_inopblock);
- /*
- * XXX - maybe should mark
- * block a duplicate
- */
return ++suspect;
}
}
@@ -1099,19 +1360,35 @@ _("sparse inode chunk claims inode block, finobt block - agno %d, bno %d, inopb
continue;
}
- if (state == XR_E_INO) {
- continue;
- } else if ((state == XR_E_UNKNOWN) ||
- (state == XR_E_INUSE_FS && agno == 0 &&
- ino + j >= first_prealloc_ino &&
- ino + j < last_prealloc_ino)) {
+ switch (state) {
+ case XR_E_INO:
+ break;
+ case XR_E_INO1: /* seen by rmap */
+ set_bmap(agno, agbno, XR_E_INO);
+ break;
+ case XR_E_UNKNOWN:
do_warn(
_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
agno, agbno, mp->m_sb.sb_inopblock);
set_bmap(agno, agbno, XR_E_INO);
suspect++;
- } else {
+ break;
+ case XR_E_INUSE_FS:
+ case XR_E_INUSE_FS1:
+ if (agno == 0 &&
+ ino + j >= first_prealloc_ino &&
+ ino + j < last_prealloc_ino) {
+ do_warn(
+_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
+ agno, agbno, mp->m_sb.sb_inopblock);
+
+ set_bmap(agno, agbno, XR_E_INO);
+ suspect++;
+ break;
+ }
+ /* fall through */
+ default:
do_warn(
_("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
agno, agbno, mp->m_sb.sb_inopblock);
@@ -1280,6 +1557,7 @@ scan_inobt(
*/
state = get_bmap(agno, bno);
switch (state) {
+ case XR_E_FS_MAP1: /* already been seen by an rmap scan */
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
@@ -1420,7 +1698,7 @@ scan_freelist(
if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
- set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+ set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_INUSE_FS);
if (be32_to_cpu(agf->agf_flcount) == 0)
return;
@@ -1505,6 +1783,19 @@ validate_agf(
bno, agno);
}
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
+ if (bno != 0 && verify_agbno(mp, agno, bno)) {
+ scan_sbtree(bno,
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
+ agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC,
+ agcnts, &xfs_rmapbt_buf_ops);
+ } else {
+ do_warn(_("bad agbno %u for rmapbt root, agno %d\n"),
+ bno, agno);
+ }
+ }
+
if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
@@ -1520,6 +1811,7 @@ validate_agf(
do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
}
+
}
static void
@@ -1759,6 +2051,7 @@ scan_ags(
__uint64_t fdblocks = 0;
__uint64_t icount = 0;
__uint64_t ifreecount = 0;
+ __uint64_t usedblocks = 0;
xfs_agnumber_t i;
work_queue_t wq;
@@ -1781,6 +2074,7 @@ scan_ags(
fdblocks += agcnts[i].fdblocks;
icount += agcnts[i].agicount;
ifreecount += agcnts[i].ifreecount;
+ usedblocks += agcnts[i].usedblocks;
}
free(agcnts);
@@ -1802,5 +2096,11 @@ scan_ags(
do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
mp->m_sb.sb_fdblocks, fdblocks);
}
+
+ if (usedblocks &&
+ usedblocks != mp->m_sb.sb_dblocks - fdblocks) {
+ do_warn(_("used blocks %" PRIu64 ", counted %" PRIu64 "\n"),
+ mp->m_sb.sb_dblocks - fdblocks, usedblocks);
+ }
}
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 1aeac5b..2fe2e4e 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -416,6 +416,8 @@ calc_mkfs(xfs_mount_t *mp)
fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
if (xfs_sb_version_hasfinobt(&mp->m_sb))
fino_bno++;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ fino_bno++;
/*
* If the log is allocated in the first allocation group we need to
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 26/53] xfs_repair: mask off length appropriately
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (24 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 25/53] xfs_repair: use rmap btree data to check block types Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 27/53] xfs_repair: fix fino_bno calculation when rmapbt is enabled Darrick J. Wong
` (26 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Ensure that we remove the flag bits from blockcount before using the
length field.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/scan.c | 32 ++++++++++++++++++++++++--------
1 file changed, 24 insertions(+), 8 deletions(-)
diff --git a/repair/scan.c b/repair/scan.c
index c1ab6df..1ade344 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -792,6 +792,8 @@ scan_rmapbt(
int numrecs;
int state;
xfs_agblock_t lastblock = 0;
+ int64_t lastowner = 0;
+ int64_t lastoffset = 0;
if (magic != XFS_RMAP_CRC_MAGIC) {
name = "(unknown)";
@@ -859,11 +861,12 @@ _("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
for (i = 0; i < numrecs; i++) {
xfs_agblock_t b, end;
xfs_extlen_t len, blen;
- int64_t owner;
+ int64_t owner, offset;
b = be32_to_cpu(rp[i].rm_startblock);
- len = be32_to_cpu(rp[i].rm_blockcount);
+ len = XFS_RMAP_LEN(be32_to_cpu(rp[i].rm_blockcount));
owner = be64_to_cpu(rp[i].rm_owner);
+ offset = be64_to_cpu(rp[i].rm_offset);
end = b + len;
if (!verify_agbno(mp, agno, b)) {
@@ -879,14 +882,27 @@ _("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
continue;
}
- /* XXX: range check owner */
+ if (!(owner > 0 || (owner > XFS_RMAP_OWN_MIN &&
+ owner <= XFS_RMAP_OWN_FS)))
+ do_warn(
+ _("invalid owner in rmap btree record %d (%"PRId64" %u) block %u/%u\n"),
+ i, owner, len, agno, bno);
- if (b && b <= lastblock) {
- do_warn(_(
- "out-of-order rmap btree record %d (%u %u) block %u/%u\n"),
- i, b, len, agno, bno);
- } else {
+ if (i == 0) {
+advance:
lastblock = b;
+ lastowner = owner;
+ lastoffset = offset;
+ } else {
+ bool bad;
+
+ bad = b <= lastblock;
+ if (bad)
+ do_warn(
+ _("out-of-order rmap btree record %d (%u %"PRId64" %"PRIx64" %u) block %u/%u\n"),
+ i, b, owner, offset, len, agno, bno);
+ else
+ goto advance;
}
for ( ; b < end; b += blen) {
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 27/53] xfs_repair: fix fino_bno calculation when rmapbt is enabled
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (25 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 26/53] xfs_repair: mask off length appropriately Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:07 ` [PATCH 28/53] xfs_repair: create a slab API for allocating arrays in large chunks Darrick J. Wong
` (25 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
In xfs_repair, we calculate where we think mkfs put the root inode
block. However, the rmapbt component doesn't account for the fact
that mkfs reserved 2 AGFL blocks for the rmapbt, so its calculation
is off by a bit. This leads to it complaining (incorrectly) about the
root inode block being in the wrong place and blowing up.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/xfs_repair.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 2fe2e4e..3710636 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -416,8 +416,10 @@ calc_mkfs(xfs_mount_t *mp)
fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1;
if (xfs_sb_version_hasfinobt(&mp->m_sb))
fino_bno++;
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ fino_bno += min(2, mp->m_ag_maxlevels);
fino_bno++;
+ }
/*
* If the log is allocated in the first allocation group we need to
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 28/53] xfs_repair: create a slab API for allocating arrays in large chunks
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (26 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 27/53] xfs_repair: fix fino_bno calculation when rmapbt is enabled Darrick J. Wong
@ 2015-12-19 9:07 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 29/53] xfs_repair: collect reverse-mapping data for refcount/rmap tree rebuilding Darrick J. Wong
` (24 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:07 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Create a slab-based array and a bag-of-pointers data structure to
facilitate rapid linear scans of reverse-mapping data for later
reconstruction of the refcount and rmap btrees.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/Makefile | 4
repair/slab.c | 473 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/slab.h | 58 +++++++
3 files changed, 533 insertions(+), 2 deletions(-)
create mode 100644 repair/slab.c
create mode 100644 repair/slab.h
diff --git a/repair/Makefile b/repair/Makefile
index 251722b..756ba95 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -11,13 +11,13 @@ LTCOMMAND = xfs_repair
HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h btree.h \
da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
- rt.h progress.h scan.h versions.h prefetch.h threads.h
+ rt.h progress.h scan.h versions.h prefetch.h slab.h threads.h
CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c btree.c \
da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
- progress.c prefetch.c rt.c sb.c scan.c threads.c \
+ progress.c prefetch.c rt.c sb.c scan.c slab.c threads.c \
versions.c xfs_repair.c
LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD)
diff --git a/repair/slab.c b/repair/slab.c
new file mode 100644
index 0000000..bb7019a
--- /dev/null
+++ b/repair/slab.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <libxfs.h>
+#include "slab.h"
+
+#undef SLAB_DEBUG
+
+#ifdef SLAB_DEBUG
+# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
+#else
+# define dbg_printf(f, a...)
+#endif
+
+/*
+ * Slab Arrays and Bags
+ *
+ * The slab array is a dynamically growable linear array. Internally it
+ * maintains a list of slabs of increasing size; when a slab fills up, another
+ * is allocated. Each slab is sorted individually, which means that one must
+ * use an iterator to walk the entire logical array, sorted order or otherwise.
+ * Array items can neither be removed nor accessed randomly, since (at the
+ * moment) the only user of them (storing reverse mappings) doesn't need either
+ * piece. Pointers are not stable across sort operations.
+ *
+ * A bag is a collection of pointers. The bag can be added to or removed from
+ * arbitrarily, and the bag items can be iterated. Bags are used to process
+ * rmaps into refcount btree entries.
+ */
+
+/*
+ * Slabs -- each slab_hdr holds an array of items; when a slab_hdr fills up, we
+ * allocate a new one and add to that one. The slab object coordinates the
+ * slab_hdrs.
+ */
+
+/* Each slab holds at least 4096 items */
+#define MIN_SLAB_NR 4096
+/* and cannot be larger than 128M */
+#define MAX_SLAB_SIZE (128 * 1048576)
+struct xfs_slab_hdr {
+ size_t sh_nr;
+ size_t sh_inuse; /* items in use */
+ struct xfs_slab_hdr *sh_next; /* next slab hdr */
+ /* objects follow */
+};
+
+struct xfs_slab {
+ size_t s_item_sz; /* item size */
+ size_t s_nr_slabs; /* # of slabs */
+ size_t s_nr_items; /* # of items */
+ struct xfs_slab_hdr *s_first; /* first slab header */
+ struct xfs_slab_hdr *s_last; /* last sh_next pointer */
+};
+
+/*
+ * Slab cursors -- each slab_hdr_cursor tracks a slab_hdr; the slab_cursor
+ * tracks the slab_hdr_cursors. If a compare_fn is specified, the cursor
+ * returns objects in increasing order (if you've previously sorted the
+ * slabs with qsort_slab()). If compare_fn == NULL, it returns slab items
+ * in order.
+ */
+struct xfs_slab_hdr_cursor {
+ struct xfs_slab_hdr *hdr; /* a slab header */
+ size_t loc; /* where we are in the slab */
+};
+
+struct xfs_slab_cursor {
+ size_t nr; /* # of per-slab cursors */
+ struct xfs_slab *slab; /* pointer to the slab */
+ struct xfs_slab_hdr_cursor *last_hcur; /* last header we took from */
+ int (*compare_fn)(const void *, const void *); /* compare function */
+ struct xfs_slab_hdr_cursor hcur[0]; /* per-slab curosr */
+};
+
+/*
+ * Bags -- each bag is an array of pointers items; when a bag fills up, we
+ * resize it.
+ */
+#define MIN_BAG_SIZE 4096
+struct xfs_bag {
+ size_t bg_nr; /* number of pointers */
+ size_t bg_inuse; /* number of slots in use */
+ void **bg_ptrs; /* pointers */
+};
+#define BAG_SIZE(nr) (sizeof(struct xfs_bag) + ((nr) * sizeof(void *)))
+#define BAG_END(bag) (&(bag)->bg_ptrs[(bag)->bg_nr])
+
+/**
+ * init_slab() -- Create a slab to hold some objects.
+ *
+ * @slab: The slab.
+ * @item_size: Create items of this size.
+ */
+int
+init_slab(
+ struct xfs_slab **slab,
+ size_t item_size)
+{
+ struct xfs_slab *ptr;
+
+ ptr = calloc(1, sizeof(struct xfs_slab));
+ if (!ptr)
+ return -ENOMEM;
+ ptr->s_item_sz = item_size;
+ ptr->s_last = NULL;
+ *slab = ptr;
+
+ return 0;
+}
+
+/**
+ * free_slab() -- Frees a slab.
+ */
+void
+free_slab(
+ struct xfs_slab **slab)
+{
+ struct xfs_slab *ptr;
+ struct xfs_slab_hdr *hdr;
+ struct xfs_slab_hdr *nhdr;
+
+ ptr = *slab;
+ if (!ptr)
+ return;
+ hdr = ptr->s_first;
+ while (hdr) {
+ nhdr = hdr->sh_next;
+ free(hdr);
+ hdr = nhdr;
+ }
+ free(ptr);
+ *slab = NULL;
+}
+
+static void *
+slab_ptr(
+ struct xfs_slab *slab,
+ struct xfs_slab_hdr *hdr,
+ size_t idx)
+{
+ char *p;
+
+ ASSERT(idx < hdr->sh_inuse);
+ p = (char *)(hdr + 1);
+ p += slab->s_item_sz * idx;
+ return p;
+}
+
+/**
+ * slab_add() -- Add an item to the slab.
+ */
+int
+slab_add(
+ struct xfs_slab *slab,
+ void *item)
+{
+ struct xfs_slab_hdr *hdr;
+ void *p;
+
+ hdr = slab->s_last;
+ if (!hdr || hdr->sh_inuse == hdr->sh_nr) {
+ size_t n;
+
+ n = (hdr ? hdr->sh_nr * 2 : MIN_SLAB_NR);
+ if (n * slab->s_item_sz > MAX_SLAB_SIZE)
+ n = MAX_SLAB_SIZE / slab->s_item_sz;
+ hdr = malloc(sizeof(struct xfs_slab_hdr) + (n * slab->s_item_sz));
+ if (!hdr)
+ return -ENOMEM;
+ hdr->sh_nr = n;
+ hdr->sh_inuse = 0;
+ hdr->sh_next = NULL;
+ if (slab->s_last)
+ slab->s_last->sh_next = hdr;
+ if (!slab->s_first)
+ slab->s_first = hdr;
+ slab->s_last = hdr;
+ slab->s_nr_slabs++;
+ }
+ hdr->sh_inuse++;
+ p = slab_ptr(slab, hdr, hdr->sh_inuse - 1);
+ memcpy(p, item, slab->s_item_sz);
+ slab->s_nr_items++;
+
+ return 0;
+}
+
+/**
+ * qsort_slab() -- Sort the items in the slab. Do not run this method
+ * if there are any cursors holding on to the slab.
+ */
+void
+qsort_slab(
+ struct xfs_slab *slab,
+ int (*compare_fn)(const void *, const void *))
+{
+ struct xfs_slab_hdr *hdr;
+
+ hdr = slab->s_first;
+ while (hdr) {
+ qsort(slab_ptr(slab, hdr, 0), hdr->sh_inuse, slab->s_item_sz,
+ compare_fn);
+ hdr = hdr->sh_next;
+ }
+}
+
+/*
+ * init_slab_cursor() -- Create a slab cursor to iterate the slab items.
+ *
+ * @slab: The slab.
+ * @compare_fn: If specified, use this function to return items in ascending order.
+ * @cur: The new cursor.
+ */
+int
+init_slab_cursor(
+ struct xfs_slab *slab,
+ int (*compare_fn)(const void *, const void *),
+ struct xfs_slab_cursor **cur)
+{
+ struct xfs_slab_cursor *c;
+ struct xfs_slab_hdr_cursor *hcur;
+ struct xfs_slab_hdr *hdr;
+
+ c = malloc(sizeof(struct xfs_slab_cursor) +
+ (sizeof(struct xfs_slab_hdr_cursor) * slab->s_nr_slabs));
+ if (!c)
+ return -ENOMEM;
+ c->nr = slab->s_nr_slabs;
+ c->slab = slab;
+ c->compare_fn = compare_fn;
+ c->last_hcur = NULL;
+ hcur = (struct xfs_slab_hdr_cursor *)(c + 1);
+ hdr = slab->s_first;
+ while (hdr) {
+ hcur->hdr = hdr;
+ hcur->loc = 0;
+ hcur++;
+ hdr = hdr->sh_next;
+ }
+ *cur = c;
+ return 0;
+}
+
+/**
+ * free_slab_cursor() -- Free the slab cursor.
+ */
+void
+free_slab_cursor(
+ struct xfs_slab_cursor **cur)
+{
+ if (!*cur)
+ return;
+ free(*cur);
+ *cur = NULL;
+}
+
+/**
+ * peek_slab_cursor() -- Return the smallest item in the slab, without
+ * advancing the iterator. The slabs must be sorted prior to the creation
+ * of the cursor.
+ */
+void *
+peek_slab_cursor(
+ struct xfs_slab_cursor *cur)
+{
+ struct xfs_slab_hdr_cursor *hcur;
+ void *p = NULL;
+ void *q;
+ size_t i;
+
+ cur->last_hcur = NULL;
+
+ /* no compare function; inorder traversal */
+ if (!cur->compare_fn) {
+ if (!cur->last_hcur)
+ cur->last_hcur = &cur->hcur[0];
+ hcur = cur->last_hcur;
+ while (hcur < &cur->hcur[cur->nr] &&
+ hcur->loc >= hcur->hdr->sh_inuse)
+ hcur++;
+ if (hcur == &cur->hcur[cur->nr])
+ return NULL;
+ p = slab_ptr(cur->slab, hcur->hdr, hcur->loc);
+ cur->last_hcur = hcur;
+ return p;
+ }
+
+ /* otherwise return things in increasing order */
+ for (i = 0, hcur = &cur->hcur[i]; i < cur->nr; i++, hcur++) {
+ if (hcur->loc >= hcur->hdr->sh_inuse)
+ continue;
+ q = slab_ptr(cur->slab, hcur->hdr, hcur->loc);
+ if (!p || cur->compare_fn(p, q) > 0) {
+ p = q;
+ cur->last_hcur = hcur;
+ }
+ }
+
+ return p;
+}
+
+/**
+ * advance_slab_cursor() -- After a peek operation, advance the cursor.
+ */
+void
+advance_slab_cursor(
+ struct xfs_slab_cursor *cur)
+{
+ ASSERT(cur->last_hcur);
+ cur->last_hcur->loc++;
+}
+
+/**
+ * pop_slab_cursor() -- Retrieve the next item in the slab and advance the
+ * cursor.
+ */
+void *
+pop_slab_cursor(
+ struct xfs_slab_cursor *cur)
+{
+ void *p;
+
+ p = peek_slab_cursor(cur);
+ if (p)
+ advance_slab_cursor(cur);
+ return p;
+}
+
+/**
+ * slab_count() -- Return the number of items in the slab.
+ */
+size_t
+slab_count(
+ struct xfs_slab *slab)
+{
+ return slab->s_nr_items;
+}
+
+/**
+ * init_bag() -- Create a bag to point to some objects.
+ *
+ * @bag: The bag.
+ */
+int
+init_bag(
+ struct xfs_bag **bag)
+{
+ struct xfs_bag *ptr;
+
+ ptr = calloc(1, sizeof(struct xfs_bag));
+ if (!ptr)
+ return -ENOMEM;
+ ptr->bg_ptrs = calloc(MIN_BAG_SIZE, sizeof(void *));
+ if (!ptr->bg_ptrs) {
+ free(ptr);
+ return -ENOMEM;
+ }
+ ptr->bg_nr = MIN_BAG_SIZE;
+ *bag = ptr;
+ return 0;
+}
+
+/**
+ * free_bag() - Free a bag of pointers.
+ *
+ * @bag: The bag to free.
+ */
+void
+free_bag(
+ struct xfs_bag **bag)
+{
+ struct xfs_bag *ptr;
+
+ ptr = *bag;
+ if (!ptr)
+ return;
+ free(ptr->bg_ptrs);
+ free(ptr);
+ *bag = NULL;
+}
+
+/**
+ * bag_add() - Add an object to the pointer bag.
+ *
+ * @bag: The bag.
+ * @ptr: The pointer to add to the bag.
+ */
+int
+bag_add(
+ struct xfs_bag *bag,
+ void *ptr)
+{
+ void **p, **x;
+
+ p = &bag->bg_ptrs[bag->bg_inuse];
+ if (p == BAG_END(bag)) {
+ /* No free space, alloc more pointers */
+ size_t nr;
+
+ nr = bag->bg_nr * 2;
+ x = realloc(bag->bg_ptrs, nr * sizeof(void *));
+ if (!x)
+ return -ENOMEM;
+ bag->bg_ptrs = x;
+ memset(BAG_END(bag), 0, bag->bg_nr * sizeof(void *));
+ bag->bg_nr = nr;
+ }
+ bag->bg_ptrs[bag->bg_inuse] = ptr;
+ bag->bg_inuse++;
+ return 0;
+}
+
+/**
+ * bag_remove() - Remove a pointer from a bag.
+ *
+ * @bag: The bag.
+ * @idx: The number of the pointer to remove.
+ */
+int
+bag_remove(
+ struct xfs_bag *bag,
+ size_t nr)
+{
+ ASSERT(nr < bag->bg_inuse);
+ memmove(&bag->bg_ptrs[nr], &bag->bg_ptrs[nr + 1],
+ (bag->bg_inuse - nr) * sizeof(void *));
+ bag->bg_inuse--;
+ return 0;
+}
+
+/**
+ * bag_count() - Return the number of items in a bag.
+ *
+ * @bag: The bag.
+ */
+size_t
+bag_count(
+ struct xfs_bag *bag)
+{
+ return bag->bg_inuse;
+}
+
+/**
+ * bag_item() - Return the nth item in a bag.
+ *
+ * @bag: The bag.
+ * @nr: The item number.
+ */
+void *
+bag_item(
+ struct xfs_bag *bag,
+ size_t nr)
+{
+ if (nr >= bag->bg_inuse)
+ return NULL;
+ return bag->bg_ptrs[nr];
+}
diff --git a/repair/slab.h b/repair/slab.h
new file mode 100644
index 0000000..8142914
--- /dev/null
+++ b/repair/slab.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef SLAB_H_
+#define SLAB_H_
+
+struct xfs_slab;
+struct xfs_slab_cursor;
+
+extern int init_slab(struct xfs_slab **, size_t);
+extern void free_slab(struct xfs_slab **);
+
+extern int slab_add(struct xfs_slab *, void *);
+extern void qsort_slab(struct xfs_slab *, int (*)(const void *, const void *));
+extern size_t slab_count(struct xfs_slab *);
+
+extern int init_slab_cursor(struct xfs_slab *,
+ int (*)(const void *, const void *), struct xfs_slab_cursor **);
+extern void free_slab_cursor(struct xfs_slab_cursor **);
+
+extern void *peek_slab_cursor(struct xfs_slab_cursor *);
+extern void advance_slab_cursor(struct xfs_slab_cursor *);
+extern void *pop_slab_cursor(struct xfs_slab_cursor *);
+
+struct xfs_bag;
+
+extern int init_bag(struct xfs_bag **);
+extern void free_bag(struct xfs_bag **);
+extern int bag_add(struct xfs_bag *, void *);
+extern int bag_remove(struct xfs_bag *, size_t);
+extern size_t bag_count(struct xfs_bag *);
+extern void *bag_item(struct xfs_bag *, size_t);
+
+#define foreach_bag_ptr(bag, idx, ptr) \
+ for ((idx) = 0, (ptr) = bag_item((bag), (idx)); \
+ (idx) < bag_count(bag); \
+ (idx)++, (ptr) = bag_item((bag), (idx)))
+
+#define foreach_bag_ptr_reverse(bag, idx, ptr) \
+ for ((idx) = bag_count(bag) - 1, (ptr) = bag_item((bag), (idx)); \
+ (idx) >= 0 && (ptr) != NULL; \
+ (idx)--, (ptr) = bag_item((bag), (idx)))
+
+#endif /* SLAB_H_ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 29/53] xfs_repair: collect reverse-mapping data for refcount/rmap tree rebuilding
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (27 preceding siblings ...)
2015-12-19 9:07 ` [PATCH 28/53] xfs_repair: create a slab API for allocating arrays in large chunks Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 30/53] xfs_repair: record and merge raw rmap data Darrick J. Wong
` (23 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Collect reverse-mapping data for the entire filesystem so that we can
later check and rebuild the reference count tree and the reverse mapping
tree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/Makefile | 4 +
repair/dinode.c | 9 ++
repair/phase4.c | 5 +
repair/rmap.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 30 ++++++++
repair/xfs_repair.c | 4 +
6 files changed, 245 insertions(+), 2 deletions(-)
create mode 100644 repair/rmap.c
create mode 100644 repair/rmap.h
diff --git a/repair/Makefile b/repair/Makefile
index 756ba95..81c2b9f 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -11,13 +11,13 @@ LTCOMMAND = xfs_repair
HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h btree.h \
da_util.h dinode.h dir2.h err_protos.h globals.h incore.h protos.h \
- rt.h progress.h scan.h versions.h prefetch.h slab.h threads.h
+ rt.h progress.h scan.h versions.h prefetch.h rmap.h slab.h threads.h
CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c btree.c \
da_util.c dino_chunks.c dinode.c dir2.c globals.c incore.c \
incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
- progress.c prefetch.c rt.c sb.c scan.c slab.c threads.c \
+ progress.c prefetch.c rmap.c rt.c sb.c scan.c slab.c threads.c \
versions.c xfs_repair.c
LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD)
diff --git a/repair/dinode.c b/repair/dinode.c
index caa4c1b..7766dea 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -30,6 +30,8 @@
#include "attr_repair.h"
#include "bmap.h"
#include "threads.h"
+#include "slab.h"
+#include "rmap.h"
/*
* gettext lookups for translations of strings use mutexes internally to
@@ -779,6 +781,13 @@ _("illegal state %d in block map %" PRIu64 "\n"),
state, b);
}
}
+ if (collect_rmaps) { /* && !check_dups */
+ error = add_rmap(mp, ino, whichfork, &irec);
+ if (error)
+ do_error(
+_("couldn't add reverse mapping\n")
+ );
+ }
*tot += irec.br_blockcount;
}
error = 0;
diff --git a/repair/phase4.c b/repair/phase4.c
index 1a7d7b5..bc43cd8 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -30,7 +30,10 @@
#include "versions.h"
#include "dir2.h"
#include "progress.h"
+#include "slab.h"
+#include "rmap.h"
+bool collect_rmaps = false;
/*
* null out quota inode fields in sb if they point to non-existent inodes.
@@ -170,6 +173,8 @@ phase4(xfs_mount_t *mp)
int ag_hdr_block;
int bstate;
+ if (needs_rmap_work(mp))
+ collect_rmaps = true;
ag_hdr_block = howmany(ag_hdr_len, mp->m_sb.sb_blocksize);
do_log(_("Phase 4 - check for duplicate blocks...\n"));
diff --git a/repair/rmap.c b/repair/rmap.c
new file mode 100644
index 0000000..1a73dbb
--- /dev/null
+++ b/repair/rmap.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <libxfs.h>
+#include "btree.h"
+#include "err_protos.h"
+#include "libxlog.h"
+#include "incore.h"
+#include "globals.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
+
+#undef RMAP_DEBUG
+
+#ifdef RMAP_DEBUG
+# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
+#else
+# define dbg_printf(f, a...)
+#endif
+
+/* per-AG rmap object anchor */
+struct xfs_ag_rmap {
+ struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
+};
+
+static struct xfs_ag_rmap *ag_rmaps;
+
+/*
+ * Compare rmap observations for array sorting.
+ */
+static int
+rmap_compare(
+ const void *a,
+ const void *b)
+{
+ const struct xfs_rmap_irec *pa;
+ const struct xfs_rmap_irec *pb;
+
+ pa = a; pb = b;
+ if (pa->rm_startblock < pb->rm_startblock)
+ return -1;
+ else if (pa->rm_startblock > pb->rm_startblock)
+ return 1;
+ else if (pa->rm_owner < pb->rm_owner)
+ return -1;
+ else if (pa->rm_owner > pb->rm_owner)
+ return 1;
+ else if (pa->rm_offset < pb->rm_offset)
+ return -1;
+ else if (pa->rm_offset > pb->rm_offset)
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * needs_rmap_work() -- Return true if we must reconstruct either the
+ * reference count or reverse mapping trees.
+ *
+ * @mp: XFS mount object
+ */
+bool
+needs_rmap_work(
+ struct xfs_mount *mp)
+{
+ return xfs_sb_version_hasrmapbt(&mp->m_sb);
+}
+
+/**
+ * init_rmaps() -- Initialize per-AG reverse map data.
+ *
+ * @mp: XFS mount object
+ */
+void
+init_rmaps(
+ struct xfs_mount *mp)
+{
+ xfs_agnumber_t i;
+ int error;
+
+ if (!needs_rmap_work(mp))
+ return;
+
+ ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(struct xfs_ag_rmap));
+ if (!ag_rmaps)
+ do_error(_("couldn't allocate per-AG reverse map roots\n"));
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ error = init_slab(&ag_rmaps[i].ar_rmaps,
+ sizeof(struct xfs_rmap_irec));
+ if (error)
+ do_error(
+_("Insufficient memory while allocating reverse mapping slabs."));
+ }
+}
+
+/**
+ * free_rmaps() -- Free the per-AG reverse-mapping data.
+ *
+ * @mp: XFS mount object
+ */
+void
+free_rmaps(
+ struct xfs_mount *mp)
+{
+ xfs_agnumber_t i;
+
+ if (!needs_rmap_work(mp))
+ return;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ free_slab(&ag_rmaps[i].ar_rmaps);
+ }
+ free(ag_rmaps);
+ ag_rmaps = NULL;
+}
+
+/**
+ * add_rmap() -- Add an observation about a physical block mapping for later
+ * btree reconstruction.
+ *
+ * @mp: XFS mount object.
+ * @ino: The inode number associated with the extent mapping.
+ * @whichfork: Data or attribute fork?
+ * @irec: The extent mapping to record.
+ */
+int
+add_rmap(
+ struct xfs_mount *mp,
+ xfs_ino_t ino,
+ int whichfork,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_slab *rmaps;
+ struct xfs_rmap_irec rmap;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+
+ if (!needs_rmap_work(mp))
+ return 0;
+
+ agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
+ ASSERT(agno != NULLAGNUMBER);
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
+ ASSERT(ino != NULLFSINO);
+ ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
+
+ rmaps = ag_rmaps[agno].ar_rmaps;
+ rmap.rm_owner = ino;
+ rmap.rm_offset = irec->br_startoff;
+ if (whichfork == XFS_ATTR_FORK)
+ rmap.rm_offset |= XFS_RMAP_OFF_ATTR;
+ rmap.rm_startblock = agbno;
+ rmap.rm_blockcount = irec->br_blockcount;
+ if (irec->br_state == XFS_EXT_UNWRITTEN)
+ rmap.rm_blockcount |= XFS_RMAP_LEN_UNWRITTEN;
+ return slab_add(rmaps, &rmap);
+}
+
+#ifdef RMAP_DEBUG
+static void
+dump_rmap(
+ const char *msg,
+ xfs_agnumber_t agno,
+ struct xfs_rmap_irec *rmap)
+{
+ printf("%s: %p agno=%u pblk=%llu ino=%llu lblk=%llu len=%u\n", msg,
+ rmap,
+ (unsigned)agno,
+ (unsigned long long)rmap->rm_startblock,
+ (unsigned long long)rmap->rm_owner,
+ (unsigned long long)rmap->rm_offset,
+ (unsigned)rmap->rm_blockcount);
+}
+#else
+# define dump_rmap(m, a, r)
+#endif
diff --git a/repair/rmap.h b/repair/rmap.h
new file mode 100644
index 0000000..be3d357
--- /dev/null
+++ b/repair/rmap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef RMAP_H_
+#define RMAP_H_
+
+extern bool collect_rmaps;
+
+extern bool needs_rmap_work(struct xfs_mount *);
+
+extern void init_rmaps(struct xfs_mount *);
+extern void free_rmaps(struct xfs_mount *);
+
+extern int add_rmap(struct xfs_mount *, xfs_ino_t, int, struct xfs_bmbt_irec *);
+
+#endif /* RMAP_H_ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 3710636..8fc6fd5 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -31,6 +31,8 @@
#include "threads.h"
#include "progress.h"
#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
#define rounddown(x, y) (((x)/(y))*(y))
@@ -858,6 +860,7 @@ main(int argc, char **argv)
init_bmaps(mp);
incore_ino_init(mp);
incore_ext_init(mp);
+ init_rmaps(mp);
/* initialize random globals now that we know the fs geometry */
inodes_per_block = mp->m_sb.sb_inopblock;
@@ -891,6 +894,7 @@ main(int argc, char **argv)
/*
* Done with the block usage maps, toss them...
*/
+ free_rmaps(mp);
free_bmaps(mp);
if (!bad_ino_btree) {
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 30/53] xfs_repair: record and merge raw rmap data
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (28 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 29/53] xfs_repair: collect reverse-mapping data for refcount/rmap tree rebuilding Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 31/53] xfs_repair: add inode bmbt block rmaps Darrick J. Wong
` (22 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Since we still allow merging of BMBT block, AG metadata, and AG btree
block rmaps, provide a facility to collect these raw observations and
merge them (with maximal length) into the main rmap list.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/rmap.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 3 +
2 files changed, 140 insertions(+)
diff --git a/repair/rmap.c b/repair/rmap.c
index 1a73dbb..f3363f7 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -37,6 +37,7 @@
/* per-AG rmap object anchor */
struct xfs_ag_rmap {
struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
+ struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
};
static struct xfs_ag_rmap *ag_rmaps;
@@ -107,6 +108,11 @@ init_rmaps(
if (error)
do_error(
_("Insufficient memory while allocating reverse mapping slabs."));
+ error = init_slab(&ag_rmaps[i].ar_raw_rmaps,
+ sizeof(struct xfs_rmap_irec));
+ if (error)
+ do_error(
+_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
}
}
@@ -126,6 +132,7 @@ free_rmaps(
for (i = 0; i < mp->m_sb.sb_agcount; i++) {
free_slab(&ag_rmaps[i].ar_rmaps);
+ free_slab(&ag_rmaps[i].ar_raw_rmaps);
}
free(ag_rmaps);
ag_rmaps = NULL;
@@ -175,6 +182,136 @@ add_rmap(
return slab_add(rmaps, &rmap);
}
+/* add a raw rmap; these will be merged later */
+static int
+__add_raw_rmap(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ uint64_t owner,
+ bool is_attr,
+ bool is_bmbt)
+{
+ struct xfs_rmap_irec rmap;
+
+ rmap.rm_owner = owner;
+ rmap.rm_offset = 0;
+ if (is_attr)
+ rmap.rm_offset |= XFS_RMAP_OFF_ATTR;
+ if (is_bmbt)
+ rmap.rm_offset |= XFS_RMAP_OFF_BMBT;
+ rmap.rm_startblock = agbno;
+ rmap.rm_blockcount = len;
+ return slab_add(ag_rmaps[agno].ar_raw_rmaps, &rmap);
+}
+
+/**
+ * add_ag_rmap() -- Add an reverse mapping for a per-AG fixed metadata object.
+ *
+ * @mp: XFS mount object.
+ * @agno: The AG number.
+ * @agbno: The block within the AG.
+ * @len: The length of the extent.
+ * @owner: The owner of the block.
+ */
+int
+add_ag_rmap(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ uint64_t owner)
+{
+ if (!needs_rmap_work(mp))
+ return 0;
+
+ ASSERT(agno != NULLAGNUMBER);
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+
+ return __add_raw_rmap(mp, agno, agbno, len, owner, false, false);
+}
+
+static bool
+mergeable_rmaps(
+ struct xfs_rmap_irec *r1,
+ struct xfs_rmap_irec *r2)
+{
+ if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
+ return false;
+ if (r1->rm_owner != r2->rm_owner)
+ return false;
+ if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
+ return true;
+ /* must be an inode owner */
+ if (XFS_RMAP_IS_ATTR_FORK(r1->rm_offset) ^
+ XFS_RMAP_IS_ATTR_FORK(r2->rm_offset))
+ return false;
+ if (XFS_RMAP_IS_BMBT(r1->rm_offset) || XFS_RMAP_IS_BMBT(r2->rm_offset))
+ return XFS_RMAP_IS_BMBT(r1->rm_offset) &&
+ XFS_RMAP_IS_BMBT(r2->rm_offset);
+ return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
+}
+
+/**
+ * fold_raw_rmaps() - Merge adjacent raw rmaps and add them to the main
+ * rmap list.
+ * @mp: XFS mount.
+ * @agno: AG number.
+ */
+int
+fold_raw_rmaps(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_slab_cursor *cur = NULL;
+ struct xfs_rmap_irec *prev, *rec;
+ size_t old_sz;
+ int error;
+
+ old_sz = slab_count(ag_rmaps[agno].ar_rmaps);
+ if (slab_count(ag_rmaps[agno].ar_raw_rmaps) == 0)
+ goto no_raw;
+ qsort_slab(ag_rmaps[agno].ar_raw_rmaps, rmap_compare);
+ error = init_slab_cursor(ag_rmaps[agno].ar_raw_rmaps, rmap_compare,
+ &cur);
+ if (error)
+ goto err;
+
+ prev = pop_slab_cursor(cur);
+ rec = pop_slab_cursor(cur);
+ while (rec) {
+ if (mergeable_rmaps(prev, rec)) {
+ prev->rm_blockcount += rec->rm_blockcount;
+ rec = pop_slab_cursor(cur);
+ continue;
+ }
+ error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
+ if (error)
+ goto err;
+ prev = rec;
+ rec = pop_slab_cursor(cur);
+ }
+ if (prev) {
+ error = slab_add(ag_rmaps[agno].ar_rmaps, prev);
+ if (error)
+ goto err;
+ }
+ free_slab(&ag_rmaps[agno].ar_raw_rmaps);
+ error = init_slab(&ag_rmaps[agno].ar_raw_rmaps,
+ sizeof(struct xfs_rmap_irec));
+ if (error)
+ do_error(
+_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
+no_raw:
+ if (old_sz)
+ qsort_slab(ag_rmaps[agno].ar_rmaps, rmap_compare);
+err:
+ free_slab_cursor(&cur);
+ return error;
+}
+
#ifdef RMAP_DEBUG
static void
dump_rmap(
diff --git a/repair/rmap.h b/repair/rmap.h
index be3d357..51e916b 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -26,5 +26,8 @@ extern void init_rmaps(struct xfs_mount *);
extern void free_rmaps(struct xfs_mount *);
extern int add_rmap(struct xfs_mount *, xfs_ino_t, int, struct xfs_bmbt_irec *);
+extern int add_ag_rmap(struct xfs_mount *, xfs_agnumber_t agno,
+ xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner);
+extern int fold_raw_rmaps(struct xfs_mount *mp, xfs_agnumber_t agno);
#endif /* RMAP_H_ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 31/53] xfs_repair: add inode bmbt block rmaps
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (29 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 30/53] xfs_repair: record and merge raw rmap data Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 32/53] xfs_repair: add fixed-location per-AG rmaps Darrick J. Wong
` (21 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Record BMBT blocks in the raw rmap list.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/rmap.c | 32 ++++++++++++++++++++++++++++++++
repair/rmap.h | 1 +
repair/scan.c | 11 +++++++++++
3 files changed, 44 insertions(+)
diff --git a/repair/rmap.c b/repair/rmap.c
index f3363f7..40bdae3 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -207,6 +207,38 @@ __add_raw_rmap(
}
/**
+ * add_bmbt_rmap() -- Add an observation about a bmbt block for later
+ * btree reconstruction.
+ *
+ * @mp: XFS mount object.
+ * @ino: The inode number associated with the extent mapping.
+ * @whichfork: Data or attribute fork?
+ * @fsbno: fsblock number of the bmbt block
+ */
+int
+add_bmbt_rmap(
+ struct xfs_mount *mp,
+ xfs_ino_t ino,
+ int whichfork,
+ xfs_fsblock_t fsbno)
+{
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+
+ if (!needs_rmap_work(mp))
+ return 0;
+
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+ ASSERT(agno != NULLAGNUMBER);
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(agbno + 1 <= mp->m_sb.sb_agblocks);
+
+ return __add_raw_rmap(mp, agno, agbno, 1, ino,
+ whichfork == XFS_ATTR_FORK, true);
+}
+
+/**
* add_ag_rmap() -- Add an reverse mapping for a per-AG fixed metadata object.
*
* @mp: XFS mount object.
diff --git a/repair/rmap.h b/repair/rmap.h
index 51e916b..57d56a0 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -28,6 +28,7 @@ extern void free_rmaps(struct xfs_mount *);
extern int add_rmap(struct xfs_mount *, xfs_ino_t, int, struct xfs_bmbt_irec *);
extern int add_ag_rmap(struct xfs_mount *, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner);
+extern int add_bmbt_rmap(struct xfs_mount *, xfs_ino_t, int, xfs_fsblock_t);
extern int fold_raw_rmaps(struct xfs_mount *mp, xfs_agnumber_t agno);
#endif /* RMAP_H_ */
diff --git a/repair/scan.c b/repair/scan.c
index 1ade344..db9e131 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -29,6 +29,7 @@
#include "bmap.h"
#include "progress.h"
#include "threads.h"
+#include "rmap.h"
static xfs_mount_t *mp = NULL;
@@ -197,6 +198,7 @@ scan_bmapbt(
xfs_agnumber_t agno;
xfs_agblock_t agbno;
int state;
+ int error;
/*
* unlike the ag freeblock btrees, if anything looks wrong
@@ -378,6 +380,15 @@ _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"),
(*tot)++;
numrecs = be16_to_cpu(block->bb_numrecs);
+ /* Record BMBT blocks in the reverse-mapping data. */
+ if (check_dups && collect_rmaps) {
+ error = add_bmbt_rmap(mp, ino, whichfork, bno);
+ if (error)
+ do_error(
+_("couldn't add inode %"PRIu64" bmbt block %"PRIu64" reverse-mapping data."),
+ ino, bno);
+ }
+
if (level == 0) {
if (numrecs > mp->m_bmap_dmxr[0] || (isroot == 0 && numrecs <
mp->m_bmap_dmnr[0])) {
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 32/53] xfs_repair: add fixed-location per-AG rmaps
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (30 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 31/53] xfs_repair: add inode bmbt block rmaps Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 33/53] xfs_repair: check existing rmapbt entries against observed rmaps Darrick J. Wong
` (20 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Add reverse-mappings for fixed-location per-AG metadata such as inode
chunks, superblocks, and the log to the raw rmap list, then merge the
raw rmap data (which also has the BMBT data) into the main rmap list.
v2: Support sparse inode chunks.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/phase4.c | 41 +++++++++++++++++++++++++
repair/rmap.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 2 +
3 files changed, 134 insertions(+)
diff --git a/repair/phase4.c b/repair/phase4.c
index bc43cd8..cbdb92e 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -157,6 +157,40 @@ process_ags(
do_inode_prefetch(mp, ag_stride, process_ag_func, true, false);
}
+static void
+check_rmap_btrees(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
+{
+ int error;
+
+ error = add_fixed_ag_rmap_data(wq->mp, agno);
+ if (error)
+ do_error(
+_("unable to add AG %u metadata reverse-mapping data.\n"), agno);
+
+ error = fold_raw_rmaps(wq->mp, agno);
+ if (error)
+ do_error(
+_("unable to merge AG %u metadata reverse-mapping data.\n"), agno);
+}
+
+static void
+process_rmap_data(
+ struct xfs_mount *mp)
+{
+ struct work_queue wq;
+ xfs_agnumber_t i;
+
+ if (!needs_rmap_work(mp))
+ return;
+
+ create_work_queue(&wq, mp, libxfs_nproc());
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ queue_work(&wq, check_rmap_btrees, i, NULL);
+ destroy_work_queue(&wq);
+}
void
phase4(xfs_mount_t *mp)
@@ -306,6 +340,13 @@ phase4(xfs_mount_t *mp)
* already in phase 3.
*/
process_ags(mp);
+
+ /*
+ * Process all the reverse-mapping data that we collected. This
+ * involves checking the rmap data against the btree.
+ */
+ process_rmap_data(mp);
+
print_final_rpt();
/*
diff --git a/repair/rmap.c b/repair/rmap.c
index 40bdae3..e13fc53 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -344,6 +344,97 @@ err:
return error;
}
+static int
+find_first_zero_bit(
+ __uint64_t mask)
+{
+ int n;
+ int b = 0;
+
+ for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
+ b++;
+
+ return b;
+}
+
+static int
+popcnt(
+ __uint64_t mask)
+{
+ int n;
+ int b = 0;
+
+ if (mask == 0)
+ return 0;
+
+ for (n = 0; n < sizeof(mask) * NBBY; n++, mask >>= 1)
+ if (mask & 1)
+ b++;
+
+ return b;
+}
+
+/**
+ * add_fixed_ag_rmap_data() - Add fixed per-AG metadata to the rmap list.
+ * This includes sb/agi/agf/agfl headers, inode
+ * chunks, and the log.
+ *
+ * @mp: XFS mountpoint.
+ * @agno: AG number.
+ */
+int
+add_fixed_ag_rmap_data(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ xfs_fsblock_t fsbno;
+ xfs_agblock_t agbno;
+ ino_tree_node_t *ino_rec;
+ xfs_agino_t agino;
+ int error;
+ int startidx;
+ int nr;
+
+ if (!needs_rmap_work(mp))
+ return 0;
+
+ /* sb/agi/agf/agfl headers */
+ error = add_ag_rmap(mp, agno, 0, XFS_BNO_BLOCK(mp),
+ XFS_RMAP_OWN_FS);
+ if (error)
+ goto out;
+
+ /* inodes */
+ ino_rec = findfirst_inode_rec(agno);
+ for (; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) {
+ if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
+ startidx = find_first_zero_bit(ino_rec->ir_sparse);
+ nr = XFS_INODES_PER_CHUNK - popcnt(ino_rec->ir_sparse);
+ } else {
+ startidx = 0;
+ nr = XFS_INODES_PER_CHUNK;
+ }
+ agino = ino_rec->ino_startnum + startidx;
+ agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+ error = add_ag_rmap(mp, agno, agbno, nr / mp->m_sb.sb_inopblock,
+ XFS_RMAP_OWN_INODES);
+ if (error)
+ goto out;
+ }
+
+ /* log */
+ fsbno = mp->m_sb.sb_logstart;
+ if (fsbno && XFS_FSB_TO_AGNO(mp, fsbno) == agno) {
+ agbno = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart);
+ error = add_ag_rmap(mp, agno, agbno, mp->m_sb.sb_logblocks,
+ XFS_RMAP_OWN_LOG);
+ if (error)
+ goto out;
+ }
+out:
+ return error;
+}
+
#ifdef RMAP_DEBUG
static void
dump_rmap(
diff --git a/repair/rmap.h b/repair/rmap.h
index 57d56a0..7bab450 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -31,4 +31,6 @@ extern int add_ag_rmap(struct xfs_mount *, xfs_agnumber_t agno,
extern int add_bmbt_rmap(struct xfs_mount *, xfs_ino_t, int, xfs_fsblock_t);
extern int fold_raw_rmaps(struct xfs_mount *mp, xfs_agnumber_t agno);
+extern int add_fixed_ag_rmap_data(struct xfs_mount *, xfs_agnumber_t);
+
#endif /* RMAP_H_ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 33/53] xfs_repair: check existing rmapbt entries against observed rmaps
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (31 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 32/53] xfs_repair: add fixed-location per-AG rmaps Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 34/53] xfs_repair: rebuild reverse-mapping btree Darrick J. Wong
` (19 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
If we're running in -n mode, check the rmaps that we observe against
what's in the rmap btree and complain if there's a mismatch.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
include/libxfs.h | 1
repair/phase4.c | 6 ++
repair/rmap.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 5 ++
repair/scan.c | 19 ++++--
5 files changed, 198 insertions(+), 6 deletions(-)
diff --git a/include/libxfs.h b/include/libxfs.h
index 2357aec..5382191 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -77,6 +77,7 @@ extern uint32_t crc32c_le(uint32_t crc, unsigned char const *p, size_t len);
#include "xfs_bmap.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
+#include "xfs_rmap_btree.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/repair/phase4.c b/repair/phase4.c
index cbdb92e..98aab35 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -174,6 +174,12 @@ _("unable to add AG %u metadata reverse-mapping data.\n"), agno);
if (error)
do_error(
_("unable to merge AG %u metadata reverse-mapping data.\n"), agno);
+
+ error = check_rmaps(wq->mp, agno);
+ if (error)
+ do_error(
+_("%s while checking reverse-mappings"),
+ strerror(-error));
}
static void
diff --git a/repair/rmap.c b/repair/rmap.c
index e13fc53..bb1206e 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -41,6 +41,7 @@ struct xfs_ag_rmap {
};
static struct xfs_ag_rmap *ag_rmaps;
+static bool rmapbt_suspect;
/*
* Compare rmap observations for array sorting.
@@ -453,3 +454,175 @@ dump_rmap(
#else
# define dump_rmap(m, a, r)
#endif
+
+/**
+ * rmap_record_count() -- Return the number of rmap objects for an AG.
+ *
+ * @mp: XFS mount object
+ * @agno: AG number
+ */
+size_t
+rmap_record_count(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ return slab_count(ag_rmaps[agno].ar_rmaps);
+}
+
+/**
+ * init_rmap_cursor() -- Return a slab cursor that will return rmap
+ * objects in order.
+ * @agno: AG number.
+ * @cur: The new cursor.
+ */
+int
+init_rmap_cursor(
+ xfs_agnumber_t agno,
+ struct xfs_slab_cursor **cur)
+{
+ return init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare, cur);
+}
+
+/**
+ * rmap_avoid_check() -- Disable the refcount btree check.
+ */
+void
+rmap_avoid_check(void)
+{
+ rmapbt_suspect = true;
+}
+
+/**
+ * check_rmaps() -- Compare the observed reverse mappings against
+ * what's in the ag btree.
+ * @mp: XFS mount object
+ * @agno: AG number
+ */
+int
+check_rmaps(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_slab_cursor *rm_cur;
+ struct xfs_btree_cur *bt_cur = NULL;
+ int error;
+ int have;
+ int i;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_rmap_irec *rm_rec;
+ struct xfs_rmap_irec tmp;
+ struct xfs_perag *pag; /* per allocation group data */
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return 0;
+ if (rmapbt_suspect) {
+ if (no_modify && agno == 0)
+ do_warn(_("would rebuild corrupt rmap btrees.\n"));
+ return 0;
+ }
+
+ /* Create cursors to refcount structures */
+ error = init_rmap_cursor(agno, &rm_cur);
+ if (error)
+ return error;
+
+ error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ if (error)
+ goto err;
+
+ /* Leave the per-ag data "uninitialized" since we rewrite it later */
+ pag = xfs_perag_get(mp, agno);
+ pag->pagf_init = 0;
+ xfs_perag_put(pag);
+
+ bt_cur = xfs_rmapbt_init_cursor(mp, NULL, agbp, agno);
+ if (!bt_cur) {
+ error = -ENOMEM;
+ goto err;
+ }
+
+ rm_rec = pop_slab_cursor(rm_cur);
+ while (rm_rec) {
+ /* Look for a rmap record in the btree */
+ error = xfs_rmap_lookup_eq(bt_cur, rm_rec->rm_startblock,
+ rm_rec->rm_blockcount, rm_rec->rm_owner,
+ rm_rec->rm_offset, &have);
+ if (error)
+ goto err;
+ if (!have) {
+ do_warn(
+_("Missing reverse-mapping record for (%u/%u) %slen %u owner %"PRIx64" \
+%s%soff %"PRIx64"\n"),
+ agno, rm_rec->rm_startblock,
+ XFS_RMAP_IS_UNWRITTEN(rm_rec->rm_blockcount) ?
+ _("unwritten ") : "",
+ XFS_RMAP_LEN(rm_rec->rm_blockcount),
+ rm_rec->rm_owner,
+ XFS_RMAP_IS_ATTR_FORK(rm_rec->rm_offset) ?
+ _("attr ") : "",
+ XFS_RMAP_IS_BMBT(rm_rec->rm_offset) ?
+ _("bmbt ") : "",
+ XFS_RMAP_OFF(rm_rec->rm_offset));
+ goto next_loop;
+ }
+
+ error = xfs_rmap_get_rec(bt_cur, &tmp, &i);
+ if (error)
+ goto err;
+ if (!i) {
+ do_warn(
+_("Unretrievable reverse-mapping record for (%u/%u) %slen %u owner %"PRIx64" \
+%s%soff %"PRIx64"\n"),
+ agno, rm_rec->rm_startblock,
+ XFS_RMAP_IS_UNWRITTEN(rm_rec->rm_blockcount) ?
+ _("unwritten ") : "",
+ XFS_RMAP_LEN(rm_rec->rm_blockcount),
+ rm_rec->rm_owner,
+ XFS_RMAP_IS_ATTR_FORK(rm_rec->rm_offset) ?
+ _("attr ") : "",
+ XFS_RMAP_IS_BMBT(rm_rec->rm_offset) ?
+ _("bmbt ") : "",
+ XFS_RMAP_OFF(rm_rec->rm_offset));
+ goto next_loop;
+ }
+
+ /* Compare each refcount observation against the btree's */
+ if (tmp.rm_startblock != rm_rec->rm_startblock ||
+ tmp.rm_blockcount != rm_rec->rm_blockcount ||
+ tmp.rm_owner != rm_rec->rm_owner ||
+ tmp.rm_offset != rm_rec->rm_offset)
+ do_warn(
+_("Incorrect reverse-mapping: saw (%u/%u) %slen %u owner %"PRIx64" %s%soff \
+%"PRIx64"; should be (%u/%u) %slen %u owner %"PRIx64" %s%soff %"PRIx64"\n"),
+ agno, tmp.rm_startblock,
+ XFS_RMAP_IS_UNWRITTEN(tmp.rm_blockcount) ?
+ _("unwritten ") : "",
+ XFS_RMAP_LEN(tmp.rm_blockcount),
+ tmp.rm_owner,
+ XFS_RMAP_IS_ATTR_FORK(tmp.rm_offset) ?
+ _("attr ") : "",
+ XFS_RMAP_IS_BMBT(tmp.rm_offset) ?
+ _("bmbt ") : "",
+ XFS_RMAP_OFF(tmp.rm_offset),
+ agno, rm_rec->rm_startblock,
+ XFS_RMAP_IS_UNWRITTEN(rm_rec->rm_blockcount) ?
+ _("unwritten ") : "",
+ XFS_RMAP_LEN(rm_rec->rm_blockcount),
+ rm_rec->rm_owner,
+ XFS_RMAP_IS_ATTR_FORK(rm_rec->rm_offset) ?
+ _("attr ") : "",
+ XFS_RMAP_IS_BMBT(rm_rec->rm_offset) ?
+ _("bmbt ") : "",
+ XFS_RMAP_OFF(rm_rec->rm_offset));
+next_loop:
+ rm_rec = pop_slab_cursor(rm_cur);
+ }
+
+err:
+ if (bt_cur)
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ if (agbp)
+ libxfs_putbuf(agbp);
+ free_slab_cursor(&rm_cur);
+ return 0;
+}
diff --git a/repair/rmap.h b/repair/rmap.h
index 7bab450..f3f3331 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -33,4 +33,9 @@ extern int fold_raw_rmaps(struct xfs_mount *mp, xfs_agnumber_t agno);
extern int add_fixed_ag_rmap_data(struct xfs_mount *, xfs_agnumber_t);
+extern size_t rmap_record_count(struct xfs_mount *, xfs_agnumber_t);
+extern int init_rmap_cursor(xfs_agnumber_t, struct xfs_slab_cursor **);
+extern void rmap_avoid_check(void);
+extern int check_rmaps(struct xfs_mount *, xfs_agnumber_t);
+
#endif /* RMAP_H_ */
diff --git a/repair/scan.c b/repair/scan.c
index db9e131..823401b 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -29,6 +29,7 @@
#include "bmap.h"
#include "progress.h"
#include "threads.h"
+#include "slab.h"
#include "rmap.h"
static xfs_mount_t *mp = NULL;
@@ -808,7 +809,9 @@ scan_rmapbt(
if (magic != XFS_RMAP_CRC_MAGIC) {
name = "(unknown)";
- assert(0);
+ hdr_errors++;
+ suspect++;
+ goto out;
}
if (be32_to_cpu(block->bb_magic) != magic) {
@@ -816,7 +819,7 @@ scan_rmapbt(
be32_to_cpu(block->bb_magic), name, agno, bno);
hdr_errors++;
if (suspect)
- return;
+ goto out;
}
/*
@@ -834,7 +837,7 @@ scan_rmapbt(
level, be16_to_cpu(block->bb_level), name, agno, bno);
hdr_errors++;
if (suspect)
- return;
+ goto out;
}
/* check for btree blocks multiply claimed */
@@ -844,7 +847,7 @@ scan_rmapbt(
do_warn(
_("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
name, state, agno, bno, suspect);
- return;
+ goto out;
}
set_bmap(agno, bno, XR_E_FS_MAP);
@@ -992,7 +995,7 @@ _("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
}
}
}
- return;
+ goto out;
}
/*
@@ -1020,7 +1023,7 @@ _("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
name, agno, bno);
if (suspect)
- return;
+ goto out;
suspect++;
} else if (suspect) {
suspect = 0;
@@ -1043,6 +1046,10 @@ _("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
magic, priv, &xfs_rmapbt_buf_ops);
}
}
+
+out:
+ if (suspect)
+ rmap_avoid_check();
}
/*
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 34/53] xfs_repair: rebuild reverse-mapping btree
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (32 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 33/53] xfs_repair: check existing rmapbt entries against observed rmaps Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 35/53] xfs_repair: add per-AG btree blocks to rmap data and add to rmapbt Darrick J. Wong
` (18 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Rebuild the reverse-mapping btree with the rmap observations
corresponding to file extents, bmbt blocks, and fixed per-AG
metadata.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/phase5.c | 321 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 316 insertions(+), 5 deletions(-)
diff --git a/repair/phase5.c b/repair/phase5.c
index 109e37b..f37ce6b 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -28,6 +28,8 @@
#include "versions.h"
#include "threads.h"
#include "progress.h"
+#include "slab.h"
+#include "rmap.h"
/*
* we maintain the current slice (path from root to leaf)
@@ -1326,6 +1328,292 @@ nextrec:
}
}
+/* rebuild the rmap tree */
+
+#define XR_RMAPBT_BLOCK_MAXRECS(mp, level) \
+ ((mp)->m_rmap_mxr[(level) != 0])
+
+/*
+ * we don't have to worry here about how chewing up free extents
+ * may perturb things because rmap tree building happens before
+ * freespace tree building.
+ */
+static void
+init_rmapbt_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs)
+{
+ size_t num_recs;
+ int level;
+ bt_stat_level_t *lptr;
+ bt_stat_level_t *p_lptr;
+ xfs_extlen_t blocks_allocated;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ memset(btree_curs, 0, sizeof(bt_status_t));
+ return;
+ }
+
+ lptr = &btree_curs->level[0];
+ btree_curs->init = 1;
+
+ /*
+ * build up statistics
+ */
+ num_recs = rmap_record_count(mp, agno);
+ if (num_recs == 0) {
+ /*
+ * easy corner-case -- no refcount records
+ */
+ lptr->num_blocks = 1;
+ lptr->modulo = 0;
+ lptr->num_recs_pb = 0;
+ lptr->num_recs_tot = 0;
+
+ btree_curs->num_levels = 1;
+ btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+
+ setup_cursor(mp, agno, btree_curs);
+
+ return;
+ }
+
+ blocks_allocated = lptr->num_blocks = howmany(num_recs,
+ XR_RMAPBT_BLOCK_MAXRECS(mp, 0));
+
+ lptr->modulo = num_recs % lptr->num_blocks;
+ lptr->num_recs_pb = num_recs / lptr->num_blocks;
+ lptr->num_recs_tot = num_recs;
+ level = 1;
+
+ if (lptr->num_blocks > 1) {
+ for (; btree_curs->level[level-1].num_blocks > 1
+ && level < XFS_BTREE_MAXLEVELS;
+ level++) {
+ lptr = &btree_curs->level[level];
+ p_lptr = &btree_curs->level[level - 1];
+ lptr->num_blocks = howmany(p_lptr->num_blocks,
+ XR_RMAPBT_BLOCK_MAXRECS(mp, level));
+ lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
+ lptr->num_recs_pb = p_lptr->num_blocks
+ / lptr->num_blocks;
+ lptr->num_recs_tot = p_lptr->num_blocks;
+
+ blocks_allocated += lptr->num_blocks;
+ }
+ }
+ ASSERT(lptr->num_blocks == 1);
+ btree_curs->num_levels = level;
+
+ btree_curs->num_tot_blocks = btree_curs->num_free_blocks
+ = blocks_allocated;
+
+ setup_cursor(mp, agno, btree_curs);
+}
+
+static void
+prop_rmap_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
+ struct xfs_rmap_irec *rm_rec, int level)
+{
+ struct xfs_btree_block *bt_hdr;
+ struct xfs_rmap_key *bt_key;
+ xfs_rmap_ptr_t *bt_ptr;
+ xfs_agblock_t agbno;
+ bt_stat_level_t *lptr;
+
+ level++;
+
+ if (level >= btree_curs->num_levels)
+ return;
+
+ lptr = &btree_curs->level[level];
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+
+ if (be16_to_cpu(bt_hdr->bb_numrecs) == 0) {
+ /*
+ * this only happens once to initialize the
+ * first path up the left side of the tree
+ * where the agbno's are already set up
+ */
+ prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
+ }
+
+ if (be16_to_cpu(bt_hdr->bb_numrecs) ==
+ lptr->num_recs_pb + (lptr->modulo > 0)) {
+ /*
+ * write out current prev block, grab us a new block,
+ * and set the rightsib pointer of current block
+ */
+#ifdef XR_BLD_INO_TRACE
+ fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
+#endif
+ if (lptr->prev_agbno != NULLAGBLOCK) {
+ ASSERT(lptr->prev_buf_p != NULL);
+ libxfs_writebuf(lptr->prev_buf_p, 0);
+ }
+ lptr->prev_agbno = lptr->agbno;
+ lptr->prev_buf_p = lptr->buf_p;
+ agbno = get_next_blockaddr(agno, level, btree_curs);
+
+ bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
+
+ lptr->buf_p = libxfs_getbuf(mp->m_dev,
+ XFS_AGB_TO_DADDR(mp, agno, agbno),
+ XFS_FSB_TO_BB(mp, 1));
+ lptr->agbno = agbno;
+
+ if (lptr->modulo)
+ lptr->modulo--;
+
+ /*
+ * initialize block header
+ */
+ lptr->buf_p->b_ops = &xfs_rmapbt_buf_ops;
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+ memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+ xfs_btree_init_block(mp, lptr->buf_p, XFS_RMAP_CRC_MAGIC,
+ level, 0, agno,
+ XFS_BTREE_CRC_BLOCKS);
+
+ bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+
+ /*
+ * propagate extent record for first extent in new block up
+ */
+ prop_rmap_cursor(mp, agno, btree_curs, rm_rec, level);
+ }
+ /*
+ * add inode info to current block
+ */
+ be16_add_cpu(&bt_hdr->bb_numrecs, 1);
+
+ bt_key = XFS_RMAP_KEY_ADDR(bt_hdr,
+ be16_to_cpu(bt_hdr->bb_numrecs));
+ bt_ptr = XFS_RMAP_PTR_ADDR(bt_hdr,
+ be16_to_cpu(bt_hdr->bb_numrecs),
+ mp->m_rmap_mxr[1]);
+
+ bt_key->rm_startblock = cpu_to_be32(rm_rec->rm_startblock);
+ bt_key->rm_owner = cpu_to_be64(rm_rec->rm_owner);
+ bt_key->rm_offset = cpu_to_be64(rm_rec->rm_offset);
+
+ *bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+}
+
+/*
+ * rebuilds a rmap btree given a cursor.
+ */
+static void
+build_rmap_tree(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs)
+{
+ xfs_agnumber_t i;
+ xfs_agblock_t j;
+ xfs_agblock_t agbno;
+ struct xfs_btree_block *bt_hdr;
+ struct xfs_rmap_irec *rm_rec;
+ struct xfs_slab_cursor *rmap_cur;
+ struct xfs_rmap_rec *bt_rec;
+ struct bt_stat_level *lptr;
+ int level = btree_curs->num_levels;
+ int error;
+
+ for (i = 0; i < level; i++) {
+ lptr = &btree_curs->level[i];
+
+ agbno = get_next_blockaddr(agno, i, btree_curs);
+ lptr->buf_p = libxfs_getbuf(mp->m_dev,
+ XFS_AGB_TO_DADDR(mp, agno, agbno),
+ XFS_FSB_TO_BB(mp, 1));
+
+ if (i == btree_curs->num_levels - 1)
+ btree_curs->root = agbno;
+
+ lptr->agbno = agbno;
+ lptr->prev_agbno = NULLAGBLOCK;
+ lptr->prev_buf_p = NULL;
+ /*
+ * initialize block header
+ */
+
+ lptr->buf_p->b_ops = &xfs_rmapbt_buf_ops;
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+ memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+ xfs_btree_init_block(mp, lptr->buf_p, XFS_RMAP_CRC_MAGIC,
+ i, 0, agno,
+ XFS_BTREE_CRC_BLOCKS);
+ }
+
+ /*
+ * run along leaf, setting up records. as we have to switch
+ * blocks, call the prop_rmap_cursor routine to set up the new
+ * pointers for the parent. that can recurse up to the root
+ * if required. set the sibling pointers for leaf level here.
+ */
+ error = init_rmap_cursor(agno, &rmap_cur);
+ if (error)
+ do_error(
+_("Insufficient memory to construct reverse-map cursor."));
+ rm_rec = pop_slab_cursor(rmap_cur);
+ lptr = &btree_curs->level[0];
+
+ for (i = 0; i < lptr->num_blocks; i++) {
+ /*
+ * block initialization, lay in block header
+ */
+ lptr->buf_p->b_ops = &xfs_rmapbt_buf_ops;
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+ memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+ xfs_btree_init_block(mp, lptr->buf_p, XFS_RMAP_CRC_MAGIC,
+ 0, 0, agno,
+ XFS_BTREE_CRC_BLOCKS);
+
+ bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+ bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
+ (lptr->modulo > 0));
+
+ if (lptr->modulo > 0)
+ lptr->modulo--;
+
+ if (lptr->num_recs_pb > 0)
+ prop_rmap_cursor(mp, agno, btree_curs, rm_rec, 0);
+
+ bt_rec = (struct xfs_rmap_rec *)
+ ((char *)bt_hdr + XFS_RMAP_BLOCK_LEN);
+ for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
+ ASSERT(rm_rec != NULL);
+ bt_rec[j].rm_startblock =
+ cpu_to_be32(rm_rec->rm_startblock);
+ bt_rec[j].rm_blockcount =
+ cpu_to_be32(rm_rec->rm_blockcount);
+ bt_rec[j].rm_owner = cpu_to_be64(rm_rec->rm_owner);
+ bt_rec[j].rm_offset = cpu_to_be64(rm_rec->rm_offset);
+
+ rm_rec = pop_slab_cursor(rmap_cur);
+ }
+
+ if (rm_rec != NULL) {
+ /*
+ * get next leaf level block
+ */
+ if (lptr->prev_buf_p != NULL) {
+#ifdef XR_BLD_RL_TRACE
+ fprintf(stderr, "writing rmapbt agbno %u\n",
+ lptr->prev_agbno);
+#endif
+ ASSERT(lptr->prev_agbno != NULLAGBLOCK);
+ libxfs_writebuf(lptr->prev_buf_p, 0);
+ }
+ lptr->prev_buf_p = lptr->buf_p;
+ lptr->prev_agbno = lptr->agbno;
+ lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
+ bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
+
+ lptr->buf_p = libxfs_getbuf(mp->m_dev,
+ XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
+ XFS_FSB_TO_BB(mp, 1));
+ }
+ }
+ free_slab_cursor(&rmap_cur);
+}
+
/*
* build both the agf and the agfl for an agno given both
* btree cursors.
@@ -1338,7 +1626,8 @@ build_agf_agfl(xfs_mount_t *mp,
bt_status_t *bno_bt,
bt_status_t *bcnt_bt,
xfs_extlen_t freeblks, /* # free blocks in tree */
- int lostblocks) /* # blocks that will be lost */
+ int lostblocks, /* # blocks that will be lost */
+ bt_status_t *rmap_bt)
{
extent_tree_node_t *ext_ptr;
xfs_buf_t *agf_buf, *agfl_buf;
@@ -1377,20 +1666,25 @@ build_agf_agfl(xfs_mount_t *mp,
agf->agf_levels[XFS_BTNUM_BNO] = cpu_to_be32(bno_bt->num_levels);
agf->agf_roots[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->root);
agf->agf_levels[XFS_BTNUM_CNT] = cpu_to_be32(bcnt_bt->num_levels);
+ agf->agf_roots[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->root);
+ agf->agf_levels[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->num_levels);
agf->agf_freeblks = cpu_to_be32(freeblks);
/*
* Count and record the number of btree blocks consumed if required.
*/
if (xfs_sb_version_haslazysbcount(&mp->m_sb)) {
+ unsigned int blks;
/*
* Don't count the root blocks as they are already
* accounted for.
*/
- agf->agf_btreeblks = cpu_to_be32(
- (bno_bt->num_tot_blocks - bno_bt->num_free_blocks) +
+ blks = (bno_bt->num_tot_blocks - bno_bt->num_free_blocks) +
(bcnt_bt->num_tot_blocks - bcnt_bt->num_free_blocks) -
- 2);
+ 2;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ blks += rmap_bt->num_tot_blocks - rmap_bt->num_free_blocks - 1;
+ agf->agf_btreeblks = cpu_to_be32(blks);
#ifdef XR_BLD_FREE_TRACE
fprintf(stderr, "agf->agf_btreeblks = %u\n",
be32_to_cpu(agf->agf_btreeblks));
@@ -1581,6 +1875,7 @@ phase5_func(
bt_status_t bcnt_btree_curs;
bt_status_t ino_btree_curs;
bt_status_t fino_btree_curs;
+ bt_status_t rmap_btree_curs;
int extra_blocks = 0;
uint num_freeblocks;
xfs_extlen_t freeblks1;
@@ -1636,6 +1931,12 @@ phase5_func(
sb_icount_ag[agno] += num_inos;
sb_ifree_ag[agno] += num_free_inos;
+ /*
+ * Set up the btree cursors for the on-disk rmap btrees,
+ * which includes pre-allocating all required blocks.
+ */
+ init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
+
num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
/*
* lose two blocks per AG -- the space tree roots
@@ -1720,11 +2021,19 @@ phase5_func(
ASSERT(freeblks1 == freeblks2);
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ build_rmap_tree(mp, agno, &rmap_btree_curs);
+ write_cursor(&rmap_btree_curs);
+ sb_fdblocks_ag[agno] += (rmap_btree_curs.num_tot_blocks -
+ rmap_btree_curs.num_free_blocks) - 1;
+ }
+
/*
* set up agf and agfl
*/
build_agf_agfl(mp, agno, &bno_btree_curs,
- &bcnt_btree_curs, freeblks1, extra_blocks);
+ &bcnt_btree_curs, freeblks1, extra_blocks,
+ &rmap_btree_curs);
/*
* build inode allocation tree.
*/
@@ -1753,6 +2062,8 @@ phase5_func(
*/
finish_cursor(&bno_btree_curs);
finish_cursor(&ino_btree_curs);
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ finish_cursor(&rmap_btree_curs);
if (xfs_sb_version_hasfinobt(&mp->m_sb))
finish_cursor(&fino_btree_curs);
finish_cursor(&bcnt_btree_curs);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 35/53] xfs_repair: add per-AG btree blocks to rmap data and add to rmapbt
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (33 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 34/53] xfs_repair: rebuild reverse-mapping btree Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:08 ` [PATCH 36/53] mkfs.xfs: Create rmapbt filesystems Darrick J. Wong
` (17 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Since we can't know the location of the new per-AG btree blocks prior
to constructing the rmapbt, we must record raw reverse-mapping data for
btree blocks while the new btrees are under construction. After the
rmapbt has been rebuilt, merge the btree rmap entries into the rmapbt
with the libxfs code.
Also refactor the freelist fixing code since we need it to tidy up
the AGFL after each rmapbt allocation.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/phase5.c | 47 ++++++------
repair/rmap.c | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 4 +
3 files changed, 248 insertions(+), 23 deletions(-)
diff --git a/repair/phase5.c b/repair/phase5.c
index f37ce6b..734291a 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -74,6 +74,7 @@ typedef struct bt_status {
* per-level status info
*/
bt_stat_level_t level[XFS_BTREE_MAXLEVELS];
+ uint64_t owner; /* owner */
} bt_status_t;
/*
@@ -205,6 +206,7 @@ setup_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *curs)
extent_tree_node_t *bno_ext_ptr;
xfs_extlen_t blocks_allocated;
xfs_agblock_t *agb_ptr;
+ int error;
/*
* get the number of blocks we need to allocate, then
@@ -249,6 +251,12 @@ setup_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *curs)
blocks_allocated++;
}
+ error = add_ag_rmap(mp, agno, ext_ptr->ex_startblock, u,
+ curs->owner);
+ if (error)
+ do_error(_("could not set up btree rmaps: %s\n"),
+ strerror(-error));
+
/*
* if we only used part of this last extent, then we
* need only to reset the extent in the extent
@@ -916,6 +924,7 @@ init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
lptr = &btree_curs->level[0];
btree_curs->init = 1;
+ btree_curs->owner = XFS_RMAP_OWN_INOBT;
/*
* build up statistics
@@ -1354,6 +1363,7 @@ init_rmapbt_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs
lptr = &btree_curs->level[0];
btree_curs->init = 1;
+ btree_curs->owner = XFS_RMAP_OWN_AG;
/*
* build up statistics
@@ -1766,6 +1776,7 @@ build_agf_agfl(xfs_mount_t *mp,
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(i - 1);
agf->agf_flcount = cpu_to_be32(i);
+ rmap_store_agflcount(mp, agno, i);
#ifdef XR_BLD_FREE_TRACE
fprintf(stderr, "writing agfl for ag %u\n", agno);
@@ -1790,30 +1801,8 @@ build_agf_agfl(xfs_mount_t *mp,
/*
* now fix up the free list appropriately
- * XXX: code lifted from mkfs, should be shared.
*/
- {
- xfs_alloc_arg_t args;
- xfs_trans_t *tp;
- struct xfs_trans_res tres = {0};
- int error;
-
- memset(&args, 0, sizeof(args));
- args.tp = tp = libxfs_trans_alloc(mp, 0);
- args.mp = mp;
- args.agno = agno;
- args.alignment = 1;
- args.pag = xfs_perag_get(mp,agno);
- libxfs_trans_reserve(tp, &tres,
- xfs_alloc_min_freelist(mp, args.pag), 0);
- error = libxfs_alloc_fix_freelist(&args, 0);
- xfs_perag_put(args.pag);
- if (error) {
- do_error(_("failed to fix AGFL on AG %d, error %d\n"),
- agno, error);
- }
- libxfs_trans_commit(tp);
- }
+ fix_freelist(mp, agno, true);
#ifdef XR_BLD_FREE_TRACE
fprintf(stderr, "wrote agf for ag %u\n", agno);
@@ -1885,6 +1874,7 @@ phase5_func(
xfs_agblock_t num_extents;
__uint32_t magic;
struct agi_stat agi_stat = {0,};
+ int error;
if (verbose)
do_log(_(" - agno = %d\n"), agno);
@@ -1990,6 +1980,8 @@ phase5_func(
bcnt_btree_curs = bno_btree_curs;
+ bno_btree_curs.owner = XFS_RMAP_OWN_AG;
+ bcnt_btree_curs.owner = XFS_RMAP_OWN_AG;
setup_cursor(mp, agno, &bno_btree_curs);
setup_cursor(mp, agno, &bcnt_btree_curs);
@@ -2067,6 +2059,15 @@ phase5_func(
if (xfs_sb_version_hasfinobt(&mp->m_sb))
finish_cursor(&fino_btree_curs);
finish_cursor(&bcnt_btree_curs);
+
+ /*
+ * Put the per-AG btree rmap data into the rmapbt
+ */
+ error = store_ag_btree_rmap_data(mp, agno);
+ if (error)
+ do_error(
+_("unable to add AG %u reverse-mapping data to btree.\n"), agno);
+
/*
* release the incore per-AG bno/bcnt trees so
* the extent nodes can be recycled
diff --git a/repair/rmap.c b/repair/rmap.c
index bb1206e..5d49eef 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -38,6 +38,8 @@
struct xfs_ag_rmap {
struct xfs_slab *ar_rmaps; /* rmap observations, p4 */
struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
+ int ar_flcount; /* agfl entries from leftover */
+ /* agbt allocations */
};
static struct xfs_ag_rmap *ag_rmaps;
@@ -436,6 +438,144 @@ out:
return error;
}
+/**
+ * store_ag_btree_rmap_data() - Copy the per-AG btree reverse-mapping data
+ * into the rmapbt.
+ *
+ * At rmapbt reconstruction time, the rmapbt will be populated _only_ with
+ * rmaps for file extents, inode chunks, AG headers, and bmbt blocks. While
+ * building the AG btrees we can record all the blocks allocated for each
+ * btree, but we cannot resolve the conflict between the fact that one has to
+ * finish allocating the space for the rmapbt before building the bnobt and the
+ * fact that allocating blocks for the bnobt requires adding rmapbt entries.
+ * Therefore we record in-core the rmaps for each btree and here use the
+ * libxfs rmap functions to finish building the rmap btree.
+ *
+ * During AGF/AGFL reconstruction in phase 5, rmaps for the AG btrees are
+ * recorded in memory. The rmapbt has not been set up yet, so we need to be
+ * able to "expand" the AGFL without updating the rmapbt. After we've written
+ * out the new AGF header the new rmapbt is available, so this function reads
+ * each AGFL to generate rmap entries. These entries are merged with the AG
+ * btree rmap entries, and then we use libxfs' rmap functions to add them to
+ * the rmapbt, after which it is fully regenerated.
+ *
+ * @mp: XFS mount.
+ * @agno: AG number.
+ */
+int
+store_ag_btree_rmap_data(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_slab_cursor *rm_cur;
+ struct xfs_rmap_irec *rm_rec = NULL;
+ struct xfs_btree_cur *bt_cur = NULL;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_buf *agflbp = NULL;
+ struct xfs_trans *tp;
+ struct xfs_trans_res tres = {0};
+ __be32 *agfl_bno, *b;
+ int error = 0;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return 0;
+
+ /* Release the ar_rmaps; they were put into the rmapbt during p5. */
+ free_slab(&ag_rmaps[agno].ar_rmaps);
+ error = init_slab(&ag_rmaps[agno].ar_rmaps,
+ sizeof(struct xfs_rmap_irec));
+ if (error)
+ goto err;
+
+ /* Add the AGFL blocks to the rmap list */
+ error = xfs_trans_read_buf(
+ mp, NULL, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
+ XFS_FSS_TO_BB(mp, 1), 0, &agflbp, &xfs_agfl_buf_ops);
+ if (error)
+ goto err;
+
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+ agfl_bno += ag_rmaps[agno].ar_flcount;
+ b = agfl_bno;
+ while (*b != NULLAGBLOCK && b - agfl_bno <= XFS_AGFL_SIZE(mp)) {
+ error = add_ag_rmap(mp, agno, be32_to_cpu(*b), 1,
+ XFS_RMAP_OWN_AG);
+ if (error)
+ goto err;
+ b++;
+ }
+ libxfs_putbuf(agflbp);
+ agflbp = NULL;
+
+ /* Merge all the raw rmaps into the main list */
+ error = fold_raw_rmaps(mp, agno);
+ if (error)
+ goto err;
+
+ /* Create cursors to refcount structures */
+ error = init_slab_cursor(ag_rmaps[agno].ar_rmaps, rmap_compare,
+ &rm_cur);
+ if (error)
+ goto err;
+
+ /* Insert rmaps into the btree one at a time */
+ rm_rec = pop_slab_cursor(rm_cur);
+ while (rm_rec) {
+ tp = libxfs_trans_alloc(mp, 0);
+ if (!tp) {
+ error = -ENOMEM;
+ goto err_slab;
+ }
+
+ error = -libxfs_trans_reserve(tp, &tres, 16, 0);
+ if (error)
+ goto err_trans;
+
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (error)
+ goto err_trans;
+
+ bt_cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+ if (!bt_cur) {
+ error = -ENOMEM;
+ goto err_agbp;
+ }
+
+ error = xfs_rmapbt_insert(bt_cur, rm_rec->rm_startblock,
+ rm_rec->rm_blockcount, rm_rec->rm_owner,
+ rm_rec->rm_offset);
+ if (error)
+ goto err_rmapcur;
+
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ error = -libxfs_trans_commit(tp);
+ if (error)
+ goto err_slab;
+
+ fix_freelist(mp, agno, false);
+
+ rm_rec = pop_slab_cursor(rm_cur);
+ }
+
+ free_slab_cursor(&rm_cur);
+ return 0;
+
+err_rmapcur:
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_ERROR);
+err_agbp:
+ libxfs_putbuf(agbp);
+err_trans:
+ libxfs_trans_cancel(tp);
+err_slab:
+ free_slab_cursor(&rm_cur);
+err:
+ if (agflbp)
+ libxfs_putbuf(agflbp);
+ printf("FAIL err %d\n", error);
+ return error;
+}
+
#ifdef RMAP_DEBUG
static void
dump_rmap(
@@ -626,3 +766,83 @@ err:
free_slab_cursor(&rm_cur);
return 0;
}
+
+/**
+ * fix_freelist() - Regenerate the AGFL, so that we don't run out of it while
+ * rebuilding the rmapbt.
+ * @mp: XFS mount object
+ * @agno: AG number
+ * @skip_rmapbt: Don't fix the rmapbt
+ */
+void
+fix_freelist(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ bool skip_rmapbt)
+{
+ xfs_alloc_arg_t args;
+ xfs_trans_t *tp;
+ struct xfs_trans_res tres = {0};
+ int flags;
+ int error;
+
+ memset(&args, 0, sizeof(args));
+ args.tp = tp = libxfs_trans_alloc(mp, 0);
+ args.mp = mp;
+ args.agno = agno;
+ args.alignment = 1;
+ args.pag = xfs_perag_get(mp, agno);
+ libxfs_trans_reserve(tp, &tres,
+ xfs_alloc_min_freelist(mp, args.pag), 0);
+ /*
+ * Prior to rmapbt, all we had to do to fix the freelist is "expand"
+ * the fresh AGFL header from empty to full. That hasn't changed. For
+ * rmapbt, however, things change a bit.
+ *
+ * When we're stuffing the rmapbt with the AG btree rmaps the tree can
+ * expand, so we need to keep the AGFL well-stocked for the expansion.
+ * However, this expansion can cause the bnobt/cntbt to shrink, which
+ * can make the AGFL eligible for shrinking. Shrinking involves
+ * freeing rmapbt entries, but since we haven't finished loading the
+ * rmapbt with the btree rmaps it's possible for the remove operation
+ * to fail. The AGFL block is large enough at this point to absorb any
+ * blocks freed from the bnobt/cntbt, so we can disable shrinking.
+ *
+ * During the initial AGFL regeneration during AGF generation in phase5
+ * we must also disable rmapbt modifications because the AGF that
+ * libxfs reads does not yet point to the new rmapbt. These initial
+ * AGFL entries are added just prior to adding the AG btree block rmaps
+ * to the rmapbt. It's ok to pass NOSHRINK here too, since the AGFL is
+ * empty and cannot shrink.
+ */
+ flags = XFS_ALLOC_FLAG_NOSHRINK;
+ if (skip_rmapbt)
+ flags |= XFS_ALLOC_FLAG_NORMAP;
+ error = libxfs_alloc_fix_freelist(&args, flags);
+ xfs_perag_put(args.pag);
+ if (error) {
+ do_error(_("failed to fix AGFL on AG %d, error %d\n"),
+ agno, error);
+ }
+ libxfs_trans_commit(tp);
+}
+
+/**
+ * rmap_store_agflcount() - Remember how many AGFL entries came from excess
+ * AG btree allocations and therefore already have
+ * rmap entries.
+ * @mp: XFS mount object.
+ * @agno: AG number.
+ * @count: Number of AGFL entries.
+ */
+void
+rmap_store_agflcount(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ int count)
+{
+ if (!needs_rmap_work(mp))
+ return;
+
+ ag_rmaps[agno].ar_flcount = count;
+}
diff --git a/repair/rmap.h b/repair/rmap.h
index f3f3331..0b4e73b 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -32,10 +32,14 @@ extern int add_bmbt_rmap(struct xfs_mount *, xfs_ino_t, int, xfs_fsblock_t);
extern int fold_raw_rmaps(struct xfs_mount *mp, xfs_agnumber_t agno);
extern int add_fixed_ag_rmap_data(struct xfs_mount *, xfs_agnumber_t);
+extern int store_ag_btree_rmap_data(struct xfs_mount *, xfs_agnumber_t);
extern size_t rmap_record_count(struct xfs_mount *, xfs_agnumber_t);
extern int init_rmap_cursor(xfs_agnumber_t, struct xfs_slab_cursor **);
extern void rmap_avoid_check(void);
extern int check_rmaps(struct xfs_mount *, xfs_agnumber_t);
+extern void fix_freelist(struct xfs_mount *, xfs_agnumber_t, bool);
+extern void rmap_store_agflcount(struct xfs_mount *, xfs_agnumber_t, int);
+
#endif /* RMAP_H_ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 36/53] mkfs.xfs: Create rmapbt filesystems
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (34 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 35/53] xfs_repair: add per-AG btree blocks to rmap data and add to rmapbt Darrick J. Wong
@ 2015-12-19 9:08 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 37/53] xfs_mkfs: initialize extra fields during mkfs, add docs Darrick J. Wong
` (16 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:08 UTC (permalink / raw)
To: david, darrick.wong; +Cc: Dave Chinner, xfs
>From : Dave Chinner <david@fromorbit.com>
Create v5 filesystems with rmapbt turned on.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
[split patch, add commit message]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
mkfs/xfs_mkfs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 101 insertions(+), 17 deletions(-)
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 934d7c0..b2b087f 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -185,6 +185,8 @@ char *mopts[] = {
"finobt",
#define M_UUID 2
"uuid",
+#define M_RMAPBT 3
+ "rmapbt",
NULL
};
@@ -984,6 +986,7 @@ main(
int finobt;
bool finobtflag;
int spinodes;
+ bool rmapbt;
platform_uuid_generate(&uuid);
progname = basename(argv[0]);
@@ -1022,6 +1025,7 @@ main(
finobt = 1;
finobtflag = false;
spinodes = 0;
+ rmapbt = false;
memset(&fsx, 0, sizeof(fsx));
memset(&xi, 0, sizeof(xi));
@@ -1531,6 +1535,13 @@ main(
reqval('m', mopts, M_UUID);
if (platform_uuid_parse(value, &uuid))
illegal(optarg, "m uuid");
+ case M_RMAPBT:
+ if (!value || *value == '\0')
+ reqval('m', mopts, M_CRC);
+ c = atoi(value);
+ if (c < 0 || c > 1)
+ illegal(value, "m rmapbt");
+ rmapbt = c;
break;
default:
unknown('m', value);
@@ -1888,6 +1899,12 @@ _("warning: sparse inodes not supported without CRC support, disabled.\n"));
spinodes = 0;
}
+ if (rmapbt && !crcs_enabled) {
+ fprintf(stderr,
+_("warning: rmapbt not supported without CRC support, disabled.\n"));
+ rmapbt = 0;
+ }
+
if (nsflag || nlflag) {
if (dirblocksize < blocksize ||
dirblocksize > XFS_MAX_BLOCKSIZE) {
@@ -2481,7 +2498,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
/*
- * sb_versionnum and finobt flags must be set before we use
+ * sb_versionnum, finobt and rmapbt flags must be set before we use
* xfs_prealloc_blocks().
*/
sbp->sb_features2 = XFS_SB_VERSION2_MKFS(crcs_enabled, lazy_sb_counters,
@@ -2503,6 +2520,8 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
if (finobt)
sbp->sb_features_ro_compat = XFS_SB_FEAT_RO_COMPAT_FINOBT;
+ if (rmapbt)
+ sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT;
if (loginternal) {
/*
@@ -2566,7 +2585,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
printf(_(
"meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n"
" =%-22s sectsz=%-5u attr=%u, projid32bit=%u\n"
- " =%-22s crc=%-8u finobt=%u, sparse=%u\n"
+ " =%-22s crc=%-8u finobt=%u, sparse=%u, rmapbt=%u\n"
"data =%-22s bsize=%-6u blocks=%llu, imaxpct=%u\n"
" =%-22s sunit=%-6u swidth=%u blks\n"
"naming =version %-14u bsize=%-6u ascii-ci=%d ftype=%d\n"
@@ -2575,7 +2594,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
"realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n"),
dfile, isize, (long long)agcount, (long long)agsize,
"", sectorsize, attrversion, !projid16bit,
- "", crcs_enabled, finobt, spinodes,
+ "", crcs_enabled, finobt, spinodes, rmapbt,
"", blocksize, (long long)dblocks, imaxpct,
"", dsunit, dswidth,
dirversion, dirblocksize, nci, dirftype,
@@ -2764,6 +2783,12 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
pag->pagf_levels[XFS_BTNUM_BNOi] = 1;
pag->pagf_levels[XFS_BTNUM_CNTi] = 1;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ agf->agf_roots[XFS_BTNUM_RMAPi] =
+ cpu_to_be32(XFS_RMAP_BLOCK(mp));
+ agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+ }
+
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
agf->agf_flcount = 0;
@@ -2951,24 +2976,83 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
/*
* Free INO btree root block
*/
- if (!finobt) {
- xfs_perag_put(pag);
- continue;
+ if (finobt) {
+ buf = libxfs_getbuf(mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+ bsize);
+ buf->b_ops = &xfs_inobt_buf_ops;
+ block = XFS_BUF_TO_BLOCK(buf);
+ memset(block, 0, blocksize);
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0,
+ agno, XFS_BTREE_CRC_BLOCKS);
+ else
+ xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0,
+ agno, 0);
+ libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
}
- buf = libxfs_getbuf(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+ /* RMAP btree root block */
+ if (rmapbt) {
+ struct xfs_rmap_rec *rrec;
+
+ buf = libxfs_getbuf(mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
bsize);
- buf->b_ops = &xfs_inobt_buf_ops;
- block = XFS_BUF_TO_BLOCK(buf);
- memset(block, 0, blocksize);
- if (xfs_sb_version_hascrc(&mp->m_sb))
- xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0,
+ buf->b_ops = &xfs_rmapbt_buf_ops;
+ block = XFS_BUF_TO_BLOCK(buf);
+ memset(block, 0, blocksize);
+
+ xfs_btree_init_block(mp, buf, XFS_RMAP_CRC_MAGIC, 0, 0,
agno, XFS_BTREE_CRC_BLOCKS);
- else
- xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0,
- agno, 0);
- libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+
+ /*
+ * mark the AG header regions as static metadata
+ * The BNO btree block is the first block after the
+ * headers, so it's location defines the size of region
+ * the static metadata consumes.
+ */
+ rrec = XFS_RMAP_REC_ADDR(block, 1);
+ rrec->rm_startblock = 0;
+ rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account freespace btree root blocks */
+ rrec = XFS_RMAP_REC_ADDR(block, 2);
+ rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
+ rrec->rm_blockcount = cpu_to_be32(2);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account inode btree root blocks */
+ rrec = XFS_RMAP_REC_ADDR(block, 3);
+ rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
+ rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
+ XFS_IBT_BLOCK(mp));
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account for rmap btree root */
+ rrec = XFS_RMAP_REC_ADDR(block, 4);
+ rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
+ rrec->rm_blockcount = cpu_to_be32(1);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+ be16_add_cpu(&block->bb_numrecs, 1);
+
+ /* account for the log space */
+ if (loginternal && agno == logagno) {
+ rrec = XFS_RMAP_REC_ADDR(block, 5);
+ rrec->rm_startblock = cpu_to_be32(
+ XFS_FSB_TO_AGBNO(mp, logstart));
+ rrec->rm_blockcount = cpu_to_be32(logblocks);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+ be16_add_cpu(&block->bb_numrecs, 1);
+ }
+
+ libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+ }
+
xfs_perag_put(pag);
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 37/53] xfs_mkfs: initialize extra fields during mkfs, add docs
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (35 preceding siblings ...)
2015-12-19 9:08 ` [PATCH 36/53] mkfs.xfs: Create rmapbt filesystems Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 38/53] libxfs: add the refcount helper functions from the kernel Darrick J. Wong
` (15 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Document the rmapbt options to mkfs, and initialize the
extra field we added.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
man/man8/mkfs.xfs.8 | 20 ++++++++++++++++++++
mkfs/xfs_mkfs.c | 7 ++++++-
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
index 1fe510b..ffef906 100644
--- a/man/man8/mkfs.xfs.8
+++ b/man/man8/mkfs.xfs.8
@@ -173,6 +173,26 @@ is used, the free inode btree feature is not supported and is disabled.
.BI uuid= value
Use the given value as the filesystem UUID for the newly created filesystem.
The default is to generate a random UUID.
+.TP
+.BI rmapbt= value
+This option enables the creation of a reverse-mapping btree index in each
+allocation group. THe value is either 0 to disable the feature, or 1 to
+create the btree.
+.IP
+The reverse mapping btree maps filesystem blocks to the owner of the
+filesystem block. Most of the mappings will be to an inode number and an
+offset, though there will also be mappings to filesystem metadata. This
+secondary metadata can be used to validate the primary metadata or to
+pinpoint exactly which data has been lost when a disk error occurs.
+.IP
+By default,
+.B mkfs.xfs
+will not create reverse mapping btrees. This feature is only available
+for filesystems created with the (default)
+.B \-m crc=1
+option set. When the option
+.B \-m crc=0
+is used, the reverse mapping btree feature is not supported and is disabled.
.RE
.TP
.BI \-d " data_section_options"
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index b2b087f..15a3866 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -3016,6 +3016,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
rrec->rm_startblock = 0;
rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
+ rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
/* account freespace btree root blocks */
@@ -3023,6 +3024,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
rrec->rm_blockcount = cpu_to_be32(2);
rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+ rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
/* account inode btree root blocks */
@@ -3031,6 +3033,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
XFS_IBT_BLOCK(mp));
rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
+ rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
/* account for rmap btree root */
@@ -3038,6 +3041,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
rrec->rm_blockcount = cpu_to_be32(1);
rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+ rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
/* account for the log space */
@@ -3047,6 +3051,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
XFS_FSB_TO_AGBNO(mp, logstart));
rrec->rm_blockcount = cpu_to_be32(logblocks);
rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+ rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
}
@@ -3290,7 +3295,7 @@ usage( void )
{
fprintf(stderr, _("Usage: %s\n\
/* blocksize */ [-b log=n|size=num]\n\
-/* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx]\n\
+/* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1]\n\
/* data subvol */ [-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
(sunit=value,swidth=value|su=num,sw=num|noalign),\n\
sectlog=n|sectsize=num\n\
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 38/53] libxfs: add the refcount helper functions from the kernel
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (36 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 37/53] xfs_mkfs: initialize extra fields during mkfs, add docs Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 39/53] libxfs: add support for refcount btrees Darrick J. Wong
` (14 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Import definitions and refcount helper code from the kernel.
This is a separate patch to avoid blowing out the mail server...
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_refcount.c | 1131 +++++++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_refcount.h | 45 ++
2 files changed, 1176 insertions(+)
create mode 100644 libxfs/xfs_refcount.c
create mode 100644 libxfs/xfs_refcount.h
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
new file mode 100644
index 0000000..59ec846
--- /dev/null
+++ b/libxfs/xfs_refcount.c
@@ -0,0 +1,1131 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_bit.h"
+#include "xfs_refcount.h"
+
+/**
+ * xfs_refcountbt_lookup_le() -- Look up the first record less than or equal to
+ * [bno, len] in the btree given by cur.
+ * @cur: refcount btree cursor
+ * @bno: AG block number to look up
+ * @stat: set to 1 if successful, 0 otherwise
+ */
+int
+xfs_refcountbt_lookup_le(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ int *stat)
+{
+ trace_xfs_refcountbt_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
+ XFS_LOOKUP_LE);
+ cur->bc_rec.rc.rc_startblock = bno;
+ cur->bc_rec.rc.rc_blockcount = 0;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/**
+ * xfs_refcountbt_lookup_ge() -- Look up the first record greater than or equal
+ * to [bno, len] in the btree given by cur.
+ * @cur: refcount btree cursor
+ * @bno: AG block number to look up
+ * @stat: set to 1 if successful, 0 otherwise
+ */
+int /* error */
+xfs_refcountbt_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ int *stat) /* success/failure */
+{
+ trace_xfs_refcountbt_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
+ XFS_LOOKUP_GE);
+ cur->bc_rec.rc.rc_startblock = bno;
+ cur->bc_rec.rc.rc_blockcount = 0;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/**
+ * xfs_refcountbt_get_rec() -- Get the data from the pointed-to record.
+ *
+ * @cur: refcount btree cursor
+ * @irec: set to the record currently pointed to by the btree cursor
+ * @stat: set to 1 if successful, 0 otherwise
+ */
+int
+xfs_refcountbt_get_rec(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *irec,
+ int *stat)
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (!error && *stat == 1) {
+ irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
+ irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
+ irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
+ trace_xfs_refcountbt_get(cur->bc_mp, cur->bc_private.a.agno,
+ irec);
+ }
+ return error;
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, refcount].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_refcountbt_update(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *irec)
+{
+ union xfs_btree_rec rec;
+
+ trace_xfs_refcountbt_update(cur->bc_mp, cur->bc_private.a.agno, irec);
+ rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
+ rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
+ rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Insert the record referred to by cur to the value given
+ * by [bno, len, refcount].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_refcountbt_insert(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *irec,
+ int *i)
+{
+ trace_xfs_refcountbt_insert(cur->bc_mp, cur->bc_private.a.agno, irec);
+ cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
+ cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
+ cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
+ return xfs_btree_insert(cur, i);
+}
+
+/*
+ * Remove the record referred to by cur, then set the pointer to the spot
+ * where the record could be re-inserted, in case we want to increment or
+ * decrement the cursor.
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_refcountbt_delete(
+ struct xfs_btree_cur *cur,
+ int *i)
+{
+ struct xfs_refcount_irec irec;
+ int found_rec;
+ int error;
+
+ error = xfs_refcountbt_get_rec(cur, &irec, &found_rec);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ trace_xfs_refcountbt_delete(cur->bc_mp, cur->bc_private.a.agno, &irec);
+ error = xfs_btree_delete(cur, i);
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
+ if (error)
+ return error;
+ error = xfs_refcountbt_lookup_ge(cur, irec.rc_startblock, &found_rec);
+out_error:
+ return error;
+}
+
+/*
+ * Adjusting the Reference Count
+ *
+ * As stated elsewhere, the reference count btree (refcbt) stores
+ * >1 reference counts for extents of physical blocks. In this
+ * operation, we're either raising or lowering the reference count of
+ * some subrange stored in the tree:
+ *
+ * <------ adjustment range ------>
+ * ----+ +---+-----+ +--+--------+---------
+ * 2 | | 3 | 4 | |17| 55 | 10
+ * ----+ +---+-----+ +--+--------+---------
+ * X axis is physical blocks number;
+ * reference counts are the numbers inside the rectangles
+ *
+ * The first thing we need to do is to ensure that there are no
+ * refcount extents crossing either boundary of the range to be
+ * adjusted. For any extent that does cross a boundary, split it into
+ * two extents so that we can increment the refcount of one of the
+ * pieces later:
+ *
+ * <------ adjustment range ------>
+ * ----+ +---+-----+ +--+--------+----+----
+ * 2 | | 3 | 2 | |17| 55 | 10 | 10
+ * ----+ +---+-----+ +--+--------+----+----
+ *
+ * For this next step, let's assume that all the physical blocks in
+ * the adjustment range are mapped to a file and are therefore in use
+ * at least once. Therefore, we can infer that any gap in the
+ * refcount tree within the adjustment range represents a physical
+ * extent with refcount == 1:
+ *
+ * <------ adjustment range ------>
+ * ----+---+---+-----+-+--+--------+----+----
+ * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10
+ * ----+---+---+-----+-+--+--------+----+----
+ * ^
+ *
+ * For each extent that falls within the interval range, figure out
+ * which extent is to the left or the right of that extent. Now we
+ * have a left, current, and right extent. If the new reference count
+ * of the center extent enables us to merge left, center, and right
+ * into one record covering all three, do so. If the center extent is
+ * at the left end of the range, abuts the left extent, and its new
+ * reference count matches the left extent's record, then merge them.
+ * If the center extent is at the right end of the range, abuts the
+ * right extent, and the reference counts match, merge those. In the
+ * example, we can left merge (assuming an increment operation):
+ *
+ * <------ adjustment range ------>
+ * --------+---+-----+-+--+--------+----+----
+ * 2 | 3 | 2 |1|17| 55 | 10 | 10
+ * --------+---+-----+-+--+--------+----+----
+ * ^
+ *
+ * For all other extents within the range, adjust the reference count
+ * or delete it if the refcount falls below 2. If we were
+ * incrementing, the end result looks like this:
+ *
+ * <------ adjustment range ------>
+ * --------+---+-----+-+--+--------+----+----
+ * 2 | 4 | 3 |2|18| 56 | 11 | 10
+ * --------+---+-----+-+--+--------+----+----
+ *
+ * The result of a decrement operation looks as such:
+ *
+ * <------ adjustment range ------>
+ * ----+ +---+ +--+--------+----+----
+ * 2 | | 2 | |16| 54 | 9 | 10
+ * ----+ +---+ +--+--------+----+----
+ * DDDD 111111DD
+ *
+ * The blocks marked "D" are freed; the blocks marked "1" are only
+ * referenced once and therefore the record is removed from the
+ * refcount btree.
+ */
+
+#define RCNEXT(rc) ((rc).rc_startblock + (rc).rc_blockcount)
+/*
+ * Split a left rcextent that crosses agbno.
+ */
+STATIC int
+try_split_left_rcextent(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t agbno)
+{
+ struct xfs_refcount_irec left, tmp;
+ int found_rec;
+ int error;
+
+ error = xfs_refcountbt_lookup_le(cur, agbno, &found_rec);
+ if (error)
+ goto out_error;
+ if (!found_rec)
+ return 0;
+
+ error = xfs_refcountbt_get_rec(cur, &left, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (left.rc_startblock >= agbno || RCNEXT(left) <= agbno)
+ return 0;
+
+ trace_xfs_refcount_split_left_extent(cur->bc_mp, cur->bc_private.a.agno,
+ &left, agbno);
+ tmp = left;
+ tmp.rc_blockcount = agbno - left.rc_startblock;
+ error = xfs_refcountbt_update(cur, &tmp);
+ if (error)
+ goto out_error;
+
+ error = xfs_btree_increment(cur, 0, &found_rec);
+ if (error)
+ goto out_error;
+
+ tmp = left;
+ tmp.rc_startblock = agbno;
+ tmp.rc_blockcount -= (agbno - left.rc_startblock);
+ error = xfs_refcountbt_insert(cur, &tmp, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ return error;
+
+out_error:
+ trace_xfs_refcount_split_left_extent_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Split a right rcextent that crosses agbno.
+ */
+STATIC int
+try_split_right_rcextent(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t agbnext)
+{
+ struct xfs_refcount_irec right, tmp;
+ int found_rec;
+ int error;
+
+ error = xfs_refcountbt_lookup_le(cur, agbnext - 1, &found_rec);
+ if (error)
+ goto out_error;
+ if (!found_rec)
+ return 0;
+
+ error = xfs_refcountbt_get_rec(cur, &right, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (RCNEXT(right) <= agbnext)
+ return 0;
+
+ trace_xfs_refcount_split_right_extent(cur->bc_mp,
+ cur->bc_private.a.agno, &right, agbnext);
+ tmp = right;
+ tmp.rc_startblock = agbnext;
+ tmp.rc_blockcount -= (agbnext - right.rc_startblock);
+ error = xfs_refcountbt_update(cur, &tmp);
+ if (error)
+ goto out_error;
+
+ tmp = right;
+ tmp.rc_blockcount = agbnext - right.rc_startblock;
+ error = xfs_refcountbt_insert(cur, &tmp, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ return error;
+
+out_error:
+ trace_xfs_refcount_split_right_extent_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Merge the left, center, and right extents.
+ */
+STATIC int
+merge_center(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *left,
+ struct xfs_refcount_irec *center,
+ unsigned long long extlen,
+ xfs_agblock_t *agbno,
+ xfs_extlen_t *aglen)
+{
+ int error;
+ int found_rec;
+
+ error = xfs_refcountbt_lookup_ge(cur, center->rc_startblock,
+ &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+
+ error = xfs_refcountbt_delete(cur, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+
+ if (center->rc_refcount > 1) {
+ error = xfs_refcountbt_delete(cur, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
+ out_error);
+ }
+
+ error = xfs_refcountbt_lookup_le(cur, left->rc_startblock,
+ &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+
+ left->rc_blockcount = extlen;
+ error = xfs_refcountbt_update(cur, left);
+ if (error)
+ goto out_error;
+
+ *aglen = 0;
+ return error;
+
+out_error:
+ trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Merge with the left extent.
+ */
+STATIC int
+merge_left(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *left,
+ struct xfs_refcount_irec *cleft,
+ xfs_agblock_t *agbno,
+ xfs_extlen_t *aglen)
+{
+ int error;
+ int found_rec;
+
+ if (cleft->rc_refcount > 1) {
+ error = xfs_refcountbt_lookup_le(cur, cleft->rc_startblock,
+ &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
+ out_error);
+
+ error = xfs_refcountbt_delete(cur, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
+ out_error);
+ }
+
+ error = xfs_refcountbt_lookup_le(cur, left->rc_startblock,
+ &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+
+ left->rc_blockcount += cleft->rc_blockcount;
+ error = xfs_refcountbt_update(cur, left);
+ if (error)
+ goto out_error;
+
+ *agbno += cleft->rc_blockcount;
+ *aglen -= cleft->rc_blockcount;
+ return error;
+
+out_error:
+ trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Merge with the right extent.
+ */
+STATIC int
+merge_right(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *right,
+ struct xfs_refcount_irec *cright,
+ xfs_agblock_t *agbno,
+ xfs_extlen_t *aglen)
+{
+ int error;
+ int found_rec;
+
+ if (cright->rc_refcount > 1) {
+ error = xfs_refcountbt_lookup_le(cur, cright->rc_startblock,
+ &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
+ out_error);
+
+ error = xfs_refcountbt_delete(cur, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
+ out_error);
+ }
+
+ error = xfs_refcountbt_lookup_le(cur, right->rc_startblock,
+ &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+
+ right->rc_startblock -= cright->rc_blockcount;
+ right->rc_blockcount += cright->rc_blockcount;
+ error = xfs_refcountbt_update(cur, right);
+ if (error)
+ goto out_error;
+
+ *aglen -= cright->rc_blockcount;
+ return error;
+
+out_error:
+ trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Find the left extent and the one after it (cleft). This function assumes
+ * that we've already split any extent crossing agbno.
+ */
+STATIC int
+find_left_extent(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *left,
+ struct xfs_refcount_irec *cleft,
+ xfs_agblock_t agbno,
+ xfs_extlen_t aglen)
+{
+ struct xfs_refcount_irec tmp;
+ int error;
+ int found_rec;
+
+ left->rc_blockcount = cleft->rc_blockcount = 0;
+ error = xfs_refcountbt_lookup_le(cur, agbno - 1, &found_rec);
+ if (error)
+ goto out_error;
+ if (!found_rec)
+ return 0;
+
+ error = xfs_refcountbt_get_rec(cur, &tmp, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+
+ if (RCNEXT(tmp) != agbno)
+ return 0;
+ /* We have a left extent; retrieve (or invent) the next right one */
+ *left = tmp;
+
+ error = xfs_btree_increment(cur, 0, &found_rec);
+ if (error)
+ goto out_error;
+ if (found_rec) {
+ error = xfs_refcountbt_get_rec(cur, &tmp, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
+ out_error);
+
+ /* if tmp starts at the end of our range, just use that */
+ if (tmp.rc_startblock == agbno)
+ *cleft = tmp;
+ else {
+ /*
+ * There's a gap in the refcntbt at the start of the
+ * range we're interested in (refcount == 1) so
+ * create the implied extent and pass it back.
+ */
+ cleft->rc_startblock = agbno;
+ cleft->rc_blockcount = min(aglen,
+ tmp.rc_startblock - agbno);
+ cleft->rc_refcount = 1;
+ }
+ } else {
+ /*
+ * No extents, so pretend that there's one covering the whole
+ * range.
+ */
+ cleft->rc_startblock = agbno;
+ cleft->rc_blockcount = aglen;
+ cleft->rc_refcount = 1;
+ }
+ trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno,
+ left, cleft, agbno);
+ return error;
+
+out_error:
+ trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Find the right extent and the one before it (cright). This function
+ * assumes that we've already split any extents crossing agbno + aglen.
+ */
+STATIC int
+find_right_extent(
+ struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *right,
+ struct xfs_refcount_irec *cright,
+ xfs_agblock_t agbno,
+ xfs_extlen_t aglen)
+{
+ struct xfs_refcount_irec tmp;
+ int error;
+ int found_rec;
+
+ right->rc_blockcount = cright->rc_blockcount = 0;
+ error = xfs_refcountbt_lookup_ge(cur, agbno + aglen, &found_rec);
+ if (error)
+ goto out_error;
+ if (!found_rec)
+ return 0;
+
+ error = xfs_refcountbt_get_rec(cur, &tmp, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+
+ if (tmp.rc_startblock != agbno + aglen)
+ return 0;
+ /* We have a right extent; retrieve (or invent) the next left one */
+ *right = tmp;
+
+ error = xfs_btree_decrement(cur, 0, &found_rec);
+ if (error)
+ goto out_error;
+ if (found_rec) {
+ error = xfs_refcountbt_get_rec(cur, &tmp, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
+ out_error);
+
+ /* if tmp ends at the end of our range, just use that */
+ if (RCNEXT(tmp) == agbno + aglen)
+ *cright = tmp;
+ else {
+ /*
+ * There's a gap in the refcntbt at the end of the
+ * range we're interested in (refcount == 1) so
+ * create the implied extent and pass it back.
+ */
+ cright->rc_startblock = max(agbno, RCNEXT(tmp));
+ cright->rc_blockcount = right->rc_startblock -
+ cright->rc_startblock;
+ cright->rc_refcount = 1;
+ }
+ } else {
+ /*
+ * No extents, so pretend that there's one covering the whole
+ * range.
+ */
+ cright->rc_startblock = agbno;
+ cright->rc_blockcount = aglen;
+ cright->rc_refcount = 1;
+ }
+ trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno,
+ cright, right, agbno + aglen);
+ return error;
+
+out_error:
+ trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+#undef RCNEXT
+
+/*
+ * Try to merge with any extents on the boundaries of the adjustment range.
+ */
+STATIC int
+try_merge_rcextents(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t *agbno,
+ xfs_extlen_t *aglen,
+ int adjust)
+{
+ struct xfs_refcount_irec left = {0}, cleft = {0};
+ struct xfs_refcount_irec cright = {0}, right = {0};
+ int error;
+ unsigned long long ulen;
+ bool cequal;
+
+ /*
+ * Find the extent just below agbno [left], just above agbno [cleft],
+ * just below (agbno + aglen) [cright], and just above (agbno + aglen)
+ * [right].
+ */
+ error = find_left_extent(cur, &left, &cleft, *agbno, *aglen);
+ if (error)
+ return error;
+ error = find_right_extent(cur, &right, &cright, *agbno, *aglen);
+ if (error)
+ return error;
+
+ /* No left or right extent to merge; exit. */
+ if (left.rc_blockcount == 0 && right.rc_blockcount == 0)
+ return 0;
+
+ cequal = (cleft.rc_startblock == cright.rc_startblock) &&
+ (cleft.rc_blockcount == cright.rc_blockcount);
+
+ /* Try to merge left, cleft, and right. cleft must == cright. */
+ ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
+ right.rc_blockcount;
+ if (left.rc_blockcount != 0 && right.rc_blockcount != 0 &&
+ cleft.rc_blockcount != 0 && cright.rc_blockcount != 0 &&
+ cequal &&
+ left.rc_refcount == cleft.rc_refcount + adjust &&
+ right.rc_refcount == cleft.rc_refcount + adjust &&
+ ulen < MAXREFCEXTLEN) {
+ trace_xfs_refcount_merge_center_extents(cur->bc_mp,
+ cur->bc_private.a.agno, &left, &cleft, &right);
+ return merge_center(cur, &left, &cleft, ulen, agbno, aglen);
+ }
+
+ /* Try to merge left and cleft. */
+ ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
+ if (left.rc_blockcount != 0 && cleft.rc_blockcount != 0 &&
+ left.rc_refcount == cleft.rc_refcount + adjust &&
+ ulen < MAXREFCEXTLEN) {
+ trace_xfs_refcount_merge_left_extent(cur->bc_mp,
+ cur->bc_private.a.agno, &left, &cleft);
+ error = merge_left(cur, &left, &cleft, agbno, aglen);
+ if (error)
+ return error;
+
+ /*
+ * If we just merged left + cleft and cleft == cright,
+ * we no longer have a cright to merge with right. We're done.
+ */
+ if (cequal)
+ return 0;
+ }
+
+ /* Try to merge cright and right. */
+ ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
+ if (right.rc_blockcount != 0 && cright.rc_blockcount != 0 &&
+ right.rc_refcount == cright.rc_refcount + adjust &&
+ ulen < MAXREFCEXTLEN) {
+ trace_xfs_refcount_merge_right_extent(cur->bc_mp,
+ cur->bc_private.a.agno, &cright, &right);
+ return merge_right(cur, &right, &cright, agbno, aglen);
+ }
+
+ return error;
+}
+
+/*
+ * Adjust the refcounts of middle extents. At this point we should have
+ * split extents that crossed the adjustment range; merged with adjacent
+ * extents; and updated agbno/aglen to reflect the merges. Therefore,
+ * all we have to do is update the extents inside [agbno, agbno + aglen].
+ */
+STATIC int
+adjust_rcextents(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t agbno,
+ xfs_extlen_t aglen,
+ int adj,
+ struct xfs_bmap_free *flist,
+ struct xfs_owner_info *oinfo)
+{
+ struct xfs_refcount_irec ext, tmp;
+ int error;
+ int found_rec, found_tmp;
+ xfs_fsblock_t fsbno;
+
+ error = xfs_refcountbt_lookup_ge(cur, agbno, &found_rec);
+ if (error)
+ goto out_error;
+
+ while (aglen > 0) {
+ error = xfs_refcountbt_get_rec(cur, &ext, &found_rec);
+ if (error)
+ goto out_error;
+ if (!found_rec) {
+ ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
+ ext.rc_blockcount = 0;
+ ext.rc_refcount = 0;
+ }
+
+ /*
+ * Deal with a hole in the refcount tree; if a file maps to
+ * these blocks and there's no refcountbt recourd, pretend that
+ * there is one with refcount == 1.
+ */
+ if (ext.rc_startblock != agbno) {
+ tmp.rc_startblock = agbno;
+ tmp.rc_blockcount = min(aglen,
+ ext.rc_startblock - agbno);
+ tmp.rc_refcount = 1 + adj;
+ trace_xfs_refcount_modify_extent(cur->bc_mp,
+ cur->bc_private.a.agno, &tmp);
+
+ /*
+ * Either cover the hole (increment) or
+ * delete the range (decrement).
+ */
+ if (tmp.rc_refcount) {
+ error = xfs_refcountbt_insert(cur, &tmp,
+ &found_tmp);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
+ found_tmp == 1, out_error);
+ } else {
+ fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
+ cur->bc_private.a.agno,
+ tmp.rc_startblock);
+ xfs_bmap_add_free(cur->bc_mp, flist, fsbno,
+ tmp.rc_blockcount, oinfo);
+ }
+
+ agbno += tmp.rc_blockcount;
+ aglen -= tmp.rc_blockcount;
+
+ error = xfs_refcountbt_lookup_ge(cur, agbno,
+ &found_rec);
+ if (error)
+ goto out_error;
+ }
+
+ /* Stop if there's nothing left to modify */
+ if (aglen == 0)
+ break;
+
+ /*
+ * Adjust the reference count and either update the tree
+ * (incr) or free the blocks (decr).
+ */
+ ext.rc_refcount += adj;
+ trace_xfs_refcount_modify_extent(cur->bc_mp,
+ cur->bc_private.a.agno, &ext);
+ if (ext.rc_refcount > 1) {
+ error = xfs_refcountbt_update(cur, &ext);
+ if (error)
+ goto out_error;
+ } else if (ext.rc_refcount == 1) {
+ error = xfs_refcountbt_delete(cur, &found_rec);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
+ found_rec == 1, out_error);
+ goto advloop;
+ } else {
+ fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
+ cur->bc_private.a.agno,
+ ext.rc_startblock);
+ xfs_bmap_add_free(cur->bc_mp, flist, fsbno,
+ ext.rc_blockcount, oinfo);
+ }
+
+ error = xfs_btree_increment(cur, 0, &found_rec);
+ if (error)
+ goto out_error;
+
+advloop:
+ agbno += ext.rc_blockcount;
+ aglen -= ext.rc_blockcount;
+ }
+
+ return error;
+out_error:
+ trace_xfs_refcount_modify_extent_error(cur->bc_mp,
+ cur->bc_private.a.agno, error, _RET_IP_);
+ return error;
+}
+
+/*
+ * Adjust the reference count of a range of AG blocks.
+ *
+ * @mp: XFS mount object
+ * @tp: XFS transaction object
+ * @agbp: Buffer containing the AGF
+ * @agno: AG number
+ * @agbno: Start of range to adjust
+ * @aglen: Length of range to adjust
+ * @adj: +1 to increment, -1 to decrement reference count
+ * @flist: freelist (only required if adj == -1)
+ * @owner: owner of the blocks (only required if adj == -1)
+ */
+STATIC int
+xfs_refcountbt_adjust_refcount(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t aglen,
+ int adj,
+ struct xfs_bmap_free *flist,
+ struct xfs_owner_info *oinfo)
+{
+ struct xfs_btree_cur *cur;
+ int error;
+
+ cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, flist);
+
+ /*
+ * Ensure that no rcextents cross the boundary of the adjustment range.
+ */
+ error = try_split_left_rcextent(cur, agbno);
+ if (error)
+ goto out_error;
+
+ error = try_split_right_rcextent(cur, agbno + aglen);
+ if (error)
+ goto out_error;
+
+ /*
+ * Try to merge with the left or right extents of the range.
+ */
+ error = try_merge_rcextents(cur, &agbno, &aglen, adj);
+ if (error)
+ goto out_error;
+
+ /* Now that we've taken care of the ends, adjust the middle extents */
+ error = adjust_rcextents(cur, agbno, aglen, adj, flist, oinfo);
+ if (error)
+ goto out_error;
+
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+
+out_error:
+ trace_xfs_refcount_adjust_error(mp, agno, error, _RET_IP_);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+/**
+ * Increase the reference count of a range of AG blocks.
+ *
+ * @mp: XFS mount object
+ * @tp: XFS transaction object
+ * @agbp: Buffer containing the AGF
+ * @agno: AG number
+ * @agbno: Start of range to adjust
+ * @aglen: Length of range to adjust
+ * @flist: List of blocks to free
+ */
+int
+xfs_refcount_increase(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t aglen,
+ struct xfs_bmap_free *flist)
+{
+ trace_xfs_refcount_increase(mp, agno, agbno, aglen);
+ return xfs_refcountbt_adjust_refcount(mp, tp, agbp, agno, agbno,
+ aglen, 1, flist, NULL);
+}
+
+/**
+ * Decrease the reference count of a range of AG blocks.
+ *
+ * @mp: XFS mount object
+ * @tp: XFS transaction object
+ * @agbp: Buffer containing the AGF
+ * @agno: AG number
+ * @agbno: Start of range to adjust
+ * @aglen: Length of range to adjust
+ * @flist: List of blocks to free
+ * @owner: Extent owner
+ */
+int
+xfs_refcount_decrease(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t aglen,
+ struct xfs_bmap_free *flist,
+ struct xfs_owner_info *oinfo)
+{
+ trace_xfs_refcount_decrease(mp, agno, agbno, aglen);
+ return xfs_refcountbt_adjust_refcount(mp, tp, agbp, agno, agbno,
+ aglen, -1, flist, oinfo);
+}
+
+/**
+ * xfs_refcount_put_extent() - release a range of blocks
+ *
+ * @mp: XFS mount object
+ * @tp: transaction that goes with the free operation
+ * @flist: List of blocks to be freed at the end of the transaction
+ * @fsbno: First fs block of the range to release
+ * @len: Length of range
+ * @owner: owner of the extent
+ */
+int
+xfs_refcount_put_extent(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_bmap_free *flist,
+ xfs_fsblock_t fsbno,
+ xfs_filblks_t fslen,
+ struct xfs_owner_info *oinfo)
+{
+ int error;
+ struct xfs_buf *agbp;
+ xfs_agnumber_t agno; /* allocation group number */
+ xfs_agblock_t agbno; /* ag start of range to free */
+ xfs_extlen_t aglen; /* ag length of range to free */
+
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+ aglen = fslen;
+
+ /*
+ * Drop reference counts in the refcount tree.
+ */
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (error)
+ return error;
+
+ error = xfs_refcount_decrease(mp, tp, agbp, agno, agbno, aglen, flist,
+ oinfo);
+ xfs_trans_brelse(tp, agbp);
+ return error;
+}
+
+/**
+ * xfs_refcount_find_shared() -- Given an AG extent, find the lowest-numbered
+ * run of shared blocks within that range.
+ *
+ * @mp: XFS mount.
+ * @agno: AG number.
+ * @agbno: AG block number to start searching.
+ * @aglen: Length of the range to search.
+ * @fbno: Returns the AG block number of the first shared range, or
+ * agbno + aglen if no shared blocks are found.
+ * @flen: Returns the length of the shared range found, or 0 if no shared
+ * blocks are found.
+ * @find_maximal: If true, find the length of the run of shared blocks.
+ * Otherwise, the length of the first refcount extent is found.
+ */
+int
+xfs_refcount_find_shared(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t aglen,
+ xfs_agblock_t *fbno,
+ xfs_extlen_t *flen,
+ bool find_maximal)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp;
+ struct xfs_refcount_irec tmp;
+ int error;
+ int i, have;
+ int bt_error = XFS_BTREE_ERROR;
+
+ trace_xfs_refcount_find_shared(mp, agno, agbno, aglen);
+
+ error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ if (error)
+ goto out;
+ cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+ /* By default, skip the whole range */
+ *fbno = agbno + aglen;
+ *flen = 0;
+
+ /* Try to find a refcount extent that crosses the start */
+ error = xfs_refcountbt_lookup_le(cur, agbno, &have);
+ if (error)
+ goto out_error;
+ if (!have) {
+ /* No left extent, look at the next one */
+ error = xfs_btree_increment(cur, 0, &have);
+ if (error)
+ goto out_error;
+ if (!have)
+ goto done;
+ }
+ error = xfs_refcountbt_get_rec(cur, &tmp, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+ /* If the extent ends before the start, look at the next one */
+ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
+ error = xfs_btree_increment(cur, 0, &have);
+ if (error)
+ goto out_error;
+ if (!have)
+ goto done;
+ error = xfs_refcountbt_get_rec(cur, &tmp, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ }
+
+ /* If the extent ends after the range we want, bail out */
+ if (tmp.rc_startblock >= agbno + aglen)
+ goto done;
+
+ /* We found the start of a shared extent! */
+ if (tmp.rc_startblock < agbno) {
+ tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
+ tmp.rc_startblock = agbno;
+ }
+
+ *fbno = tmp.rc_startblock;
+ *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
+ if (!find_maximal)
+ goto done;
+
+ /* Otherwise, find the end of this shared extent */
+ while (*fbno + *flen < agbno + aglen) {
+ error = xfs_btree_increment(cur, 0, &have);
+ if (error)
+ goto out_error;
+ if (!have)
+ break;
+ error = xfs_refcountbt_get_rec(cur, &tmp, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (tmp.rc_startblock >= agbno + aglen ||
+ tmp.rc_startblock != *fbno + *flen)
+ break;
+ *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
+ }
+
+done:
+ bt_error = XFS_BTREE_NOERROR;
+ trace_xfs_refcount_find_shared_result(mp, agno, *fbno, *flen);
+
+out_error:
+ xfs_btree_del_cursor(cur, bt_error);
+ xfs_buf_relse(agbp);
+out:
+ if (error)
+ trace_xfs_refcount_find_shared_error(mp, agno, error, _RET_IP_);
+ return error;
+}
diff --git a/libxfs/xfs_refcount.h b/libxfs/xfs_refcount.h
new file mode 100644
index 0000000..e8e0beb
--- /dev/null
+++ b/libxfs/xfs_refcount.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_REFCOUNT_H__
+#define __XFS_REFCOUNT_H__
+
+extern int xfs_refcountbt_lookup_le(struct xfs_btree_cur *cur,
+ xfs_agblock_t bno, int *stat);
+extern int xfs_refcountbt_lookup_ge(struct xfs_btree_cur *cur,
+ xfs_agblock_t bno, int *stat);
+extern int xfs_refcountbt_get_rec(struct xfs_btree_cur *cur,
+ struct xfs_refcount_irec *irec, int *stat);
+
+extern int xfs_refcount_increase(struct xfs_mount *mp, struct xfs_trans *tp,
+ struct xfs_buf *agbp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+ xfs_extlen_t aglen, struct xfs_bmap_free *flist);
+extern int xfs_refcount_decrease(struct xfs_mount *mp, struct xfs_trans *tp,
+ struct xfs_buf *agbp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+ xfs_extlen_t aglen, struct xfs_bmap_free *flist,
+ struct xfs_owner_info *oinfo);
+
+extern int xfs_refcount_put_extent(struct xfs_mount *mp, struct xfs_trans *tp,
+ struct xfs_bmap_free *flist, xfs_fsblock_t fsbno,
+ xfs_filblks_t len, struct xfs_owner_info *oinfo);
+
+extern int xfs_refcount_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
+ xfs_extlen_t *flen, bool find_maximal);
+
+#endif /* __XFS_REFCOUNT_H__ */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 39/53] libxfs: add support for refcount btrees
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (37 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 38/53] libxfs: add the refcount helper functions from the kernel Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 40/53] xfs_db: dump refcount btree data Darrick J. Wong
` (13 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Import definitions and refcount btree code from the kernel.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
include/libxfs.h | 2
include/linux.h | 1
include/list.h | 3
include/xfs_inode.h | 8 +
include/xfs_mount.h | 4
include/xfs_trace.h | 43 +++
libxfs/Makefile | 6
libxfs/xfs_alloc.c | 21 ++
libxfs/xfs_bmap.c | 364 ++++++++++++++++++++++++---
libxfs/xfs_bmap.h | 30 ++
libxfs/xfs_bmap_btree.c | 1
libxfs/xfs_btree.c | 8 -
libxfs/xfs_btree.h | 7 +
libxfs/xfs_format.h | 71 +++++
libxfs/xfs_fs.h | 1
libxfs/xfs_inode_fork.c | 72 +++++
libxfs/xfs_inode_fork.h | 28 ++
libxfs/xfs_perag_pool.c | 378 ++++++++++++++++++++++++++++
libxfs/xfs_perag_pool.h | 47 ++++
libxfs/xfs_refcount_btree.c | 576 +++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_refcount_btree.h | 71 +++++
libxfs/xfs_rmap.c | 2
libxfs/xfs_sb.c | 9 +
libxfs/xfs_shared.h | 2
libxfs/xfs_types.h | 3
25 files changed, 1694 insertions(+), 64 deletions(-)
create mode 100644 libxfs/xfs_perag_pool.c
create mode 100644 libxfs/xfs_perag_pool.h
create mode 100644 libxfs/xfs_refcount_btree.c
create mode 100644 libxfs/xfs_refcount_btree.h
diff --git a/include/libxfs.h b/include/libxfs.h
index 5382191..c7041f5 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -78,6 +78,8 @@ extern uint32_t crc32c_le(uint32_t crc, unsigned char const *p, size_t len);
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/include/linux.h b/include/linux.h
index 674717c..990d4a3 100644
--- a/include/linux.h
+++ b/include/linux.h
@@ -145,6 +145,7 @@ typedef loff_t xfs_off_t;
typedef __uint64_t xfs_ino_t;
typedef __uint32_t xfs_dev_t;
typedef __int64_t xfs_daddr_t;
+typedef __uint32_t xfs_nlink_t;
/**
* Abstraction of mountpoints.
diff --git a/include/list.h b/include/list.h
index f92faed..c52fc68 100644
--- a/include/list.h
+++ b/include/list.h
@@ -161,4 +161,7 @@ static inline void list_splice_init(struct list_head *list,
&pos->member != (head); \
pos = n, n = list_entry(n->member.next, typeof(*n), member))
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
#endif /* __LIST_H__ */
diff --git a/include/xfs_inode.h b/include/xfs_inode.h
index 71c0fb4..681bc93 100644
--- a/include/xfs_inode.h
+++ b/include/xfs_inode.h
@@ -38,6 +38,7 @@ typedef struct xfs_inode {
struct xfs_imap i_imap; /* location for xfs_imap() */
struct xfs_buftarg i_dev; /* dev for this inode */
struct xfs_ifork *i_afp; /* attribute fork pointer */
+ struct xfs_ifork *i_cowfp; /* copy on write extents */
struct xfs_ifork i_df; /* data fork */
struct xfs_trans *i_transp; /* ptr to owning transaction */
struct xfs_inode_log_item *i_itemp; /* logging information */
@@ -45,6 +46,8 @@ typedef struct xfs_inode {
struct xfs_icdinode i_d; /* most of ondisk inode */
xfs_fsize_t i_size; /* in-memory size */
const struct xfs_dir_ops *d_ops; /* directory ops vector */
+ xfs_extnum_t i_cnextents; /* # of extents in cow fork */
+ unsigned int i_cformat; /* format of cow fork */
} xfs_inode_t;
/*
@@ -81,6 +84,11 @@ xfs_set_projid(struct xfs_icdinode *id, prid_t projid)
id->di_projid_lo = (__uint16_t) (projid & 0xffff);
}
+static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
+{
+ return ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
+}
+
typedef struct cred {
uid_t cr_uid;
gid_t cr_gid;
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
index 390ec77..bf44d69 100644
--- a/include/xfs_mount.h
+++ b/include/xfs_mount.h
@@ -66,6 +66,8 @@ typedef struct xfs_mount {
uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */
uint m_rmap_mxr[2]; /* max rmap btree records */
uint m_rmap_mnr[2]; /* min rmap btree records */
+ uint m_refc_mxr[2]; /* max refc btree records */
+ uint m_refc_mnr[2]; /* min refc btree records */
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
@@ -140,6 +142,8 @@ typedef struct xfs_perag {
xfs_agino_t pagl_leftrec;
xfs_agino_t pagl_rightrec;
int pagb_count; /* pagb slots in use */
+ __uint8_t pagf_refcount_level;
+ struct xfs_perag_pool *pagf_refcountbt_pool;
} xfs_perag_t;
#define LIBXFS_MOUNT_DEBUGGER 0x0001
diff --git a/include/xfs_trace.h b/include/xfs_trace.h
index 2c8d34e..da12c36 100644
--- a/include/xfs_trace.h
+++ b/include/xfs_trace.h
@@ -190,4 +190,47 @@
#define trace_xfs_rmap_lcombine(a...) ((void) 0)
#define trace_xfs_rmap_rcombine(a...) ((void) 0)
+#define trace_xfs_refcountbt_lookup(a...) ((void)0)
+#define trace_xfs_refcountbt_get(a...) ((void)0)
+#define trace_xfs_refcountbt_update(a...) ((void)0)
+#define trace_xfs_refcountbt_insert(a...) ((void)0)
+#define trace_xfs_refcountbt_delete(a...) ((void)0)
+#define trace_xfs_refcount_split_left_extent(a...) ((void)0)
+#define trace_xfs_refcount_split_left_extent_error(a...) ((void)0)
+#define trace_xfs_refcount_split_right_extent(a...) ((void)0)
+#define trace_xfs_refcount_split_right_extent_error(a...) ((void)0)
+#define trace_xfs_refcount_merge_center_extents_error(a...) ((void)0)
+#define trace_xfs_refcount_merge_left_extent_error(a...) ((void)0)
+#define trace_xfs_refcount_merge_right_extent_error(a...) ((void)0)
+#define trace_xfs_refcount_find_left_extent(a...) ((void)0)
+#define trace_xfs_refcount_find_left_extent_error(a...) ((void)0)
+#define trace_xfs_refcount_find_right_extent(a...) ((void)0)
+#define trace_xfs_refcount_find_right_extent_error(a...) ((void)0)
+#define trace_xfs_refcount_merge_center_extents(a...) ((void)0)
+#define trace_xfs_refcount_merge_left_extent(a...) ((void)0)
+#define trace_xfs_refcount_merge_right_extent(a...) ((void)0)
+#define trace_xfs_refcount_modify_extent(a...) ((void)0)
+#define trace_xfs_refcount_modify_extent_error(a...) ((void)0)
+#define trace_xfs_refcount_adjust_error(a...) ((void)0)
+#define trace_xfs_refcount_increase(a...) ((void)0)
+#define trace_xfs_refcount_decrease(a...) ((void)0)
+#define trace_xfs_reflink_relink_blocks(a...) ((void)0)
+
+#define trace_xfs_bmap_remap_alloc(a...) ((void)0)
+#define trace_xfs_bmap_remap_alloc_error(a...) ((void)0)
+#define trace_xfs_refcount_find_shared(a...) ((void)0)
+#define trace_xfs_refcount_find_shared_result(a...) ((void)0)
+#define trace_xfs_refcount_find_shared_error(a...) ((void)0)
+#define trace_xfs_perag_pool_free_extent(a...) ((void)0)
+#define trace_xfs_perag_pool_free_error(a...) ((void)0)
+#define trace_xfs_perag_pool_grab_block(a...) ((void)0)
+#define trace_xfs_perag_pool_grab_block_error(a...) ((void)0)
+#define trace_xfs_perag_pool_init(a...) ((void)0)
+#define trace_xfs_perag_pool_init_error(a...) ((void)0)
+#define trace_xfs_perag_pool_alloc_block(a...) ((void)0)
+#define trace_xfs_perag_pool_alloc_block_error(a...) ((void)0)
+#define trace_xfs_perag_pool_free_block(a...) ((void)0)
+#define trace_xfs_perag_pool_ensure_capacity(a...) ((void)0)
+#define trace_xfs_perag_pool_ensure_capacity_error(a...) ((void)0)
+
#endif /* __TRACE_H__ */
diff --git a/libxfs/Makefile b/libxfs/Makefile
index 3255917..70e7e2f 100644
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -35,7 +35,10 @@ HFILES = \
xfs_inode_buf.h \
xfs_inode_fork.h \
xfs_quota_defs.h \
+ xfs_perag_pool.h \
xfs_rmap_btree.h \
+ xfs_refcount.h \
+ xfs_refcount_btree.h \
xfs_sb.h \
xfs_shared.h \
xfs_trans_resv.h \
@@ -80,6 +83,9 @@ CFILES = cache.c \
xfs_inode_fork.c \
xfs_ialloc_btree.c \
xfs_log_rlimit.c \
+ xfs_perag_pool.c \
+ xfs_refcount.c \
+ xfs_refcount_btree.c \
xfs_rtbitmap.c \
xfs_rmap.c \
xfs_rmap_btree.c \
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index fd0767e..619e06d 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -32,6 +32,7 @@
#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
+#include "xfs_refcount_btree.h"
struct workqueue_struct *xfs_alloc_wq;
@@ -46,10 +47,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+unsigned int
+xfs_refc_block(
+ struct xfs_mount *mp)
+{
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return XFS_RMAP_BLOCK(mp) + 1;
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ return XFS_FIBT_BLOCK(mp) + 1;
+ return XFS_IBT_BLOCK(mp) + 1;
+}
+
xfs_extlen_t
xfs_prealloc_blocks(
struct xfs_mount *mp)
{
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ return xfs_refc_block(mp) + 1;
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
return XFS_RMAP_BLOCK(mp) + 1;
if (xfs_sb_version_hasfinobt(&mp->m_sb))
@@ -119,6 +133,8 @@ xfs_alloc_ag_max_usable(struct xfs_mount *mp)
/* rmap root block + full tree split on full AG */
blocks += 1 + (2 * mp->m_ag_maxlevels) - 1;
}
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ blocks += xfs_refcountbt_max_btree_size(mp);
return mp->m_sb.sb_agblocks - blocks;
}
@@ -2409,6 +2425,10 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
return false;
+ if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)
+ return false;
+
return true;;
}
@@ -2529,6 +2549,7 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
pag->pagf_levels[XFS_BTNUM_RMAPi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+ pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
/* XXX: pagb_tree doesn't exist in userspace */
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
index cedb64b..69eb3f0 100644
--- a/libxfs/xfs_bmap.c
+++ b/libxfs/xfs_bmap.c
@@ -37,6 +37,7 @@
#include "xfs_trace.h"
#include "xfs_attr_leaf.h"
#include "xfs_quota_defs.h"
+#include "xfs_refcount.h"
#include "xfs_rmap_btree.h"
@@ -130,7 +131,8 @@ xfs_bmbt_lookup_ge(
*/
static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
{
- return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+ return whichfork != XFS_COW_FORK &&
+ XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(ip, whichfork) >
XFS_IFORK_MAXEXT(ip, whichfork);
}
@@ -140,7 +142,8 @@ static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
*/
static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
{
- return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+ return whichfork != XFS_COW_FORK &&
+ XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
XFS_IFORK_NEXTENTS(ip, whichfork) <=
XFS_IFORK_MAXEXT(ip, whichfork);
}
@@ -662,6 +665,7 @@ xfs_bmap_btree_to_extents(
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
+ ASSERT(whichfork != XFS_COW_FORK);
ASSERT(ifp->if_flags & XFS_IFEXTENTS);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
rblock = ifp->if_broot;
@@ -728,6 +732,7 @@ xfs_bmap_extents_to_btree(
xfs_bmbt_ptr_t *pp; /* root block address pointer */
mp = ip->i_mount;
+ ASSERT(whichfork != XFS_COW_FORK);
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
@@ -859,6 +864,7 @@ xfs_bmap_local_to_extents_empty(
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ ASSERT(whichfork != XFS_COW_FORK);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
ASSERT(ifp->if_bytes == 0);
ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
@@ -1692,7 +1698,8 @@ xfs_bmap_one_block(
*/
STATIC int /* error */
xfs_bmap_add_extent_delay_real(
- struct xfs_bmalloca *bma)
+ struct xfs_bmalloca *bma,
+ int whichfork)
{
struct xfs_bmbt_irec *new = &bma->got;
int diff; /* temp value */
@@ -1711,10 +1718,13 @@ xfs_bmap_add_extent_delay_real(
xfs_filblks_t temp2=0;/* value for da_new calculations */
int tmp_rval; /* partial logging flags */
struct xfs_mount *mp;
- int whichfork = XFS_DATA_FORK;
+ xfs_extnum_t *nextents;
mp = bma->tp ? bma->tp->t_mountp : NULL;
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+ ASSERT(whichfork != XFS_ATTR_FORK);
+ nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
+ &bma->ip->i_d.di_nextents);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
@@ -1728,6 +1738,9 @@ xfs_bmap_add_extent_delay_real(
#define RIGHT r[1]
#define PREV r[2]
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
+
/*
* Set up a bunch of variables to make the tests simpler.
*/
@@ -1814,7 +1827,7 @@ xfs_bmap_add_extent_delay_real(
trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
- bma->ip->i_d.di_nextents--;
+ (*nextents)--;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -1842,7 +1855,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
error = xfs_rmap_combine(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, &LEFT, &RIGHT, &PREV);
+ whichfork, &LEFT, &RIGHT, &PREV);
if (error)
goto done;
break;
@@ -1878,7 +1891,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, &LEFT, PREV.br_blockcount);
+ whichfork, &LEFT, PREV.br_blockcount);
if (error)
goto done;
break;
@@ -1913,7 +1926,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, &RIGHT, -PREV.br_blockcount);
+ whichfork, &RIGHT, -PREV.br_blockcount);
if (error)
goto done;
break;
@@ -1928,7 +1941,7 @@ xfs_bmap_add_extent_delay_real(
xfs_bmbt_set_startblock(ep, new->br_startblock);
trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- bma->ip->i_d.di_nextents++;
+ (*nextents)++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -1946,7 +1959,7 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, new);
+ whichfork, new);
if (error)
goto done;
break;
@@ -1985,7 +1998,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
error = xfs_rmap_resize(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, &LEFT, new->br_blockcount);
+ whichfork, &LEFT, new->br_blockcount);
if (error)
goto done;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
@@ -2006,7 +2019,7 @@ xfs_bmap_add_extent_delay_real(
temp = PREV.br_blockcount - new->br_blockcount;
xfs_bmbt_set_blockcount(ep, temp);
xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
- bma->ip->i_d.di_nextents++;
+ (*nextents)++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2024,7 +2037,7 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, new);
+ whichfork, new);
if (error)
goto done;
@@ -2076,7 +2089,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
error = xfs_rmap_move(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, &RIGHT, -new->br_blockcount);
+ whichfork, &RIGHT, -new->br_blockcount);
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock));
@@ -2096,7 +2109,7 @@ xfs_bmap_add_extent_delay_real(
trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_blockcount(ep, temp);
xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
- bma->ip->i_d.di_nextents++;
+ (*nextents)++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2114,7 +2127,7 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, new);
+ whichfork, new);
if (error)
goto done;
@@ -2169,7 +2182,7 @@ xfs_bmap_add_extent_delay_real(
RIGHT.br_blockcount = temp2;
/* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
- bma->ip->i_d.di_nextents++;
+ (*nextents)++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2187,7 +2200,7 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
error = xfs_rmap_insert(mp, bma->rlist, bma->ip->i_ino,
- XFS_DATA_FORK, new);
+ whichfork, new);
if (error)
goto done;
@@ -2266,7 +2279,8 @@ xfs_bmap_add_extent_delay_real(
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
- bma->logflags |= rval;
+ if (whichfork != XFS_COW_FORK)
+ bma->logflags |= rval;
return error;
#undef LEFT
#undef RIGHT
@@ -2867,6 +2881,7 @@ done:
STATIC void
xfs_bmap_add_extent_hole_delay(
xfs_inode_t *ip, /* incore inode pointer */
+ int whichfork,
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_bmbt_irec_t *new) /* new data to add to file extents */
{
@@ -2878,8 +2893,10 @@ xfs_bmap_add_extent_hole_delay(
int state; /* state bits, accessed thru macros */
xfs_filblks_t temp=0; /* temp for indirect calculations */
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ ifp = XFS_IFORK_PTR(ip, whichfork);
state = 0;
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
ASSERT(isnullstartblock(new->br_startblock));
/*
@@ -2897,7 +2914,7 @@ xfs_bmap_add_extent_hole_delay(
* Check and set flags if the current (right) segment exists.
* If it doesn't exist, we're converting the hole at end-of-file.
*/
- if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+ if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
@@ -3032,6 +3049,7 @@ xfs_bmap_add_extent_hole_real(
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!bma->cur ||
!(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+ ASSERT(whichfork != XFS_COW_FORK);
XFS_STATS_INC(xs_add_exlist);
@@ -3967,7 +3985,8 @@ xfs_bmap_btalloc(
ASSERT(nullfb || fb_agno == args.agno ||
(ap->flist->xbf_low && fb_agno < args.agno));
ap->length = args.len;
- ap->ip->i_d.di_nblocks += args.len;
+ if (!(ap->flags & XFS_BMAPI_COWFORK))
+ ap->ip->i_d.di_nblocks += args.len;
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel)
ap->ip->i_delayed_blks -= args.len;
@@ -3987,6 +4006,54 @@ xfs_bmap_btalloc(
}
/*
+ * For a remap operation, just "allocate" an extent at the address that the
+ * caller passed in, and ensure that the AGFL is the right size. The caller
+ * will then map the "allocated" extent into the file somewhere.
+ */
+STATIC int
+xfs_bmap_remap_alloc(
+ struct xfs_bmalloca *ap)
+{
+ struct xfs_trans *tp = ap->tp;
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_agblock_t bno;
+ struct xfs_alloc_arg args;
+ int error;
+
+ /*
+ * validate that the block number is legal - the enables us to detect
+ * and handle a silent filesystem corruption rather than crashing.
+ */
+ memset(&args, 0, sizeof(struct xfs_alloc_arg));
+ args.tp = ap->tp;
+ args.mp = ap->tp->t_mountp;
+ bno = *ap->firstblock;
+ args.agno = XFS_FSB_TO_AGNO(mp, bno);
+ ASSERT(args.agno < mp->m_sb.sb_agcount);
+ args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
+ ASSERT(args.agbno < mp->m_sb.sb_agblocks);
+
+ /* "Allocate" the extent from the range we passed in. */
+ trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
+ ap->blkno = bno;
+ ap->ip->i_d.di_nblocks += ap->length;
+ xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+
+ /* Fix the freelist, like a real allocator does. */
+ args.pag = xfs_perag_get(args.mp, args.agno);
+ ASSERT(args.pag);
+
+ error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
+ if (error)
+ goto error0;
+error0:
+ xfs_perag_put(args.pag);
+ if (error)
+ trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
+ return error;
+}
+
+/*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
* It figures out where to ask the underlying allocator to put the new extent.
*/
@@ -3994,6 +4061,8 @@ STATIC int
xfs_bmap_alloc(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
{
+ if (ap->flags & XFS_BMAPI_REMAP)
+ return xfs_bmap_remap_alloc(ap);
if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
return xfs_bmap_rtalloc(ap);
return xfs_bmap_btalloc(ap);
@@ -4122,8 +4191,7 @@ xfs_bmapi_read(
int error;
int eof;
int n = 0;
- int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
+ int whichfork = xfs_bmapi_whichfork(flags);
ASSERT(*nmap >= 1);
ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
@@ -4194,6 +4262,7 @@ xfs_bmapi_read(
STATIC int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
+ int whichfork,
xfs_fileoff_t aoff,
xfs_filblks_t len,
struct xfs_bmbt_irec *got,
@@ -4202,7 +4271,7 @@ xfs_bmapi_reserve_delalloc(
int eof)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
xfs_extlen_t alen;
xfs_extlen_t indlen;
char rt = XFS_IS_REALTIME_INODE(ip);
@@ -4261,7 +4330,7 @@ xfs_bmapi_reserve_delalloc(
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
- xfs_bmap_add_extent_hole_delay(ip, lastx, got);
+ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
/*
* Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
@@ -4293,6 +4362,7 @@ out_unreserve_quota:
int
xfs_bmapi_delay(
struct xfs_inode *ip, /* incore inode */
+ int whichfork, /* data or cow fork? */
xfs_fileoff_t bno, /* starting file offs. mapped */
xfs_filblks_t len, /* length to map in file */
struct xfs_bmbt_irec *mval, /* output: map values */
@@ -4300,7 +4370,7 @@ xfs_bmapi_delay(
int flags) /* XFS_BMAPI_... */
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec got; /* current file extent record */
struct xfs_bmbt_irec prev; /* previous file extent record */
xfs_fileoff_t obno; /* old block number (offset) */
@@ -4310,14 +4380,15 @@ xfs_bmapi_delay(
int n = 0; /* current extent index */
int error = 0;
+ ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK);
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
+ (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
@@ -4328,19 +4399,20 @@ xfs_bmapi_delay(
XFS_STATS_INC(xs_blk_mapw);
- if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ if (whichfork == XFS_DATA_FORK && !(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, whichfork);
if (error)
return error;
}
- xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
+ xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
end = bno + len;
obno = bno;
while (bno < end && n < *nmap) {
if (eof || got.br_startoff > bno) {
- error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
+ error = xfs_bmapi_reserve_delalloc(ip, whichfork,
+ bno, len, &got,
&prev, &lastx, eof);
if (error) {
if (n == 0) {
@@ -4376,8 +4448,7 @@ xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
- int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
+ int whichfork = xfs_bmapi_whichfork(bma->flags);
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
int tmp_logflags = 0;
int error;
@@ -4463,7 +4534,7 @@ xfs_bmapi_allocate(
bma->got.br_state = XFS_EXT_UNWRITTEN;
if (bma->wasdel)
- error = xfs_bmap_add_extent_delay_real(bma);
+ error = xfs_bmap_add_extent_delay_real(bma, whichfork);
else
error = xfs_bmap_add_extent_hole_real(bma, whichfork);
@@ -4493,8 +4564,7 @@ xfs_bmapi_convert_unwritten(
xfs_filblks_t len,
int flags)
{
- int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
+ int whichfork = xfs_bmapi_whichfork(flags);
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
int tmp_logflags = 0;
int error;
@@ -4510,6 +4580,8 @@ xfs_bmapi_convert_unwritten(
(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
return 0;
+ ASSERT(whichfork != XFS_COW_FORK);
+
/*
* Modify (by adding) the state flag, if writing.
*/
@@ -4605,8 +4677,7 @@ xfs_bmapi_write(
orig_mval = mval;
orig_nmap = *nmap;
#endif
- whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
+ whichfork = xfs_bmapi_whichfork(flags);
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
@@ -4615,6 +4686,17 @@ xfs_bmapi_write(
ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ if (whichfork == XFS_ATTR_FORK)
+ ASSERT(!(flags & XFS_BMAPI_REMAP));
+ if (whichfork == XFS_COW_FORK) {
+ ASSERT(!(flags & XFS_BMAPI_REMAP));
+ ASSERT(!(flags & XFS_BMAPI_PREALLOC));
+ ASSERT(!(flags & XFS_BMAPI_CONVERT));
+ }
+ if (flags & XFS_BMAPI_REMAP) {
+ ASSERT(!(flags & XFS_BMAPI_PREALLOC));
+ ASSERT(!(flags & XFS_BMAPI_CONVERT));
+ }
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -4665,6 +4747,14 @@ xfs_bmapi_write(
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
/*
+ * Make sure we only reflink into a hole.
+ */
+ if (flags & XFS_BMAPI_REMAP)
+ ASSERT(inhole);
+ if (flags & XFS_BMAPI_COWFORK)
+ ASSERT(!inhole);
+
+ /*
* First, deal with the hole before the allocated space
* that we found, if any.
*/
@@ -4827,6 +4917,8 @@ xfs_bmap_del_extent(
if (whichfork == XFS_ATTR_FORK)
state |= BMAP_ATTRFORK;
+ else if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -5103,9 +5195,18 @@ xfs_bmap_del_extent(
/*
* If we need to, add to list of extents to delete.
*/
- if (do_fx)
- xfs_bmap_add_free(mp, flist, del->br_startblock,
- del->br_blockcount, NULL);
+ if (do_fx) {
+ if (xfs_is_reflink_inode(ip)) {
+ error = xfs_refcount_put_extent(mp, tp, flist,
+ del->br_startblock,
+ del->br_blockcount, NULL);
+ if (error)
+ goto done;
+ } else
+ xfs_bmap_add_free(mp, flist, del->br_startblock,
+ del->br_blockcount, NULL);
+ }
+
/*
* Adjust inode # blocks in the file.
*/
@@ -5130,6 +5231,179 @@ done:
}
/*
+ * xfs_bunmapi_cow() -- Remove the relevant parts of the CoW fork.
+ * See xfs_bmap_del_extent.
+ * @ip: XFS inode.
+ * @idx: Extent number to delete.
+ * @del: Extent to remove.
+ */
+int
+xfs_bunmapi_cow(
+ xfs_inode_t *ip,
+ xfs_extnum_t *idx,
+ xfs_bmbt_irec_t *del)
+{
+ xfs_filblks_t da_new; /* new delay-alloc indirect blocks */
+ xfs_filblks_t da_old; /* old delay-alloc indirect blocks */
+ xfs_fsblock_t del_endblock = 0;/* first block past del */
+ xfs_fileoff_t del_endoff; /* first offset past del */
+ int delay; /* current block is delayed allocated */
+ xfs_bmbt_rec_host_t *ep; /* current extent entry pointer */
+ int error; /* error return value */
+ xfs_bmbt_irec_t got; /* current extent entry */
+ xfs_fileoff_t got_endoff; /* first offset past got */
+ xfs_ifork_t *ifp; /* inode fork pointer */
+ xfs_mount_t *mp; /* mount structure */
+ xfs_filblks_t nblks; /* quota/sb block count */
+ xfs_bmbt_irec_t new; /* new record to be inserted */
+ /* REFERENCED */
+ uint qfield; /* quota field to update */
+ xfs_filblks_t temp; /* for indirect length calculations */
+ xfs_filblks_t temp2; /* for indirect length calculations */
+ int state = BMAP_COWFORK;
+
+ mp = ip->i_mount;
+ XFS_STATS_INC(xs_del_exlist);
+
+ ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
+ (uint)sizeof(xfs_bmbt_rec_t)));
+ ASSERT(del->br_blockcount > 0);
+ ep = xfs_iext_get_ext(ifp, *idx);
+ xfs_bmbt_get_all(ep, &got);
+ ASSERT(got.br_startoff <= del->br_startoff);
+ del_endoff = del->br_startoff + del->br_blockcount;
+ got_endoff = got.br_startoff + got.br_blockcount;
+ ASSERT(got_endoff >= del_endoff);
+ delay = isnullstartblock(got.br_startblock);
+ ASSERT(isnullstartblock(del->br_startblock) == delay);
+ qfield = 0;
+ error = 0;
+ /*
+ * If deleting a real allocation, must free up the disk space.
+ */
+ if (!delay) {
+ nblks = del->br_blockcount;
+ qfield = XFS_TRANS_DQ_BCOUNT;
+ /*
+ * Set up del_endblock and cur for later.
+ */
+ del_endblock = del->br_startblock + del->br_blockcount;
+ da_old = da_new = 0;
+ } else {
+ da_old = startblockval(got.br_startblock);
+ da_new = 0;
+ nblks = 0;
+ }
+ qfield = qfield;
+ nblks = nblks;
+
+ /*
+ * Set flag value to use in switch statement.
+ * Left-contig is 2, right-contig is 1.
+ */
+ switch (((got.br_startoff == del->br_startoff) << 1) |
+ (got_endoff == del_endoff)) {
+ case 3:
+ /*
+ * Matches the whole extent. Delete the entry.
+ */
+ xfs_iext_remove(ip, *idx, 1, BMAP_COWFORK);
+ --*idx;
+ break;
+
+ case 2:
+ /*
+ * Deleting the first part of the extent.
+ */
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ xfs_bmbt_set_startoff(ep, del_endoff);
+ temp = got.br_blockcount - del->br_blockcount;
+ xfs_bmbt_set_blockcount(ep, temp);
+ if (delay) {
+ temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ da_old);
+ xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ da_new = temp;
+ break;
+ }
+ xfs_bmbt_set_startblock(ep, del_endblock);
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ break;
+
+ case 1:
+ /*
+ * Deleting the last part of the extent.
+ */
+ temp = got.br_blockcount - del->br_blockcount;
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(ep, temp);
+ if (delay) {
+ temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ da_old);
+ xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ da_new = temp;
+ break;
+ }
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ break;
+
+ case 0:
+ /*
+ * Deleting the middle of the extent.
+ */
+ temp = del->br_startoff - got.br_startoff;
+ trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(ep, temp);
+ new.br_startoff = del_endoff;
+ temp2 = got_endoff - del_endoff;
+ new.br_blockcount = temp2;
+ new.br_state = got.br_state;
+ if (!delay) {
+ new.br_startblock = del_endblock;
+ } else {
+ temp = xfs_bmap_worst_indlen(ip, temp);
+ xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+ temp2 = xfs_bmap_worst_indlen(ip, temp2);
+ new.br_startblock = nullstartblock((int)temp2);
+ da_new = temp + temp2;
+ while (da_new > da_old) {
+ if (temp) {
+ temp--;
+ da_new--;
+ xfs_bmbt_set_startblock(ep,
+ nullstartblock((int)temp));
+ }
+ if (da_new == da_old)
+ break;
+ if (temp2) {
+ temp2--;
+ da_new--;
+ new.br_startblock =
+ nullstartblock((int)temp2);
+ }
+ }
+ }
+ trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ xfs_iext_insert(ip, *idx + 1, 1, &new, state);
+ ++*idx;
+ break;
+ }
+
+ /*
+ * Account for change in delayed indirect blocks.
+ * Nothing to do for disk quota accounting here.
+ */
+ ASSERT(da_old >= da_new);
+ if (da_old > da_new)
+ xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
+
+ return error;
+}
+
+/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
* that value. If not all extents in the block range can be removed then
@@ -5171,8 +5445,8 @@ xfs_bunmapi(
trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
- whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
- XFS_ATTR_FORK : XFS_DATA_FORK;
+ whichfork = xfs_bmapi_whichfork(flags);
+ ASSERT(whichfork != XFS_COW_FORK);
ifp = XFS_IFORK_PTR(ip, whichfork);
if (unlikely(
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
diff --git a/libxfs/xfs_bmap.h b/libxfs/xfs_bmap.h
index 77d8771..9d6d060 100644
--- a/libxfs/xfs_bmap.h
+++ b/libxfs/xfs_bmap.h
@@ -118,6 +118,15 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
+/*
+ * Map the inode offset to the block given in ap->firstblock. Primarily
+ * used for reflink. The range must be in a hole, and this flag cannot be
+ * turned on with PREALLOC or CONVERT, and cannot be used on the attr fork.
+ */
+#define XFS_BMAPI_REMAP 0x100
+
+/* Map something in the CoW fork. */
+#define XFS_BMAPI_COWFORK 0x200
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -126,7 +135,9 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
- { XFS_BMAPI_CONVERT, "CONVERT" }
+ { XFS_BMAPI_CONVERT, "CONVERT" }, \
+ { XFS_BMAPI_REMAP, "REMAP" }, \
+ { XFS_BMAPI_COWFORK, "COWFORK" }
static inline int xfs_bmapi_aflag(int w)
@@ -134,6 +145,15 @@ static inline int xfs_bmapi_aflag(int w)
return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0);
}
+static inline int xfs_bmapi_whichfork(int bmapi_flags)
+{
+ if (bmapi_flags & XFS_BMAPI_COWFORK)
+ return XFS_COW_FORK;
+ else if (bmapi_flags & XFS_BMAPI_ATTRFORK)
+ return XFS_ATTR_FORK;
+ return XFS_DATA_FORK;
+}
+
/*
* Special values for xfs_bmbt_irec_t br_startblock field.
*/
@@ -160,13 +180,15 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
#define BMAP_LEFT_VALID (1 << 6)
#define BMAP_RIGHT_VALID (1 << 7)
#define BMAP_ATTRFORK (1 << 8)
+#define BMAP_COWFORK (1 << 9)
#define XFS_BMAP_EXT_FLAGS \
{ BMAP_LEFT_CONTIG, "LC" }, \
{ BMAP_RIGHT_CONTIG, "RC" }, \
{ BMAP_LEFT_FILLING, "LF" }, \
{ BMAP_RIGHT_FILLING, "RF" }, \
- { BMAP_ATTRFORK, "ATTR" }
+ { BMAP_ATTRFORK, "ATTR" }, \
+ { BMAP_COWFORK, "COW" }
/*
@@ -213,7 +235,7 @@ int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
-int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
+int xfs_bmapi_delay(struct xfs_inode *ip, int whichfork, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
@@ -221,6 +243,8 @@ int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fsblock_t *firstblock, xfs_extlen_t total,
struct xfs_bmbt_irec *mval, int *nmap,
struct xfs_bmap_free *flist);
+int xfs_bunmapi_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
+ struct xfs_bmbt_irec *del);
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
index bc09b2b..dc3152b 100644
--- a/libxfs/xfs_bmap_btree.c
+++ b/libxfs/xfs_bmap_btree.c
@@ -785,6 +785,7 @@ xfs_bmbt_init_cursor(
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_btree_cur *cur;
+ ASSERT(whichfork != XFS_COW_FORK);
cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
index 1622ddd..f325adc 100644
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -41,9 +41,10 @@ kmem_zone_t *xfs_btree_cur_zone;
*/
static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
- XFS_FIBT_MAGIC },
+ XFS_FIBT_MAGIC, 0 },
{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
- XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
+ XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
+ XFS_REFC_CRC_MAGIC }
};
#define xfs_btree_magic(cur) \
xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
@@ -1129,6 +1130,9 @@ xfs_btree_set_refs(
case XFS_BTNUM_RMAP:
xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
break;
+ case XFS_BTNUM_REFC:
+ xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
+ break;
default:
ASSERT(0);
}
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
index dd29d15..94848a1 100644
--- a/libxfs/xfs_btree.h
+++ b/libxfs/xfs_btree.h
@@ -43,6 +43,7 @@ union xfs_btree_key {
xfs_alloc_key_t alloc;
struct xfs_inobt_key inobt;
struct xfs_rmap_key rmap;
+ struct xfs_refcount_key refc;
};
union xfs_btree_rec {
@@ -51,6 +52,7 @@ union xfs_btree_rec {
struct xfs_alloc_rec alloc;
struct xfs_inobt_rec inobt;
struct xfs_rmap_rec rmap;
+ struct xfs_refcount_rec refc;
};
/*
@@ -66,6 +68,7 @@ union xfs_btree_rec {
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
+#define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi)
/*
* For logging record fields.
@@ -98,6 +101,7 @@ do { \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(rmap, stat); break; \
+ case XFS_BTNUM_REFC: __XFS_BTREE_STATS_INC(refcbt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -113,6 +117,7 @@ do { \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_ADD(rmap, stat, val); break; \
+ case XFS_BTNUM_REFC: __XFS_BTREE_STATS_ADD(refcbt, stat, val); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -205,6 +210,7 @@ typedef struct xfs_btree_cur
xfs_bmbt_irec_t b;
xfs_inobt_rec_incore_t i;
struct xfs_rmap_irec r;
+ struct xfs_refcount_irec rc;
} bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
@@ -217,6 +223,7 @@ typedef struct xfs_btree_cur
union {
struct { /* needed for BNO, CNT, INO */
struct xfs_buf *agbp; /* agf/agi buffer pointer */
+ struct xfs_bmap_free *flist; /* list to free after */
xfs_agnumber_t agno; /* ag number */
} a;
struct { /* needed for BMAP */
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
index 94bd2f9..7876c98 100644
--- a/libxfs/xfs_format.h
+++ b/libxfs/xfs_format.h
@@ -456,9 +456,11 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */
#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
+#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
- XFS_SB_FEAT_RO_COMPAT_RMAPBT)
+ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
+ XFS_SB_FEAT_RO_COMPAT_REFLINK)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
@@ -529,6 +531,12 @@ static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
}
+static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
+{
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+ (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
+}
+
static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
@@ -641,12 +649,15 @@ typedef struct xfs_agf {
__be32 agf_btreeblks; /* # of blocks held in AGF btrees */
uuid_t agf_uuid; /* uuid of filesystem */
+ __be32 agf_refcount_root; /* refcount tree root block */
+ __be32 agf_refcount_level; /* refcount btree levels */
+
/*
* reserve some contiguous space for future logged fields before we add
* the unlogged fields. This makes the range logging via flags and
* structure offsets much simpler.
*/
- __be64 agf_spare64[16];
+ __be64 agf_spare64[15];
/* unlogged fields, written during buffer writeback. */
__be64 agf_lsn; /* last write sequence */
@@ -1032,6 +1043,18 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
/*
+ * Values for di_flags2
+ * There should be a one-to-one correspondence between these flags and the
+ * XFS_XFLAG_s.
+ */
+#define XFS_DIFLAG2_REFLINK_BIT 0 /* file's blocks may be reflinked */
+#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
+
+#define XFS_DIFLAG2_ANY \
+ (XFS_DIFLAG2_REFLINK)
+
+
+/*
* Inode number format:
* low inopblog bits - offset in block
* next agblklog bits - block number in ag
@@ -1376,7 +1399,8 @@ XFS_RMAP_INO_OWNER(
#define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */
#define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */
#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */
-#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */
+#define XFS_RMAP_OWN_REFC (-8ULL) /* refcount tree */
+#define XFS_RMAP_OWN_MIN (-9ULL) /* guard */
#define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63)))
@@ -1479,6 +1503,47 @@ xfs_owner_info_pack(
}
/*
+ * Reference Count Btree format definitions
+ *
+ */
+#define XFS_REFC_CRC_MAGIC 0x52334643 /* 'R3FC' */
+
+unsigned int xfs_refc_block(struct xfs_mount *mp);
+
+/*
+ * Data record/key structure
+ *
+ * Each record associates a range of physical blocks (starting at
+ * rc_startblock and ending rc_blockcount blocks later) with a
+ * reference count (rc_refcount). A record is only stored in the
+ * btree if the refcount is > 2. An entry in the free block btree
+ * means that the refcount is 0, and no entries anywhere means that
+ * the refcount is 1, as was true in XFS before reflinking.
+ */
+struct xfs_refcount_rec {
+ __be32 rc_startblock; /* starting block number */
+ __be32 rc_blockcount; /* count of blocks */
+ __be32 rc_refcount; /* number of inodes linked here */
+};
+
+struct xfs_refcount_key {
+ __be32 rc_startblock; /* starting block number */
+};
+
+struct xfs_refcount_irec {
+ xfs_agblock_t rc_startblock; /* starting block number */
+ xfs_extlen_t rc_blockcount; /* count of free blocks */
+ xfs_nlink_t rc_refcount; /* number of inodes linked here */
+};
+
+#define MAXREFCOUNT ((xfs_nlink_t)~0U)
+#define MAXREFCEXTLEN ((xfs_extlen_t)~0U)
+
+/* btree pointer type */
+typedef __be32 xfs_refcount_ptr_t;
+
+
+/*
* BMAP Btree format definitions
*
* This includes both the root block definition that sits inside an inode fork
diff --git a/libxfs/xfs_fs.h b/libxfs/xfs_fs.h
index 56990eb..3af7747 100644
--- a/libxfs/xfs_fs.h
+++ b/libxfs/xfs_fs.h
@@ -67,6 +67,7 @@ struct fsxattr {
#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
+#define XFS_XFLAG_REFLINK 0x00008000 /* file is reflinked */
#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/*
diff --git a/libxfs/xfs_inode_fork.c b/libxfs/xfs_inode_fork.c
index 96a633e..0c60205 100644
--- a/libxfs/xfs_inode_fork.c
+++ b/libxfs/xfs_inode_fork.c
@@ -117,6 +117,26 @@ xfs_iformat_fork(
return -EFSCORRUPTED;
}
+ if (unlikely(xfs_is_reflink_inode(ip) &&
+ (ip->i_d.di_mode & S_IFMT) != S_IFREG)) {
+ xfs_warn(ip->i_mount,
+ "corrupt dinode %llu, wrong file type for reflink.",
+ ip->i_ino);
+ XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
+ XFS_ERRLEVEL_LOW, ip->i_mount, dip);
+ return -EFSCORRUPTED;
+ }
+
+ if (unlikely(xfs_is_reflink_inode(ip) &&
+ (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) {
+ xfs_warn(ip->i_mount,
+ "corrupt dinode %llu, has reflink+realtime flag set.",
+ ip->i_ino);
+ XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
+ XFS_ERRLEVEL_LOW, ip->i_mount, dip);
+ return -EFSCORRUPTED;
+ }
+
switch (ip->i_d.di_mode & S_IFMT) {
case S_IFIFO:
case S_IFCHR:
@@ -182,9 +202,14 @@ xfs_iformat_fork(
XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
return -EFSCORRUPTED;
}
- if (error) {
+ if (error)
return error;
+
+ if (xfs_is_reflink_inode(ip)) {
+ ASSERT(ip->i_cowfp == NULL);
+ xfs_ifork_init_cow(ip);
}
+
if (!XFS_DFORK_Q(dip))
return 0;
@@ -204,7 +229,8 @@ xfs_iformat_fork(
XFS_CORRUPTION_ERROR("xfs_iformat(8)",
XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
- return -EFSCORRUPTED;
+ error = -EFSCORRUPTED;
+ break;
}
error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
@@ -222,6 +248,9 @@ xfs_iformat_fork(
if (error) {
kmem_zone_free(xfs_ifork_zone, ip->i_afp);
ip->i_afp = NULL;
+ if (ip->i_cowfp)
+ kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
+ ip->i_cowfp = NULL;
xfs_idestroy_fork(ip, XFS_DATA_FORK);
}
return error;
@@ -712,6 +741,9 @@ xfs_idestroy_fork(
if (whichfork == XFS_ATTR_FORK) {
kmem_zone_free(xfs_ifork_zone, ip->i_afp);
ip->i_afp = NULL;
+ } else if (whichfork == XFS_COW_FORK) {
+ kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
+ ip->i_cowfp = NULL;
}
}
@@ -899,6 +931,19 @@ xfs_iext_get_ext(
}
}
+/* XFS_IEXT_STATE_TO_FORK() -- Convert BMAP state flags to an inode fork. */
+xfs_ifork_t *
+XFS_IEXT_STATE_TO_FORK(
+ struct xfs_inode *ip,
+ int state)
+{
+ if (state & BMAP_COWFORK)
+ return ip->i_cowfp;
+ else if (state & BMAP_ATTRFORK)
+ return ip->i_afp;
+ return &ip->i_df;
+}
+
/*
* Insert new item(s) into the extent records for incore inode
* fork 'ifp'. 'count' new items are inserted at index 'idx'.
@@ -911,7 +956,7 @@ xfs_iext_insert(
xfs_bmbt_irec_t *new, /* items to insert */
int state) /* type of extent conversion */
{
- xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+ xfs_ifork_t *ifp = XFS_IEXT_STATE_TO_FORK(ip, state);
xfs_extnum_t i; /* extent record index */
trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
@@ -1161,7 +1206,7 @@ xfs_iext_remove(
int ext_diff, /* number of extents to remove */
int state) /* type of extent conversion */
{
- xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+ xfs_ifork_t *ifp = XFS_IEXT_STATE_TO_FORK(ip, state);
xfs_extnum_t nextents; /* number of extents in file */
int new_size; /* size of extents after removal */
@@ -1897,3 +1942,22 @@ xfs_iext_irec_update_extoffs(
ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
}
}
+
+/**
+ * xfs_ifork_init_cow() -- Initialize an inode's copy-on-write fork.
+ *
+ * @ip: XFS inode.
+ */
+void
+xfs_ifork_init_cow(
+ struct xfs_inode *ip)
+{
+ if (ip->i_cowfp)
+ return;
+
+ ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone,
+ KM_SLEEP | KM_NOFS);
+ ip->i_cowfp->if_flags = XFS_IFEXTENTS;
+ ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
+ ip->i_cnextents = 0;
+}
diff --git a/libxfs/xfs_inode_fork.h b/libxfs/xfs_inode_fork.h
index 7d3b1ed..a9f5270 100644
--- a/libxfs/xfs_inode_fork.h
+++ b/libxfs/xfs_inode_fork.h
@@ -92,7 +92,9 @@ typedef struct xfs_ifork {
#define XFS_IFORK_PTR(ip,w) \
((w) == XFS_DATA_FORK ? \
&(ip)->i_df : \
- (ip)->i_afp)
+ ((w) == XFS_ATTR_FORK ? \
+ (ip)->i_afp : \
+ (ip)->i_cowfp))
#define XFS_IFORK_DSIZE(ip) \
(XFS_IFORK_Q(ip) ? \
XFS_IFORK_BOFF(ip) : \
@@ -105,26 +107,38 @@ typedef struct xfs_ifork {
#define XFS_IFORK_SIZE(ip,w) \
((w) == XFS_DATA_FORK ? \
XFS_IFORK_DSIZE(ip) : \
- XFS_IFORK_ASIZE(ip))
+ ((w) == XFS_ATTR_FORK ? \
+ XFS_IFORK_ASIZE(ip) : \
+ 0))
#define XFS_IFORK_FORMAT(ip,w) \
((w) == XFS_DATA_FORK ? \
(ip)->i_d.di_format : \
- (ip)->i_d.di_aformat)
+ ((w) == XFS_ATTR_FORK ? \
+ (ip)->i_d.di_aformat : \
+ (ip)->i_cformat))
#define XFS_IFORK_FMT_SET(ip,w,n) \
((w) == XFS_DATA_FORK ? \
((ip)->i_d.di_format = (n)) : \
- ((ip)->i_d.di_aformat = (n)))
+ ((w) == XFS_ATTR_FORK ? \
+ ((ip)->i_d.di_aformat = (n)) : \
+ ((ip)->i_cformat = (n))))
#define XFS_IFORK_NEXTENTS(ip,w) \
((w) == XFS_DATA_FORK ? \
(ip)->i_d.di_nextents : \
- (ip)->i_d.di_anextents)
+ ((w) == XFS_ATTR_FORK ? \
+ (ip)->i_d.di_anextents : \
+ (ip)->i_cnextents))
#define XFS_IFORK_NEXT_SET(ip,w,n) \
((w) == XFS_DATA_FORK ? \
((ip)->i_d.di_nextents = (n)) : \
- ((ip)->i_d.di_anextents = (n)))
+ ((w) == XFS_ATTR_FORK ? \
+ ((ip)->i_d.di_anextents = (n)) : \
+ ((ip)->i_cnextents = (n))))
#define XFS_IFORK_MAXEXT(ip, w) \
(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
+xfs_ifork_t *XFS_IEXT_STATE_TO_FORK(struct xfs_inode *ip, int state);
+
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
@@ -168,4 +182,6 @@ void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
extern struct kmem_zone *xfs_ifork_zone;
+extern void xfs_ifork_init_cow(struct xfs_inode *ip);
+
#endif /* __XFS_INODE_FORK_H__ */
diff --git a/libxfs/xfs_perag_pool.c b/libxfs/xfs_perag_pool.c
new file mode 100644
index 0000000..5fdd293
--- /dev/null
+++ b/libxfs/xfs_perag_pool.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_alloc.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_perag_pool.h"
+#include "xfs_trans_space.h"
+
+/**
+ * xfs_perag_pool_free() -- Free a per-AG reserved block pool.
+ */
+int
+xfs_perag_pool_free(
+ struct xfs_perag_pool *p)
+{
+ struct xfs_mount *mp;
+ struct xfs_perag_pool_entry *ppe, *n;
+ struct xfs_trans *tp;
+ xfs_fsblock_t fsb;
+ struct xfs_bmap_free freelist;
+ int committed;
+ int error = 0, err;
+
+ if (!p)
+ return 0;
+
+ mp = p->pp_mount;
+ list_for_each_entry_safe(ppe, n, &p->pp_entries, ppe_list) {
+ list_del(&ppe->ppe_list);
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ kmem_free(ppe);
+ continue;
+ }
+
+ /* Set up transaction. */
+ tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ err = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 0, 0);
+ if (err)
+ goto loop_cancel;
+ xfs_bmap_init(&freelist, &fsb);
+ fsb = XFS_AGB_TO_FSB(p->pp_mount, p->pp_agno, ppe->ppe_bno);
+
+ trace_xfs_perag_pool_free_extent(mp, p->pp_agno, ppe->ppe_bno,
+ ppe->ppe_len, &p->pp_oinfo);
+
+ /* Free the block. */
+ xfs_bmap_add_free(mp, &freelist, fsb, ppe->ppe_len,
+ &p->pp_oinfo);
+
+ err = xfs_bmap_finish(&tp, &freelist, &committed, NULL);
+ if (err)
+ goto loop_cancel;
+
+ err = xfs_trans_commit(tp);
+ if (!error)
+ error = err;
+ kmem_free(ppe);
+ continue;
+loop_cancel:
+ if (!error)
+ error = err;
+ xfs_trans_cancel(tp);
+ kmem_free(ppe);
+ }
+
+ kmem_free(p);
+ if (error)
+ trace_xfs_perag_pool_free_error(mp, p->pp_agno, error,
+ _RET_IP_);
+ return error;
+}
+
+/* Allocate a block for the pool. */
+static int
+xfs_perag_pool_grab_block(
+ struct xfs_perag_pool *p,
+ struct xfs_trans *tp,
+ xfs_extlen_t *len)
+{
+ struct xfs_mount *mp;
+ struct xfs_perag_pool_entry *ppe;
+ struct xfs_alloc_arg args;
+ int error;
+
+ mp = p->pp_mount;
+
+ /* Set up the allocation. */
+ memset(&args, 0, sizeof(args));
+ args.mp = mp;
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.fsbno = XFS_AGB_TO_FSB(mp, p->pp_agno, p->pp_agbno);
+ args.firstblock = args.fsbno;
+ args.oinfo = p->pp_oinfo;
+ args.minlen = 1;
+
+ /* Allocate blocks. */
+ args.tp = tp;
+ args.maxlen = args.prod = *len;
+ p->pp_allocating = true;
+ error = xfs_alloc_vextent(&args);
+ p->pp_allocating = false;
+ if (error)
+ goto out_error;
+ if (args.fsbno == NULLFSBLOCK) {
+ /* oh well, we're headed towards failure. */
+ error = -ENOSPC;
+ goto out_error;
+ }
+ *len = args.len;
+
+ trace_xfs_perag_pool_grab_block(mp, p->pp_agno, args.agbno, args.len,
+ &p->pp_oinfo);
+
+ /* Add to our list. */
+ ASSERT(args.agno == p->pp_agno);
+ ppe = kmem_alloc(sizeof(struct xfs_perag_pool_entry), KM_SLEEP);
+ ppe->ppe_bno = args.agbno;
+ ppe->ppe_len = args.len;
+ list_add_tail(&ppe->ppe_list, &p->pp_entries);
+ return 0;
+
+out_error:
+ trace_xfs_perag_pool_grab_block_error(mp, p->pp_agno, error, _RET_IP_);
+ return error;
+}
+
+/* Ensure the pool has some capacity. */
+static int
+__xfs_perag_pool_ensure_capacity(
+ struct xfs_perag_pool *p,
+ xfs_extlen_t sz,
+ bool force)
+{
+ struct xfs_mount *mp = p->pp_mount;
+ struct xfs_trans *tp;
+ struct xfs_perag *pag;
+ uint resblks;
+ xfs_extlen_t alloc_len;
+ int error;
+
+ if (sz <= p->pp_len - p->pp_inuse)
+ return 0;
+ sz -= p->pp_len - p->pp_inuse;
+
+ trace_xfs_perag_pool_ensure_capacity(mp, p->pp_agno,
+ p->pp_len - p->pp_inuse, sz, &p->pp_oinfo);
+
+ /* Do we even have enough free blocks? */
+ pag = xfs_perag_get(mp, p->pp_agno);
+ resblks = pag->pagf_freeblks;
+ xfs_perag_put(pag);
+ if (force && resblks < sz)
+ sz = resblks;
+ if (resblks < sz) {
+ error = -ENOSPC;
+ goto out_error;
+ }
+
+ while (sz) {
+ /* Set up a transaction */
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, sz);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
+ if (error)
+ goto out_cancel;
+
+ /* Allocate the blocks */
+ alloc_len = sz;
+ error = xfs_perag_pool_grab_block(p, tp, &alloc_len);
+ if (error)
+ goto out_cancel;
+
+ /* Commit the transaction */
+ error = xfs_trans_commit(tp);
+ if (error)
+ goto out_error;
+
+ p->pp_len += alloc_len;
+ sz -= alloc_len;
+ }
+ return 0;
+
+out_cancel:
+ xfs_trans_cancel(tp);
+out_error:
+ trace_xfs_perag_pool_ensure_capacity_error(mp, p->pp_agno, error,
+ _RET_IP_);
+ return error;
+}
+
+/**
+ * xfs_perag_pool_ensure_capacity() -- Ensure the pool has some capacity.
+ *
+ * @p: per-AG reserved blocks pool.
+ * @sz: Ensure that there are at least this many free blocks.
+ */
+int
+xfs_perag_pool_ensure_capacity(
+ struct xfs_perag_pool *p,
+ xfs_extlen_t sz)
+{
+ if (!p)
+ return 0;
+ return __xfs_perag_pool_ensure_capacity(p, sz, false);
+}
+
+/**
+ * xfs_perag_pool_init() -- Initialize a per-AG reserved block pool.
+ */
+int
+xfs_perag_pool_init(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ xfs_extlen_t inuse,
+ uint64_t owner,
+ struct xfs_perag_pool **pp)
+{
+ struct xfs_perag_pool *p;
+ struct xfs_owner_info oinfo;
+ int error;
+
+ XFS_RMAP_AG_OWNER(&oinfo, owner);
+ trace_xfs_perag_pool_init(mp, agno, agbno, len, &oinfo);
+ trace_xfs_perag_pool_init(mp, agno, agbno, inuse, &oinfo);
+
+ p = kmem_alloc(sizeof(struct xfs_perag_pool), KM_SLEEP);
+ p->pp_mount = mp;
+ p->pp_agno = agno;
+ p->pp_agbno = agbno;
+ p->pp_inuse = p->pp_len = inuse;
+ p->pp_oinfo = oinfo;
+ p->pp_allocating = false;
+ INIT_LIST_HEAD(&p->pp_entries);
+ *pp = p;
+
+ /* Try to reserve some blocks. */
+ error = __xfs_perag_pool_ensure_capacity(p, len - inuse, true);
+ if (error == -ENOSPC)
+ error = 0;
+
+ if (error)
+ trace_xfs_perag_pool_init_error(mp, agno, error, _RET_IP_);
+ return error;
+}
+
+/**
+ * xfs_perag_pool_alloc_block() -- Allocate a block from the pool.
+ *
+ * @p: Reserved block pool.
+ * @tp: Transaction to record the allocation.
+ * @bno: (out) The allocated block number.
+ */
+int
+xfs_perag_pool_alloc_block(
+ struct xfs_perag_pool *p,
+ struct xfs_trans *tp,
+ xfs_agblock_t *bno)
+{
+ struct xfs_mount *mp;
+ struct xfs_perag_pool_entry *ppe;
+ xfs_extlen_t len;
+ int error;
+
+ if (p == NULL || p->pp_allocating)
+ return -EINVAL;
+
+ mp = p->pp_mount;
+ mp = mp;
+ /* Empty pool? Grab another block. */
+ if (list_empty(&p->pp_entries)) {
+ len = 1;
+ error = xfs_perag_pool_grab_block(p, tp, &len);
+ if (error)
+ goto err;
+ ASSERT(len == 1);
+ if (list_empty(&p->pp_entries)) {
+ error = -ENOSPC;
+ goto err;
+ }
+ }
+
+ /* Find an available block. */
+ ppe = list_first_entry(&p->pp_entries, struct xfs_perag_pool_entry,
+ ppe_list);
+ *bno = ppe->ppe_bno;
+
+ trace_xfs_perag_pool_alloc_block(mp, p->pp_agno, *bno, 1, &p->pp_oinfo);
+
+ /* Update the accounting. */
+ ppe->ppe_len--;
+ ppe->ppe_bno++;
+ if (ppe->ppe_len == 0)
+ list_del(&ppe->ppe_list);
+ p->pp_inuse++;
+
+ return 0;
+err:
+ trace_xfs_perag_pool_alloc_block_error(mp, p->pp_agno, error, _RET_IP_);
+ return error;
+}
+
+/**
+ * xfs_perag_pool_free_block() -- Put a block back in the pool.
+ *
+ * @p: Reserved block pool.
+ * @tp: Transaction to record the free operation.
+ * @bno: Block to put back.
+ */
+int
+xfs_perag_pool_free_block(
+ struct xfs_perag_pool *p,
+ struct xfs_trans *tp,
+ xfs_agblock_t bno)
+{
+ struct xfs_mount *mp;
+ struct xfs_perag_pool_entry *ppe;
+
+ if (p == NULL)
+ return -EINVAL;
+
+ mp = p->pp_mount;
+ mp = mp;
+ trace_xfs_perag_pool_free_block(mp, p->pp_agno, bno, 1, &p->pp_oinfo);
+
+ list_for_each_entry(ppe, &p->pp_entries, ppe_list) {
+ if (ppe->ppe_bno - 1 == bno) {
+
+ /* Adjust bookkeeping. */
+ p->pp_inuse--;
+ ppe->ppe_bno--;
+ ppe->ppe_len++;
+ return 0;
+ }
+ if (ppe->ppe_bno + ppe->ppe_len == bno) {
+ p->pp_inuse--;
+ ppe->ppe_len++;
+ return 0;
+ }
+ }
+ ppe = kmem_alloc(sizeof(struct xfs_perag_pool_entry), KM_SLEEP);
+ ppe->ppe_bno = bno;
+ ppe->ppe_len = 1;
+ p->pp_inuse--;
+
+ list_add_tail(&ppe->ppe_list, &p->pp_entries);
+ return 0;
+}
diff --git a/libxfs/xfs_perag_pool.h b/libxfs/xfs_perag_pool.h
new file mode 100644
index 0000000..ecdcd2a
--- /dev/null
+++ b/libxfs/xfs_perag_pool.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+struct xfs_perag_pool_entry {
+ struct list_head ppe_list; /* pool list */
+ xfs_agblock_t ppe_bno; /* AG block number */
+ xfs_extlen_t ppe_len; /* length */
+};
+
+struct xfs_perag_pool {
+ struct xfs_mount *pp_mount; /* XFS mount */
+ xfs_agnumber_t pp_agno; /* AG number */
+ xfs_agblock_t pp_agbno; /* suggested AG block number */
+ xfs_extlen_t pp_len; /* blocks in pool */
+ xfs_extlen_t pp_inuse; /* blocks in use */
+ struct xfs_owner_info pp_oinfo; /* owner */
+ struct list_head pp_entries; /* pool entries */
+ bool pp_allocating; /* are we allocating? */
+};
+
+int xfs_perag_pool_free(struct xfs_perag_pool *p);
+int xfs_perag_pool_init(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agblock_t agbno, xfs_extlen_t len, xfs_extlen_t inuse,
+ uint64_t owner, struct xfs_perag_pool **pp);
+
+int xfs_perag_pool_ensure_capacity(struct xfs_perag_pool *p, xfs_extlen_t sz);
+
+int xfs_perag_pool_alloc_block(struct xfs_perag_pool *p, struct xfs_trans *tp,
+ xfs_agblock_t *bno);
+int xfs_perag_pool_free_block(struct xfs_perag_pool *p, struct xfs_trans *tp,
+ xfs_agblock_t bno);
diff --git a/libxfs/xfs_refcount_btree.c b/libxfs/xfs_refcount_btree.c
new file mode 100644
index 0000000..4ad7cb1
--- /dev/null
+++ b/libxfs/xfs_refcount_btree.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_bit.h"
+#include "xfs_perag_pool.h"
+
+static struct xfs_btree_cur *
+xfs_refcountbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno,
+ cur->bc_private.a.flist);
+}
+
+STATIC void
+xfs_refcountbt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int inc)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
+
+ ASSERT(ptr->s != 0);
+
+ agf->agf_refcount_root = ptr->s;
+ be32_add_cpu(&agf->agf_refcount_level, inc);
+ pag->pagf_refcount_level += inc;
+ xfs_perag_put(pag);
+
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_refcountbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ struct xfs_alloc_arg args; /* block allocation args */
+ struct xfs_perag *pag;
+ xfs_agblock_t bno;
+ int error; /* error return value */
+
+ /* First try the per-AG reserve pool. */
+ pag = xfs_perag_get(cur->bc_mp, cur->bc_private.a.agno);
+ error = xfs_perag_pool_alloc_block(pag->pagf_refcountbt_pool,
+ cur->bc_tp, &bno);
+ xfs_perag_put(pag);
+
+ switch (error) {
+ case 0:
+ *stat = 1;
+ new->s = cpu_to_be32(bno);
+ return 0;
+ case -EINVAL:
+ break;
+ case -ENOSPC:
+ error = 0;
+ /* fall through */
+ default:
+ *stat = 0;
+ return error;
+ }
+
+ /* No pool; try a regular allocation. */
+ memset(&args, 0, sizeof(args));
+ args.tp = cur->bc_tp;
+ args.mp = cur->bc_mp;
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
+ xfs_refc_block(args.mp));
+ args.firstblock = args.fsbno;
+ XFS_RMAP_AG_OWNER(&args.oinfo, XFS_RMAP_OWN_REFC);
+ args.minlen = args.maxlen = args.prod = 1;
+
+ error = xfs_alloc_vextent(&args);
+ if (error)
+ goto out_error;
+ if (args.fsbno == NULLFSBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+ ASSERT(args.agno == cur->bc_private.a.agno);
+ ASSERT(args.len == 1);
+
+ new->s = cpu_to_be32(args.agbno);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+
+out_error:
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+}
+
+STATIC int
+xfs_refcountbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_trans *tp = cur->bc_tp;
+ struct xfs_perag *pag;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+ struct xfs_owner_info oinfo;
+ int error;
+
+ /* Try to give it back to the pool. */
+ pag = xfs_perag_get(cur->bc_mp, cur->bc_private.a.agno);
+ error = xfs_perag_pool_free_block(pag->pagf_refcountbt_pool, cur->bc_tp,
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno));
+ xfs_perag_put(pag);
+
+ switch (error) {
+ case 0:
+ return 0;
+ case -EINVAL:
+ break;
+ default:
+ return error;
+ }
+
+ /* Return it to the AG. */
+ XFS_RMAP_AG_OWNER(&oinfo, XFS_RMAP_OWN_REFC);
+ xfs_bmap_add_free(mp, cur->bc_private.a.flist, fsbno, 1,
+ &oinfo);
+ xfs_trans_binval(tp, bp);
+ return 0;
+}
+
+STATIC int
+xfs_refcountbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_refc_mnr[level != 0];
+}
+
+STATIC int
+xfs_refcountbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_refc_mxr[level != 0];
+}
+
+STATIC void
+xfs_refcountbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(rec->refc.rc_startblock != 0);
+
+ key->refc.rc_startblock = rec->refc.rc_startblock;
+}
+
+STATIC void
+xfs_refcountbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(key->refc.rc_startblock != 0);
+
+ rec->refc.rc_startblock = key->refc.rc_startblock;
+}
+
+STATIC void
+xfs_refcountbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(cur->bc_rec.rc.rc_startblock != 0);
+
+ rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock);
+ rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount);
+ rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount);
+}
+
+STATIC void
+xfs_refcountbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(agf->agf_refcount_root != 0);
+
+ ptr->s = agf->agf_refcount_root;
+}
+
+STATIC __int64_t
+xfs_refcountbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ struct xfs_refcount_irec *rec = &cur->bc_rec.rc;
+ struct xfs_refcount_key *kp = &key->refc;
+
+ return (__int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+}
+
+STATIC bool
+xfs_refcountbt_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+ unsigned int level;
+
+ if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC))
+ return false;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return false;
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
+ return false;
+
+ level = be16_to_cpu(block->bb_level);
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_refcount_level)
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
+
+ return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]);
+}
+
+STATIC void
+xfs_refcountbt_read_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_refcountbt_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
+ }
+}
+
+STATIC void
+xfs_refcountbt_write_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_refcountbt_verify(bp)) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
+ }
+ xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
+ .name = "xfs_refcountbt",
+ .verify_read = xfs_refcountbt_read_verify,
+ .verify_write = xfs_refcountbt_write_verify,
+};
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_refcountbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->refc.rc_startblock) <
+ be32_to_cpu(k2->refc.rc_startblock);
+}
+
+STATIC int
+xfs_refcountbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ struct xfs_refcount_irec a, b;
+
+ int ret = be32_to_cpu(r1->refc.rc_startblock) +
+ be32_to_cpu(r1->refc.rc_blockcount) <=
+ be32_to_cpu(r2->refc.rc_startblock);
+ if (!ret) {
+ a.rc_startblock = be32_to_cpu(r1->refc.rc_startblock);
+ a.rc_blockcount = be32_to_cpu(r1->refc.rc_blockcount);
+ a.rc_refcount = be32_to_cpu(r1->refc.rc_refcount);
+ b.rc_startblock = be32_to_cpu(r2->refc.rc_startblock);
+ b.rc_blockcount = be32_to_cpu(r2->refc.rc_blockcount);
+ b.rc_refcount = be32_to_cpu(r2->refc.rc_refcount);
+ trace_xfs_refcount_rec_order_error(cur->bc_mp,
+ cur->bc_private.a.agno, &a, &b);
+ }
+
+ return ret;
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_refcountbt_ops = {
+ .rec_len = sizeof(struct xfs_refcount_rec),
+ .key_len = sizeof(struct xfs_refcount_key),
+
+ .dup_cursor = xfs_refcountbt_dup_cursor,
+ .set_root = xfs_refcountbt_set_root,
+ .alloc_block = xfs_refcountbt_alloc_block,
+ .free_block = xfs_refcountbt_free_block,
+ .get_minrecs = xfs_refcountbt_get_minrecs,
+ .get_maxrecs = xfs_refcountbt_get_maxrecs,
+ .init_key_from_rec = xfs_refcountbt_init_key_from_rec,
+ .init_rec_from_key = xfs_refcountbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_refcountbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_refcountbt_init_ptr_from_cur,
+ .key_diff = xfs_refcountbt_key_diff,
+ .buf_ops = &xfs_refcountbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_refcountbt_keys_inorder,
+ .recs_inorder = xfs_refcountbt_recs_inorder,
+#endif
+};
+
+/**
+ * xfs_refcountbt_init_cursor() -- Allocate a new refcount btree cursor.
+ *
+ * @mp: XFS mount object
+ * @tp: XFS transaction
+ * @agbp: Buffer containing the AGF
+ * @agno: AG number
+ */
+struct xfs_btree_cur *
+xfs_refcountbt_init_cursor(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_agnumber_t agno,
+ struct xfs_bmap_free *flist)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
+
+ ASSERT(agno != NULLAGNUMBER);
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_btnum = XFS_BTNUM_REFC;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
+ cur->bc_ops = &xfs_refcountbt_ops;
+
+ cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level);
+
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
+ cur->bc_private.a.flist = flist;
+ cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+ return cur;
+}
+
+/**
+ * xfs_refcountbt_maxrecs() -- Calculate number of records in a refcount
+ * btree block.
+ * @mp: XFS mount object
+ * @blocklen: Length of block, in bytes.
+ * @leaf: true if this is a leaf btree block, false otherwise
+ */
+int
+xfs_refcountbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ bool leaf)
+{
+ blocklen -= XFS_REFCOUNT_BLOCK_LEN;
+
+ if (leaf)
+ return blocklen / sizeof(struct xfs_refcount_rec);
+ return blocklen / (sizeof(struct xfs_refcount_key) +
+ sizeof(xfs_refcount_ptr_t));
+}
+
+DEFINE_BTREE_SIZE_FN(refcountbt, m_refc_mxr, XFS_BTREE_MAXLEVELS);
+
+/**
+ * xfs_refcountbt_max_btree_size() -- Calculate the maximum refcount btree size.
+ */
+unsigned int
+xfs_refcountbt_max_btree_size(
+ struct xfs_mount *mp)
+{
+ /* Bail out if we're uninitialized, which can happen in mkfs. */
+ if (mp->m_refc_mxr[0] == 0)
+ return 0;
+
+ return xfs_refcountbt_calc_btree_size(mp, mp->m_sb.sb_agblocks);
+}
+
+/* Count the blocks in the reference count tree. */
+static int
+xfs_refcountbt_count_tree_blocks(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_extlen_t *tree_len)
+{
+ struct xfs_buf *agfbp;
+ struct xfs_buf *bp = NULL;
+ struct xfs_agf *agfp;
+ struct xfs_btree_block *block = NULL;
+ int level;
+ xfs_agblock_t bno;
+ xfs_fsblock_t fsbno;
+ __be32 *pp;
+ int error;
+ xfs_extlen_t nr_blocks = 0;
+
+ error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agfbp);
+ if (error)
+ goto out;
+ agfp = XFS_BUF_TO_AGF(agfbp);
+ level = be32_to_cpu(agfp->agf_refcount_level);
+ bno = be32_to_cpu(agfp->agf_refcount_root);
+
+ /*
+ * Go down the tree until leaf level is reached, following the first
+ * pointer (leftmost) at each level.
+ */
+ while (level-- > 0) {
+ fsbno = XFS_AGB_TO_FSB(mp, agno, bno);
+ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, fsbno),
+ XFS_FSB_TO_BB(mp, 1), 0, &bp,
+ &xfs_refcountbt_buf_ops);
+ if (error)
+ goto err;
+ block = XFS_BUF_TO_BLOCK(bp);
+ if (level == 0)
+ break;
+ pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]);
+ bno = be32_to_cpu(*pp);
+ xfs_trans_brelse(NULL, bp);
+ }
+
+ /* Jog rightward though level zero. */
+ while (block) {
+ nr_blocks++;
+ bno = be32_to_cpu(block->bb_u.s.bb_rightsib);
+ if (bno == NULLAGBLOCK)
+ break;
+ fsbno = XFS_AGB_TO_FSB(mp, agno, bno);
+ xfs_trans_brelse(NULL, bp);
+ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, fsbno),
+ XFS_FSB_TO_BB(mp, 1), 0, &bp,
+ &xfs_refcountbt_buf_ops);
+ if (error)
+ goto err;
+ block = XFS_BUF_TO_BLOCK(bp);
+ }
+
+ if (bp)
+ xfs_trans_brelse(NULL, bp);
+
+ /* Add in the upper levels of tree. */
+ *tree_len = nr_blocks;
+err:
+ xfs_trans_brelse(NULL, agfbp);
+out:
+ return error;
+}
+
+/**
+ * xfs_refcountbt_alloc_reserve_pool() -- Create reserved block pools for each
+ * allocation group.
+ */
+int
+xfs_refcountbt_alloc_reserve_pool(
+ struct xfs_mount *mp)
+{
+ xfs_agnumber_t agno;
+ struct xfs_perag *pag;
+ xfs_extlen_t pool_len;
+ xfs_extlen_t tree_len;
+ int error = 0;
+ int err;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+
+ pool_len = xfs_refcountbt_max_btree_size(mp);
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ pag = xfs_perag_get(mp, agno);
+ if (pag->pagf_refcountbt_pool) {
+ xfs_perag_put(pag);
+ continue;
+ }
+ tree_len = 0;
+ xfs_refcountbt_count_tree_blocks(mp, agno, &tree_len);
+ err = xfs_perag_pool_init(mp, agno,
+ xfs_refc_block(mp),
+ pool_len, tree_len,
+ XFS_RMAP_OWN_REFC,
+ &pag->pagf_refcountbt_pool);
+ xfs_perag_put(pag);
+ if (err && !error)
+ error = err;
+ }
+
+ return error;
+}
+
+/**
+ * xfs_refcountbt_free_reserve_pool() -- Free the reference count btree pools.
+ */
+int
+xfs_refcountbt_free_reserve_pool(
+ struct xfs_mount *mp)
+{
+ xfs_agnumber_t agno;
+ struct xfs_perag *pag;
+ int error = 0;
+ int err;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+ pag = xfs_perag_get(mp, agno);
+ err = xfs_perag_pool_free(pag->pagf_refcountbt_pool);
+ pag->pagf_refcountbt_pool = NULL;
+ xfs_perag_put(pag);
+ if (err && !error)
+ error = err;
+ }
+
+ return error;
+}
diff --git a/libxfs/xfs_refcount_btree.h b/libxfs/xfs_refcount_btree.h
new file mode 100644
index 0000000..93eebda
--- /dev/null
+++ b/libxfs/xfs_refcount_btree.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_REFCOUNT_BTREE_H__
+#define __XFS_REFCOUNT_BTREE_H__
+
+/*
+ * Reference Count Btree on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/*
+ * Btree block header size
+ */
+#define XFS_REFCOUNT_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_REFCOUNT_REC_ADDR(block, index) \
+ ((struct xfs_refcount_rec *) \
+ ((char *)(block) + \
+ XFS_REFCOUNT_BLOCK_LEN + \
+ (((index) - 1) * sizeof(struct xfs_refcount_rec))))
+
+#define XFS_REFCOUNT_KEY_ADDR(block, index) \
+ ((struct xfs_refcount_key *) \
+ ((char *)(block) + \
+ XFS_REFCOUNT_BLOCK_LEN + \
+ ((index) - 1) * sizeof(struct xfs_refcount_key)))
+
+#define XFS_REFCOUNT_PTR_ADDR(block, index, maxrecs) \
+ ((xfs_refcount_ptr_t *) \
+ ((char *)(block) + \
+ XFS_REFCOUNT_BLOCK_LEN + \
+ (maxrecs) * sizeof(struct xfs_refcount_key) + \
+ ((index) - 1) * sizeof(xfs_refcount_ptr_t)))
+
+extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp,
+ struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno,
+ struct xfs_bmap_free *flist);
+extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen,
+ bool leaf);
+
+DECLARE_BTREE_SIZE_FN(refcountbt);
+extern unsigned int xfs_refcountbt_max_btree_size(struct xfs_mount *mp);
+
+extern int xfs_refcountbt_alloc_reserve_pool(struct xfs_mount *mp);
+extern int xfs_refcountbt_free_reserve_pool(struct xfs_mount *mp);
+
+#endif /* __XFS_REFCOUNT_BTREE_H__ */
diff --git a/libxfs/xfs_rmap.c b/libxfs/xfs_rmap.c
index 5ae4c1e..bbb6c90 100644
--- a/libxfs/xfs_rmap.c
+++ b/libxfs/xfs_rmap.c
@@ -1073,6 +1073,8 @@ __xfs_rmap_add(
if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
return 0;
+ if (ri->ri_whichfork == XFS_COW_FORK)
+ return 0;
new = kmem_zalloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
*new = *ri;
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
index 85ef128..c952c6a 100644
--- a/libxfs/xfs_sb.c
+++ b/libxfs/xfs_sb.c
@@ -34,6 +34,8 @@
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_refcount_btree.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -717,6 +719,13 @@ xfs_sb_mount_common(
mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+ mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize,
+ true);
+ mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize,
+ false);
+ mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2;
+ mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
+
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
diff --git a/libxfs/xfs_shared.h b/libxfs/xfs_shared.h
index fa2bb9b..bffef9e 100644
--- a/libxfs/xfs_shared.h
+++ b/libxfs/xfs_shared.h
@@ -39,6 +39,7 @@ extern const struct xfs_buf_ops xfs_agf_buf_ops;
extern const struct xfs_buf_ops xfs_agfl_buf_ops;
extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
+extern const struct xfs_buf_ops xfs_refcountbt_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
@@ -216,6 +217,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
#define XFS_INO_REF 2
#define XFS_ATTR_BTREE_REF 1
#define XFS_DQUOT_REF 1
+#define XFS_REFC_BTREE_REF 1
/*
* Flags for xfs_trans_ichgtime().
diff --git a/libxfs/xfs_types.h b/libxfs/xfs_types.h
index da87796..cf044c0 100644
--- a/libxfs/xfs_types.h
+++ b/libxfs/xfs_types.h
@@ -93,6 +93,7 @@ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
*/
#define XFS_DATA_FORK 0
#define XFS_ATTR_FORK 1
+#define XFS_COW_FORK 2
/*
* Min numbers of data/attr fork btree root pointers.
@@ -112,7 +113,7 @@ typedef enum {
typedef enum {
XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
- XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+ XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_REFCi, XFS_BTNUM_MAX
} xfs_btnum_t;
struct xfs_name {
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 40/53] xfs_db: dump refcount btree data
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (38 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 39/53] libxfs: add support for refcount btrees Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 41/53] xfs_db: add support for checking the refcount btree Darrick J. Wong
` (12 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Add the ability to walk and dump the refcount btree in xfs_db.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/agf.c | 10 ++++++++--
db/btblock.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
db/btblock.h | 5 +++++
db/field.c | 9 +++++++++
db/field.h | 4 ++++
db/inode.c | 3 +++
db/sb.c | 2 ++
db/type.c | 5 +++++
db/type.h | 2 +-
man/man8/xfs_db.8 | 45 ++++++++++++++++++++++++++++++++++++++++++++-
10 files changed, 131 insertions(+), 4 deletions(-)
diff --git a/db/agf.c b/db/agf.c
index f4c4269..86d8929 100644
--- a/db/agf.c
+++ b/db/agf.c
@@ -47,7 +47,7 @@ const field_t agf_flds[] = {
{ "versionnum", FLDT_UINT32D, OI(OFF(versionnum)), C1, 0, TYP_NONE },
{ "seqno", FLDT_AGNUMBER, OI(OFF(seqno)), C1, 0, TYP_NONE },
{ "length", FLDT_AGBLOCK, OI(OFF(length)), C1, 0, TYP_NONE },
- { "roots", FLDT_AGBLOCK, OI(OFF(roots)), CI(XFS_BTNUM_AGF),
+ { "roots", FLDT_AGBLOCK, OI(OFF(roots)), CI(XFS_BTNUM_AGF) + 1,
FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
{ "bnoroot", FLDT_AGBLOCK,
OI(OFF(roots) + XFS_BTNUM_BNO * SZ(roots[XFS_BTNUM_BNO])), C1, 0,
@@ -58,7 +58,10 @@ const field_t agf_flds[] = {
{ "rmaproot", FLDT_AGBLOCKNZ,
OI(OFF(roots) + XFS_BTNUM_RMAP * SZ(roots[XFS_BTNUM_RMAP])), C1, 0,
TYP_RMAPBT },
- { "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF),
+ { "refcntroot", FLDT_AGBLOCKNZ,
+ OI(OFF(refcount_root)), C1, 0,
+ TYP_REFCBT },
+ { "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF) + 1,
FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
{ "bnolevel", FLDT_UINT32D,
OI(OFF(levels) + XFS_BTNUM_BNO * SZ(levels[XFS_BTNUM_BNO])), C1, 0,
@@ -69,6 +72,9 @@ const field_t agf_flds[] = {
{ "rmaplevel", FLDT_UINT32D,
OI(OFF(levels) + XFS_BTNUM_RMAP * SZ(levels[XFS_BTNUM_RMAP])), C1, 0,
TYP_NONE },
+ { "refcntlevel", FLDT_UINT32D,
+ OI(OFF(refcount_level)), C1, 0,
+ TYP_NONE },
{ "flfirst", FLDT_UINT32D, OI(OFF(flfirst)), C1, 0, TYP_NONE },
{ "fllast", FLDT_UINT32D, OI(OFF(fllast)), C1, 0, TYP_NONE },
{ "flcount", FLDT_UINT32D, OI(OFF(flcount)), C1, 0, TYP_NONE },
diff --git a/db/btblock.c b/db/btblock.c
index 430d84f..bdf07b1 100644
--- a/db/btblock.c
+++ b/db/btblock.c
@@ -102,6 +102,12 @@ struct xfs_db_btree {
sizeof(struct xfs_rmap_rec),
sizeof(__be32),
},
+ { XFS_REFC_CRC_MAGIC,
+ XFS_BTREE_SBLOCK_CRC_LEN,
+ sizeof(struct xfs_refcount_key),
+ sizeof(struct xfs_refcount_rec),
+ sizeof(__be32),
+ },
{ 0,
},
};
@@ -675,3 +681,47 @@ const field_t rmapbt_rec_flds[] = {
{ NULL }
};
#undef ROFF
+
+/* refcount btree blocks */
+const field_t refcbt_crc_hfld[] = {
+ { "", FLDT_REFCBT_CRC, OI(0), C1, 0, TYP_NONE },
+ { NULL }
+};
+
+#define OFF(f) bitize(offsetof(struct xfs_btree_block, bb_ ## f))
+const field_t refcbt_crc_flds[] = {
+ { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+ { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+ { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+ { "leftsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_leftsib)), C1, 0, TYP_REFCBT },
+ { "rightsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_rightsib)), C1, 0, TYP_REFCBT },
+ { "bno", FLDT_DFSBNO, OI(OFF(u.s.bb_blkno)), C1, 0, TYP_REFCBT },
+ { "lsn", FLDT_UINT64X, OI(OFF(u.s.bb_lsn)), C1, 0, TYP_NONE },
+ { "uuid", FLDT_UUID, OI(OFF(u.s.bb_uuid)), C1, 0, TYP_NONE },
+ { "owner", FLDT_AGNUMBER, OI(OFF(u.s.bb_owner)), C1, 0, TYP_NONE },
+ { "crc", FLDT_CRC, OI(OFF(u.s.bb_crc)), C1, 0, TYP_NONE },
+ { "recs", FLDT_REFCBTREC, btblock_rec_offset, btblock_rec_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+ { "keys", FLDT_REFCBTKEY, btblock_key_offset, btblock_key_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+ { "ptrs", FLDT_REFCBTPTR, btblock_ptr_offset, btblock_key_count,
+ FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_REFCBT },
+ { NULL }
+};
+#undef OFF
+
+#define KOFF(f) bitize(offsetof(struct xfs_refcount_key, rc_ ## f))
+const field_t refcbt_key_flds[] = {
+ { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+ { NULL }
+};
+#undef KOFF
+
+#define ROFF(f) bitize(offsetof(struct xfs_refcount_rec, rc_ ## f))
+const field_t refcbt_rec_flds[] = {
+ { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+ { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+ { "refcount", FLDT_UINT32D, OI(ROFF(refcount)), C1, 0, TYP_DATA },
+ { NULL }
+};
+#undef ROFF
diff --git a/db/btblock.h b/db/btblock.h
index 35299b4..fead2f1 100644
--- a/db/btblock.h
+++ b/db/btblock.h
@@ -59,4 +59,9 @@ extern const struct field rmapbt_crc_hfld[];
extern const struct field rmapbt_key_flds[];
extern const struct field rmapbt_rec_flds[];
+extern const struct field refcbt_crc_flds[];
+extern const struct field refcbt_crc_hfld[];
+extern const struct field refcbt_key_flds[];
+extern const struct field refcbt_rec_flds[];
+
extern int btblock_size(void *obj, int startoff, int idx);
diff --git a/db/field.c b/db/field.c
index 5664b95..2c956bf 100644
--- a/db/field.c
+++ b/db/field.c
@@ -183,6 +183,15 @@ const ftattr_t ftattrtab[] = {
{ FLDT_RMAPBTREC, "rmapbtrec", fp_sarray, (char *)rmapbt_rec_flds,
SI(bitsz(struct xfs_rmap_rec)), 0, NULL, rmapbt_rec_flds },
+ { FLDT_REFCBT_CRC, "refcntbt", NULL, (char *)refcbt_crc_flds, btblock_size,
+ FTARG_SIZE, NULL, refcbt_crc_flds },
+ { FLDT_REFCBTKEY, "refcntbtkey", fp_sarray, (char *)refcbt_key_flds,
+ SI(bitsz(struct xfs_refcount_key)), 0, NULL, refcbt_key_flds },
+ { FLDT_REFCBTPTR, "refcntbtptr", fp_num, "%u", SI(bitsz(xfs_refcount_ptr_t)),
+ 0, fa_agblock, NULL },
+ { FLDT_REFCBTREC, "refcntbtrec", fp_sarray, (char *)refcbt_rec_flds,
+ SI(bitsz(struct xfs_refcount_rec)), 0, NULL, refcbt_rec_flds },
+
/* CRC field */
{ FLDT_CRC, "crc", fp_crc, "%#x (%s)", SI(bitsz(__uint32_t)),
0, NULL, NULL },
diff --git a/db/field.h b/db/field.h
index 47f562a..ae5f490 100644
--- a/db/field.h
+++ b/db/field.h
@@ -89,6 +89,10 @@ typedef enum fldt {
FLDT_RMAPBTKEY,
FLDT_RMAPBTPTR,
FLDT_RMAPBTREC,
+ FLDT_REFCBT_CRC,
+ FLDT_REFCBTKEY,
+ FLDT_REFCBTPTR,
+ FLDT_REFCBTREC,
/* CRC field type */
FLDT_CRC,
diff --git a/db/inode.c b/db/inode.c
index 64b263b..4f0794a 100644
--- a/db/inode.c
+++ b/db/inode.c
@@ -175,6 +175,9 @@ const field_t inode_v3_flds[] = {
{ "crtime", FLDT_TIMESTAMP, OI(COFF(crtime)), C1, 0, TYP_NONE },
{ "inumber", FLDT_INO, OI(COFF(ino)), C1, 0, TYP_NONE },
{ "uuid", FLDT_UUID, OI(COFF(uuid)), C1, 0, TYP_NONE },
+ { "reflink", FLDT_UINT1,
+ OI(COFF(flags2) + bitsz(__uint64_t) - XFS_DIFLAG2_REFLINK_BIT-1), C1,
+ 0, TYP_NONE },
{ NULL }
};
diff --git a/db/sb.c b/db/sb.c
index 17d446c..b2cdbe6 100644
--- a/db/sb.c
+++ b/db/sb.c
@@ -694,6 +694,8 @@ version_string(
strcat(s, ",SPARSE_INODES");
if (xfs_sb_version_hasmetauuid(sbp))
strcat(s, ",META_UUID");
+ if (xfs_sb_version_hasreflink(sbp))
+ strcat(s, ",REFLINK");
return s;
}
diff --git a/db/type.c b/db/type.c
index f78290e..a84bfd1 100644
--- a/db/type.c
+++ b/db/type.c
@@ -59,6 +59,7 @@ static const typ_t __typtab[] = {
{ TYP_BNOBT, "bnobt", handle_struct, bnobt_hfld, NULL },
{ TYP_CNTBT, "cntbt", handle_struct, cntbt_hfld, NULL },
{ TYP_RMAPBT, NULL },
+ { TYP_REFCBT, NULL },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir2", handle_struct, dir2_hfld, NULL },
{ TYP_DQBLK, "dqblk", handle_struct, dqblk_hfld, NULL },
@@ -91,6 +92,8 @@ static const typ_t __typtab_crc[] = {
&xfs_allocbt_buf_ops },
{ TYP_RMAPBT, "rmapbt", handle_struct, rmapbt_crc_hfld,
&xfs_rmapbt_buf_ops },
+ { TYP_REFCBT, "refcntbt", handle_struct, refcbt_crc_hfld,
+ &xfs_refcountbt_buf_ops },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir3", handle_struct, dir3_hfld,
&xfs_dir3_db_buf_ops },
@@ -129,6 +132,8 @@ static const typ_t __typtab_spcrc[] = {
&xfs_allocbt_buf_ops },
{ TYP_RMAPBT, "rmapbt", handle_struct, rmapbt_crc_hfld,
&xfs_rmapbt_buf_ops },
+ { TYP_REFCBT, "refcntbt", handle_struct, refcbt_crc_hfld,
+ &xfs_refcountbt_buf_ops },
{ TYP_DATA, "data", handle_block, NULL, NULL },
{ TYP_DIR2, "dir3", handle_struct, dir3_hfld,
&xfs_dir3_db_buf_ops },
diff --git a/db/type.h b/db/type.h
index 1bef8e6..998f755 100644
--- a/db/type.h
+++ b/db/type.h
@@ -24,7 +24,7 @@ struct field;
typedef enum typnm
{
TYP_AGF, TYP_AGFL, TYP_AGI, TYP_ATTR, TYP_BMAPBTA,
- TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_RMAPBT, TYP_DATA,
+ TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_RMAPBT, TYP_REFCBT, TYP_DATA,
TYP_DIR2, TYP_DQBLK, TYP_INOBT, TYP_INODATA, TYP_INODE,
TYP_LOG, TYP_RTBITMAP, TYP_RTSUMMARY, TYP_SB, TYP_SYMLINK,
TYP_TEXT, TYP_FINOBT, TYP_NONE
diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
index 5745b22..2f61357 100644
--- a/man/man8/xfs_db.8
+++ b/man/man8/xfs_db.8
@@ -673,7 +673,7 @@ If no argument is given, show the current data type.
The possible data types are:
.BR agf ", " agfl ", " agi ", " attr ", " bmapbta ", " bmapbtd ,
.BR bnobt ", " cntbt ", " data ", " dir ", " dir2 ", " dqblk ,
-.BR inobt ", " inode ", " log ", " rtbitmap ", " rtsummary ,
+.BR inobt ", " inode ", " log ", " refcntbt ", " rtbitmap ", " rtsummary ,
.BR sb ", " symlink " and " text .
See the TYPES section below for more information on these data types.
.TP
@@ -1658,6 +1658,49 @@ use
.BR xfs_logprint (8)
instead.
.TP
+.B refcntbt
+There is one set of filesystem blocks forming the reference count Btree for
+each allocation group. The root block of this Btree is designated by the
+.B refcntroot
+field in the corresponding AGF block. The blocks are linked to sibling left
+and right blocks at each level, as well as by pointers from parent to child
+blocks. Each block has the following fields:
+.RS 1.4i
+.PD 0
+.TP 1.2i
+.B magic
+REFC block magic number, 0x52334643 ('R3FC').
+.TP
+.B level
+level number of this block, 0 is a leaf.
+.TP
+.B numrecs
+number of data entries in the block.
+.TP
+.B leftsib
+left (logically lower) sibling block, 0 if none.
+.TP
+.B rightsib
+right (logically higher) sibling block, 0 if none.
+.TP
+.B recs
+[leaf blocks only] array of reference count records. Each record contains
+.B startblock ,
+.B blockcount ,
+and
+.BR refcount .
+.TP
+.B keys
+[non-leaf blocks only] array of key records. These are the first value
+of each block in the level below this one. Each record contains
+.B startblock .
+.TP
+.B ptrs
+[non-leaf blocks only] array of child block pointers. Each pointer is a
+block number within the allocation group to the next level in the Btree.
+.PD
+.RE
+.TP
.B rtbitmap
If the filesystem has a realtime subvolume, then the
.B rbmino
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 41/53] xfs_db: add support for checking the refcount btree
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (39 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 40/53] xfs_db: dump refcount btree data Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 42/53] xfs_db: metadump should copy the refcount btree too Darrick J. Wong
` (11 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Do some basic checks of the refcount btree. xfs_repair will have to
check that the reference counts match the various bmbt mappings.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/check.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 128 insertions(+), 8 deletions(-)
diff --git a/db/check.c b/db/check.c
index 4721834..661ba06 100644
--- a/db/check.c
+++ b/db/check.c
@@ -44,7 +44,8 @@ typedef enum {
DBM_FREE1, DBM_FREE2, DBM_FREELIST, DBM_INODE,
DBM_LOG, DBM_MISSING, DBM_QUOTA, DBM_RTBITMAP,
DBM_RTDATA, DBM_RTFREE, DBM_RTSUM, DBM_SB,
- DBM_SYMLINK, DBM_BTFINO, DBM_BTRMAP,
+ DBM_SYMLINK, DBM_BTFINO, DBM_BTRMAP, DBM_BTREFC,
+ DBM_RLDATA,
DBM_NDBM
} dbm_t;
@@ -52,7 +53,8 @@ typedef struct inodata {
struct inodata *next;
nlink_t link_set;
nlink_t link_add;
- char isdir;
+ char isdir:1;
+ char isreflink:1;
char security;
char ilist;
xfs_ino_t ino;
@@ -172,6 +174,8 @@ static const char *typename[] = {
"symlink",
"btfino",
"btrmap",
+ "btrefcnt",
+ "rldata",
NULL
};
static int verbose;
@@ -229,7 +233,8 @@ static int blocktrash_f(int argc, char **argv);
static int blockuse_f(int argc, char **argv);
static int check_blist(xfs_fsblock_t bno);
static void check_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
- xfs_extlen_t len, dbm_t type);
+ xfs_extlen_t len, dbm_t type,
+ int ignore_reflink);
static int check_inomap(xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_extlen_t len, xfs_ino_t c_ino);
static void check_linkcounts(xfs_agnumber_t agno);
@@ -353,6 +358,9 @@ static void scanfunc_fino(struct xfs_btree_block *block, int level,
static void scanfunc_rmap(struct xfs_btree_block *block, int level,
struct xfs_agf *agf, xfs_agblock_t bno,
int isroot);
+static void scanfunc_refcnt(struct xfs_btree_block *block, int level,
+ struct xfs_agf *agf, xfs_agblock_t bno,
+ int isroot);
static void set_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_extlen_t len, dbm_t type,
xfs_agnumber_t c_agno, xfs_agblock_t c_agbno);
@@ -1055,6 +1063,7 @@ blocktrash_f(
(1 << DBM_SYMLINK) |
(1 << DBM_BTFINO) |
(1 << DBM_BTRMAP) |
+ (1 << DBM_BTREFC) |
(1 << DBM_SB);
while ((c = getopt(argc, argv, "0123n:o:s:t:x:y:z")) != EOF) {
switch (c) {
@@ -1291,18 +1300,25 @@ check_dbmap(
xfs_agnumber_t agno,
xfs_agblock_t agbno,
xfs_extlen_t len,
- dbm_t type)
+ dbm_t type,
+ int ignore_reflink)
{
xfs_extlen_t i;
char *p;
+ dbm_t d;
for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
+ d = (dbm_t)*p;
+ if (ignore_reflink && (d == DBM_UNKNOWN || d == DBM_DATA ||
+ d == DBM_RLDATA))
+ continue;
if ((dbm_t)*p != type) {
- if (!sflag || CHECK_BLISTA(agno, agbno + i))
+ if (!sflag || CHECK_BLISTA(agno, agbno + i)) {
dbprintf(_("block %u/%u expected type %s got "
"%s\n"),
agno, agbno + i, typename[type],
typename[(dbm_t)*p]);
+ }
error++;
}
}
@@ -1336,7 +1352,7 @@ check_inomap(
return 0;
}
for (i = 0, rval = 1, idp = &inomap[agno][agbno]; i < len; i++, idp++) {
- if (*idp) {
+ if (*idp && !(*idp)->isreflink) {
if (!sflag || (*idp)->ilist ||
CHECK_BLISTA(agno, agbno + i))
dbprintf(_("block %u/%u claimed by inode %lld, "
@@ -1542,6 +1558,26 @@ check_rrange(
return 1;
}
+/*
+ * We don't check the accuracy of reference counts -- all we do is ensure
+ * that a data block never crosses with non-data blocks. repair can check
+ * those kinds of things.
+ *
+ * So with that in mind, if we're setting a block to be data or rldata,
+ * don't complain so long as the block is currently unknown, data, or rldata.
+ * Don't let blocks downgrade from rldata -> data.
+ */
+static bool
+is_reflink(
+ dbm_t type2)
+{
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return false;
+ if (type2 == DBM_DATA || type2 == DBM_RLDATA)
+ return true;
+ return false;
+}
+
static void
check_set_dbmap(
xfs_agnumber_t agno,
@@ -1561,10 +1597,15 @@ check_set_dbmap(
agbno, agbno + len - 1, c_agno, c_agbno);
return;
}
- check_dbmap(agno, agbno, len, type1);
+ check_dbmap(agno, agbno, len, type1, is_reflink(type2));
mayprint = verbose | blist_size;
for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
- *p = (char)type2;
+ if (*p == DBM_RLDATA && type2 == DBM_DATA)
+ ; /* do nothing */
+ if (*p == DBM_DATA && type2 == DBM_DATA)
+ *p = (char)DBM_RLDATA;
+ else
+ *p = (char)type2;
if (mayprint && (verbose || CHECK_BLISTA(agno, agbno + i)))
dbprintf(_("setting block %u/%u to %s\n"), agno, agbno + i,
typename[type2]);
@@ -2806,6 +2847,7 @@ process_inode(
type = DBM_UNKNOWN;
break;
}
+ id->isreflink = !!(idic.di_flags2 & XFS_DIFLAG2_REFLINK);
if (idic.di_version == 1)
setlink_inode(id, idic.di_onlink, type == DBM_DIR, security);
else {
@@ -3918,6 +3960,12 @@ scan_ag(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
1, scanfunc_rmap, TYP_RMAPBT);
}
+ if (agf->agf_refcount_root) {
+ scan_sbtree(agf,
+ be32_to_cpu(agf->agf_refcount_root),
+ be32_to_cpu(agf->agf_refcount_level),
+ 1, scanfunc_refcnt, TYP_REFCBT);
+ }
scan_sbtree(agf,
be32_to_cpu(agi->agi_root),
be32_to_cpu(agi->agi_level),
@@ -4741,6 +4789,78 @@ scanfunc_rmap(
}
static void
+scanfunc_refcnt(
+ struct xfs_btree_block *block,
+ int level,
+ struct xfs_agf *agf,
+ xfs_agblock_t bno,
+ int isroot)
+{
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ int i;
+ xfs_refcount_ptr_t *pp;
+ struct xfs_refcount_rec *rp;
+ xfs_agblock_t lastblock;
+
+ if (be32_to_cpu(block->bb_magic) != XFS_REFC_CRC_MAGIC) {
+ dbprintf(_("bad magic # %#x in refcntbt block %u/%u\n"),
+ be32_to_cpu(block->bb_magic), seqno, bno);
+ serious_error++;
+ return;
+ }
+ if (be16_to_cpu(block->bb_level) != level) {
+ if (!sflag)
+ dbprintf(_("expected level %d got %d in refcntbt block "
+ "%u/%u\n"),
+ level, be16_to_cpu(block->bb_level), seqno, bno);
+ error++;
+ }
+ set_dbmap(seqno, bno, 1, DBM_BTREFC, seqno, bno);
+ if (level == 0) {
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_refc_mxr[0] ||
+ (isroot == 0 && be16_to_cpu(block->bb_numrecs) < mp->m_refc_mnr[0])) {
+ dbprintf(_("bad btree nrecs (%u, min=%u, max=%u) in "
+ "refcntbt block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs), mp->m_refc_mnr[0],
+ mp->m_refc_mxr[0], seqno, bno);
+ serious_error++;
+ return;
+ }
+ rp = XFS_REFCOUNT_REC_ADDR(block, 1);
+ lastblock = 0;
+ for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) {
+ set_dbmap(seqno, be32_to_cpu(rp[i].rc_startblock),
+ be32_to_cpu(rp[i].rc_blockcount), DBM_RLDATA,
+ seqno, bno);
+ if (be32_to_cpu(rp[i].rc_startblock) < lastblock) {
+ dbprintf(_(
+ "out-of-order refcnt btree record %d (%u %u) block %u/%u\n"),
+ i, be32_to_cpu(rp[i].rc_startblock),
+ be32_to_cpu(rp[i].rc_startblock),
+ be32_to_cpu(agf->agf_seqno), bno);
+ } else {
+ lastblock = be32_to_cpu(rp[i].rc_startblock) +
+ be32_to_cpu(rp[i].rc_blockcount);
+ }
+ }
+ return;
+ }
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_refc_mxr[1] ||
+ (isroot == 0 && be16_to_cpu(block->bb_numrecs) < mp->m_refc_mnr[1])) {
+ dbprintf(_("bad btree nrecs (%u, min=%u, max=%u) in refcntbt "
+ "block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs), mp->m_refc_mnr[1],
+ mp->m_refc_mxr[1], seqno, bno);
+ serious_error++;
+ return;
+ }
+ pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]);
+ for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++)
+ scan_sbtree(agf, be32_to_cpu(pp[i]), level, 0, scanfunc_refcnt,
+ TYP_REFCBT);
+}
+
+static void
set_dbmap(
xfs_agnumber_t agno,
xfs_agblock_t agbno,
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 42/53] xfs_db: metadump should copy the refcount btree too
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (40 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 41/53] xfs_db: add support for checking the refcount btree Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 43/53] xfs_growfs: report the presence of the reflink feature Darrick J. Wong
` (10 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Teach metadump to copy the refcount btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
db/metadump.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/db/metadump.c b/db/metadump.c
index 94c22a9..56c7ae2 100644
--- a/db/metadump.c
+++ b/db/metadump.c
@@ -596,6 +596,78 @@ copy_rmap_btree(
return scan_btree(agno, root, levels, TYP_RMAPBT, agf, scanfunc_rmapbt);
}
+static int
+scanfunc_refcntbt(
+ struct xfs_btree_block *block,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ int level,
+ typnm_t btype,
+ void *arg)
+{
+ xfs_refcount_ptr_t *pp;
+ int i;
+ int numrecs;
+
+ if (level == 0)
+ return 1;
+
+ numrecs = be16_to_cpu(block->bb_numrecs);
+ if (numrecs > mp->m_refc_mxr[1]) {
+ if (show_warnings)
+ print_warning("invalid numrecs (%u) in %s block %u/%u",
+ numrecs, typtab[btype].name, agno, agbno);
+ return 1;
+ }
+
+ pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]);
+ for (i = 0; i < numrecs; i++) {
+ if (!valid_bno(agno, be32_to_cpu(pp[i]))) {
+ if (show_warnings)
+ print_warning("invalid block number (%u/%u) "
+ "in %s block %u/%u",
+ agno, be32_to_cpu(pp[i]),
+ typtab[btype].name, agno, agbno);
+ continue;
+ }
+ if (!scan_btree(agno, be32_to_cpu(pp[i]), level, btype, arg,
+ scanfunc_refcntbt))
+ return 0;
+ }
+ return 1;
+}
+
+static int
+copy_refcount_btree(
+ xfs_agnumber_t agno,
+ struct xfs_agf *agf)
+{
+ xfs_agblock_t root;
+ int levels;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 1;
+
+ root = be32_to_cpu(agf->agf_refcount_root);
+ levels = be32_to_cpu(agf->agf_refcount_level);
+
+ /* validate root and levels before processing the tree */
+ if (root == 0 || root > mp->m_sb.sb_agblocks) {
+ if (show_warnings)
+ print_warning("invalid block number (%u) in refcntbt "
+ "root in agf %u", root, agno);
+ return 1;
+ }
+ if (levels >= XFS_BTREE_MAXLEVELS) {
+ if (show_warnings)
+ print_warning("invalid level (%u) in refcntbt root "
+ "in agf %u", levels, agno);
+ return 1;
+ }
+
+ return scan_btree(agno, root, levels, TYP_REFCBT, agf, scanfunc_refcntbt);
+}
+
/* filename and extended attribute obfuscation routines */
struct name_ent {
@@ -2506,6 +2578,8 @@ scan_ag(
goto pop_out;
if (!copy_rmap_btree(agno, agf))
goto pop_out;
+ if (!copy_refcount_btree(agno, agf))
+ goto pop_out;
}
/* copy inode btrees and the inodes and their associated metadata */
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 43/53] xfs_growfs: report the presence of the reflink feature
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (41 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 42/53] xfs_db: metadump should copy the refcount btree too Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 44/53] xfs_repair: check the existing refcount btree Darrick J. Wong
` (9 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Report the presence of the reflink feature in xfs_info.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
growfs/xfs_growfs.c | 12 +++++++++---
libxfs/xfs_fs.h | 3 ++-
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c
index 2b46480..a294e14 100644
--- a/growfs/xfs_growfs.c
+++ b/growfs/xfs_growfs.c
@@ -59,12 +59,14 @@ report_info(
int ftype_enabled,
int finobt_enabled,
int spinodes,
- int rmapbt_enabled)
+ int rmapbt_enabled,
+ int reflink_enabled)
{
printf(_(
"meta-data=%-22s isize=%-6u agcount=%u, agsize=%u blks\n"
" =%-22s sectsz=%-5u attr=%u, projid32bit=%u\n"
" =%-22s crc=%-8u finobt=%u spinodes=%u rmapbt=%u\n"
+ " =%-22s reflink=%u\n"
"data =%-22s bsize=%-6u blocks=%llu, imaxpct=%u\n"
" =%-22s sunit=%-6u swidth=%u blks\n"
"naming =version %-14u bsize=%-6u ascii-ci=%d ftype=%d\n"
@@ -75,6 +77,7 @@ report_info(
mntpoint, geo.inodesize, geo.agcount, geo.agblocks,
"", geo.sectsize, attrversion, projid32bit,
"", crcs_enabled, finobt_enabled, spinodes, rmapbt_enabled,
+ "", reflink_enabled,
"", geo.blocksize, (unsigned long long)geo.datablocks,
geo.imaxpct,
"", geo.sunit, geo.swidth,
@@ -129,6 +132,7 @@ main(int argc, char **argv)
int finobt_enabled; /* free inode btree */
int spinodes;
int rmapbt_enabled;
+ int reflink_enabled;
progname = basename(argv[0]);
setlocale(LC_ALL, "");
@@ -253,12 +257,13 @@ main(int argc, char **argv)
finobt_enabled = geo.flags & XFS_FSOP_GEOM_FLAGS_FINOBT ? 1 : 0;
spinodes = geo.flags & XFS_FSOP_GEOM_FLAGS_SPINODES ? 1 : 0;
rmapbt_enabled = geo.flags & XFS_FSOP_GEOM_FLAGS_RMAPBT ? 1 : 0;
+ reflink_enabled = geo.flags & XFS_FSOP_GEOM_FLAGS_REFLINK ? 1 : 0;
if (nflag) {
report_info(geo, datadev, isint, logdev, rtdev,
lazycount, dirversion, logversion,
attrversion, projid32bit, crcs_enabled, ci,
ftype_enabled, finobt_enabled, spinodes,
- rmapbt_enabled);
+ rmapbt_enabled, reflink_enabled);
exit(0);
}
@@ -296,7 +301,8 @@ main(int argc, char **argv)
report_info(geo, datadev, isint, logdev, rtdev,
lazycount, dirversion, logversion,
attrversion, projid32bit, crcs_enabled, ci, ftype_enabled,
- finobt_enabled, spinodes, rmapbt_enabled);
+ finobt_enabled, spinodes, rmapbt_enabled,
+ reflink_enabled);
ddsize = xi.dsize;
dlsize = ( xi.logBBsize? xi.logBBsize :
diff --git a/libxfs/xfs_fs.h b/libxfs/xfs_fs.h
index 3af7747..7ee757e 100644
--- a/libxfs/xfs_fs.h
+++ b/libxfs/xfs_fs.h
@@ -241,7 +241,8 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
-#define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* reverse-mapping btree */
+#define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* reverse mapping btree */
+#define XFS_FSOP_GEOM_FLAGS_REFLINK 0x100000 /* files can share blocks */
/*
* Minimum and maximum sizes need for growth checks.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 44/53] xfs_repair: check the existing refcount btree
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (42 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 43/53] xfs_growfs: report the presence of the reflink feature Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:09 ` [PATCH 45/53] xfs_repair: handle multiple owners of data blocks Darrick J. Wong
` (8 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Spot-check the refcount btree for obvious errors, and mark the
refcount btree blocks as such.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/incore.h | 3 +
repair/scan.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++
repair/xfs_repair.c | 2 +
3 files changed, 189 insertions(+), 1 deletion(-)
diff --git a/repair/incore.h b/repair/incore.h
index bc0810b..b6c4b4f 100644
--- a/repair/incore.h
+++ b/repair/incore.h
@@ -106,7 +106,8 @@ typedef struct rt_extent_tree_node {
#define XR_E_INUSE_FS1 9 /* used by fs ag header or log (rmap btree) */
#define XR_E_INO1 10 /* used by inodes (marked by rmap btree) */
#define XR_E_FS_MAP1 11 /* used by fs space/inode maps (rmap btree) */
-#define XR_E_BAD_STATE 12
+#define XR_E_REFC 12 /* used by fs ag reference count btree */
+#define XR_E_BAD_STATE 13
/* separate state bit, OR'ed into high (4th) bit of ex_state field */
diff --git a/repair/scan.c b/repair/scan.c
index 823401b..4be02a6 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -935,6 +935,9 @@ advance:
case XFS_RMAP_OWN_INODES:
set_bmap(agno, b, XR_E_INO1);
break;
+ case XFS_RMAP_OWN_REFC:
+ set_bmap(agno, b, XR_E_REFC);
+ break;
case XFS_RMAP_OWN_NULL:
/* still unknown */
break;
@@ -970,6 +973,14 @@ _("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
agno, b, b + blen - 1,
name, state, owner);
break;
+ case XR_E_REFC:
+ if (owner == XFS_RMAP_OWN_REFC)
+ break;
+ do_warn(
+_("AG refcount block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+ agno, b, b + blen - 1,
+ name, state, owner);
+ break;
case XR_E_INUSE:
if (owner >= 0 &&
owner < mp->m_sb.sb_dblocks)
@@ -1052,6 +1063,167 @@ out:
rmap_avoid_check();
}
+static void
+scan_refcbt(
+ struct xfs_btree_block *block,
+ int level,
+ xfs_agblock_t bno,
+ xfs_agnumber_t agno,
+ int suspect,
+ int isroot,
+ __uint32_t magic,
+ void *priv)
+{
+ const char *name = "refcount";
+ int i;
+ xfs_refcount_ptr_t *pp;
+ struct xfs_refcount_rec *rp;
+ int hdr_errors = 0;
+ int numrecs;
+ int state;
+ xfs_agblock_t lastblock = 0;
+
+ if (magic != XFS_REFC_CRC_MAGIC) {
+ name = "(unknown)";
+ hdr_errors++;
+ suspect++;
+ goto out;
+ }
+
+ if (be32_to_cpu(block->bb_magic) != magic) {
+ do_warn(_("bad magic # %#x in %s btree block %d/%d\n"),
+ be32_to_cpu(block->bb_magic), name, agno, bno);
+ hdr_errors++;
+ if (suspect)
+ goto out;
+ }
+
+ if (be16_to_cpu(block->bb_level) != level) {
+ do_warn(_("expected level %d got %d in %s btree block %d/%d\n"),
+ level, be16_to_cpu(block->bb_level), name, agno, bno);
+ hdr_errors++;
+ if (suspect)
+ goto out;
+ }
+
+ /* check for btree blocks multiply claimed */
+ state = get_bmap(agno, bno);
+ if (!(state == XR_E_UNKNOWN || state == XR_E_REFC)) {
+ set_bmap(agno, bno, XR_E_MULT);
+ do_warn(
+_("%s btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
+ name, state, agno, bno, suspect);
+ goto out;
+ }
+ set_bmap(agno, bno, XR_E_FS_MAP);
+
+ numrecs = be16_to_cpu(block->bb_numrecs);
+ if (level == 0) {
+ if (numrecs > mp->m_refc_mxr[0]) {
+ numrecs = mp->m_refc_mxr[0];
+ hdr_errors++;
+ }
+ if (isroot == 0 && numrecs < mp->m_refc_mnr[0]) {
+ numrecs = mp->m_refc_mnr[0];
+ hdr_errors++;
+ }
+
+ if (hdr_errors) {
+ do_warn(
+ _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs),
+ mp->m_refc_mnr[0], mp->m_refc_mxr[0],
+ name, agno, bno);
+ suspect++;
+ }
+
+ rp = XFS_REFCOUNT_REC_ADDR(block, 1);
+ for (i = 0; i < numrecs; i++) {
+ xfs_agblock_t b, end;
+ xfs_extlen_t len;
+ xfs_nlink_t nr;
+
+ b = be32_to_cpu(rp[i].rc_startblock);
+ len = be32_to_cpu(rp[i].rc_blockcount);
+ nr = be32_to_cpu(rp[i].rc_refcount);
+ end = b + len;
+
+ if (!verify_agbno(mp, agno, b)) {
+ do_warn(
+ _("invalid start block %u in record %u of %s btree block %u/%u\n"),
+ b, i, name, agno, bno);
+ continue;
+ }
+ if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
+ do_warn(
+ _("invalid length %u in record %u of %s btree block %u/%u\n"),
+ len, i, name, agno, bno);
+ continue;
+ }
+
+ if (nr < 2 || nr > MAXREFCOUNT) {
+ do_warn(
+ _("invalid reference count %u in record %u of %s btree block %u/%u\n"),
+ nr, i, name, agno, bno);
+ continue;
+ }
+
+ if (b && b <= lastblock) {
+ do_warn(_(
+ "out-of-order %s btree record %d (%u %u) block %u/%u\n"),
+ name, i, b, len, agno, bno);
+ } else {
+ lastblock = b;
+ }
+
+ /* XXX: probably want to mark the reflinked areas? */
+ }
+ goto out;
+ }
+
+ /*
+ * interior record
+ */
+ pp = XFS_REFCOUNT_PTR_ADDR(block, 1, mp->m_refc_mxr[1]);
+
+ if (numrecs > mp->m_refc_mxr[1]) {
+ numrecs = mp->m_refc_mxr[1];
+ hdr_errors++;
+ }
+ if (isroot == 0 && numrecs < mp->m_refc_mnr[1]) {
+ numrecs = mp->m_refc_mnr[1];
+ hdr_errors++;
+ }
+
+ /*
+ * don't pass bogus tree flag down further if this block
+ * looked ok. bail out if two levels in a row look bad.
+ */
+ if (hdr_errors) {
+ do_warn(
+ _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
+ be16_to_cpu(block->bb_numrecs),
+ mp->m_refc_mnr[1], mp->m_refc_mxr[1],
+ name, agno, bno);
+ if (suspect)
+ goto out;
+ suspect++;
+ } else if (suspect) {
+ suspect = 0;
+ }
+
+ for (i = 0; i < numrecs; i++) {
+ xfs_agblock_t bno = be32_to_cpu(pp[i]);
+
+ if (bno != 0 && verify_agbno(mp, agno, bno)) {
+ scan_sbtree(bno, level, agno, suspect, scan_refcbt, 0,
+ magic, priv, &xfs_refcountbt_buf_ops);
+ }
+ }
+out:
+ return;
+}
+
/*
* The following helpers are to help process and validate individual on-disk
* inode btree records. We have two possible inode btrees with slightly
@@ -1830,6 +2002,19 @@ validate_agf(
}
}
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ bno = be32_to_cpu(agf->agf_refcount_root);
+ if (bno != 0 && verify_agbno(mp, agno, bno)) {
+ scan_sbtree(bno,
+ be32_to_cpu(agf->agf_refcount_level),
+ agno, 0, scan_refcbt, 1, XFS_REFC_CRC_MAGIC,
+ agcnts, &xfs_refcountbt_buf_ops);
+ } else {
+ do_warn(_("bad agbno %u for refcntbt root, agno %d\n"),
+ bno, agno);
+ }
+ }
+
if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 8fc6fd5..1d402a5 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -422,6 +422,8 @@ calc_mkfs(xfs_mount_t *mp)
fino_bno += min(2, mp->m_ag_maxlevels);
fino_bno++;
}
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ fino_bno++;
/*
* If the log is allocated in the first allocation group we need to
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 45/53] xfs_repair: handle multiple owners of data blocks
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (43 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 44/53] xfs_repair: check the existing refcount btree Darrick J. Wong
@ 2015-12-19 9:09 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 46/53] xfs_repair: process reverse-mapping data into refcount data Darrick J. Wong
` (7 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:09 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
If reflink is enabled, don't freak out if there are multiple owners of
a given block; that's just a sign that each of those owners are
reflink files.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/dinode.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/scan.c | 40 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 91 insertions(+), 2 deletions(-)
diff --git a/repair/dinode.c b/repair/dinode.c
index 7766dea..2ff1476 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -722,6 +722,9 @@ _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n"
* checking each entry without setting the
* block bitmap
*/
+ if (type == XR_INO_DATA &&
+ xfs_sb_version_hasreflink(&mp->m_sb))
+ goto skip_dup;
if (search_dup_extent(agno, agbno, ebno)) {
do_warn(
_("%s fork in ino %" PRIu64 " claims dup extent, "
@@ -731,6 +734,7 @@ _("%s fork in ino %" PRIu64 " claims dup extent, "
irec.br_blockcount);
goto done;
}
+skip_dup:
*tot += irec.br_blockcount;
continue;
}
@@ -770,6 +774,9 @@ _("%s fork in inode %" PRIu64 " claims metadata block %" PRIu64 "\n"),
case XR_E_INUSE:
case XR_E_MULT:
set_bmap_ext(agno, agbno, blen, XR_E_MULT);
+ if (type == XR_INO_DATA &&
+ xfs_sb_version_hasreflink(&mp->m_sb))
+ break;
do_warn(
_("%s fork in %s inode %" PRIu64 " claims used block %" PRIu64 "\n"),
forkname, ftype, ino, b);
@@ -2460,6 +2467,52 @@ _("bad (negative) size %" PRId64 " on inode %" PRIu64 "\n"),
}
}
+ /*
+ * check that we only have valid flags2 set, and those that are set make
+ * sense.
+ */
+ if (dino->di_version >= 3) {
+ uint16_t flags = be16_to_cpu(dino->di_flags);
+ uint64_t flags2 = be64_to_cpu(dino->di_flags2);
+
+ if (flags2 & ~XFS_DIFLAG2_ANY) {
+ if (!uncertain) {
+ do_warn(
+ _("Bad flags2 set in inode %" PRIu64 "\n"),
+ lino);
+ }
+ flags2 &= XFS_DIFLAG2_ANY;
+ }
+
+ if ((flags2 & XFS_DIFLAG2_REFLINK) &&
+ (flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT))) {
+ if (!uncertain) {
+ do_warn(
+ _("Cannot have a reflinked realtime inode %" PRIu64 "\n"),
+ lino);
+ }
+ goto clear_bad_out;
+ }
+
+ if ((flags2 & XFS_DIFLAG2_REFLINK) &&
+ !xfs_sb_version_hasreflink(&mp->m_sb)) {
+ if (!uncertain) {
+ do_warn(
+ _("inode %" PRIu64 " is marked reflinked but file system does not support reflink\n"),
+ lino);
+ }
+ goto clear_bad_out;
+ }
+ if (!verify_mode && flags2 != be64_to_cpu(dino->di_flags2)) {
+ if (!no_modify) {
+ do_warn(_("fixing bad flags2.\n"));
+ dino->di_flags2 = cpu_to_be64(flags2);
+ *dirty = 1;
+ } else
+ do_warn(_("would fix bad flags2.\n"));
+ }
+ }
+
if (verify_mode)
return retval;
diff --git a/repair/scan.c b/repair/scan.c
index 4be02a6..54b9b68 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -783,7 +783,29 @@ ino_issparse(
return xfs_inobt_is_sparse_disk(rp, offset);
}
-
+
+static bool
+rmap_in_order(
+ xfs_agblock_t b,
+ xfs_agblock_t lastblock,
+ int64_t owner,
+ int64_t lastowner,
+ int64_t offset,
+ int64_t lastoffset)
+{
+ if (b > lastblock)
+ return true;
+ else if (b < lastblock)
+ return false;
+
+ if (owner > lastowner)
+ return true;
+ else if (owner < lastowner)
+ return false;
+
+ return offset > lastoffset;
+}
+
static void
scan_rmapbt(
struct xfs_btree_block *block,
@@ -910,7 +932,12 @@ advance:
} else {
bool bad;
- bad = b <= lastblock;
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ bad = !rmap_in_order(b, lastblock,
+ owner, lastowner,
+ offset, lastoffset);
+ else
+ bad = b <= lastblock;
if (bad)
do_warn(
_("out-of-order rmap btree record %d (%u %"PRId64" %"PRIx64" %u) block %u/%u\n"),
@@ -997,6 +1024,15 @@ _("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
* be caught later.
*/
break;
+ case XR_E_INUSE1:
+ /*
+ * multiple inode owners are ok with
+ * reflink enabled
+ */
+ if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ !XFS_RMAP_NON_INODE_OWNER(owner))
+ break;
+ /* fall through */
default:
do_warn(
_("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 46/53] xfs_repair: process reverse-mapping data into refcount data
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (44 preceding siblings ...)
2015-12-19 9:09 ` [PATCH 45/53] xfs_repair: handle multiple owners of data blocks Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 47/53] xfs_repair: record reflink inode state Darrick J. Wong
` (6 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Take all the reverse-mapping data we've acquired and use it to generate
reference count data. This data is used in phase 5 to rebuild the
refcount btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/phase4.c | 27 ++++++
repair/rmap.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 2
3 files changed, 263 insertions(+), 2 deletions(-)
diff --git a/repair/phase4.c b/repair/phase4.c
index 98aab35..0be8579 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -183,6 +183,21 @@ _("%s while checking reverse-mappings"),
}
static void
+compute_ag_refcounts(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
+{
+ int error;
+
+ error = compute_refcounts(wq->mp, agno);
+ if (error)
+ do_error(
+_("%s while computing reference count records.\n"),
+ strerror(-error));
+}
+
+static void
process_rmap_data(
struct xfs_mount *mp)
{
@@ -196,6 +211,14 @@ process_rmap_data(
for (i = 0; i < mp->m_sb.sb_agcount; i++)
queue_work(&wq, check_rmap_btrees, i, NULL);
destroy_work_queue(&wq);
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return;
+
+ create_work_queue(&wq, mp, libxfs_nproc());
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ queue_work(&wq, compute_ag_refcounts, i, NULL);
+ destroy_work_queue(&wq);
}
void
@@ -349,7 +372,9 @@ phase4(xfs_mount_t *mp)
/*
* Process all the reverse-mapping data that we collected. This
- * involves checking the rmap data against the btree.
+ * involves checking the rmap data against the btree, computing
+ * reference counts based on the rmap data, and checking the counts
+ * against the refcount btree.
*/
process_rmap_data(mp);
diff --git a/repair/rmap.c b/repair/rmap.c
index 5d49eef..677de14 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -40,6 +40,7 @@ struct xfs_ag_rmap {
struct xfs_slab *ar_raw_rmaps; /* unmerged rmaps */
int ar_flcount; /* agfl entries from leftover */
/* agbt allocations */
+ struct xfs_slab *ar_refcount_items; /* refcount items, p4-5 */
};
static struct xfs_ag_rmap *ag_rmaps;
@@ -83,7 +84,8 @@ bool
needs_rmap_work(
struct xfs_mount *mp)
{
- return xfs_sb_version_hasrmapbt(&mp->m_sb);
+ return xfs_sb_version_hasreflink(&mp->m_sb) ||
+ xfs_sb_version_hasrmapbt(&mp->m_sb);
}
/**
@@ -116,6 +118,11 @@ _("Insufficient memory while allocating reverse mapping slabs."));
if (error)
do_error(
_("Insufficient memory while allocating raw metadata reverse mapping slabs."));
+ error = init_slab(&ag_rmaps[i].ar_refcount_items,
+ sizeof(struct xfs_refcount_irec));
+ if (error)
+ do_error(
+_("Insufficient memory while allocating refcount item slabs."));
}
}
@@ -136,6 +143,7 @@ free_rmaps(
for (i = 0; i < mp->m_sb.sb_agcount; i++) {
free_slab(&ag_rmaps[i].ar_rmaps);
free_slab(&ag_rmaps[i].ar_raw_rmaps);
+ free_slab(&ag_rmaps[i].ar_refcount_items);
}
free(ag_rmaps);
ag_rmaps = NULL;
@@ -595,6 +603,232 @@ dump_rmap(
# define dump_rmap(m, a, r)
#endif
+/*
+ * Rebuilding the Reference Count & Reverse Mapping Btrees
+ *
+ * The reference count (refcnt) and reverse mapping (rmap) btrees are rebuilt
+ * during phase 5, like all other AG btrees. Therefore, reverse mappings must
+ * be processed into reference counts at the end of phase 4, and the rmaps must
+ * be recorded during phase 4. There is a need to access the rmaps in physical
+ * block order, but no particular need for random access, so the slab.c code
+ * provides a big logical array (consisting of smaller slabs) and some inorder
+ * iterator functions.
+ *
+ * Once we've recorded all the reverse mappings, we're ready to translate the
+ * rmaps into refcount entries. Imagine the rmap entries as rectangles
+ * representing extents of physical blocks, and that the rectangles can be laid
+ * down to allow them to overlap each other; then we know that we must emit
+ * a refcnt btree entry wherever the amount of overlap changes, i.e. the
+ * emission stimulus is level-triggered:
+ *
+ * - ---
+ * -- ----- ---- --- ------
+ * -- ---- ----------- ---- ---------
+ * -------------------------------- -----------
+ * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
+ * 2 1 23 21 3 43 234 2123 1 01 2 3 0
+ *
+ * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
+ *
+ * Note that in the actual refcnt btree we don't store the refcount < 2 cases
+ * because the bnobt tells us which blocks are free; single-use blocks aren't
+ * recorded in the bnobt or the refcntbt. If the rmapbt supports storing
+ * multiple entries covering a given block we could theoretically dispense with
+ * the refcntbt and simply count rmaps, but that's inefficient in the (hot)
+ * write path, so we'll take the cost of the extra tree to save time. Also
+ * there's no guarantee that rmap will be enabled.
+ *
+ * Given an array of rmaps sorted by physical block number, a starting physical
+ * block (sp), a bag to hold rmaps that cover sp, and the next physical
+ * block where the level changes (np), we can reconstruct the refcount
+ * btree as follows:
+ *
+ * While there are still unprocessed rmaps in the array,
+ * - Set sp to the physical block (pblk) of the next unprocessed rmap.
+ * - Add to the bag all rmaps in the array where startblock == sp.
+ * - Set np to the physical block where the bag size will change.
+ * This is the minimum of (the pblk of the next unprocessed rmap) and
+ * (startblock + len of each rmap in the bag).
+ * - Record the bag size as old_bag_size.
+ *
+ * - While the bag isn't empty,
+ * - Remove from the bag all rmaps where startblock + len == np.
+ * - Add to the bag all rmaps in the array where startblock == np.
+ * - If the bag size isn't old_bag_size, store the refcount entry
+ * (sp, np - sp, bag_size) in the refcnt btree.
+ * - If the bag is empty, break out of the inner loop.
+ * - Set old_bag_size to the bag size
+ * - Set sp = np.
+ * - Set np to the physical block where the bag size will change.
+ * This is the minimum of (the pblk of the next unprocessed rmap) and
+ * (startblock + len of each rmap in the bag).
+ *
+ * An implementation detail is that because this processing happens during
+ * phase 4, the refcount entries are stored in an array so that phase 5 can
+ * load them into the refcount btree. The rmaps can be loaded directly into
+ * the rmap btree during phase 5 as well.
+ */
+
+/*
+ * Emit a refcount object for refcntbt reconstruction during phase 5.
+ */
+#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
+static void
+refcount_emit(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ size_t nr_rmaps)
+{
+ struct xfs_refcount_irec rlrec;
+ int error;
+ struct xfs_slab *rlslab;
+
+ rlslab = ag_rmaps[agno].ar_refcount_items;
+ ASSERT(nr_rmaps > 0);
+
+ dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
+ agno, agbno, len, nr_rmaps);
+ rlrec.rc_startblock = agbno;
+ rlrec.rc_blockcount = len;
+ rlrec.rc_refcount = REFCOUNT_CLAMP(nr_rmaps);
+ error = slab_add(rlslab, &rlrec);
+ if (error)
+ do_error(
+_("Insufficient memory while recreating refcount tree."));
+}
+#undef REFCOUNT_CLAMP
+
+/**
+ * compute_refcounts() - Transform a pile of physical block mapping
+ * observations into refcount data for eventual
+ * rebuilding of the btrees.
+ *
+ * @mp: XFS mount object.
+ * @agno: AG number.
+ */
+#define RMAP_END(r) ((r)->rm_startblock + XFS_RMAP_LEN((r)->rm_blockcount))
+int
+compute_refcounts(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_bag *stack_top = NULL;
+ struct xfs_slab *rmaps;
+ struct xfs_slab_cursor *rmaps_cur;
+ struct xfs_rmap_irec *array_cur;
+ struct xfs_rmap_irec *rmap;
+ xfs_agblock_t sbno; /* first bno of this rmap set */
+ xfs_agblock_t cbno; /* first bno of this refcount set */
+ xfs_agblock_t nbno; /* next bno where rmap set changes */
+ size_t n, idx;
+ size_t old_stack_nr;
+ int error;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+
+ rmaps = ag_rmaps[agno].ar_rmaps;
+
+ error = init_slab_cursor(rmaps, rmap_compare, &rmaps_cur);
+ if (error)
+ return error;
+
+ error = init_bag(&stack_top);
+ if (error)
+ goto err;
+
+ /* While there are rmaps to be processed... */
+ n = 0;
+ while (n < slab_count(rmaps)) {
+ array_cur = peek_slab_cursor(rmaps_cur);
+ sbno = cbno = array_cur->rm_startblock;
+ /* Push all rmaps with pblk == sbno onto the stack */
+ for (;
+ array_cur && array_cur->rm_startblock == sbno;
+ array_cur = peek_slab_cursor(rmaps_cur)) {
+ advance_slab_cursor(rmaps_cur); n++;
+ dump_rmap("push0", agno, array_cur);
+ error = bag_add(stack_top, array_cur);
+ if (error)
+ goto err;
+ }
+
+ /* Set nbno to the bno of the next refcount change */
+ if (n < slab_count(rmaps))
+ nbno = array_cur->rm_startblock;
+ else
+ nbno = NULLAGBLOCK;
+ foreach_bag_ptr(stack_top, idx, rmap) {
+ nbno = min(nbno, RMAP_END(rmap));
+ }
+
+ /* Emit reverse mappings, if needed */
+ ASSERT(nbno > sbno);
+ old_stack_nr = bag_count(stack_top);
+
+ /* While stack isn't empty... */
+ while (bag_count(stack_top)) {
+ /* Pop all rmaps that end at nbno */
+ foreach_bag_ptr_reverse(stack_top, idx, rmap) {
+ if (RMAP_END(rmap) != nbno)
+ continue;
+ dump_rmap("pop", agno, rmap);
+ error = bag_remove(stack_top, idx);
+ if (error)
+ goto err;
+ }
+
+ /* Push array items that start at nbno */
+ for (;
+ array_cur && array_cur->rm_startblock == nbno;
+ array_cur = peek_slab_cursor(rmaps_cur)) {
+ advance_slab_cursor(rmaps_cur); n++;
+ dump_rmap("push1", agno, array_cur);
+ error = bag_add(stack_top, array_cur);
+ if (error)
+ goto err;
+ }
+
+ /* Emit refcount if necessary */
+ ASSERT(nbno > cbno);
+ if (bag_count(stack_top) != old_stack_nr) {
+ if (old_stack_nr > 1) {
+ refcount_emit(mp, agno, cbno,
+ nbno - cbno,
+ old_stack_nr);
+ }
+ cbno = nbno;
+ }
+
+ /* Stack empty, go find the next rmap */
+ if (bag_count(stack_top) == 0)
+ break;
+ old_stack_nr = bag_count(stack_top);
+ sbno = nbno;
+
+ /* Set nbno to the bno of the next refcount change */
+ if (n < slab_count(rmaps))
+ nbno = array_cur->rm_startblock;
+ else
+ nbno = NULLAGBLOCK;
+ foreach_bag_ptr(stack_top, idx, rmap) {
+ nbno = min(nbno, RMAP_END(rmap));
+ }
+
+ /* Emit reverse mappings, if needed */
+ ASSERT(nbno > sbno);
+ }
+ }
+err:
+ free_bag(&stack_top);
+ free_slab_cursor(&rmaps_cur);
+
+ return error;
+}
+#undef RMAP_END
+
/**
* rmap_record_count() -- Return the number of rmap objects for an AG.
*
diff --git a/repair/rmap.h b/repair/rmap.h
index 0b4e73b..13df5d6 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -39,6 +39,8 @@ extern int init_rmap_cursor(xfs_agnumber_t, struct xfs_slab_cursor **);
extern void rmap_avoid_check(void);
extern int check_rmaps(struct xfs_mount *, xfs_agnumber_t);
+extern int compute_refcounts(struct xfs_mount *, xfs_agnumber_t);
+
extern void fix_freelist(struct xfs_mount *, xfs_agnumber_t, bool);
extern void rmap_store_agflcount(struct xfs_mount *, xfs_agnumber_t, int);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 47/53] xfs_repair: record reflink inode state
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (45 preceding siblings ...)
2015-12-19 9:10 ` [PATCH 46/53] xfs_repair: process reverse-mapping data into refcount data Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 48/53] xfs_repair: fix inode reflink flags Darrick J. Wong
` (5 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Record the state of the per-inode reflink flag, so that we can
compare against the rmap data and update the flags accordingly.
Clear the (reflink) state if we clear the inode.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/dino_chunks.c | 1 +
repair/dinode.c | 6 ++++++
repair/incore.h | 38 ++++++++++++++++++++++++++++++++++++++
repair/incore_ino.c | 2 ++
repair/rmap.c | 32 ++++++++++++++++++++++++++++++++
repair/rmap.h | 3 +++
6 files changed, 82 insertions(+)
diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c
index 7dbaca6..4db9512 100644
--- a/repair/dino_chunks.c
+++ b/repair/dino_chunks.c
@@ -931,6 +931,7 @@ next_readbuf:
do_warn(_("would have cleared inode %" PRIu64 "\n"),
ino);
}
+ clear_inode_was_rl(ino_rec, irec_offset);
}
process_next:
diff --git a/repair/dinode.c b/repair/dinode.c
index 2ff1476..6de3720 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -2608,6 +2608,12 @@ _("bad non-zero extent size %u for non-realtime/extsize inode %" PRIu64 ", "),
goto clear_bad_out;
/*
+ * record the state of the reflink flag
+ */
+ if (collect_rmaps)
+ record_inode_reflink_flag(mp, dino, agno, ino, lino);
+
+ /*
* check data fork -- if it's bad, clear the inode
*/
if (process_inode_data_fork(mp, agno, ino, dino, type, dirty,
diff --git a/repair/incore.h b/repair/incore.h
index b6c4b4f..bcd2f4b 100644
--- a/repair/incore.h
+++ b/repair/incore.h
@@ -283,6 +283,8 @@ typedef struct ino_tree_node {
__uint64_t ir_sparse; /* sparse inode bitmask */
__uint64_t ino_confirmed; /* confirmed bitmask */
__uint64_t ino_isa_dir; /* bit == 1 if a directory */
+ __uint64_t ino_was_rl; /* bit == 1 if reflink flag set */
+ __uint64_t ino_is_rl; /* bit == 1 if reflink flag should be set */
__uint8_t nlink_size;
union ino_nlink disk_nlinks; /* on-disk nlinks, set in P3 */
union {
@@ -494,6 +496,42 @@ static inline bool is_inode_sparse(struct ino_tree_node *irec, int offset)
}
/*
+ * set/clear/test was inode marked as reflinked
+ */
+static inline void set_inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_was_rl |= IREC_MASK(offset);
+}
+
+static inline void clear_inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_was_rl &= ~IREC_MASK(offset);
+}
+
+static inline int inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+ return (irec->ino_was_rl & IREC_MASK(offset)) != 0;
+}
+
+/*
+ * set/clear/test should inode be marked as reflinked
+ */
+static inline void set_inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_is_rl |= IREC_MASK(offset);
+}
+
+static inline void clear_inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_is_rl &= ~IREC_MASK(offset);
+}
+
+static inline int inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+ return (irec->ino_is_rl & IREC_MASK(offset)) != 0;
+}
+
+/*
* add_inode_reached() is set on inode I only if I has been reached
* by an inode P claiming to be the parent and if I is a directory,
* the .. link in the I says that P is I's parent.
diff --git a/repair/incore_ino.c b/repair/incore_ino.c
index 1898257..2ec1765 100644
--- a/repair/incore_ino.c
+++ b/repair/incore_ino.c
@@ -257,6 +257,8 @@ alloc_ino_node(
irec->ino_startnum = starting_ino;
irec->ino_confirmed = 0;
irec->ino_isa_dir = 0;
+ irec->ino_was_rl = 0;
+ irec->ino_is_rl = 0;
irec->ir_free = (xfs_inofree_t) - 1;
irec->ir_sparse = 0;
irec->ino_un.ex_data = NULL;
diff --git a/repair/rmap.c b/repair/rmap.c
index 677de14..54d478e 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -1001,6 +1001,38 @@ err:
return 0;
}
+/*
+ * record_inode_reflink_flag() -- Record that an inode had the reflink flag
+ * set when repair started. The inode reflink
+ * flag will be adjusted as necessary.
+ * @mp: XFS mount object.
+ * @dino: On-disk inode.
+ * @agno: AG number of the inode.
+ * @ino: AG inode number.
+ * @lino: Full inode number.
+ */
+void
+record_inode_reflink_flag(
+ struct xfs_mount *mp,
+ struct xfs_dinode *dino,
+ xfs_agnumber_t agno,
+ xfs_agino_t ino,
+ xfs_ino_t lino)
+{
+ struct ino_tree_node *irec;
+ int off;
+
+ ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
+ if (!(be64_to_cpu(dino->di_flags2) & XFS_DIFLAG2_REFLINK))
+ return;
+ irec = find_inode_rec(mp, agno, ino);
+ off = get_inode_offset(mp, lino, irec);
+ ASSERT(!inode_was_rl(irec, off));
+ set_inode_was_rl(irec, off);
+ dbg_printf("set was_rl lino=%llu was=0x%llx\n",
+ (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
+}
+
/**
* fix_freelist() - Regenerate the AGFL, so that we don't run out of it while
* rebuilding the rmapbt.
diff --git a/repair/rmap.h b/repair/rmap.h
index 13df5d6..b404c59 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -41,6 +41,9 @@ extern int check_rmaps(struct xfs_mount *, xfs_agnumber_t);
extern int compute_refcounts(struct xfs_mount *, xfs_agnumber_t);
+extern void record_inode_reflink_flag(struct xfs_mount *, struct xfs_dinode *,
+ xfs_agnumber_t, xfs_agino_t, xfs_ino_t);
+
extern void fix_freelist(struct xfs_mount *, xfs_agnumber_t, bool);
extern void rmap_store_agflcount(struct xfs_mount *, xfs_agnumber_t, int);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 48/53] xfs_repair: fix inode reflink flags
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (46 preceding siblings ...)
2015-12-19 9:10 ` [PATCH 47/53] xfs_repair: record reflink inode state Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 49/53] xfs_repair: check the refcount btree against our observed reference counts when -n Darrick J. Wong
` (4 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
While we're computing reference counts, record which inodes actually
share blocks with other files and fix the flags as necessary.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/phase4.c | 20 ++++++++
repair/rmap.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 1
3 files changed, 157 insertions(+)
diff --git a/repair/phase4.c b/repair/phase4.c
index 0be8579..caa4221 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -198,6 +198,21 @@ _("%s while computing reference count records.\n"),
}
static void
+process_inode_reflink_flags(
+ struct work_queue *wq,
+ xfs_agnumber_t agno,
+ void *arg)
+{
+ int error;
+
+ error = fix_inode_reflink_flags(wq->mp, agno);
+ if (error)
+ do_error(
+_("%s while fixing inode reflink flags.\n"),
+ strerror(-error));
+}
+
+static void
process_rmap_data(
struct xfs_mount *mp)
{
@@ -219,6 +234,11 @@ process_rmap_data(
for (i = 0; i < mp->m_sb.sb_agcount; i++)
queue_work(&wq, compute_ag_refcounts, i, NULL);
destroy_work_queue(&wq);
+
+ create_work_queue(&wq, mp, libxfs_nproc());
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ queue_work(&wq, process_inode_reflink_flags, i, NULL);
+ destroy_work_queue(&wq);
}
void
diff --git a/repair/rmap.c b/repair/rmap.c
index 54d478e..05e3c98 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -670,6 +670,39 @@ dump_rmap(
*/
/*
+ * Mark all inodes in the reverse-mapping observation stack as requiring the
+ * reflink inode flag, if the stack depth is greater than 1.
+ */
+static void
+mark_inode_rl(
+ struct xfs_mount *mp,
+ struct xfs_bag *rmaps)
+{
+ xfs_agnumber_t iagno;
+ struct xfs_rmap_irec *rmap;
+ struct ino_tree_node *irec;
+ int off;
+ size_t idx;
+ xfs_agino_t ino;
+
+ if (bag_count(rmaps) < 2)
+ return;
+
+ /* Reflink flag accounting */
+ foreach_bag_ptr(rmaps, idx, rmap) {
+ ASSERT(!XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner));
+ iagno = XFS_INO_TO_AGNO(mp, rmap->rm_owner);
+ ino = XFS_INO_TO_AGINO(mp, rmap->rm_owner);
+ pthread_mutex_lock(&ag_locks[iagno].lock);
+ irec = find_inode_rec(mp, iagno, ino);
+ off = get_inode_offset(mp, rmap->rm_owner, irec);
+ /* lock here because we might go outside this ag */
+ set_inode_is_rl(irec, off);
+ pthread_mutex_unlock(&ag_locks[iagno].lock);
+ }
+}
+
+/*
* Emit a refcount object for refcntbt reconstruction during phase 5.
*/
#define REFCOUNT_CLAMP(nr) ((nr) > MAXREFCOUNT ? MAXREFCOUNT : (nr))
@@ -754,6 +787,7 @@ compute_refcounts(
if (error)
goto err;
}
+ mark_inode_rl(mp, stack_top);
/* Set nbno to the bno of the next refcount change */
if (n < slab_count(rmaps))
@@ -790,6 +824,7 @@ compute_refcounts(
if (error)
goto err;
}
+ mark_inode_rl(mp, stack_top);
/* Emit refcount if necessary */
ASSERT(nbno > cbno);
@@ -1033,6 +1068,107 @@ record_inode_reflink_flag(
(unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
}
+/*
+ * Fix an inode's reflink flag.
+ */
+static int
+fix_inode_reflink_flag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ bool set)
+{
+ struct xfs_dinode *dino;
+ struct xfs_buf *buf;
+
+ if (set)
+ do_warn(
+_("setting reflink flag on inode %"PRIu64"\n"),
+ XFS_AGINO_TO_INO(mp, agno, agino));
+ else if (!no_modify) /* && !set */
+ do_warn(
+_("clearing reflink flag on inode %"PRIu64"\n"),
+ XFS_AGINO_TO_INO(mp, agno, agino));
+ if (no_modify)
+ return 0;
+
+ buf = get_agino_buf(mp, agno, agino, &dino);
+ if (!buf)
+ return 1;
+ ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
+ if (set)
+ dino->di_flags2 |= cpu_to_be64(XFS_DIFLAG2_REFLINK);
+ else
+ dino->di_flags2 &= cpu_to_be64(~XFS_DIFLAG2_REFLINK);
+ libxfs_dinode_calc_crc(mp, dino);
+ libxfs_writebuf(buf, 0);
+
+ return 0;
+}
+
+/**
+ * fix_inode_reflink_flags() -- Fix discrepancies between the state of the
+ * inode reflink flag and our observations as to
+ * whether or not the inode really needs it.
+ * @mp: XFS mountpoint.
+ * @agno: AG number.
+ */
+int
+fix_inode_reflink_flags(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct ino_tree_node *irec;
+ int bit;
+ __uint64_t was;
+ __uint64_t is;
+ __uint64_t diff;
+ __uint64_t mask;
+ int error = 0;
+ xfs_agino_t agino;
+
+ /*
+ * Update the reflink flag for any inode where there's a discrepancy
+ * between the inode flag and whether or not we found any reflinked
+ * extents.
+ */
+ for (irec = findfirst_inode_rec(agno);
+ irec != NULL;
+ irec = next_ino_rec(irec)) {
+ ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
+ ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
+ was = irec->ino_was_rl;
+ is = irec->ino_is_rl;
+ if (was == is)
+ continue;
+ diff = was ^ is;
+ dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
+ (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
+ irec->ino_startnum),
+ was, is, diff);
+
+ for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
+ agino = bit + irec->ino_startnum;
+ if (!(diff & mask))
+ continue;
+ else if (was & mask)
+ error = fix_inode_reflink_flag(mp, agno, agino,
+ false);
+ else if (is & mask)
+ error = fix_inode_reflink_flag(mp, agno, agino,
+ true);
+ else
+ ASSERT(0);
+ if (error)
+ do_error(
+_("Unable to fix reflink flag on inode %"PRIu64".\n"),
+ XFS_AGINO_TO_INO(mp, agno, agino));
+ }
+ }
+
+ return error;
+}
+
/**
* fix_freelist() - Regenerate the AGFL, so that we don't run out of it while
* rebuilding the rmapbt.
diff --git a/repair/rmap.h b/repair/rmap.h
index b404c59..d0bcde1 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -43,6 +43,7 @@ extern int compute_refcounts(struct xfs_mount *, xfs_agnumber_t);
extern void record_inode_reflink_flag(struct xfs_mount *, struct xfs_dinode *,
xfs_agnumber_t, xfs_agino_t, xfs_ino_t);
+extern int fix_inode_reflink_flags(struct xfs_mount *, xfs_agnumber_t);
extern void fix_freelist(struct xfs_mount *, xfs_agnumber_t, bool);
extern void rmap_store_agflcount(struct xfs_mount *, xfs_agnumber_t, int);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 49/53] xfs_repair: check the refcount btree against our observed reference counts when -n
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (47 preceding siblings ...)
2015-12-19 9:10 ` [PATCH 48/53] xfs_repair: fix inode reflink flags Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 50/53] xfs_repair: rebuild the refcount btree Darrick J. Wong
` (3 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Check the observed reference counts against whatever's in the refcount
btree for discrepancies.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/phase4.c | 20 ++++++++
repair/rmap.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 4 ++
repair/scan.c | 2 +
4 files changed, 161 insertions(+)
diff --git a/repair/phase4.c b/repair/phase4.c
index caa4221..dd03ca4 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -213,6 +213,21 @@ _("%s while fixing inode reflink flags.\n"),
}
static void
+check_refcount_btrees(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
+{
+ int error;
+
+ error = check_refcounts(wq->mp, agno);
+ if (error)
+ do_error(
+_("%s while checking reference counts"),
+ strerror(-error));
+}
+
+static void
process_rmap_data(
struct xfs_mount *mp)
{
@@ -239,6 +254,11 @@ process_rmap_data(
for (i = 0; i < mp->m_sb.sb_agcount; i++)
queue_work(&wq, process_inode_reflink_flags, i, NULL);
destroy_work_queue(&wq);
+
+ create_work_queue(&wq, mp, libxfs_nproc());
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ queue_work(&wq, check_refcount_btrees, i, NULL);
+ destroy_work_queue(&wq);
}
void
diff --git a/repair/rmap.c b/repair/rmap.c
index 05e3c98..0fa2c33 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -45,6 +45,7 @@ struct xfs_ag_rmap {
static struct xfs_ag_rmap *ag_rmaps;
static bool rmapbt_suspect;
+static bool refcbt_suspect;
/*
* Compare rmap observations for array sorting.
@@ -1170,6 +1171,140 @@ _("Unable to fix reflink flag on inode %"PRIu64".\n"),
}
/**
+ * refcount_record_count() -- Return the number of refcount objects for an AG.
+ *
+ * @mp: XFS mount object
+ * @agno: AG number
+ */
+size_t
+refcount_record_count(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ return slab_count(ag_rmaps[agno].ar_refcount_items);
+}
+
+/**
+ * init_refcount_cursor() -- Return a slab cursor that will return refcount
+ * objects in order.
+ * @agno: AG number.
+ * @cur: The new cursor.
+ */
+int
+init_refcount_cursor(
+ xfs_agnumber_t agno,
+ struct xfs_slab_cursor **cur)
+{
+ return init_slab_cursor(ag_rmaps[agno].ar_refcount_items, NULL, cur);
+}
+
+/**
+ * refcount_avoid_check() -- Disable the refcount btree check.
+ */
+void
+refcount_avoid_check(void)
+{
+ refcbt_suspect = true;
+}
+
+/**
+ * check_refcounts() -- Compare the observed reference counts against
+ * what's in the ag btree.
+ * @mp: XFS mount object
+ * @agno: AG number
+ */
+int
+check_refcounts(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_slab_cursor *rl_cur;
+ struct xfs_btree_cur *bt_cur = NULL;
+ int error;
+ int have;
+ int i;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_refcount_irec *rl_rec;
+ struct xfs_refcount_irec tmp;
+ struct xfs_perag *pag; /* per allocation group data */
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+ if (refcbt_suspect) {
+ if (no_modify && agno == 0)
+ do_warn(_("would rebuild corrupt refcount btrees.\n"));
+ return 0;
+ }
+
+ /* Create cursors to refcount structures */
+ error = init_refcount_cursor(agno, &rl_cur);
+ if (error)
+ return error;
+
+ error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ if (error)
+ goto err;
+
+ /* Leave the per-ag data "uninitialized" since we rewrite it later */
+ pag = xfs_perag_get(mp, agno);
+ pag->pagf_init = 0;
+ xfs_perag_put(pag);
+
+ bt_cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+ if (!bt_cur) {
+ error = -ENOMEM;
+ goto err;
+ }
+
+ rl_rec = pop_slab_cursor(rl_cur);
+ while (rl_rec) {
+ /* Look for a refcount record in the btree */
+ error = xfs_refcountbt_lookup_le(bt_cur,
+ rl_rec->rc_startblock, &have);
+ if (error)
+ goto err;
+ if (!have) {
+ do_warn(
+_("Missing reference count record for (%u/%u) len %u count %u\n"),
+ agno, rl_rec->rc_startblock,
+ rl_rec->rc_blockcount, rl_rec->rc_refcount);
+ goto next_loop;
+ }
+
+ error = xfs_refcountbt_get_rec(bt_cur, &tmp, &i);
+ if (error)
+ goto err;
+ if (!i) {
+ do_warn(
+_("Missing reference count record for (%u/%u) len %u count %u\n"),
+ agno, rl_rec->rc_startblock,
+ rl_rec->rc_blockcount, rl_rec->rc_refcount);
+ goto next_loop;
+ }
+
+ /* Compare each refcount observation against the btree's */
+ if (tmp.rc_startblock != rl_rec->rc_startblock ||
+ tmp.rc_blockcount < rl_rec->rc_blockcount ||
+ tmp.rc_refcount < rl_rec->rc_refcount)
+ do_warn(
+_("Incorrect reference count: saw (%u/%u) len %u nlinks %u; should be (%u/%u) len %u nlinks %u\n"),
+ agno, tmp.rc_startblock, tmp.rc_blockcount,
+ tmp.rc_refcount, agno, rl_rec->rc_startblock,
+ rl_rec->rc_blockcount, rl_rec->rc_refcount);
+next_loop:
+ rl_rec = pop_slab_cursor(rl_cur);
+ }
+
+err:
+ if (bt_cur)
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ if (agbp)
+ libxfs_putbuf(agbp);
+ free_slab_cursor(&rl_cur);
+ return 0;
+}
+
+/**
* fix_freelist() - Regenerate the AGFL, so that we don't run out of it while
* rebuilding the rmapbt.
* @mp: XFS mount object
diff --git a/repair/rmap.h b/repair/rmap.h
index d0bcde1..df7d489 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -40,6 +40,10 @@ extern void rmap_avoid_check(void);
extern int check_rmaps(struct xfs_mount *, xfs_agnumber_t);
extern int compute_refcounts(struct xfs_mount *, xfs_agnumber_t);
+extern size_t refcount_record_count(struct xfs_mount *, xfs_agnumber_t);
+extern int init_refcount_cursor(xfs_agnumber_t, struct xfs_slab_cursor **);
+extern void refcount_avoid_check(void);
+extern int check_refcounts(struct xfs_mount *, xfs_agnumber_t);
extern void record_inode_reflink_flag(struct xfs_mount *, struct xfs_dinode *,
xfs_agnumber_t, xfs_agino_t, xfs_ino_t);
diff --git a/repair/scan.c b/repair/scan.c
index 54b9b68..3e8633c 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -1257,6 +1257,8 @@ _("%s btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
}
}
out:
+ if (suspect)
+ refcount_avoid_check();
return;
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 50/53] xfs_repair: rebuild the refcount btree
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (48 preceding siblings ...)
2015-12-19 9:10 ` [PATCH 49/53] xfs_repair: check the refcount btree against our observed reference counts when -n Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 51/53] mkfs.xfs: format reflink enabled filesystems Darrick J. Wong
` (2 subsequent siblings)
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Rebuild the refcount btree with the reference count data we assembled
during phase 4.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
repair/phase5.c | 309 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 307 insertions(+), 2 deletions(-)
diff --git a/repair/phase5.c b/repair/phase5.c
index 734291a..8a4ec43 100644
--- a/repair/phase5.c
+++ b/repair/phase5.c
@@ -1624,6 +1624,290 @@ _("Insufficient memory to construct reverse-map cursor."));
free_slab_cursor(&rmap_cur);
}
+/* rebuild the refcount tree */
+
+#define XR_REFCOUNTBT_BLOCK_MAXRECS(mp, level) \
+ ((mp)->m_refc_mxr[(level) != 0])
+
+/*
+ * we don't have to worry here about how chewing up free extents
+ * may perturb things because reflink tree building happens before
+ * freespace tree building.
+ */
+static void
+init_refc_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs)
+{
+ size_t num_recs;
+ int level;
+ bt_stat_level_t *lptr;
+ bt_stat_level_t *p_lptr;
+ xfs_extlen_t blocks_allocated;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb)) {
+ memset(btree_curs, 0, sizeof(bt_status_t));
+ return;
+ }
+
+ lptr = &btree_curs->level[0];
+ btree_curs->init = 1;
+ btree_curs->owner = XFS_RMAP_OWN_REFC;
+
+ /*
+ * build up statistics
+ */
+ num_recs = refcount_record_count(mp, agno);
+ if (num_recs == 0) {
+ /*
+ * easy corner-case -- no refcount records
+ */
+ lptr->num_blocks = 1;
+ lptr->modulo = 0;
+ lptr->num_recs_pb = 0;
+ lptr->num_recs_tot = 0;
+
+ btree_curs->num_levels = 1;
+ btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+
+ setup_cursor(mp, agno, btree_curs);
+
+ return;
+ }
+
+ blocks_allocated = lptr->num_blocks = howmany(num_recs,
+ XR_REFCOUNTBT_BLOCK_MAXRECS(mp, 0));
+
+ lptr->modulo = num_recs % lptr->num_blocks;
+ lptr->num_recs_pb = num_recs / lptr->num_blocks;
+ lptr->num_recs_tot = num_recs;
+ level = 1;
+
+ if (lptr->num_blocks > 1) {
+ for (; btree_curs->level[level-1].num_blocks > 1
+ && level < XFS_BTREE_MAXLEVELS;
+ level++) {
+ lptr = &btree_curs->level[level];
+ p_lptr = &btree_curs->level[level - 1];
+ lptr->num_blocks = howmany(p_lptr->num_blocks,
+ XR_REFCOUNTBT_BLOCK_MAXRECS(mp, level));
+ lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
+ lptr->num_recs_pb = p_lptr->num_blocks
+ / lptr->num_blocks;
+ lptr->num_recs_tot = p_lptr->num_blocks;
+
+ blocks_allocated += lptr->num_blocks;
+ }
+ }
+ ASSERT(lptr->num_blocks == 1);
+ btree_curs->num_levels = level;
+
+ btree_curs->num_tot_blocks = btree_curs->num_free_blocks
+ = blocks_allocated;
+
+ setup_cursor(mp, agno, btree_curs);
+}
+
+static void
+prop_refc_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
+ xfs_agblock_t startbno, int level)
+{
+ struct xfs_btree_block *bt_hdr;
+ struct xfs_refcount_key *bt_key;
+ xfs_refcount_ptr_t *bt_ptr;
+ xfs_agblock_t agbno;
+ bt_stat_level_t *lptr;
+
+ level++;
+
+ if (level >= btree_curs->num_levels)
+ return;
+
+ lptr = &btree_curs->level[level];
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+
+ if (be16_to_cpu(bt_hdr->bb_numrecs) == 0) {
+ /*
+ * this only happens once to initialize the
+ * first path up the left side of the tree
+ * where the agbno's are already set up
+ */
+ prop_refc_cursor(mp, agno, btree_curs, startbno, level);
+ }
+
+ if (be16_to_cpu(bt_hdr->bb_numrecs) ==
+ lptr->num_recs_pb + (lptr->modulo > 0)) {
+ /*
+ * write out current prev block, grab us a new block,
+ * and set the rightsib pointer of current block
+ */
+#ifdef XR_BLD_INO_TRACE
+ fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
+#endif
+ if (lptr->prev_agbno != NULLAGBLOCK) {
+ ASSERT(lptr->prev_buf_p != NULL);
+ libxfs_writebuf(lptr->prev_buf_p, 0);
+ }
+ lptr->prev_agbno = lptr->agbno;
+ lptr->prev_buf_p = lptr->buf_p;
+ agbno = get_next_blockaddr(agno, level, btree_curs);
+
+ bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno);
+
+ lptr->buf_p = libxfs_getbuf(mp->m_dev,
+ XFS_AGB_TO_DADDR(mp, agno, agbno),
+ XFS_FSB_TO_BB(mp, 1));
+ lptr->agbno = agbno;
+
+ if (lptr->modulo)
+ lptr->modulo--;
+
+ /*
+ * initialize block header
+ */
+ lptr->buf_p->b_ops = &xfs_refcountbt_buf_ops;
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+ memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+ xfs_btree_init_block(mp, lptr->buf_p, XFS_REFC_CRC_MAGIC,
+ level, 0, agno,
+ XFS_BTREE_CRC_BLOCKS);
+
+ bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+
+ /*
+ * propagate extent record for first extent in new block up
+ */
+ prop_refc_cursor(mp, agno, btree_curs, startbno, level);
+ }
+ /*
+ * add inode info to current block
+ */
+ be16_add_cpu(&bt_hdr->bb_numrecs, 1);
+
+ bt_key = XFS_REFCOUNT_KEY_ADDR(bt_hdr,
+ be16_to_cpu(bt_hdr->bb_numrecs));
+ bt_ptr = XFS_REFCOUNT_PTR_ADDR(bt_hdr,
+ be16_to_cpu(bt_hdr->bb_numrecs),
+ mp->m_refc_mxr[1]);
+
+ bt_key->rc_startblock = cpu_to_be32(startbno);
+ *bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno);
+}
+
+/*
+ * rebuilds a refcount btree given a cursor.
+ */
+static void
+build_refcount_tree(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs)
+{
+ xfs_agnumber_t i;
+ xfs_agblock_t j;
+ xfs_agblock_t agbno;
+ struct xfs_btree_block *bt_hdr;
+ struct xfs_refcount_irec *refc_rec;
+ struct xfs_slab_cursor *refc_cur;
+ struct xfs_refcount_rec *bt_rec;
+ struct bt_stat_level *lptr;
+ int level = btree_curs->num_levels;
+ int error;
+
+ for (i = 0; i < level; i++) {
+ lptr = &btree_curs->level[i];
+
+ agbno = get_next_blockaddr(agno, i, btree_curs);
+ lptr->buf_p = libxfs_getbuf(mp->m_dev,
+ XFS_AGB_TO_DADDR(mp, agno, agbno),
+ XFS_FSB_TO_BB(mp, 1));
+
+ if (i == btree_curs->num_levels - 1)
+ btree_curs->root = agbno;
+
+ lptr->agbno = agbno;
+ lptr->prev_agbno = NULLAGBLOCK;
+ lptr->prev_buf_p = NULL;
+ /*
+ * initialize block header
+ */
+
+ lptr->buf_p->b_ops = &xfs_refcountbt_buf_ops;
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+ memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+ xfs_btree_init_block(mp, lptr->buf_p, XFS_REFC_CRC_MAGIC,
+ i, 0, agno,
+ XFS_BTREE_CRC_BLOCKS);
+ }
+
+ /*
+ * run along leaf, setting up records. as we have to switch
+ * blocks, call the prop_refc_cursor routine to set up the new
+ * pointers for the parent. that can recurse up to the root
+ * if required. set the sibling pointers for leaf level here.
+ */
+ error = init_refcount_cursor(agno, &refc_cur);
+ if (error)
+ do_error(
+_("Insufficient memory to construct refcount cursor."));
+ refc_rec = pop_slab_cursor(refc_cur);
+ lptr = &btree_curs->level[0];
+
+ for (i = 0; i < lptr->num_blocks; i++) {
+ /*
+ * block initialization, lay in block header
+ */
+ lptr->buf_p->b_ops = &xfs_refcountbt_buf_ops;
+ bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p);
+ memset(bt_hdr, 0, mp->m_sb.sb_blocksize);
+ xfs_btree_init_block(mp, lptr->buf_p, XFS_REFC_CRC_MAGIC,
+ 0, 0, agno,
+ XFS_BTREE_CRC_BLOCKS);
+
+ bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno);
+ bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb +
+ (lptr->modulo > 0));
+
+ if (lptr->modulo > 0)
+ lptr->modulo--;
+
+ if (lptr->num_recs_pb > 0)
+ prop_refc_cursor(mp, agno, btree_curs,
+ refc_rec->rc_startblock, 0);
+
+ bt_rec = (struct xfs_refcount_rec *)
+ ((char *)bt_hdr + XFS_REFCOUNT_BLOCK_LEN);
+ for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) {
+ ASSERT(refc_rec != NULL);
+ bt_rec[j].rc_startblock =
+ cpu_to_be32(refc_rec->rc_startblock);
+ bt_rec[j].rc_blockcount =
+ cpu_to_be32(refc_rec->rc_blockcount);
+ bt_rec[j].rc_refcount = cpu_to_be32(refc_rec->rc_refcount);
+
+ refc_rec = pop_slab_cursor(refc_cur);
+ }
+
+ if (refc_rec != NULL) {
+ /*
+ * get next leaf level block
+ */
+ if (lptr->prev_buf_p != NULL) {
+#ifdef XR_BLD_RL_TRACE
+ fprintf(stderr, "writing refcntbt agbno %u\n",
+ lptr->prev_agbno);
+#endif
+ ASSERT(lptr->prev_agbno != NULLAGBLOCK);
+ libxfs_writebuf(lptr->prev_buf_p, 0);
+ }
+ lptr->prev_buf_p = lptr->buf_p;
+ lptr->prev_agbno = lptr->agbno;
+ lptr->agbno = get_next_blockaddr(agno, 0, btree_curs);
+ bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno);
+
+ lptr->buf_p = libxfs_getbuf(mp->m_dev,
+ XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
+ XFS_FSB_TO_BB(mp, 1));
+ }
+ }
+ free_slab_cursor(&refc_cur);
+}
+
/*
* build both the agf and the agfl for an agno given both
* btree cursors.
@@ -1637,7 +1921,8 @@ build_agf_agfl(xfs_mount_t *mp,
bt_status_t *bcnt_bt,
xfs_extlen_t freeblks, /* # free blocks in tree */
int lostblocks, /* # blocks that will be lost */
- bt_status_t *rmap_bt)
+ bt_status_t *rmap_bt,
+ bt_status_t *refcnt_bt)
{
extent_tree_node_t *ext_ptr;
xfs_buf_t *agf_buf, *agfl_buf;
@@ -1679,6 +1964,8 @@ build_agf_agfl(xfs_mount_t *mp,
agf->agf_roots[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->root);
agf->agf_levels[XFS_BTNUM_RMAP] = cpu_to_be32(rmap_bt->num_levels);
agf->agf_freeblks = cpu_to_be32(freeblks);
+ agf->agf_refcount_root = cpu_to_be32(refcnt_bt->root);
+ agf->agf_refcount_level = cpu_to_be32(refcnt_bt->num_levels);
/*
* Count and record the number of btree blocks consumed if required.
@@ -1796,6 +2083,10 @@ build_agf_agfl(xfs_mount_t *mp,
ASSERT(be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNOi]) !=
be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNTi]));
+ ASSERT(be32_to_cpu(agf->agf_refcount_root) !=
+ be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNOi]));
+ ASSERT(be32_to_cpu(agf->agf_refcount_root) !=
+ be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNTi]));
libxfs_writebuf(agf_buf, 0);
@@ -1865,6 +2156,7 @@ phase5_func(
bt_status_t ino_btree_curs;
bt_status_t fino_btree_curs;
bt_status_t rmap_btree_curs;
+ bt_status_t refcnt_btree_curs;
int extra_blocks = 0;
uint num_freeblocks;
xfs_extlen_t freeblks1;
@@ -1927,6 +2219,12 @@ phase5_func(
*/
init_rmapbt_cursor(mp, agno, &rmap_btree_curs);
+ /*
+ * Set up the btree cursors for the on-disk refcount btrees,
+ * which includes pre-allocating all required blocks.
+ */
+ init_refc_cursor(mp, agno, &refcnt_btree_curs);
+
num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
/*
* lose two blocks per AG -- the space tree roots
@@ -2020,12 +2318,17 @@ phase5_func(
rmap_btree_curs.num_free_blocks) - 1;
}
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ build_refcount_tree(mp, agno, &refcnt_btree_curs);
+ write_cursor(&refcnt_btree_curs);
+ }
+
/*
* set up agf and agfl
*/
build_agf_agfl(mp, agno, &bno_btree_curs,
&bcnt_btree_curs, freeblks1, extra_blocks,
- &rmap_btree_curs);
+ &rmap_btree_curs, &refcnt_btree_curs);
/*
* build inode allocation tree.
*/
@@ -2056,6 +2359,8 @@ phase5_func(
finish_cursor(&ino_btree_curs);
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
finish_cursor(&rmap_btree_curs);
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ finish_cursor(&refcnt_btree_curs);
if (xfs_sb_version_hasfinobt(&mp->m_sb))
finish_cursor(&fino_btree_curs);
finish_cursor(&bcnt_btree_curs);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 51/53] mkfs.xfs: format reflink enabled filesystems
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (49 preceding siblings ...)
2015-12-19 9:10 ` [PATCH 50/53] xfs_repair: rebuild the refcount btree Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 52/53] libxfs: try to prevent failed rmap btree expansion during cow Darrick J. Wong
2015-12-19 9:10 ` [PATCH 53/53] mkfs: hack around not having enough log blocks Darrick J. Wong
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Create the refcount btree at mkfs time and set the feature flag.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
man/man8/mkfs.xfs.8 | 28 +++++++++++++++++++++++
mkfs/xfs_mkfs.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 85 insertions(+), 5 deletions(-)
diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
index ffef906..56c14e8 100644
--- a/man/man8/mkfs.xfs.8
+++ b/man/man8/mkfs.xfs.8
@@ -193,6 +193,34 @@ for filesystems created with the (default)
option set. When the option
.B \-m crc=0
is used, the reverse mapping btree feature is not supported and is disabled.
+.TP
+.BI reflink= value
+This option enables the use of a separate reference count btree index in each
+allocation group. The value is either 0 to disable the feature, or 1 to create
+a reference count btree in each allocation group.
+.IP
+The reference count btree enables the sharing of physical extents between
+the data forks of different files, which is commonly known as "reflink".
+Unlike traditional Unix filesystems which assume that every inode and
+logical block pair map to a unique physical block, a reflink-capable
+XFS filesystem removes the uniqueness requirement, allowing up to four
+billion arbitrary inode/logical block pairs to map to a physical block.
+If a program tries to write to a multiply-referenced block in a file, the write
+will be redirected to a new block, and that file's logical-to-physical
+mapping will be changed to the new block ("copy on write"). This feature
+enables the creation of per-file snapshots and deduplication. It is only
+available for the data forks of regular files.
+.IP
+By default,
+.B mkfs.xfs
+will not create reference count btrees and therefore will not enable the
+reflink feature. This feature is only available for filesystems created with
+the (default)
+.B \-m crc=1
+option set. When the option
+.B \-m crc=0
+is used, the reference count btree feature is not supported and reflink is
+disabled.
.RE
.TP
.BI \-d " data_section_options"
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 15a3866..a65e2bf 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -187,6 +187,8 @@ char *mopts[] = {
"uuid",
#define M_RMAPBT 3
"rmapbt",
+#define M_REFLINK 4
+ "reflink",
NULL
};
@@ -987,6 +989,7 @@ main(
bool finobtflag;
int spinodes;
bool rmapbt;
+ bool reflink;
platform_uuid_generate(&uuid);
progname = basename(argv[0]);
@@ -1026,6 +1029,7 @@ main(
finobtflag = false;
spinodes = 0;
rmapbt = false;
+ reflink = false;
memset(&fsx, 0, sizeof(fsx));
memset(&xi, 0, sizeof(xi));
@@ -1543,6 +1547,14 @@ main(
illegal(value, "m rmapbt");
rmapbt = c;
break;
+ case M_REFLINK:
+ if (!value || *value == '\0')
+ reqval('m', mopts, M_CRC);
+ c = atoi(value);
+ if (c < 0 || c > 1)
+ illegal(value, "m reflink");
+ reflink = c;
+ break;
default:
unknown('m', value);
}
@@ -1905,6 +1917,12 @@ _("warning: rmapbt not supported without CRC support, disabled.\n"));
rmapbt = 0;
}
+ if (reflink && !crcs_enabled) {
+ fprintf(stderr,
+_("warning: reflink not supported without CRC support, disabled.\n"));
+ reflink = false;
+ }
+
if (nsflag || nlflag) {
if (dirblocksize < blocksize ||
dirblocksize > XFS_MAX_BLOCKSIZE) {
@@ -2522,6 +2540,8 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
sbp->sb_features_ro_compat = XFS_SB_FEAT_RO_COMPAT_FINOBT;
if (rmapbt)
sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT;
+ if (reflink)
+ sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_REFLINK;
if (loginternal) {
/*
@@ -2585,7 +2605,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
printf(_(
"meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n"
" =%-22s sectsz=%-5u attr=%u, projid32bit=%u\n"
- " =%-22s crc=%-8u finobt=%u, sparse=%u, rmapbt=%u\n"
+ " =%-22s crc=%-8u finobt=%u, sparse=%u, rmapbt=%u, reflink=%u\n"
"data =%-22s bsize=%-6u blocks=%llu, imaxpct=%u\n"
" =%-22s sunit=%-6u swidth=%u blks\n"
"naming =version %-14u bsize=%-6u ascii-ci=%d ftype=%d\n"
@@ -2594,7 +2614,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
"realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n"),
dfile, isize, (long long)agcount, (long long)agsize,
"", sectorsize, attrversion, !projid16bit,
- "", crcs_enabled, finobt, spinodes, rmapbt,
+ "", crcs_enabled, finobt, spinodes, rmapbt, reflink,
"", blocksize, (long long)dblocks, imaxpct,
"", dsunit, dswidth,
dirversion, dirblocksize, nci, dirftype,
@@ -2788,7 +2808,10 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
cpu_to_be32(XFS_RMAP_BLOCK(mp));
agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
}
-
+ if (reflink) {
+ agf->agf_refcount_root = cpu_to_be32(xfs_refc_block(mp));
+ agf->agf_refcount_level = cpu_to_be32(1);
+ }
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
agf->agf_flcount = 0;
@@ -2957,6 +2980,23 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
/*
+ * refcount btree root block
+ */
+ if (reflink) {
+ buf = libxfs_getbuf(mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, xfs_refc_block(mp)),
+ bsize);
+ buf->b_ops = &xfs_refcountbt_buf_ops;
+
+ block = XFS_BUF_TO_BLOCK(buf);
+ memset(block, 0, blocksize);
+ xfs_btree_init_block(mp, buf, XFS_REFC_CRC_MAGIC, 0, 0,
+ agno, XFS_BTREE_CRC_BLOCKS);
+
+ libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+ }
+
+ /*
* INO btree root block
*/
buf = libxfs_getbuf(mp->m_ddev_targp,
@@ -3044,9 +3084,21 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
+ /* account for refcount btree root */
+ if (reflink) {
+ rrec = XFS_RMAP_REC_ADDR(block, 5);
+ rrec->rm_startblock = cpu_to_be32(
+ xfs_refc_block(mp));
+ rrec->rm_blockcount = cpu_to_be32(1);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
+ rrec->rm_offset = 0;
+ be16_add_cpu(&block->bb_numrecs, 1);
+ }
+
/* account for the log space */
if (loginternal && agno == logagno) {
- rrec = XFS_RMAP_REC_ADDR(block, 5);
+ rrec = XFS_RMAP_REC_ADDR(block,
+ be16_to_cpu(block->bb_numrecs) + 1);
rrec->rm_startblock = cpu_to_be32(
XFS_FSB_TO_AGBNO(mp, logstart));
rrec->rm_blockcount = cpu_to_be32(logblocks);
@@ -3295,7 +3347,7 @@ usage( void )
{
fprintf(stderr, _("Usage: %s\n\
/* blocksize */ [-b log=n|size=num]\n\
-/* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1]\n\
+/* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1,reflink=0|1]\n\
/* data subvol */ [-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
(sunit=value,swidth=value|su=num,sw=num|noalign),\n\
sectlog=n|sectsize=num\n\
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 52/53] libxfs: try to prevent failed rmap btree expansion during cow
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (50 preceding siblings ...)
2015-12-19 9:10 ` [PATCH 51/53] mkfs.xfs: format reflink enabled filesystems Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
2015-12-19 9:10 ` [PATCH 53/53] mkfs: hack around not having enough log blocks Darrick J. Wong
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
It's possible, if reflink and rmap are both enabled, for CoW to create
a lot of small mappings that cause an rmap btree expansion to run out
of space. If both are enabled, keep the AGFL fully stocked at all
times, refuse to CoW if we start to run out of space in the AGFL, and
hope that's good enough.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
libxfs/xfs_alloc.c | 8 ++++++++
repair/xfs_repair.c | 5 +++++
2 files changed, 13 insertions(+)
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
index 619e06d..86bc620 100644
--- a/libxfs/xfs_alloc.c
+++ b/libxfs/xfs_alloc.c
@@ -1997,6 +1997,14 @@ xfs_alloc_min_freelist(
min_free += min_t(unsigned int,
pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
mp->m_ag_maxlevels);
+ /*
+ * The rmapbt can explode if we have reflink enabled and someone
+ * creates a lot of small mappings... so max out the AGFL to try
+ * to prevent that.
+ */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ xfs_sb_version_hasreflink(&mp->m_sb))
+ min_free = XFS_AGFL_SIZE(mp) - min_free;
return min_free;
}
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 1d402a5..372c8f8 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -424,6 +424,11 @@ calc_mkfs(xfs_mount_t *mp)
}
if (xfs_sb_version_hasreflink(&mp->m_sb))
fino_bno++;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ xfs_sb_version_hasreflink(&mp->m_sb)) {
+ fino_bno = XFS_AGFL_SIZE(mp) + (fino_bno -
+ (6 * min(2, mp->m_ag_maxlevels)));
+ }
/*
* If the log is allocated in the first allocation group we need to
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 53/53] mkfs: hack around not having enough log blocks
2015-12-19 9:04 [RFCv4 00/53] xfsprogs: add reverse-mapping, reflink, and dedupe support Darrick J. Wong
` (51 preceding siblings ...)
2015-12-19 9:10 ` [PATCH 52/53] libxfs: try to prevent failed rmap btree expansion during cow Darrick J. Wong
@ 2015-12-19 9:10 ` Darrick J. Wong
52 siblings, 0 replies; 54+ messages in thread
From: Darrick J. Wong @ 2015-12-19 9:10 UTC (permalink / raw)
To: david, darrick.wong; +Cc: xfs
Under some circumstances I don't yet understand, mkfs underestimates
the minimum log size and the kernel refuses to mount. Put in a crude
hack so that this doesn't happen.
DO NOT APPLY!!!
Singed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
mkfs/xfs_mkfs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index a65e2bf..547b9b9 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -2444,6 +2444,7 @@ an AG size that is one stripe unit smaller, for example %llu.\n"),
logversion, lsunit);
ASSERT(min_logblocks);
min_logblocks = MAX(XFS_MIN_LOG_BLOCKS, min_logblocks);
+if (min_logblocks < 860) min_logblocks = 860;
if (!logsize && dblocks >= (1024*1024*1024) >> blocklog)
min_logblocks = MAX(min_logblocks, XFS_MIN_LOG_BYTES>>blocklog);
if (logsize && xi.logBBsize > 0 && logblocks > DTOBT(xi.logBBsize)) {
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 54+ messages in thread