All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
@ 2018-02-14 18:12 Darrick J. Wong
  2018-02-14 18:12 ` [PATCH 1/4] xfs: convert XFS_AGFL_SIZE to a helper function Darrick J. Wong
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-14 18:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

Hi all,

Here's a bunch of patches fixing the AGFL padding problem once and for
all.  When the v5 disk format was rolled out, the AGFL header definition
had a different padding size on 32-bit vs 64-bit systems, with the
result that XFS_AGFL_SIZE reports different maximum lengths depending on
the compiler.  In Linux 4.5 we fixed the structure definition, but this
has lead to sporadic corruption reports on filesystems that were
unmounted with a pre-4.5 kernel and a wrapped AGFL and then remounted on
a 4.5+ kernel.

To deal with these corruption problems, we introduce a new ROCOMPAT
feature bit to indicate that the AGFL has been scanned and guaranteed
not to wrap.  We then amend the mounting code to find broken wrapping,
fix the wrapping, and if we had to fix anything, set the new ROCOMPAT
flag.  The ROCOMPAT flag prevents re-mounting on unpatched kernels, so
this series will likely have to be backported.

--D

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/4] xfs: convert XFS_AGFL_SIZE to a helper function
  2018-02-14 18:12 [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Darrick J. Wong
@ 2018-02-14 18:12 ` Darrick J. Wong
  2018-02-14 18:12 ` [PATCH 2/4] xfs: introduce 'fixed agfl' feature Darrick J. Wong
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-14 18:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, Dave Chinner

From: Dave Chinner <dchinner@redhat.com>

The AGFL size calculation is about to get more complex, so lets turn
the macro into a function first and remove the macro.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
[darrick: forward port to newer kernel, simplify the helper]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c  |   31 ++++++++++++++++++++++++-------
 fs/xfs/libxfs/xfs_alloc.h  |    2 ++
 fs/xfs/libxfs/xfs_format.h |   13 +------------
 fs/xfs/scrub/agheader.c    |    6 +++---
 fs/xfs/xfs_fsops.c         |    2 +-
 5 files changed, 31 insertions(+), 23 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c02781a..36101e5 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -53,6 +53,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
 		xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
 
+/*
+ * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
+ * the beginning of the block for a proper header with the location information
+ * and CRC.
+ */
+unsigned int
+xfs_agfl_size(
+	struct xfs_mount	*mp)
+{
+	unsigned int		size = mp->m_sb.sb_sectsize;
+
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		size -= sizeof(struct xfs_agfl);
+
+	return size / sizeof(xfs_agblock_t);
+}
+
 unsigned int
 xfs_refc_block(
 	struct xfs_mount	*mp)
@@ -550,7 +567,7 @@ xfs_agfl_verify(
 	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 		return __this_address;
 
-	for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
+	for (i = 0; i < xfs_agfl_size(mp); i++) {
 		if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 		    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 			return __this_address;
@@ -2266,7 +2283,7 @@ xfs_alloc_get_freelist(
 	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
 	be32_add_cpu(&agf->agf_flfirst, 1);
 	xfs_trans_brelse(tp, agflbp);
-	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
+	if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
 		agf->agf_flfirst = 0;
 
 	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
@@ -2377,7 +2394,7 @@ xfs_alloc_put_freelist(
 			be32_to_cpu(agf->agf_seqno), &agflbp)))
 		return error;
 	be32_add_cpu(&agf->agf_fllast, 1);
-	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
+	if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
 		agf->agf_fllast = 0;
 
 	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
@@ -2395,7 +2412,7 @@ xfs_alloc_put_freelist(
 
 	xfs_alloc_log_agf(tp, agbp, logflags);
 
-	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
+	ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
 
 	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
 	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
@@ -2428,9 +2445,9 @@ xfs_agf_verify(
 	if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
 	      XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
 	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
-	      be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
-	      be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
-	      be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
+	      be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
+	      be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
+	      be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
 		return __this_address;
 
 	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 65a0caf..a311a24 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -26,6 +26,8 @@ struct xfs_trans;
 
 extern struct workqueue_struct *xfs_alloc_wq;
 
+unsigned int xfs_agfl_size(struct xfs_mount *mp);
+
 /*
  * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
  */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1acb584..42956d8 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -803,24 +803,13 @@ typedef struct xfs_agi {
 		&(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
 		(__be32 *)(bp)->b_addr)
 
-/*
- * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of
- * slots in the beginning of the block for a proper header with the
- * location information and CRC.
- */
-#define XFS_AGFL_SIZE(mp) \
-	(((mp)->m_sb.sb_sectsize - \
-	 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
-		sizeof(struct xfs_agfl) : 0)) / \
-	  sizeof(xfs_agblock_t))
-
 typedef struct xfs_agfl {
 	__be32		agfl_magicnum;
 	__be32		agfl_seqno;
 	uuid_t		agfl_uuid;
 	__be64		agfl_lsn;
 	__be32		agfl_crc;
-	__be32		agfl_bno[];	/* actually XFS_AGFL_SIZE(mp) */
+	__be32		agfl_bno[];	/* actually xfs_agfl_size(mp) */
 } __attribute__((packed)) xfs_agfl_t;
 
 #define XFS_AGFL_CRC_OFF	offsetof(struct xfs_agfl, agfl_crc)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index fd97552..fd383c5 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -80,7 +80,7 @@ xfs_scrub_walk_agfl(
 	}
 
 	/* first to the end */
-	for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
+	for (i = flfirst; i < xfs_agfl_size(mp); i++) {
 		error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
 		if (error)
 			return error;
@@ -664,7 +664,7 @@ xfs_scrub_agf(
 	if (agfl_last > agfl_first)
 		fl_count = agfl_last - agfl_first + 1;
 	else
-		fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
+		fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1;
 	if (agfl_count != 0 && fl_count != agfl_count)
 		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
 
@@ -791,7 +791,7 @@ xfs_scrub_agfl(
 	/* Allocate buffer to ensure uniqueness of AGFL entries. */
 	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 	agflcount = be32_to_cpu(agf->agf_flcount);
-	if (agflcount > XFS_AGFL_SIZE(sc->mp)) {
+	if (agflcount > xfs_agfl_size(sc->mp)) {
 		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
 		goto out;
 	}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8b45456..5237927 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -217,7 +217,7 @@ xfs_growfs_data_private(
 		}
 
 		agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
-		for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
+		for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
 			agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
 
 		error = xfs_bwrite(bp);


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/4] xfs: introduce 'fixed agfl' feature
  2018-02-14 18:12 [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Darrick J. Wong
  2018-02-14 18:12 ` [PATCH 1/4] xfs: convert XFS_AGFL_SIZE to a helper function Darrick J. Wong
@ 2018-02-14 18:12 ` Darrick J. Wong
  2018-02-14 18:12 ` [PATCH 3/4] xfs: fix agfl wrapping on v5 filesystems Darrick J. Wong
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-14 18:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Introduce a new rocompat flag "FIXED_AGFL" to indicate that we've
scanned and verified that the AGFL does not hit the troublesome last
element of the AGFL array, and update xfs_agfl_size to its new
definition based on this feature.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |    7 +++++++
 1 file changed, 7 insertions(+)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 42956d8..4f6f41b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -462,6 +462,7 @@ xfs_sb_has_compat_feature(
 #define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)		/* free inode btree */
 #define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
 #define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)		/* reflinked files */
+#define XFS_SB_FEAT_RO_COMPAT_FIXED_AGFL (1 << 3)	/* fixed agfl */
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
 		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
 		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
@@ -559,6 +560,12 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
 }
 
+static inline bool xfs_sb_version_hasfixedagfl(struct xfs_sb *sbp)
+{
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FIXED_AGFL);
+}
+
 /*
  * end of superblock version macros
  */


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/4] xfs: fix agfl wrapping on v5 filesystems
  2018-02-14 18:12 [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Darrick J. Wong
  2018-02-14 18:12 ` [PATCH 1/4] xfs: convert XFS_AGFL_SIZE to a helper function Darrick J. Wong
  2018-02-14 18:12 ` [PATCH 2/4] xfs: introduce 'fixed agfl' feature Darrick J. Wong
@ 2018-02-14 18:12 ` Darrick J. Wong
  2018-02-14 18:12 ` [PATCH 4/4] xfs: enable fixed agfl feature Darrick J. Wong
  2018-02-14 19:56 ` [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Brian Foster
  4 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-14 18:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Introduce helper functions to scan and fix the AGFLs at mount time.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_alloc.c  |  169 ++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_alloc.h  |    3 +
 fs/xfs/libxfs/xfs_format.h |    5 +
 fs/xfs/xfs_mount.c         |   10 +++
 fs/xfs/xfs_super.c         |   10 +++
 5 files changed, 197 insertions(+)


diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 36101e5..8e4e1ae 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -70,6 +70,175 @@ xfs_agfl_size(
 	return size / sizeof(xfs_agblock_t);
 }
 
+/*
+ * Detect and fixup a screwup in the struct xfs_agfl definition that results in
+ * different free list sizes depending on the compiler padding added to the
+ * xfs_agfl. This will only matter on v5 filesystems that have the freelist
+ * wrapping around the end of the AGFL.  If we fix the problem, we log the new
+ * AGF immediately; if the caller fixes any AGFLs, then it should set the
+ * superblock flag to say we've fixed everything.
+ *
+ * This function returns 1 if something was fixed, 0 if nothing needed fixing,
+ * or the usual negative error code.
+ */
+int
+xfs_agf_fixup_freelist_count(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agfbp,
+	struct xfs_perag	*pag)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_agf		*agf;
+	struct xfs_buf		*agflbp;
+	__be32			*agfl_bno;
+	xfs_agnumber_t		agno;
+	uint32_t		agfl_size;
+	uint32_t		flfirst;
+	uint32_t		fllast;
+	int32_t			active;
+	int			offset;
+	int			error;
+
+	/* v4 or fixed-agfl fs doesn't need fixing */
+	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
+	    xfs_sb_version_hasfixedagfl(&mp->m_sb))
+		return 0;
+
+	agfl_size = xfs_agfl_size(mp);
+
+	agf = XFS_BUF_TO_AGF(agfbp);
+	agno = be32_to_cpu(agf->agf_seqno);
+	flfirst = be32_to_cpu(agf->agf_flfirst);
+	fllast = be32_to_cpu(agf->agf_fllast);
+
+	/*
+	 * In the original v5 code, we had a structure padding error in struct
+	 * xfs_agfl such that XFS_AGFL_SIZE thought the structure size was 36
+	 * on 32-bit and 40 on 64-bit.  On a 512b sector fs this lead to
+	 * ((512 - 36) / 4) = 119 agfl entries on 32bit, whereas on 64-bit
+	 * ((512 - 40) / 4) = 118 agfl entries.  This could lead to problems
+	 * if you migrated filesystems between 32bit and 64bit machines.
+	 *
+	 * In Linux 4.5 we fixed the padding problem such that the structure
+	 * size is always 36 bytes (119 entries in the above example), but
+	 * this still leads to verifier problems because flfirst/fllast have
+	 * to match flcount even if they wrap around the end.  This was also
+	 * problematic because there were v5 filesystems already deployed, and
+	 * some of them have the 40-byte padding (i.e. 118 entries).
+	 *
+	 * Now we're introducing a FIXED_AGFL feature that means that we've
+	 * fixed any wrapping problems and we'll never do that again.  Better
+	 * yet, old kernels won't be touching the AGFL because it's a rocompat
+	 * feature.  We retain the 36-byte padding.
+	 */
+
+	/* Empty AGFL?  Make sure we aren't pointing at the end. */
+	if (pag->pagf_flcount == 0) {
+		if (flfirst >= agfl_size || fllast >= agfl_size) {
+			agf->agf_flfirst = cpu_to_be32(1);
+			agf->agf_fllast = 0;
+			xfs_alloc_log_agf(tp, agfbp,
+					XFS_AGF_FLFIRST | XFS_AGF_FLLAST);
+			return 1;
+		}
+		return 0;
+	}
+
+	/* Make sure we're either spot on or off by 1. */
+	active = fllast - flfirst + 1;
+	if (active <= 0)
+		active += agfl_size;
+	if (active == pag->pagf_flcount)
+		return 0;
+	else if (active != pag->pagf_flcount + 1)
+		return -EFSCORRUPTED;
+
+	/* Would this have even passed muster on an old system? */
+	if (flfirst >= agfl_size - 1 || fllast >= agfl_size - 1 ||
+	    pag->pagf_flcount > agfl_size - 1)
+		return -EFSCORRUPTED;
+
+	/*
+	 * Convert a 40-byte-padded agfl into a 36-byte-padded AGFL.
+	 * Therefore, we need to move the AGFL blocks
+	 * bno[flfirst..agfl_size - 2] to bno[flfirst + 1...agfl_size - 1].
+	 *
+	 * Reusing the example above, if we had flfirst == 116, we need
+	 * to move bno[116] and bno[117] into bno[117] and bno[118],
+	 * respectively, and then increment flfirst.
+	 */
+	error = xfs_alloc_read_agfl(mp, tp, agno, &agflbp);
+	if (error)
+		return error;
+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+	memmove(&agfl_bno[flfirst + 1], &agfl_bno[flfirst],
+			(agfl_size - flfirst - 1) * sizeof(xfs_agblock_t));
+	agfl_bno[flfirst] = cpu_to_be32(NULLAGBLOCK);
+	xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
+	offset = (char *)&agfl_bno[flfirst] - (char *)agflbp->b_addr;
+	xfs_trans_log_buf(tp, agflbp, offset, BBTOB(agflbp->b_length) - 1);
+	xfs_trans_brelse(tp, agflbp);
+	agflbp = NULL;
+
+	be32_add_cpu(&agf->agf_flfirst, 1);
+	xfs_alloc_log_agf(tp, agfbp, XFS_AGF_FLFIRST);
+
+	return 1;
+}
+
+/* Fix all the AGFL counters. */
+int
+xfs_agf_fixup_freelist_counts(
+	struct xfs_mount	*mp)
+{
+	struct xfs_trans	*tp;
+	struct xfs_perag	*pag;
+	struct xfs_buf		*agfbp;
+	xfs_agnumber_t		agno;
+	bool			needed_fixing = false;
+	int			error;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb) ||
+	    xfs_sb_version_hasfixedagfl(&mp->m_sb))
+		return 0;
+
+	/* Need to be able to log all AGFs + primary super */
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 0, 0, 0, &tp);
+	if (error)
+		return error;
+
+	/* Make sure all the AGFLs do not go off the end of the array. */
+	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+		error = xfs_alloc_read_agf(mp, tp, agno, 0, &agfbp);
+		if (error)
+			goto cancel;
+		if (!agfbp) {
+			error = -ENOMEM;
+			goto cancel;
+		}
+		pag = xfs_perag_get(mp, agno);
+		error = xfs_agf_fixup_freelist_count(tp, agfbp, pag);
+		xfs_perag_put(pag);
+		xfs_trans_brelse(tp, agfbp);
+		if (error == 1)
+			needed_fixing = true;
+		else if (error < 0)
+			goto cancel;
+	}
+
+	if (!needed_fixing)
+		goto cancel;
+
+	/* Add the feature flag so we don't have to do this again. */
+	xfs_sb_version_addfixedagfl(&mp->m_sb);
+	xfs_log_sb(tp);
+	return xfs_trans_commit(tp);
+
+cancel:
+	xfs_trans_cancel(tp);
+	return error;
+}
+
 unsigned int
 xfs_refc_block(
 	struct xfs_mount	*mp)
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index a311a24..2c79ea4e 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -27,6 +27,9 @@ struct xfs_trans;
 extern struct workqueue_struct *xfs_alloc_wq;
 
 unsigned int xfs_agfl_size(struct xfs_mount *mp);
+int xfs_agf_fixup_freelist_count(struct xfs_trans *tp, struct xfs_buf *agfbp,
+		struct xfs_perag *pag);
+int xfs_agf_fixup_freelist_counts(struct xfs_mount *mp);
 
 /*
  * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 4f6f41b..6a7d65f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -566,6 +566,11 @@ static inline bool xfs_sb_version_hasfixedagfl(struct xfs_sb *sbp)
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FIXED_AGFL);
 }
 
+static inline void xfs_sb_version_addfixedagfl(struct xfs_sb *sbp)
+{
+	sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_FIXED_AGFL;
+}
+
 /*
  * end of superblock version macros
  */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 98fd41c..62865aa 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -875,6 +875,16 @@ xfs_mountfs(
 	}
 
 	/*
+	 * Make sure our AGFL counters do not wrap the end of the block in a
+	 * troublesome manner.  Don't bother if we're readonly.
+	 */
+	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+		error = xfs_agf_fixup_freelist_counts(mp);
+		if (error)
+			goto out_log_dealloc;
+	}
+
+	/*
 	 * Get and sanity-check the root inode.
 	 * Save the pointer to it in the mount structure.
 	 */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7aba628..5fa0501 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1353,6 +1353,16 @@ xfs_fs_remount(
 		}
 
 		/*
+		 * Make sure our AGFL counters do not wrap the end of the block
+		 * in a troublesome manner.
+		 */
+		error = xfs_agf_fixup_freelist_counts(mp);
+		if (error) {
+			xfs_warn(mp, "failed to set FIXED_AGFL feature");
+			return error;
+		}
+
+		/*
 		 * Fill out the reserve pool if it is empty. Use the stashed
 		 * value if it is non-zero, otherwise go with the default.
 		 */


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/4] xfs: enable fixed agfl feature
  2018-02-14 18:12 [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Darrick J. Wong
                   ` (2 preceding siblings ...)
  2018-02-14 18:12 ` [PATCH 3/4] xfs: fix agfl wrapping on v5 filesystems Darrick J. Wong
@ 2018-02-14 18:12 ` Darrick J. Wong
  2018-02-14 19:56 ` [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Brian Foster
  4 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-14 18:12 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Now that we have all the FIXED AGFL bits ready, enable the feature flag.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 6a7d65f..8c1425d 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -466,7 +466,8 @@ xfs_sb_has_compat_feature(
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
 		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
 		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
-		 XFS_SB_FEAT_RO_COMPAT_REFLINK)
+		 XFS_SB_FEAT_RO_COMPAT_REFLINK | \
+		 XFS_SB_FEAT_RO_COMPAT_FIXED_AGFL)
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
 static inline bool
 xfs_sb_has_ro_compat_feature(


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-14 18:12 [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Darrick J. Wong
                   ` (3 preceding siblings ...)
  2018-02-14 18:12 ` [PATCH 4/4] xfs: enable fixed agfl feature Darrick J. Wong
@ 2018-02-14 19:56 ` Brian Foster
  2018-02-15  2:05   ` Darrick J. Wong
  4 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2018-02-14 19:56 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Wed, Feb 14, 2018 at 10:12:00AM -0800, Darrick J. Wong wrote:
> Hi all,
> 
> Here's a bunch of patches fixing the AGFL padding problem once and for
> all.  When the v5 disk format was rolled out, the AGFL header definition
> had a different padding size on 32-bit vs 64-bit systems, with the
> result that XFS_AGFL_SIZE reports different maximum lengths depending on
> the compiler.  In Linux 4.5 we fixed the structure definition, but this
> has lead to sporadic corruption reports on filesystems that were
> unmounted with a pre-4.5 kernel and a wrapped AGFL and then remounted on
> a 4.5+ kernel.
> 
> To deal with these corruption problems, we introduce a new ROCOMPAT
> feature bit to indicate that the AGFL has been scanned and guaranteed
> not to wrap.  We then amend the mounting code to find broken wrapping,
> fix the wrapping, and if we had to fix anything, set the new ROCOMPAT
> flag.  The ROCOMPAT flag prevents re-mounting on unpatched kernels, so
> this series will likely have to be backported.
> 

I haven't taken a close enough look at the code yet, but I'm more
curious about the high level approach before getting into that.. IIUC,
we'll set the rocompat bit iff we mount on a fixed kernel, found the
agfl wrapped and detected the agfl size mismatch problem. So we
essentially only restrict mounts on older kernels if we happen to detect
the size mismatch conditions on the newer kernel.

IOW, a user can mount back and forth between good/bad kernels so long as
the agfl isn't wrapped during a mount on the good kernel, and then at
some seemingly random point in the future said user might be permanently
restricted from rw mounts on the older kernel based on the issue being
detected/cleared in the new kernel. That seems a bit.. heavy-handed for
such an unpredictable condition. Why is a warning that the user has a
busted/old/in-need-of-upgrade kernel in the mix not sufficient?

I guess I'm just not really seeing the value in using the feature bit
for this as opposed to doing something more simple like detect an agfl
with a size mismatch with respect to the current kernel and forcing the
read-only mount in that instance. E.g., similar to how we handle log
recovery byte order mismatches:

  "dirty log written in incompatible format - can't recover"

But in this case it would be more something like "agfl in invalid state,
mounting read-only, please run xfs_repair and ditch that old kernel that
caused this in the first place ..." Then we write a FAQ entry that
elaborates on how/why a user hit that problem and backport to stable
kernels as far and wide as possible. Hm?

Brian

> --D
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-14 19:56 ` [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Brian Foster
@ 2018-02-15  2:05   ` Darrick J. Wong
  2018-02-15 13:16     ` Brian Foster
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-15  2:05 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> On Wed, Feb 14, 2018 at 10:12:00AM -0800, Darrick J. Wong wrote:
> > Hi all,
> > 
> > Here's a bunch of patches fixing the AGFL padding problem once and for
> > all.  When the v5 disk format was rolled out, the AGFL header definition
> > had a different padding size on 32-bit vs 64-bit systems, with the
> > result that XFS_AGFL_SIZE reports different maximum lengths depending on
> > the compiler.  In Linux 4.5 we fixed the structure definition, but this
> > has lead to sporadic corruption reports on filesystems that were
> > unmounted with a pre-4.5 kernel and a wrapped AGFL and then remounted on
> > a 4.5+ kernel.
> > 
> > To deal with these corruption problems, we introduce a new ROCOMPAT
> > feature bit to indicate that the AGFL has been scanned and guaranteed
> > not to wrap.  We then amend the mounting code to find broken wrapping,
> > fix the wrapping, and if we had to fix anything, set the new ROCOMPAT
> > flag.  The ROCOMPAT flag prevents re-mounting on unpatched kernels, so
> > this series will likely have to be backported.
> > 
> 
> I haven't taken a close enough look at the code yet, but I'm more
> curious about the high level approach before getting into that.. IIUC,
> we'll set the rocompat bit iff we mount on a fixed kernel, found the
> agfl wrapped and detected the agfl size mismatch problem. So we
> essentially only restrict mounts on older kernels if we happen to detect
> the size mismatch conditions on the newer kernel.

Correct.

> IOW, a user can mount back and forth between good/bad kernels so long as
> the agfl isn't wrapped during a mount on the good kernel, and then at
> some seemingly random point in the future said user might be permanently
> restricted from rw mounts on the older kernel based on the issue being
> detected/cleared in the new kernel. That seems a bit.. heavy-handed for
> such an unpredictable condition. Why is a warning that the user has a
> busted/old/in-need-of-upgrade kernel in the mix not sufficient?

I like the rocompat approach because we can prevent admins from mounting
filesystems on old kernels, rather than letting the mount proceed and
then blowing up in the middle of a transaction some time later when
something actually hits the badly wrapped AGFL.  If we backport this
series to the relevant distro kernels (ha!) then they will work without
incident.

The thing I dislike about this approach is that now we have this
rocompat bit that has to be backported everywhere, and it only triggers
in the specific case that (a) you have a v5 filesystem that (b) you run
a mixed environment with 4.5+ and pre-4.5 kernels and (c) happened to
land on a badly wrapped AGFL.  It's a lot of work for us (and the distro
partners) but it's the least amount of work for admins -- so long as
they keep their environments up to date.

A second solution I considered was fixing the AGFL at mount/remount-rw
time like we do in this series and adding code to move the AGFL to the
start at freeze/remount-ro/umount time.  For the common case where we
don't crash, we handle the mixed kernel environment seamlessly.  This
would be my primary choice except that it'll still blow up if we wrap
the AGFL, crash, and reboot with an old kernel.  Here too it won't fail
at mount time, it'll fail some time after mount when something tries to
modify an AGFL.

One solution involves a fair amount of backport and sw redeployment but
makes it obvious when things are broken; the other solution handles it
*almost* seamlessly... until it doesn't.

> I guess I'm just not really seeing the value in using the feature bit
> for this as opposed to doing something more simple like detect an agfl
> with a size mismatch with respect to the current kernel and forcing the
> read-only mount in that instance. E.g., similar to how we handle log
> recovery byte order mismatches:
> 
>   "dirty log written in incompatible format - can't recover"
> 
> But in this case it would be more something like "agfl in invalid state,
> mounting read-only, please run xfs_repair and ditch that old kernel that

Good point, third option: if the agfl wraps badly, printk that the agfl
is in an invalid state and that the user must mount with -o fixagfl
(which will fix the problem and set the rocompat flag) and upgrade the
kernel.  Seems kinda clunky... but all of these solutions have a wart of
some kind.

> caused this in the first place ..." Then we write a FAQ entry that
> elaborates on how/why a user hit that problem and backport to stable
> kernels as far and wide as possible. Hm?

I /do/ like the part about writing FAQ to update the kernel.

--D

> Brian
> 
> > --D
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-15  2:05   ` Darrick J. Wong
@ 2018-02-15 13:16     ` Brian Foster
  2018-02-16 17:57       ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2018-02-15 13:16 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > On Wed, Feb 14, 2018 at 10:12:00AM -0800, Darrick J. Wong wrote:
> > > Hi all,
> > > 
> > > Here's a bunch of patches fixing the AGFL padding problem once and for
> > > all.  When the v5 disk format was rolled out, the AGFL header definition
> > > had a different padding size on 32-bit vs 64-bit systems, with the
> > > result that XFS_AGFL_SIZE reports different maximum lengths depending on
> > > the compiler.  In Linux 4.5 we fixed the structure definition, but this
> > > has lead to sporadic corruption reports on filesystems that were
> > > unmounted with a pre-4.5 kernel and a wrapped AGFL and then remounted on
> > > a 4.5+ kernel.
> > > 
> > > To deal with these corruption problems, we introduce a new ROCOMPAT
> > > feature bit to indicate that the AGFL has been scanned and guaranteed
> > > not to wrap.  We then amend the mounting code to find broken wrapping,
> > > fix the wrapping, and if we had to fix anything, set the new ROCOMPAT
> > > flag.  The ROCOMPAT flag prevents re-mounting on unpatched kernels, so
> > > this series will likely have to be backported.
> > > 
> > 
> > I haven't taken a close enough look at the code yet, but I'm more
> > curious about the high level approach before getting into that.. IIUC,
> > we'll set the rocompat bit iff we mount on a fixed kernel, found the
> > agfl wrapped and detected the agfl size mismatch problem. So we
> > essentially only restrict mounts on older kernels if we happen to detect
> > the size mismatch conditions on the newer kernel.
> 
> Correct.
> 
> > IOW, a user can mount back and forth between good/bad kernels so long as
> > the agfl isn't wrapped during a mount on the good kernel, and then at
> > some seemingly random point in the future said user might be permanently
> > restricted from rw mounts on the older kernel based on the issue being
> > detected/cleared in the new kernel. That seems a bit.. heavy-handed for
> > such an unpredictable condition. Why is a warning that the user has a
> > busted/old/in-need-of-upgrade kernel in the mix not sufficient?
> 
> I like the rocompat approach because we can prevent admins from mounting
> filesystems on old kernels, rather than letting the mount proceed and
> then blowing up in the middle of a transaction some time later when
> something actually hits the badly wrapped AGFL.  If we backport this
> series to the relevant distro kernels (ha!) then they will work without
> incident.
> 

Ok, so the intent is to "protect" those old kernels from a filesystem
that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
filesystem that has a wrapped agfl from a fixed kernel. That sounds
reasonable.. I guess what concerns me is that the bit doesn't seem to be
used in that manner. A user can always run a fixed kernel, wrap the
agfl, then go back to the bad one and cry when it explodes.

IOW, if the goal is this kind of retroactive protection, I'd expect the
bit to be enabled any time the filesystem is in a state that would cause
problems on the old kernel. Technically, that could mean doing something
like setting the bit any time an AGFL is in a wrapped state and clearing
it when that state clears. Or perhaps setting it unconditionally on
mount and leaving it permanently would be another way to satisfy the
requirement, though certainly more heavy handed.

Note that what I dislike about other possible combinations of the above,
such as setting the bit internally/conditionally and never clearing it
(which is what I think this series currently does), is that can
potentially fool a user who actually does due diligence to test a
filesystem against two kernels (for whatever, probably odd, reason). For
example:

- mount fs on good kernel, scribble on it
- test my (bad) back up kernel, mount, scribble some more
- back to my primary kernels, everything still works, deploy!

So the whole "set the bit unconditionally" thing might be heavy-handed,
but at least that is predictable. The set/clear approach is less
predictable, but the advantage of that approach is that it is
recoverable without putting a user in a bind with no other option but to
upgrade.

> The thing I dislike about this approach is that now we have this
> rocompat bit that has to be backported everywhere, and it only triggers
> in the specific case that (a) you have a v5 filesystem that (b) you run
> a mixed environment with 4.5+ and pre-4.5 kernels and (c) happened to
> land on a badly wrapped AGFL.  It's a lot of work for us (and the distro
> partners) but it's the least amount of work for admins -- so long as
> they keep their environments up to date.
> 

We should have already backported fixes to stable kernels for the AGFL
size problem itself, right? IOW, it really should be a minimal set of
users affected by this who haven't updated their old, broken kernels
that should have stable updates available by now.

If so, then I agree.. Technicalities aside, I'm not a huge fan of
propagating a feature bit all over just to try and isolate those users.
Plus with some of the other ideas I'm throwing out above, we'd have to
start also thinking about users who might end up using two old kernels
that cross the boundary where the feature bit was introduced. Ugh. :P

Yet another way to look at it.. if we have enough users out there using
two kernels where one of them happens to be a stale/broken kernel
because they haven't upgraded to the agfl fix, what evidence do we have
that those users would actually update their variant of the "good agfl"
kernel to one that handles the agfl good/bad transition and sets a
feature bit? IOW, it seems to me that on some level the ship has sailed.

> A second solution I considered was fixing the AGFL at mount/remount-rw
> time like we do in this series and adding code to move the AGFL to the
> start at freeze/remount-ro/umount time.  For the common case where we
> don't crash, we handle the mixed kernel environment seamlessly.  This
> would be my primary choice except that it'll still blow up if we wrap
> the AGFL, crash, and reboot with an old kernel.  Here too it won't fail
> at mount time, it'll fail some time after mount when something tries to
> modify an AGFL.
> 

Interesting idea, but I also think it would be unfortunate to alter our
normal/common runtime behavior to pacify some old, broken kernel that
also has stable fixes. To get around the crash thing, we'd probably have
to shuffle the AGFL locations on the fly, and that would be even worse
IMO.

> One solution involves a fair amount of backport and sw redeployment but
> makes it obvious when things are broken; the other solution handles it
> *almost* seamlessly... until it doesn't.
> 
> > I guess I'm just not really seeing the value in using the feature bit
> > for this as opposed to doing something more simple like detect an agfl
> > with a size mismatch with respect to the current kernel and forcing the
> > read-only mount in that instance. E.g., similar to how we handle log
> > recovery byte order mismatches:
> > 
> >   "dirty log written in incompatible format - can't recover"
> > 
> > But in this case it would be more something like "agfl in invalid state,
> > mounting read-only, please run xfs_repair and ditch that old kernel that
> 
> Good point, third option: if the agfl wraps badly, printk that the agfl
> is in an invalid state and that the user must mount with -o fixagfl
> (which will fix the problem and set the rocompat flag) and upgrade the
> kernel.  Seems kinda clunky... but all of these solutions have a wart of
> some kind.
> 

Hmm, I still think my preferred approach is to mount time detect,
enforce read-only and tell the user to xfs_repair[1]. It's the most
simple implementation as we don't have to carry fixup code in the kernel
for such an uncommon situation (which facilitates a broad backport
strategy). It protects the users on stable kernels who pull in the
latest bug fixes, including the guy who goes from a bad kernel to a good
one, and all kernels going forward without having to use a feature bit.
We don't do anything for the guy who wraps the agfl on newer kernels and
goes back to the bad one, but at some point he just needs to update the
broken kernel.

[1] I guess doing a mount time fix up vs. a read-only mount + warning is
a separate question from the feature bit. I'd prefer the latter for
simplicity, but perhaps there are use cases where the benefit outweighs
the risk.

Anyways, that's just my .02. There is no real great option here, I
guess. I just think that at some point maybe it's best to fix all the
kernels we can to handle the size mismatch, live with the fact that some
users might have those old, unfixed kernels and bonk them on the head to
upgrade when they (hopefully decreasingly) pop up. Perhaps others can
chime in with more thoughts..

Brian

> > caused this in the first place ..." Then we write a FAQ entry that
> > elaborates on how/why a user hit that problem and backport to stable
> > kernels as far and wide as possible. Hm?
> 
> I /do/ like the part about writing FAQ to update the kernel.
> 
> --D
> 
> > Brian
> > 
> > > --D
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-15 13:16     ` Brian Foster
@ 2018-02-16 17:57       ` Darrick J. Wong
  2018-02-19 13:54         ` Brian Foster
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-16 17:57 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > > On Wed, Feb 14, 2018 at 10:12:00AM -0800, Darrick J. Wong wrote:
> > > > Hi all,
> > > > 
> > > > Here's a bunch of patches fixing the AGFL padding problem once and for
> > > > all.  When the v5 disk format was rolled out, the AGFL header definition
> > > > had a different padding size on 32-bit vs 64-bit systems, with the
> > > > result that XFS_AGFL_SIZE reports different maximum lengths depending on
> > > > the compiler.  In Linux 4.5 we fixed the structure definition, but this
> > > > has lead to sporadic corruption reports on filesystems that were
> > > > unmounted with a pre-4.5 kernel and a wrapped AGFL and then remounted on
> > > > a 4.5+ kernel.
> > > > 
> > > > To deal with these corruption problems, we introduce a new ROCOMPAT
> > > > feature bit to indicate that the AGFL has been scanned and guaranteed
> > > > not to wrap.  We then amend the mounting code to find broken wrapping,
> > > > fix the wrapping, and if we had to fix anything, set the new ROCOMPAT
> > > > flag.  The ROCOMPAT flag prevents re-mounting on unpatched kernels, so
> > > > this series will likely have to be backported.
> > > > 
> > > 
> > > I haven't taken a close enough look at the code yet, but I'm more
> > > curious about the high level approach before getting into that.. IIUC,
> > > we'll set the rocompat bit iff we mount on a fixed kernel, found the
> > > agfl wrapped and detected the agfl size mismatch problem. So we
> > > essentially only restrict mounts on older kernels if we happen to detect
> > > the size mismatch conditions on the newer kernel.
> > 
> > Correct.
> > 
> > > IOW, a user can mount back and forth between good/bad kernels so long as
> > > the agfl isn't wrapped during a mount on the good kernel, and then at
> > > some seemingly random point in the future said user might be permanently
> > > restricted from rw mounts on the older kernel based on the issue being
> > > detected/cleared in the new kernel. That seems a bit.. heavy-handed for
> > > such an unpredictable condition. Why is a warning that the user has a
> > > busted/old/in-need-of-upgrade kernel in the mix not sufficient?
> > 
> > I like the rocompat approach because we can prevent admins from mounting
> > filesystems on old kernels, rather than letting the mount proceed and
> > then blowing up in the middle of a transaction some time later when
> > something actually hits the badly wrapped AGFL.  If we backport this
> > series to the relevant distro kernels (ha!) then they will work without
> > incident.
> > 
> 
> Ok, so the intent is to "protect" those old kernels from a filesystem
> that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> filesystem that has a wrapped agfl from a fixed kernel. That sounds
> reasonable.. I guess what concerns me is that the bit doesn't seem to be
> used in that manner. A user can always run a fixed kernel, wrap the
> agfl, then go back to the bad one and cry when it explodes.
> 
> IOW, if the goal is this kind of retroactive protection, I'd expect the
> bit to be enabled any time the filesystem is in a state that would cause
> problems on the old kernel. Technically, that could mean doing something
> like setting the bit any time an AGFL is in a wrapped state and clearing
> it when that state clears. Or perhaps setting it unconditionally on
> mount and leaving it permanently would be another way to satisfy the
> requirement, though certainly more heavy handed.

The previous version of this series did that, though Dave suggested I
change it to set the bit only if we actually touched an agfl.  I
probably should have pushed back harder, but it was only rfc at the
time.

> Note that what I dislike about other possible combinations of the above,
> such as setting the bit internally/conditionally and never clearing it
> (which is what I think this series currently does), is that can
> potentially fool a user who actually does due diligence to test a
> filesystem against two kernels (for whatever, probably odd, reason). For
> example:
> 
> - mount fs on good kernel, scribble on it
> - test my (bad) back up kernel, mount, scribble some more
> - back to my primary kernels, everything still works, deploy!
> 
> So the whole "set the bit unconditionally" thing might be heavy-handed,
> but at least that is predictable. The set/clear approach is less
> predictable, but the advantage of that approach is that it is
> recoverable without putting a user in a bind with no other option but to
> upgrade.
> 
> > The thing I dislike about this approach is that now we have this
> > rocompat bit that has to be backported everywhere, and it only triggers
> > in the specific case that (a) you have a v5 filesystem that (b) you run
> > a mixed environment with 4.5+ and pre-4.5 kernels and (c) happened to
> > land on a badly wrapped AGFL.  It's a lot of work for us (and the distro
> > partners) but it's the least amount of work for admins -- so long as
> > they keep their environments up to date.
> > 
> 
> We should have already backported fixes to stable kernels for the AGFL
> size problem itself, right? IOW, it really should be a minimal set of
> users affected by this who haven't updated their old, broken kernels
> that should have stable updates available by now.

Eric tells me RHEL xfsprogs/kernels ship with the agfl fix reverted /
not applied, so there are potentially a lot of people who haven't
updated... :/

(I also find it weird that RHEL7 changed the mkfs defaults post-GA to
enable v5 by default, doesn't that create compatibility problems?)

> If so, then I agree.. Technicalities aside, I'm not a huge fan of
> propagating a feature bit all over just to try and isolate those users.
> Plus with some of the other ideas I'm throwing out above, we'd have to
> start also thinking about users who might end up using two old kernels
> that cross the boundary where the feature bit was introduced. Ugh. :P
> 
> Yet another way to look at it.. if we have enough users out there using
> two kernels where one of them happens to be a stale/broken kernel
> because they haven't upgraded to the agfl fix, what evidence do we have
> that those users would actually update their variant of the "good agfl"
> kernel to one that handles the agfl good/bad transition and sets a
> feature bit? IOW, it seems to me that on some level the ship has sailed.

That's difficult to know.  Some customers have certified configurations
and never update (and never run mixed environments), others are fairly
good at pulling down the quarterly updates, and others do risky things
like run mixed $distrokernel series, because mixing 2.6.39 and 4.1 is
fun.

> > A second solution I considered was fixing the AGFL at mount/remount-rw
> > time like we do in this series and adding code to move the AGFL to the
> > start at freeze/remount-ro/umount time.  For the common case where we
> > don't crash, we handle the mixed kernel environment seamlessly.  This
> > would be my primary choice except that it'll still blow up if we wrap
> > the AGFL, crash, and reboot with an old kernel.  Here too it won't fail
> > at mount time, it'll fail some time after mount when something tries to
> > modify an AGFL.
> > 
> 
> Interesting idea, but I also think it would be unfortunate to alter our
> normal/common runtime behavior to pacify some old, broken kernel that
> also has stable fixes. To get around the crash thing, we'd probably have
> to shuffle the AGFL locations on the fly, and that would be even worse
> IMO.

Agreed, in the long run everyone will be post-4.5 so we should minimize
the clutter and pain.

> > One solution involves a fair amount of backport and sw redeployment but
> > makes it obvious when things are broken; the other solution handles it
> > *almost* seamlessly... until it doesn't.
> > 
> > > I guess I'm just not really seeing the value in using the feature bit
> > > for this as opposed to doing something more simple like detect an agfl
> > > with a size mismatch with respect to the current kernel and forcing the
> > > read-only mount in that instance. E.g., similar to how we handle log
> > > recovery byte order mismatches:
> > > 
> > >   "dirty log written in incompatible format - can't recover"
> > > 
> > > But in this case it would be more something like "agfl in invalid state,
> > > mounting read-only, please run xfs_repair and ditch that old kernel that
> > 
> > Good point, third option: if the agfl wraps badly, printk that the agfl
> > is in an invalid state and that the user must mount with -o fixagfl
> > (which will fix the problem and set the rocompat flag) and upgrade the
> > kernel.  Seems kinda clunky... but all of these solutions have a wart of
> > some kind.
> > 
> 
> Hmm, I still think my preferred approach is to mount time detect,
> enforce read-only and tell the user to xfs_repair[1]. It's the most

I think this is difficult to do for the root fs -- in general I don't
think initrds ship with xfs_repair and the logic to detect the "refuses
to mount rw" condition and react to that with xfs_repair and a second
mount attempt.

Also I think this doesn't work if we do an initial ro mount and then try
to remount-rw...

> simple implementation as we don't have to carry fixup code in the kernel
> for such an uncommon situation (which facilitates a broad backport
> strategy). It protects the users on stable kernels who pull in the
> latest bug fixes, including the guy who goes from a bad kernel to a good
> one, and all kernels going forward without having to use a feature bit.
> We don't do anything for the guy who wraps the agfl on newer kernels and
> goes back to the bad one, but at some point he just needs to update the
> broken kernel.
> 
> [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> a separate question from the feature bit. I'd prefer the latter for
> simplicity, but perhaps there are use cases where the benefit outweighs
> the risk.
> 
> Anyways, that's just my .02. There is no real great option here, I
> guess. I just think that at some point maybe it's best to fix all the
> kernels we can to handle the size mismatch, live with the fact that some
> users might have those old, unfixed kernels and bonk them on the head to
> upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> chime in with more thoughts..

Yeah, we could do that too (make all the next releases of
rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
skip the feature bit) and just let the unpatched kernels rot.  It keeps
coming up, though...

--D

> 
> Brian
> 
> > > caused this in the first place ..." Then we write a FAQ entry that
> > > elaborates on how/why a user hit that problem and backport to stable
> > > kernels as far and wide as possible. Hm?
> > 
> > I /do/ like the part about writing FAQ to update the kernel.
> > 
> > --D
> > 
> > > Brian
> > > 
> > > > --D
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-16 17:57       ` Darrick J. Wong
@ 2018-02-19 13:54         ` Brian Foster
  2018-02-20 17:08           ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2018-02-19 13:54 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > > > On Wed, Feb 14, 2018 at 10:12:00AM -0800, Darrick J. Wong wrote:
> > > > > Hi all,
> > > > > 
> > > > > Here's a bunch of patches fixing the AGFL padding problem once and for
> > > > > all.  When the v5 disk format was rolled out, the AGFL header definition
> > > > > had a different padding size on 32-bit vs 64-bit systems, with the
> > > > > result that XFS_AGFL_SIZE reports different maximum lengths depending on
> > > > > the compiler.  In Linux 4.5 we fixed the structure definition, but this
> > > > > has lead to sporadic corruption reports on filesystems that were
> > > > > unmounted with a pre-4.5 kernel and a wrapped AGFL and then remounted on
> > > > > a 4.5+ kernel.
> > > > > 
> > > > > To deal with these corruption problems, we introduce a new ROCOMPAT
> > > > > feature bit to indicate that the AGFL has been scanned and guaranteed
> > > > > not to wrap.  We then amend the mounting code to find broken wrapping,
> > > > > fix the wrapping, and if we had to fix anything, set the new ROCOMPAT
> > > > > flag.  The ROCOMPAT flag prevents re-mounting on unpatched kernels, so
> > > > > this series will likely have to be backported.
> > > > > 
> > > > 
> > > > I haven't taken a close enough look at the code yet, but I'm more
> > > > curious about the high level approach before getting into that.. IIUC,
> > > > we'll set the rocompat bit iff we mount on a fixed kernel, found the
> > > > agfl wrapped and detected the agfl size mismatch problem. So we
> > > > essentially only restrict mounts on older kernels if we happen to detect
> > > > the size mismatch conditions on the newer kernel.
> > > 
> > > Correct.
> > > 
> > > > IOW, a user can mount back and forth between good/bad kernels so long as
> > > > the agfl isn't wrapped during a mount on the good kernel, and then at
> > > > some seemingly random point in the future said user might be permanently
> > > > restricted from rw mounts on the older kernel based on the issue being
> > > > detected/cleared in the new kernel. That seems a bit.. heavy-handed for
> > > > such an unpredictable condition. Why is a warning that the user has a
> > > > busted/old/in-need-of-upgrade kernel in the mix not sufficient?
> > > 
> > > I like the rocompat approach because we can prevent admins from mounting
> > > filesystems on old kernels, rather than letting the mount proceed and
> > > then blowing up in the middle of a transaction some time later when
> > > something actually hits the badly wrapped AGFL.  If we backport this
> > > series to the relevant distro kernels (ha!) then they will work without
> > > incident.
> > > 
> > 
> > Ok, so the intent is to "protect" those old kernels from a filesystem
> > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > used in that manner. A user can always run a fixed kernel, wrap the
> > agfl, then go back to the bad one and cry when it explodes.
> > 
> > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > bit to be enabled any time the filesystem is in a state that would cause
> > problems on the old kernel. Technically, that could mean doing something
> > like setting the bit any time an AGFL is in a wrapped state and clearing
> > it when that state clears. Or perhaps setting it unconditionally on
> > mount and leaving it permanently would be another way to satisfy the
> > requirement, though certainly more heavy handed.
> 
> The previous version of this series did that, though Dave suggested I
> change it to set the bit only if we actually touched an agfl.  I
> probably should have pushed back harder, but it was only rfc at the
> time.
> 

It's not clear which of the above options you're referring to. FWIW, the
former seems notably more usable to me than the latter.

> > Note that what I dislike about other possible combinations of the above,
> > such as setting the bit internally/conditionally and never clearing it
> > (which is what I think this series currently does), is that can
> > potentially fool a user who actually does due diligence to test a
> > filesystem against two kernels (for whatever, probably odd, reason). For
> > example:
> > 
> > - mount fs on good kernel, scribble on it
> > - test my (bad) back up kernel, mount, scribble some more
> > - back to my primary kernels, everything still works, deploy!
> > 
> > So the whole "set the bit unconditionally" thing might be heavy-handed,
> > but at least that is predictable. The set/clear approach is less
> > predictable, but the advantage of that approach is that it is
> > recoverable without putting a user in a bind with no other option but to
> > upgrade.
> > 
> > > The thing I dislike about this approach is that now we have this
> > > rocompat bit that has to be backported everywhere, and it only triggers
> > > in the specific case that (a) you have a v5 filesystem that (b) you run
> > > a mixed environment with 4.5+ and pre-4.5 kernels and (c) happened to
> > > land on a badly wrapped AGFL.  It's a lot of work for us (and the distro
> > > partners) but it's the least amount of work for admins -- so long as
> > > they keep their environments up to date.
> > > 
> > 
> > We should have already backported fixes to stable kernels for the AGFL
> > size problem itself, right? IOW, it really should be a minimal set of
> > users affected by this who haven't updated their old, broken kernels
> > that should have stable updates available by now.
> 
> Eric tells me RHEL xfsprogs/kernels ship with the agfl fix reverted /
> not applied, so there are potentially a lot of people who haven't
> updated... :/
> 

Eh, I suppose that could be another problem. I guess that is "our
problem" though, and not something we should allow to factor into
upstream decisions.

> (I also find it weird that RHEL7 changed the mkfs defaults post-GA to
> enable v5 by default, doesn't that create compatibility problems?)
> 

I don't recall the reasoning behind that decision. It may have just been
early enough for the benefit to outweigh the risk.

> > If so, then I agree.. Technicalities aside, I'm not a huge fan of
> > propagating a feature bit all over just to try and isolate those users.
> > Plus with some of the other ideas I'm throwing out above, we'd have to
> > start also thinking about users who might end up using two old kernels
> > that cross the boundary where the feature bit was introduced. Ugh. :P
> > 
> > Yet another way to look at it.. if we have enough users out there using
> > two kernels where one of them happens to be a stale/broken kernel
> > because they haven't upgraded to the agfl fix, what evidence do we have
> > that those users would actually update their variant of the "good agfl"
> > kernel to one that handles the agfl good/bad transition and sets a
> > feature bit? IOW, it seems to me that on some level the ship has sailed.
> 
> That's difficult to know.  Some customers have certified configurations
> and never update (and never run mixed environments), others are fairly
> good at pulling down the quarterly updates, and others do risky things
> like run mixed $distrokernel series, because mixing 2.6.39 and 4.1 is
> fun.
> 

Heh, indeed. I guess this is part of the reason why I wonder whether a
feature bit could create as many user issues as it resolves. In a sense,
we're floating another compatibility obstacle downstream (by that I mean
upstream -stable) that users have to navigate around.

> > > A second solution I considered was fixing the AGFL at mount/remount-rw
> > > time like we do in this series and adding code to move the AGFL to the
> > > start at freeze/remount-ro/umount time.  For the common case where we
> > > don't crash, we handle the mixed kernel environment seamlessly.  This
> > > would be my primary choice except that it'll still blow up if we wrap
> > > the AGFL, crash, and reboot with an old kernel.  Here too it won't fail
> > > at mount time, it'll fail some time after mount when something tries to
> > > modify an AGFL.
> > > 
> > 
> > Interesting idea, but I also think it would be unfortunate to alter our
> > normal/common runtime behavior to pacify some old, broken kernel that
> > also has stable fixes. To get around the crash thing, we'd probably have
> > to shuffle the AGFL locations on the fly, and that would be even worse
> > IMO.
> 
> Agreed, in the long run everyone will be post-4.5 so we should minimize
> the clutter and pain.
> 
> > > One solution involves a fair amount of backport and sw redeployment but
> > > makes it obvious when things are broken; the other solution handles it
> > > *almost* seamlessly... until it doesn't.
> > > 
> > > > I guess I'm just not really seeing the value in using the feature bit
> > > > for this as opposed to doing something more simple like detect an agfl
> > > > with a size mismatch with respect to the current kernel and forcing the
> > > > read-only mount in that instance. E.g., similar to how we handle log
> > > > recovery byte order mismatches:
> > > > 
> > > >   "dirty log written in incompatible format - can't recover"
> > > > 
> > > > But in this case it would be more something like "agfl in invalid state,
> > > > mounting read-only, please run xfs_repair and ditch that old kernel that
> > > 
> > > Good point, third option: if the agfl wraps badly, printk that the agfl
> > > is in an invalid state and that the user must mount with -o fixagfl
> > > (which will fix the problem and set the rocompat flag) and upgrade the
> > > kernel.  Seems kinda clunky... but all of these solutions have a wart of
> > > some kind.
> > > 
> > 
> > Hmm, I still think my preferred approach is to mount time detect,
> > enforce read-only and tell the user to xfs_repair[1]. It's the most
> 
> I think this is difficult to do for the root fs -- in general I don't
> think initrds ship with xfs_repair and the logic to detect the "refuses
> to mount rw" condition and react to that with xfs_repair and a second
> mount attempt.
> 

Good point. I guess what concerns me is leaving around such highly
specialized code. I figured an error check is less risk than a function
that modifies the fs. Do we have a test case that would exercise this
codepath going forward, at least?

> Also I think this doesn't work if we do an initial ro mount and then try
> to remount-rw...
> 

That one seems like it would just require a bit more code. E.g., we'd
have to cache or re-check state on ro -> rw transitions. That's
secondary to the rootfs use case justification, though..

> > simple implementation as we don't have to carry fixup code in the kernel
> > for such an uncommon situation (which facilitates a broad backport
> > strategy). It protects the users on stable kernels who pull in the
> > latest bug fixes, including the guy who goes from a bad kernel to a good
> > one, and all kernels going forward without having to use a feature bit.
> > We don't do anything for the guy who wraps the agfl on newer kernels and
> > goes back to the bad one, but at some point he just needs to update the
> > broken kernel.
> > 
> > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > a separate question from the feature bit. I'd prefer the latter for
> > simplicity, but perhaps there are use cases where the benefit outweighs
> > the risk.
> > 
> > Anyways, that's just my .02. There is no real great option here, I
> > guess. I just think that at some point maybe it's best to fix all the
> > kernels we can to handle the size mismatch, live with the fact that some
> > users might have those old, unfixed kernels and bonk them on the head to
> > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > chime in with more thoughts..
> 
> Yeah, we could do that too (make all the next releases of
> rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> skip the feature bit) and just let the unpatched kernels rot.  It keeps
> coming up, though...
> 

I guess I haven't really noticed it enough to consider it an ongoing
issue (which doesn't mean it isn't :P). Any pattern to the use cache
behind these continued reports..?

>From an upstream perspective, RHEL continuing to operate in this mode
certainly does create an ongoing situation where a "current" distro
has/causes compatibility issues, particularly since some other
downstream distros may have kernels that use the upstream/fixed format.

I'll reiterate that in principle I don't think downstream decisions
should prohibit doing the right thing upstream, but for somebody on a
downstream distro who happens to test an upstream kernel (say as part of
a regression test/investigation), having the upstream kernel decide the
downstream kernel is no longer allowed to mount the fs seems kind of
mean. :P

Brian

> --D
> 
> > 
> > Brian
> > 
> > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > elaborates on how/why a user hit that problem and backport to stable
> > > > kernels as far and wide as possible. Hm?
> > > 
> > > I /do/ like the part about writing FAQ to update the kernel.
> > > 
> > > --D
> > > 
> > > > Brian
> > > > 
> > > > > --D
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-19 13:54         ` Brian Foster
@ 2018-02-20 17:08           ` Darrick J. Wong
  2018-02-21 13:39             ` Brian Foster
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-20 17:08 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > > > > On Wed, Feb 14, 2018 at 10:12:00AM -0800, Darrick J. Wong wrote:
> > > > > > Hi all,
> > > > > > 
> > > > > > Here's a bunch of patches fixing the AGFL padding problem once and for
> > > > > > all.  When the v5 disk format was rolled out, the AGFL header definition
> > > > > > had a different padding size on 32-bit vs 64-bit systems, with the
> > > > > > result that XFS_AGFL_SIZE reports different maximum lengths depending on
> > > > > > the compiler.  In Linux 4.5 we fixed the structure definition, but this
> > > > > > has lead to sporadic corruption reports on filesystems that were
> > > > > > unmounted with a pre-4.5 kernel and a wrapped AGFL and then remounted on
> > > > > > a 4.5+ kernel.
> > > > > > 
> > > > > > To deal with these corruption problems, we introduce a new ROCOMPAT
> > > > > > feature bit to indicate that the AGFL has been scanned and guaranteed
> > > > > > not to wrap.  We then amend the mounting code to find broken wrapping,
> > > > > > fix the wrapping, and if we had to fix anything, set the new ROCOMPAT
> > > > > > flag.  The ROCOMPAT flag prevents re-mounting on unpatched kernels, so
> > > > > > this series will likely have to be backported.
> > > > > > 
> > > > > 
> > > > > I haven't taken a close enough look at the code yet, but I'm more
> > > > > curious about the high level approach before getting into that.. IIUC,
> > > > > we'll set the rocompat bit iff we mount on a fixed kernel, found the
> > > > > agfl wrapped and detected the agfl size mismatch problem. So we
> > > > > essentially only restrict mounts on older kernels if we happen to detect
> > > > > the size mismatch conditions on the newer kernel.
> > > > 
> > > > Correct.
> > > > 
> > > > > IOW, a user can mount back and forth between good/bad kernels so long as
> > > > > the agfl isn't wrapped during a mount on the good kernel, and then at
> > > > > some seemingly random point in the future said user might be permanently
> > > > > restricted from rw mounts on the older kernel based on the issue being
> > > > > detected/cleared in the new kernel. That seems a bit.. heavy-handed for
> > > > > such an unpredictable condition. Why is a warning that the user has a
> > > > > busted/old/in-need-of-upgrade kernel in the mix not sufficient?
> > > > 
> > > > I like the rocompat approach because we can prevent admins from mounting
> > > > filesystems on old kernels, rather than letting the mount proceed and
> > > > then blowing up in the middle of a transaction some time later when
> > > > something actually hits the badly wrapped AGFL.  If we backport this
> > > > series to the relevant distro kernels (ha!) then they will work without
> > > > incident.
> > > > 
> > > 
> > > Ok, so the intent is to "protect" those old kernels from a filesystem
> > > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > > used in that manner. A user can always run a fixed kernel, wrap the
> > > agfl, then go back to the bad one and cry when it explodes.
> > > 
> > > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > > bit to be enabled any time the filesystem is in a state that would cause
> > > problems on the old kernel. Technically, that could mean doing something
> > > like setting the bit any time an AGFL is in a wrapped state and clearing
> > > it when that state clears. Or perhaps setting it unconditionally on
> > > mount and leaving it permanently would be another way to satisfy the
> > > requirement, though certainly more heavy handed.
> > 
> > The previous version of this series did that, though Dave suggested I
> > change it to set the bit only if we actually touched an agfl.  I
> > probably should have pushed back harder, but it was only rfc at the
> > time.
> > 
> 
> It's not clear which of the above options you're referring to. FWIW, the
> former seems notably more usable to me than the latter.

Sorry about that.  The previous iteration would do:

if (!has_fixedagfl) {
	fix_agfls();
	set_fixedagfl();
}

Which probably doesn't satisfy your usability test. :)

Hmm... another possible strategy would be to fix the agfls and set the
fixedagfl bit at mount time, then at unmount/freeze/remount-ro we fix
the agfls again and clear the fixedagfl bit.  Then the only way an
unpatched kernel hits this is if we crash with a patched kernel and the
recovery mount is an unpatched kernel.

What does everyone think of this strategy?  I think I like it the most
of all the things we've put forth because the only time anyone will trip
over this rocompat bit is this one specific scenario.

(Apologies if we've already talked about this, I've gotten lost in this
thread.)

> > > Note that what I dislike about other possible combinations of the above,
> > > such as setting the bit internally/conditionally and never clearing it
> > > (which is what I think this series currently does), is that can
> > > potentially fool a user who actually does due diligence to test a
> > > filesystem against two kernels (for whatever, probably odd, reason). For
> > > example:
> > > 
> > > - mount fs on good kernel, scribble on it
> > > - test my (bad) back up kernel, mount, scribble some more
> > > - back to my primary kernels, everything still works, deploy!
> > > 
> > > So the whole "set the bit unconditionally" thing might be heavy-handed,
> > > but at least that is predictable. The set/clear approach is less
> > > predictable, but the advantage of that approach is that it is
> > > recoverable without putting a user in a bind with no other option but to
> > > upgrade.
> > > 
> > > > The thing I dislike about this approach is that now we have this
> > > > rocompat bit that has to be backported everywhere, and it only triggers
> > > > in the specific case that (a) you have a v5 filesystem that (b) you run
> > > > a mixed environment with 4.5+ and pre-4.5 kernels and (c) happened to
> > > > land on a badly wrapped AGFL.  It's a lot of work for us (and the distro
> > > > partners) but it's the least amount of work for admins -- so long as
> > > > they keep their environments up to date.
> > > > 
> > > 
> > > We should have already backported fixes to stable kernels for the AGFL
> > > size problem itself, right? IOW, it really should be a minimal set of
> > > users affected by this who haven't updated their old, broken kernels
> > > that should have stable updates available by now.
> > 
> > Eric tells me RHEL xfsprogs/kernels ship with the agfl fix reverted /
> > not applied, so there are potentially a lot of people who haven't
> > updated... :/
> > 
> 
> Eh, I suppose that could be another problem. I guess that is "our
> problem" though, and not something we should allow to factor into
> upstream decisions.

Ideally, yes, but OTOH I get the feeling that most of us upstream
developers are also downstream developers, so we ought to try to roll
this out in a coordinated and least-clumsy fashion.

> > (I also find it weird that RHEL7 changed the mkfs defaults post-GA to
> > enable v5 by default, doesn't that create compatibility problems?)
> > 
> 
> I don't recall the reasoning behind that decision. It may have just been
> early enough for the benefit to outweigh the risk.
> 
> > > If so, then I agree.. Technicalities aside, I'm not a huge fan of
> > > propagating a feature bit all over just to try and isolate those users.
> > > Plus with some of the other ideas I'm throwing out above, we'd have to
> > > start also thinking about users who might end up using two old kernels
> > > that cross the boundary where the feature bit was introduced. Ugh. :P
> > > 
> > > Yet another way to look at it.. if we have enough users out there using
> > > two kernels where one of them happens to be a stale/broken kernel
> > > because they haven't upgraded to the agfl fix, what evidence do we have
> > > that those users would actually update their variant of the "good agfl"
> > > kernel to one that handles the agfl good/bad transition and sets a
> > > feature bit? IOW, it seems to me that on some level the ship has sailed.
> > 
> > That's difficult to know.  Some customers have certified configurations
> > and never update (and never run mixed environments), others are fairly
> > good at pulling down the quarterly updates, and others do risky things
> > like run mixed $distrokernel series, because mixing 2.6.39 and 4.1 is
> > fun.
> > 
> 
> Heh, indeed. I guess this is part of the reason why I wonder whether a
> feature bit could create as many user issues as it resolves. In a sense,
> we're floating another compatibility obstacle downstream (by that I mean
> upstream -stable) that users have to navigate around.

Yeah... the obstacle is annoying but so is crashing on a wrapped agfl.

> > > > A second solution I considered was fixing the AGFL at mount/remount-rw
> > > > time like we do in this series and adding code to move the AGFL to the
> > > > start at freeze/remount-ro/umount time.  For the common case where we
> > > > don't crash, we handle the mixed kernel environment seamlessly.  This
> > > > would be my primary choice except that it'll still blow up if we wrap
> > > > the AGFL, crash, and reboot with an old kernel.  Here too it won't fail
> > > > at mount time, it'll fail some time after mount when something tries to
> > > > modify an AGFL.
> > > > 
> > > 
> > > Interesting idea, but I also think it would be unfortunate to alter our
> > > normal/common runtime behavior to pacify some old, broken kernel that
> > > also has stable fixes. To get around the crash thing, we'd probably have
> > > to shuffle the AGFL locations on the fly, and that would be even worse
> > > IMO.
> > 
> > Agreed, in the long run everyone will be post-4.5 so we should minimize
> > the clutter and pain.
> > 
> > > > One solution involves a fair amount of backport and sw redeployment but
> > > > makes it obvious when things are broken; the other solution handles it
> > > > *almost* seamlessly... until it doesn't.
> > > > 
> > > > > I guess I'm just not really seeing the value in using the feature bit
> > > > > for this as opposed to doing something more simple like detect an agfl
> > > > > with a size mismatch with respect to the current kernel and forcing the
> > > > > read-only mount in that instance. E.g., similar to how we handle log
> > > > > recovery byte order mismatches:
> > > > > 
> > > > >   "dirty log written in incompatible format - can't recover"
> > > > > 
> > > > > But in this case it would be more something like "agfl in invalid state,
> > > > > mounting read-only, please run xfs_repair and ditch that old kernel that
> > > > 
> > > > Good point, third option: if the agfl wraps badly, printk that the agfl
> > > > is in an invalid state and that the user must mount with -o fixagfl
> > > > (which will fix the problem and set the rocompat flag) and upgrade the
> > > > kernel.  Seems kinda clunky... but all of these solutions have a wart of
> > > > some kind.
> > > > 
> > > 
> > > Hmm, I still think my preferred approach is to mount time detect,
> > > enforce read-only and tell the user to xfs_repair[1]. It's the most
> > 
> > I think this is difficult to do for the root fs -- in general I don't
> > think initrds ship with xfs_repair and the logic to detect the "refuses
> > to mount rw" condition and react to that with xfs_repair and a second
> > mount attempt.
> > 
> 
> Good point. I guess what concerns me is leaving around such highly
> specialized code. I figured an error check is less risk than a function
> that modifies the fs. Do we have a test case that would exercise this
> codepath going forward, at least?

I did write xfstests for the general case, though apparently I've been
lagging xfstests and haven't sent it.  :/

Oh, right, we're still discussing semantics & existence of an rocompat
bit, which affects the golden output.

> > Also I think this doesn't work if we do an initial ro mount and then try
> > to remount-rw...
> > 
> 
> That one seems like it would just require a bit more code. E.g., we'd
> have to cache or re-check state on ro -> rw transitions. That's
> secondary to the rootfs use case justification, though..

<nod>

> > > simple implementation as we don't have to carry fixup code in the kernel
> > > for such an uncommon situation (which facilitates a broad backport
> > > strategy). It protects the users on stable kernels who pull in the
> > > latest bug fixes, including the guy who goes from a bad kernel to a good
> > > one, and all kernels going forward without having to use a feature bit.
> > > We don't do anything for the guy who wraps the agfl on newer kernels and
> > > goes back to the bad one, but at some point he just needs to update the
> > > broken kernel.
> > > 
> > > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > > a separate question from the feature bit. I'd prefer the latter for
> > > simplicity, but perhaps there are use cases where the benefit outweighs
> > > the risk.
> > > 
> > > Anyways, that's just my .02. There is no real great option here, I
> > > guess. I just think that at some point maybe it's best to fix all the
> > > kernels we can to handle the size mismatch, live with the fact that some
> > > users might have those old, unfixed kernels and bonk them on the head to
> > > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > > chime in with more thoughts..
> > 
> > Yeah, we could do that too (make all the next releases of
> > rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> > skip the feature bit) and just let the unpatched kernels rot.  It keeps
> > coming up, though...
> > 
> 
> I guess I haven't really noticed it enough to consider it an ongoing
> issue (which doesn't mean it isn't :P). Any pattern to the use cache
> behind these continued reports..?

No particular pattern, other than using/freeing agfl blocks.

> From an upstream perspective, RHEL continuing to operate in this mode
> certainly does create an ongoing situation where a "current" distro
> has/causes compatibility issues, particularly since some other
> downstream distros may have kernels that use the upstream/fixed format.
> 
> I'll reiterate that in principle I don't think downstream decisions
> should prohibit doing the right thing upstream, but for somebody on a
> downstream distro who happens to test an upstream kernel (say as part of
> a regression test/investigation), having the upstream kernel decide the
> downstream kernel is no longer allowed to mount the fs seems kind of
> mean. :P

Agreed, though agfl-related crashes are also mean. :/

--D

> Brian
> 
> > --D
> > 
> > > 
> > > Brian
> > > 
> > > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > > elaborates on how/why a user hit that problem and backport to stable
> > > > > kernels as far and wide as possible. Hm?
> > > > 
> > > > I /do/ like the part about writing FAQ to update the kernel.
> > > > 
> > > > --D
> > > > 
> > > > > Brian
> > > > > 
> > > > > > --D
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-20 17:08           ` Darrick J. Wong
@ 2018-02-21 13:39             ` Brian Foster
  2018-02-21 17:35               ` Darrick J. Wong
  2018-02-21 21:16               ` Dave Chinner
  0 siblings, 2 replies; 18+ messages in thread
From: Brian Foster @ 2018-02-21 13:39 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Tue, Feb 20, 2018 at 09:08:30AM -0800, Darrick J. Wong wrote:
> On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> > On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > > On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > > > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
...
> > > > 
> > > > Ok, so the intent is to "protect" those old kernels from a filesystem
> > > > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > > > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > > > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > > > used in that manner. A user can always run a fixed kernel, wrap the
> > > > agfl, then go back to the bad one and cry when it explodes.
> > > > 
> > > > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > > > bit to be enabled any time the filesystem is in a state that would cause
> > > > problems on the old kernel. Technically, that could mean doing something
> > > > like setting the bit any time an AGFL is in a wrapped state and clearing
> > > > it when that state clears. Or perhaps setting it unconditionally on
> > > > mount and leaving it permanently would be another way to satisfy the
> > > > requirement, though certainly more heavy handed.
> > > 
> > > The previous version of this series did that, though Dave suggested I
> > > change it to set the bit only if we actually touched an agfl.  I
> > > probably should have pushed back harder, but it was only rfc at the
> > > time.
> > > 
> > 
> > It's not clear which of the above options you're referring to. FWIW, the
> > former seems notably more usable to me than the latter.
> 
> Sorry about that.  The previous iteration would do:
> 
> if (!has_fixedagfl) {
> 	fix_agfls();
> 	set_fixedagfl();
> }
> 
> Which probably doesn't satisfy your usability test. :)
> 
> Hmm... another possible strategy would be to fix the agfls and set the
> fixedagfl bit at mount time, then at unmount/freeze/remount-ro we fix
> the agfls again and clear the fixedagfl bit.  Then the only way an
> unpatched kernel hits this is if we crash with a patched kernel and the
> recovery mount is an unpatched kernel.
> 

What's the purpose of "fixing the agfls again" at unmount time? Hasn't
the patched kernel already addressed the size mismatch at mount time?

> What does everyone think of this strategy?  I think I like it the most
> of all the things we've put forth because the only time anyone will trip
> over this rocompat bit is this one specific scenario.
> 
> (Apologies if we've already talked about this, I've gotten lost in this
> thread.)
> 
...
> > > > 
> > > > Hmm, I still think my preferred approach is to mount time detect,
> > > > enforce read-only and tell the user to xfs_repair[1]. It's the most
> > > 
> > > I think this is difficult to do for the root fs -- in general I don't
> > > think initrds ship with xfs_repair and the logic to detect the "refuses
> > > to mount rw" condition and react to that with xfs_repair and a second
> > > mount attempt.
> > > 
> > 
> > Good point. I guess what concerns me is leaving around such highly
> > specialized code. I figured an error check is less risk than a function
> > that modifies the fs. Do we have a test case that would exercise this
> > codepath going forward, at least?
> 
> I did write xfstests for the general case, though apparently I've been
> lagging xfstests and haven't sent it.  :/
> 
> Oh, right, we're still discussing semantics & existence of an rocompat
> bit, which affects the golden output.
> 

Ok..

> > > Also I think this doesn't work if we do an initial ro mount and then try
> > > to remount-rw...
> > > 
> > 
> > That one seems like it would just require a bit more code. E.g., we'd
> > have to cache or re-check state on ro -> rw transitions. That's
> > secondary to the rootfs use case justification, though..
> 
> <nod>
> 
> > > > simple implementation as we don't have to carry fixup code in the kernel
> > > > for such an uncommon situation (which facilitates a broad backport
> > > > strategy). It protects the users on stable kernels who pull in the
> > > > latest bug fixes, including the guy who goes from a bad kernel to a good
> > > > one, and all kernels going forward without having to use a feature bit.
> > > > We don't do anything for the guy who wraps the agfl on newer kernels and
> > > > goes back to the bad one, but at some point he just needs to update the
> > > > broken kernel.
> > > > 
> > > > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > > > a separate question from the feature bit. I'd prefer the latter for
> > > > simplicity, but perhaps there are use cases where the benefit outweighs
> > > > the risk.
> > > > 
> > > > Anyways, that's just my .02. There is no real great option here, I
> > > > guess. I just think that at some point maybe it's best to fix all the
> > > > kernels we can to handle the size mismatch, live with the fact that some
> > > > users might have those old, unfixed kernels and bonk them on the head to
> > > > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > > > chime in with more thoughts..
> > > 
> > > Yeah, we could do that too (make all the next releases of
> > > rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> > > skip the feature bit) and just let the unpatched kernels rot.  It keeps
> > > coming up, though...
> > > 
> > 
> > I guess I haven't really noticed it enough to consider it an ongoing
> > issue (which doesn't mean it isn't :P). Any pattern to the use cache
> > behind these continued reports..?
> 
> No particular pattern, other than using/freeing agfl blocks.
> 

I'm more curious about the use case than the workload. E.g., what's the
purpose for using two kernels? Was it bad luck on an upgrade or
something that implies the problematic state could reoccur (i.e., active
switching between good/bad kernels)?

IOW, if the "it keeps coming up" situation is simply because the
existing userbase hasn't fully upgraded yet, then a simple detect/repair
forward-fix patch is going to be sufficient (perhaps with a warning not
to revert to $previous kernel) to resolve the problem for the remaining
set of users who are susceptible to the problem.

If the problem is instead a set of users that are switching back and
forth for whatever reason, then perhaps a feature bit policy is more
justified.

> > From an upstream perspective, RHEL continuing to operate in this mode
> > certainly does create an ongoing situation where a "current" distro
> > has/causes compatibility issues, particularly since some other
> > downstream distros may have kernels that use the upstream/fixed format.
> > 
> > I'll reiterate that in principle I don't think downstream decisions
> > should prohibit doing the right thing upstream, but for somebody on a
> > downstream distro who happens to test an upstream kernel (say as part of
> > a regression test/investigation), having the upstream kernel decide the
> > downstream kernel is no longer allowed to mount the fs seems kind of
> > mean. :P
> 
> Agreed, though agfl-related crashes are also mean. :/
> 

Yeah, but you can recover from that. In the use case above, the feature
bit has permanently prevented the user from going back to the distro
kernel. So this is one semi-realistic "switching between kernels" use
case where I think this feature bit actually hurts more than it helps.
If it were a big enough problem, I'd much rather have the downstream
kernel implement the feature bit to indicate that the upstream kernel
shouldn't/can't mount this filesystem than retroactively doing the
opposite. Even then, I still think I'd prefer the downstream kernel just
detect, fail to mount and encourage repair so we don't have to support
the strange transition from a clearly unsupported sequence (but this is
all downstream/distro discussion).

FWIW, if you actually want to make progress on this in absense of any
other feedback, I think the best approach is to refactor this series to
separate the detect/repair mechanism from all of this feature bit policy
stuff. The former all seems reasonable/justified to me based on your
explanations, so I'd be happy to review those portions of it without the
feature bit (which could still be tacked on for further review,
reordered appropriately on merge if ultimately necessary, etc.) and
consider the feature bit separately based on justification for added
benefit over the former.

Brian

> --D
> 
> > Brian
> > 
> > > --D
> > > 
> > > > 
> > > > Brian
> > > > 
> > > > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > > > elaborates on how/why a user hit that problem and backport to stable
> > > > > > kernels as far and wide as possible. Hm?
> > > > > 
> > > > > I /do/ like the part about writing FAQ to update the kernel.
> > > > > 
> > > > > --D
> > > > > 
> > > > > > Brian
> > > > > > 
> > > > > > > --D
> > > > > > > --
> > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-21 13:39             ` Brian Foster
@ 2018-02-21 17:35               ` Darrick J. Wong
  2018-02-22 11:55                 ` Brian Foster
  2018-02-21 21:16               ` Dave Chinner
  1 sibling, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-21 17:35 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Wed, Feb 21, 2018 at 08:39:16AM -0500, Brian Foster wrote:
> On Tue, Feb 20, 2018 at 09:08:30AM -0800, Darrick J. Wong wrote:
> > On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> > > On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > > > On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > > > > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > > > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> ...
> > > > > 
> > > > > Ok, so the intent is to "protect" those old kernels from a filesystem
> > > > > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > > > > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > > > > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > > > > used in that manner. A user can always run a fixed kernel, wrap the
> > > > > agfl, then go back to the bad one and cry when it explodes.
> > > > > 
> > > > > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > > > > bit to be enabled any time the filesystem is in a state that would cause
> > > > > problems on the old kernel. Technically, that could mean doing something
> > > > > like setting the bit any time an AGFL is in a wrapped state and clearing
> > > > > it when that state clears. Or perhaps setting it unconditionally on
> > > > > mount and leaving it permanently would be another way to satisfy the
> > > > > requirement, though certainly more heavy handed.
> > > > 
> > > > The previous version of this series did that, though Dave suggested I
> > > > change it to set the bit only if we actually touched an agfl.  I
> > > > probably should have pushed back harder, but it was only rfc at the
> > > > time.
> > > > 
> > > 
> > > It's not clear which of the above options you're referring to. FWIW, the
> > > former seems notably more usable to me than the latter.
> > 
> > Sorry about that.  The previous iteration would do:
> > 
> > if (!has_fixedagfl) {
> > 	fix_agfls();
> > 	set_fixedagfl();
> > }
> > 
> > Which probably doesn't satisfy your usability test. :)
> > 
> > Hmm... another possible strategy would be to fix the agfls and set the
> > fixedagfl bit at mount time, then at unmount/freeze/remount-ro we fix
> > the agfls again and clear the fixedagfl bit.  Then the only way an
> > unpatched kernel hits this is if we crash with a patched kernel and the
> > recovery mount is an unpatched kernel.
> > 
> 
> What's the purpose of "fixing the agfls again" at unmount time? Hasn't
> the patched kernel already addressed the size mismatch at mount time?

Sorry, my wording there didn't match what was in my head.  I'll try
again:

At mount/remount-rw time:
if agfl list is wrapped assuming the shorter agfl length:
	fix wrapping to fit the longer 4.5+ agfl length
set fixedagfl bit

At unmount/freeze/remount-ro time:
if agfl list touches the last agfl item:
	reset agfl list to the start of the agfl
clear fixedagfl bit

This way, we're only using the rocompat bit to prevent unpatched kernels
from *recovering* filesystems that might have an agfl wrap that it
doesn't understand.

> > What does everyone think of this strategy?  I think I like it the most
> > of all the things we've put forth because the only time anyone will trip
> > over this rocompat bit is this one specific scenario.
> > 
> > (Apologies if we've already talked about this, I've gotten lost in this
> > thread.)
> > 
> ...
> > > > > 
> > > > > Hmm, I still think my preferred approach is to mount time detect,
> > > > > enforce read-only and tell the user to xfs_repair[1]. It's the most
> > > > 
> > > > I think this is difficult to do for the root fs -- in general I don't
> > > > think initrds ship with xfs_repair and the logic to detect the "refuses
> > > > to mount rw" condition and react to that with xfs_repair and a second
> > > > mount attempt.
> > > > 
> > > 
> > > Good point. I guess what concerns me is leaving around such highly
> > > specialized code. I figured an error check is less risk than a function
> > > that modifies the fs. Do we have a test case that would exercise this
> > > codepath going forward, at least?
> > 
> > I did write xfstests for the general case, though apparently I've been
> > lagging xfstests and haven't sent it.  :/
> > 
> > Oh, right, we're still discussing semantics & existence of an rocompat
> > bit, which affects the golden output.
> > 
> 
> Ok..
> 
> > > > Also I think this doesn't work if we do an initial ro mount and then try
> > > > to remount-rw...
> > > > 
> > > 
> > > That one seems like it would just require a bit more code. E.g., we'd
> > > have to cache or re-check state on ro -> rw transitions. That's
> > > secondary to the rootfs use case justification, though..
> > 
> > <nod>
> > 
> > > > > simple implementation as we don't have to carry fixup code in the kernel
> > > > > for such an uncommon situation (which facilitates a broad backport
> > > > > strategy). It protects the users on stable kernels who pull in the
> > > > > latest bug fixes, including the guy who goes from a bad kernel to a good
> > > > > one, and all kernels going forward without having to use a feature bit.
> > > > > We don't do anything for the guy who wraps the agfl on newer kernels and
> > > > > goes back to the bad one, but at some point he just needs to update the
> > > > > broken kernel.
> > > > > 
> > > > > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > > > > a separate question from the feature bit. I'd prefer the latter for
> > > > > simplicity, but perhaps there are use cases where the benefit outweighs
> > > > > the risk.
> > > > > 
> > > > > Anyways, that's just my .02. There is no real great option here, I
> > > > > guess. I just think that at some point maybe it's best to fix all the
> > > > > kernels we can to handle the size mismatch, live with the fact that some
> > > > > users might have those old, unfixed kernels and bonk them on the head to
> > > > > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > > > > chime in with more thoughts..
> > > > 
> > > > Yeah, we could do that too (make all the next releases of
> > > > rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> > > > skip the feature bit) and just let the unpatched kernels rot.  It keeps
> > > > coming up, though...
> > > > 
> > > 
> > > I guess I haven't really noticed it enough to consider it an ongoing
> > > issue (which doesn't mean it isn't :P). Any pattern to the use cache
> > > behind these continued reports..?
> > 
> > No particular pattern, other than using/freeing agfl blocks.
> > 
> 
> I'm more curious about the use case than the workload. E.g., what's the
> purpose for using two kernels? Was it bad luck on an upgrade or
> something that implies the problematic state could reoccur (i.e., active
> switching between good/bad kernels)?
> 
> IOW, if the "it keeps coming up" situation is simply because the
> existing userbase hasn't fully upgraded yet, then a simple detect/repair
> forward-fix patch is going to be sufficient (perhaps with a warning not
> to revert to $previous kernel) to resolve the problem for the remaining
> set of users who are susceptible to the problem.
> 
> If the problem is instead a set of users that are switching back and
> forth for whatever reason, then perhaps a feature bit policy is more
> justified.

$employer has customers that want or have to run mixed environments.

Not many, though.

> > > From an upstream perspective, RHEL continuing to operate in this mode
> > > certainly does create an ongoing situation where a "current" distro
> > > has/causes compatibility issues, particularly since some other
> > > downstream distros may have kernels that use the upstream/fixed format.
> > > 
> > > I'll reiterate that in principle I don't think downstream decisions
> > > should prohibit doing the right thing upstream, but for somebody on a
> > > downstream distro who happens to test an upstream kernel (say as part of
> > > a regression test/investigation), having the upstream kernel decide the
> > > downstream kernel is no longer allowed to mount the fs seems kind of
> > > mean. :P
> > 
> > Agreed, though agfl-related crashes are also mean. :/
> > 
> 
> Yeah, but you can recover from that. In the use case above, the feature
> bit has permanently prevented the user from going back to the distro
> kernel. So this is one semi-realistic "switching between kernels" use
> case where I think this feature bit actually hurts more than it helps.
> If it were a big enough problem, I'd much rather have the downstream
> kernel implement the feature bit to indicate that the upstream kernel
> shouldn't/can't mount this filesystem than retroactively doing the
> opposite. Even then, I still think I'd prefer the downstream kernel just
> detect, fail to mount and encourage repair so we don't have to support
> the strange transition from a clearly unsupported sequence (but this is
> all downstream/distro discussion).
> 
> FWIW, if you actually want to make progress on this in absense of any
> other feedback, I think the best approach is to refactor this series to
> separate the detect/repair mechanism from all of this feature bit policy
> stuff. The former all seems reasonable/justified to me based on your
> explanations, so I'd be happy to review those portions of it without the
> feature bit (which could still be tacked on for further review,
> reordered appropriately on merge if ultimately necessary, etc.) and
> consider the feature bit separately based on justification for added
> benefit over the former.

Ok, I'll split out the part that fixes the AGFL and we can review those
parts separately from the feature bit stuff.

--D

> 
> Brian
> 
> > --D
> > 
> > > Brian
> > > 
> > > > --D
> > > > 
> > > > > 
> > > > > Brian
> > > > > 
> > > > > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > > > > elaborates on how/why a user hit that problem and backport to stable
> > > > > > > kernels as far and wide as possible. Hm?
> > > > > > 
> > > > > > I /do/ like the part about writing FAQ to update the kernel.
> > > > > > 
> > > > > > --D
> > > > > > 
> > > > > > > Brian
> > > > > > > 
> > > > > > > > --D
> > > > > > > > --
> > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > --
> > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-21 13:39             ` Brian Foster
  2018-02-21 17:35               ` Darrick J. Wong
@ 2018-02-21 21:16               ` Dave Chinner
  1 sibling, 0 replies; 18+ messages in thread
From: Dave Chinner @ 2018-02-21 21:16 UTC (permalink / raw)
  To: Brian Foster; +Cc: Darrick J. Wong, linux-xfs

On Wed, Feb 21, 2018 at 08:39:16AM -0500, Brian Foster wrote:
> On Tue, Feb 20, 2018 at 09:08:30AM -0800, Darrick J. Wong wrote:
> > On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> > > On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > > I guess I haven't really noticed it enough to consider it an ongoing
> > > issue (which doesn't mean it isn't :P). Any pattern to the use cache
> > > behind these continued reports..?
> > 
> > No particular pattern, other than using/freeing agfl blocks.
> > 
> 
> I'm more curious about the use case than the workload. E.g., what's the
> purpose for using two kernels? Was it bad luck on an upgrade or
> something that implies the problematic state could reoccur (i.e., active
> switching between good/bad kernels)?

It's the absurb cloud image creation/deployment hoops that seem
popular these days, where an image is created on one machine,
it's mounted, modified and made ready for production on another,
and then deployed to a third machine.

We've seen this quite a few times now where the original image and
deployment targets are running the same kernel, but the machine that
does the "make ready for production" step runs a different kernel.
I've seen several setups where RHEL7 kernels were used for steps 1&3,
and a near TOT stable kernel was running step 2....

That's the problem case we have to care about here. People are
moving filesystem images from machine to machine, backwards and
forwards, because they don't have any idea that different machines
in the cloud workflow are running different kernels.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-21 17:35               ` Darrick J. Wong
@ 2018-02-22 11:55                 ` Brian Foster
  2018-02-22 18:06                   ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Brian Foster @ 2018-02-22 11:55 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Wed, Feb 21, 2018 at 09:35:46AM -0800, Darrick J. Wong wrote:
> On Wed, Feb 21, 2018 at 08:39:16AM -0500, Brian Foster wrote:
> > On Tue, Feb 20, 2018 at 09:08:30AM -0800, Darrick J. Wong wrote:
> > > On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> > > > On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > > > > On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > > > > > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > > > > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > ...
> > > > > > 
> > > > > > Ok, so the intent is to "protect" those old kernels from a filesystem
> > > > > > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > > > > > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > > > > > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > > > > > used in that manner. A user can always run a fixed kernel, wrap the
> > > > > > agfl, then go back to the bad one and cry when it explodes.
> > > > > > 
> > > > > > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > > > > > bit to be enabled any time the filesystem is in a state that would cause
> > > > > > problems on the old kernel. Technically, that could mean doing something
> > > > > > like setting the bit any time an AGFL is in a wrapped state and clearing
> > > > > > it when that state clears. Or perhaps setting it unconditionally on
> > > > > > mount and leaving it permanently would be another way to satisfy the
> > > > > > requirement, though certainly more heavy handed.
> > > > > 
> > > > > The previous version of this series did that, though Dave suggested I
> > > > > change it to set the bit only if we actually touched an agfl.  I
> > > > > probably should have pushed back harder, but it was only rfc at the
> > > > > time.
> > > > > 
> > > > 
> > > > It's not clear which of the above options you're referring to. FWIW, the
> > > > former seems notably more usable to me than the latter.
> > > 
> > > Sorry about that.  The previous iteration would do:
> > > 
> > > if (!has_fixedagfl) {
> > > 	fix_agfls();
> > > 	set_fixedagfl();
> > > }
> > > 
> > > Which probably doesn't satisfy your usability test. :)
> > > 
> > > Hmm... another possible strategy would be to fix the agfls and set the
> > > fixedagfl bit at mount time, then at unmount/freeze/remount-ro we fix
> > > the agfls again and clear the fixedagfl bit.  Then the only way an
> > > unpatched kernel hits this is if we crash with a patched kernel and the
> > > recovery mount is an unpatched kernel.
> > > 
> > 
> > What's the purpose of "fixing the agfls again" at unmount time? Hasn't
> > the patched kernel already addressed the size mismatch at mount time?
> 
> Sorry, my wording there didn't match what was in my head.  I'll try
> again:
> 
> At mount/remount-rw time:
> if agfl list is wrapped assuming the shorter agfl length:
> 	fix wrapping to fit the longer 4.5+ agfl length
> set fixedagfl bit
> 
> At unmount/freeze/remount-ro time:
> if agfl list touches the last agfl item:
> 	reset agfl list to the start of the agfl
> clear fixedagfl bit
> 

Hmm, that doesn't seem so bad. I was hesitant to change agfl/allocator
behavior going forward for such a rare case, but doing one-off fixups at
mount/unmount time is much less intrusive. The reduced scope of rocompat
enforcement also eliminates permanent breakage for unsuspecting users.

Of course it still suffers from the general drawbacks associated with
using a feature bit. IMO, it's not worth creating a new set of backwards
incompat kernels (i.e., those with the feature bit vs. those without,
where both kernels are essentially "fixed" already with regard to the
agfl) just to provide nicer behavior for users still on the broken
kernel. E.g., if they're still on the broken kernel, I don't see them
upgrading to the feature enabled non-broken kernel designed to fix the
broken kernel. Further, if the rhel case ends up using a similar
"backwards detection" (if that is even possible, I have no idea)
approach to address functionality, I suspect we'd have to support the
feature bit on a kernel that doesn't technically have the fixed agfl.

The detection patch fixes breakage going forward. AFAICT all the feature
bit does is create a softer landing for users going back to a broken
kernel. In the end, this doesn't actually fix the broken environment, so
the next steps are the same either way: the user needs to upgrade the
broken kernel. This all sums up to what seems like a superfluous change
to me. All that said, if others feel like we really need this for some
reason then I'd prefer this policy over anything we've discussed so far
wrt the feature bit.

Brian

> This way, we're only using the rocompat bit to prevent unpatched kernels
> from *recovering* filesystems that might have an agfl wrap that it
> doesn't understand.
> 
> > > What does everyone think of this strategy?  I think I like it the most
> > > of all the things we've put forth because the only time anyone will trip
> > > over this rocompat bit is this one specific scenario.
> > > 
> > > (Apologies if we've already talked about this, I've gotten lost in this
> > > thread.)
> > > 
> > ...
> > > > > > 
> > > > > > Hmm, I still think my preferred approach is to mount time detect,
> > > > > > enforce read-only and tell the user to xfs_repair[1]. It's the most
> > > > > 
> > > > > I think this is difficult to do for the root fs -- in general I don't
> > > > > think initrds ship with xfs_repair and the logic to detect the "refuses
> > > > > to mount rw" condition and react to that with xfs_repair and a second
> > > > > mount attempt.
> > > > > 
> > > > 
> > > > Good point. I guess what concerns me is leaving around such highly
> > > > specialized code. I figured an error check is less risk than a function
> > > > that modifies the fs. Do we have a test case that would exercise this
> > > > codepath going forward, at least?
> > > 
> > > I did write xfstests for the general case, though apparently I've been
> > > lagging xfstests and haven't sent it.  :/
> > > 
> > > Oh, right, we're still discussing semantics & existence of an rocompat
> > > bit, which affects the golden output.
> > > 
> > 
> > Ok..
> > 
> > > > > Also I think this doesn't work if we do an initial ro mount and then try
> > > > > to remount-rw...
> > > > > 
> > > > 
> > > > That one seems like it would just require a bit more code. E.g., we'd
> > > > have to cache or re-check state on ro -> rw transitions. That's
> > > > secondary to the rootfs use case justification, though..
> > > 
> > > <nod>
> > > 
> > > > > > simple implementation as we don't have to carry fixup code in the kernel
> > > > > > for such an uncommon situation (which facilitates a broad backport
> > > > > > strategy). It protects the users on stable kernels who pull in the
> > > > > > latest bug fixes, including the guy who goes from a bad kernel to a good
> > > > > > one, and all kernels going forward without having to use a feature bit.
> > > > > > We don't do anything for the guy who wraps the agfl on newer kernels and
> > > > > > goes back to the bad one, but at some point he just needs to update the
> > > > > > broken kernel.
> > > > > > 
> > > > > > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > > > > > a separate question from the feature bit. I'd prefer the latter for
> > > > > > simplicity, but perhaps there are use cases where the benefit outweighs
> > > > > > the risk.
> > > > > > 
> > > > > > Anyways, that's just my .02. There is no real great option here, I
> > > > > > guess. I just think that at some point maybe it's best to fix all the
> > > > > > kernels we can to handle the size mismatch, live with the fact that some
> > > > > > users might have those old, unfixed kernels and bonk them on the head to
> > > > > > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > > > > > chime in with more thoughts..
> > > > > 
> > > > > Yeah, we could do that too (make all the next releases of
> > > > > rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> > > > > skip the feature bit) and just let the unpatched kernels rot.  It keeps
> > > > > coming up, though...
> > > > > 
> > > > 
> > > > I guess I haven't really noticed it enough to consider it an ongoing
> > > > issue (which doesn't mean it isn't :P). Any pattern to the use cache
> > > > behind these continued reports..?
> > > 
> > > No particular pattern, other than using/freeing agfl blocks.
> > > 
> > 
> > I'm more curious about the use case than the workload. E.g., what's the
> > purpose for using two kernels? Was it bad luck on an upgrade or
> > something that implies the problematic state could reoccur (i.e., active
> > switching between good/bad kernels)?
> > 
> > IOW, if the "it keeps coming up" situation is simply because the
> > existing userbase hasn't fully upgraded yet, then a simple detect/repair
> > forward-fix patch is going to be sufficient (perhaps with a warning not
> > to revert to $previous kernel) to resolve the problem for the remaining
> > set of users who are susceptible to the problem.
> > 
> > If the problem is instead a set of users that are switching back and
> > forth for whatever reason, then perhaps a feature bit policy is more
> > justified.
> 
> $employer has customers that want or have to run mixed environments.
> 
> Not many, though.
> 
> > > > From an upstream perspective, RHEL continuing to operate in this mode
> > > > certainly does create an ongoing situation where a "current" distro
> > > > has/causes compatibility issues, particularly since some other
> > > > downstream distros may have kernels that use the upstream/fixed format.
> > > > 
> > > > I'll reiterate that in principle I don't think downstream decisions
> > > > should prohibit doing the right thing upstream, but for somebody on a
> > > > downstream distro who happens to test an upstream kernel (say as part of
> > > > a regression test/investigation), having the upstream kernel decide the
> > > > downstream kernel is no longer allowed to mount the fs seems kind of
> > > > mean. :P
> > > 
> > > Agreed, though agfl-related crashes are also mean. :/
> > > 
> > 
> > Yeah, but you can recover from that. In the use case above, the feature
> > bit has permanently prevented the user from going back to the distro
> > kernel. So this is one semi-realistic "switching between kernels" use
> > case where I think this feature bit actually hurts more than it helps.
> > If it were a big enough problem, I'd much rather have the downstream
> > kernel implement the feature bit to indicate that the upstream kernel
> > shouldn't/can't mount this filesystem than retroactively doing the
> > opposite. Even then, I still think I'd prefer the downstream kernel just
> > detect, fail to mount and encourage repair so we don't have to support
> > the strange transition from a clearly unsupported sequence (but this is
> > all downstream/distro discussion).
> > 
> > FWIW, if you actually want to make progress on this in absense of any
> > other feedback, I think the best approach is to refactor this series to
> > separate the detect/repair mechanism from all of this feature bit policy
> > stuff. The former all seems reasonable/justified to me based on your
> > explanations, so I'd be happy to review those portions of it without the
> > feature bit (which could still be tacked on for further review,
> > reordered appropriately on merge if ultimately necessary, etc.) and
> > consider the feature bit separately based on justification for added
> > benefit over the former.
> 
> Ok, I'll split out the part that fixes the AGFL and we can review those
> parts separately from the feature bit stuff.
> 
> --D
> 
> > 
> > Brian
> > 
> > > --D
> > > 
> > > > Brian
> > > > 
> > > > > --D
> > > > > 
> > > > > > 
> > > > > > Brian
> > > > > > 
> > > > > > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > > > > > elaborates on how/why a user hit that problem and backport to stable
> > > > > > > > kernels as far and wide as possible. Hm?
> > > > > > > 
> > > > > > > I /do/ like the part about writing FAQ to update the kernel.
> > > > > > > 
> > > > > > > --D
> > > > > > > 
> > > > > > > > Brian
> > > > > > > > 
> > > > > > > > > --D
> > > > > > > > > --
> > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > --
> > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > --
> > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-22 11:55                 ` Brian Foster
@ 2018-02-22 18:06                   ` Darrick J. Wong
  2018-02-22 18:11                     ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-22 18:06 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Thu, Feb 22, 2018 at 06:55:03AM -0500, Brian Foster wrote:
> On Wed, Feb 21, 2018 at 09:35:46AM -0800, Darrick J. Wong wrote:
> > On Wed, Feb 21, 2018 at 08:39:16AM -0500, Brian Foster wrote:
> > > On Tue, Feb 20, 2018 at 09:08:30AM -0800, Darrick J. Wong wrote:
> > > > On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> > > > > On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > > > > > On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > > > > > > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > > > > > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > > ...
> > > > > > > 
> > > > > > > Ok, so the intent is to "protect" those old kernels from a filesystem
> > > > > > > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > > > > > > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > > > > > > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > > > > > > used in that manner. A user can always run a fixed kernel, wrap the
> > > > > > > agfl, then go back to the bad one and cry when it explodes.
> > > > > > > 
> > > > > > > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > > > > > > bit to be enabled any time the filesystem is in a state that would cause
> > > > > > > problems on the old kernel. Technically, that could mean doing something
> > > > > > > like setting the bit any time an AGFL is in a wrapped state and clearing
> > > > > > > it when that state clears. Or perhaps setting it unconditionally on
> > > > > > > mount and leaving it permanently would be another way to satisfy the
> > > > > > > requirement, though certainly more heavy handed.
> > > > > > 
> > > > > > The previous version of this series did that, though Dave suggested I
> > > > > > change it to set the bit only if we actually touched an agfl.  I
> > > > > > probably should have pushed back harder, but it was only rfc at the
> > > > > > time.
> > > > > > 
> > > > > 
> > > > > It's not clear which of the above options you're referring to. FWIW, the
> > > > > former seems notably more usable to me than the latter.
> > > > 
> > > > Sorry about that.  The previous iteration would do:
> > > > 
> > > > if (!has_fixedagfl) {
> > > > 	fix_agfls();
> > > > 	set_fixedagfl();
> > > > }
> > > > 
> > > > Which probably doesn't satisfy your usability test. :)
> > > > 
> > > > Hmm... another possible strategy would be to fix the agfls and set the
> > > > fixedagfl bit at mount time, then at unmount/freeze/remount-ro we fix
> > > > the agfls again and clear the fixedagfl bit.  Then the only way an
> > > > unpatched kernel hits this is if we crash with a patched kernel and the
> > > > recovery mount is an unpatched kernel.
> > > > 
> > > 
> > > What's the purpose of "fixing the agfls again" at unmount time? Hasn't
> > > the patched kernel already addressed the size mismatch at mount time?
> > 
> > Sorry, my wording there didn't match what was in my head.  I'll try
> > again:
> > 
> > At mount/remount-rw time:
> > if agfl list is wrapped assuming the shorter agfl length:
> > 	fix wrapping to fit the longer 4.5+ agfl length
> > set fixedagfl bit
> > 
> > At unmount/freeze/remount-ro time:
> > if agfl list touches the last agfl item:
> > 	reset agfl list to the start of the agfl
> > clear fixedagfl bit
> > 
> 
> Hmm, that doesn't seem so bad. I was hesitant to change agfl/allocator
> behavior going forward for such a rare case, but doing one-off fixups at
> mount/unmount time is much less intrusive. The reduced scope of rocompat
> enforcement also eliminates permanent breakage for unsuspecting users.
> 
> Of course it still suffers from the general drawbacks associated with
> using a feature bit. IMO, it's not worth creating a new set of backwards
> incompat kernels (i.e., those with the feature bit vs. those without,
> where both kernels are essentially "fixed" already with regard to the
> agfl) just to provide nicer behavior for users still on the broken
> kernel. E.g., if they're still on the broken kernel, I don't see them
> upgrading to the feature enabled non-broken kernel designed to fix the
> broken kernel. Further, if the rhel case ends up using a similar
> "backwards detection" (if that is even possible, I have no idea)
> approach to address functionality, I suspect we'd have to support the
> feature bit on a kernel that doesn't technically have the fixed agfl.
> 
> The detection patch fixes breakage going forward. AFAICT all the feature
> bit does is create a softer landing for users going back to a broken
> kernel. In the end, this doesn't actually fix the broken environment, so
> the next steps are the same either way: the user needs to upgrade the
> broken kernel. This all sums up to what seems like a superfluous change
> to me. All that said, if others feel like we really need this for some
> reason then I'd prefer this policy over anything we've discussed so far
> wrt the feature bit.

It occurred to me over breakfast that we also have rocompat features
that were introduced after 4.5, which narrows further the number of
filesystems requiring fixups.

rmapbt and reflink were introduced in 4.8 and 4.9, which means that any
fs with either of those features enabled has always the longer agfl size
and cannot be write-mounted on an unpatched kernel.  As time goes by,
the percentage of v5 filesystems without either feature will shrink.
Therefore, we can skip both agfl fixups if either feature is enabled:

At mount/remount-rw time:
if !reflink && !rmap && agfl list is wrapped assuming the shorter agfl length:
	fix wrapping to fit the longer 4.5+ agfl length

At unmount/freeze/remount-ro time:
if !reflink && !rmap && agfl list touches the last agfl item:
	reset agfl list to the start of the agfl

The only way we're vulnerable to agfl wrapping problems is if someone
(a) uses an unpatched kernel to (b) recover a dirty filesystem that (c)
has a wrapped agfl and (d) doesn't have rmap or reflink enabled.

I think that's narrow enough that we can skip the rocompat bit.

--D

> Brian
> 
> > This way, we're only using the rocompat bit to prevent unpatched kernels
> > from *recovering* filesystems that might have an agfl wrap that it
> > doesn't understand.
> > 
> > > > What does everyone think of this strategy?  I think I like it the most
> > > > of all the things we've put forth because the only time anyone will trip
> > > > over this rocompat bit is this one specific scenario.
> > > > 
> > > > (Apologies if we've already talked about this, I've gotten lost in this
> > > > thread.)
> > > > 
> > > ...
> > > > > > > 
> > > > > > > Hmm, I still think my preferred approach is to mount time detect,
> > > > > > > enforce read-only and tell the user to xfs_repair[1]. It's the most
> > > > > > 
> > > > > > I think this is difficult to do for the root fs -- in general I don't
> > > > > > think initrds ship with xfs_repair and the logic to detect the "refuses
> > > > > > to mount rw" condition and react to that with xfs_repair and a second
> > > > > > mount attempt.
> > > > > > 
> > > > > 
> > > > > Good point. I guess what concerns me is leaving around such highly
> > > > > specialized code. I figured an error check is less risk than a function
> > > > > that modifies the fs. Do we have a test case that would exercise this
> > > > > codepath going forward, at least?
> > > > 
> > > > I did write xfstests for the general case, though apparently I've been
> > > > lagging xfstests and haven't sent it.  :/
> > > > 
> > > > Oh, right, we're still discussing semantics & existence of an rocompat
> > > > bit, which affects the golden output.
> > > > 
> > > 
> > > Ok..
> > > 
> > > > > > Also I think this doesn't work if we do an initial ro mount and then try
> > > > > > to remount-rw...
> > > > > > 
> > > > > 
> > > > > That one seems like it would just require a bit more code. E.g., we'd
> > > > > have to cache or re-check state on ro -> rw transitions. That's
> > > > > secondary to the rootfs use case justification, though..
> > > > 
> > > > <nod>
> > > > 
> > > > > > > simple implementation as we don't have to carry fixup code in the kernel
> > > > > > > for such an uncommon situation (which facilitates a broad backport
> > > > > > > strategy). It protects the users on stable kernels who pull in the
> > > > > > > latest bug fixes, including the guy who goes from a bad kernel to a good
> > > > > > > one, and all kernels going forward without having to use a feature bit.
> > > > > > > We don't do anything for the guy who wraps the agfl on newer kernels and
> > > > > > > goes back to the bad one, but at some point he just needs to update the
> > > > > > > broken kernel.
> > > > > > > 
> > > > > > > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > > > > > > a separate question from the feature bit. I'd prefer the latter for
> > > > > > > simplicity, but perhaps there are use cases where the benefit outweighs
> > > > > > > the risk.
> > > > > > > 
> > > > > > > Anyways, that's just my .02. There is no real great option here, I
> > > > > > > guess. I just think that at some point maybe it's best to fix all the
> > > > > > > kernels we can to handle the size mismatch, live with the fact that some
> > > > > > > users might have those old, unfixed kernels and bonk them on the head to
> > > > > > > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > > > > > > chime in with more thoughts..
> > > > > > 
> > > > > > Yeah, we could do that too (make all the next releases of
> > > > > > rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> > > > > > skip the feature bit) and just let the unpatched kernels rot.  It keeps
> > > > > > coming up, though...
> > > > > > 
> > > > > 
> > > > > I guess I haven't really noticed it enough to consider it an ongoing
> > > > > issue (which doesn't mean it isn't :P). Any pattern to the use cache
> > > > > behind these continued reports..?
> > > > 
> > > > No particular pattern, other than using/freeing agfl blocks.
> > > > 
> > > 
> > > I'm more curious about the use case than the workload. E.g., what's the
> > > purpose for using two kernels? Was it bad luck on an upgrade or
> > > something that implies the problematic state could reoccur (i.e., active
> > > switching between good/bad kernels)?
> > > 
> > > IOW, if the "it keeps coming up" situation is simply because the
> > > existing userbase hasn't fully upgraded yet, then a simple detect/repair
> > > forward-fix patch is going to be sufficient (perhaps with a warning not
> > > to revert to $previous kernel) to resolve the problem for the remaining
> > > set of users who are susceptible to the problem.
> > > 
> > > If the problem is instead a set of users that are switching back and
> > > forth for whatever reason, then perhaps a feature bit policy is more
> > > justified.
> > 
> > $employer has customers that want or have to run mixed environments.
> > 
> > Not many, though.
> > 
> > > > > From an upstream perspective, RHEL continuing to operate in this mode
> > > > > certainly does create an ongoing situation where a "current" distro
> > > > > has/causes compatibility issues, particularly since some other
> > > > > downstream distros may have kernels that use the upstream/fixed format.
> > > > > 
> > > > > I'll reiterate that in principle I don't think downstream decisions
> > > > > should prohibit doing the right thing upstream, but for somebody on a
> > > > > downstream distro who happens to test an upstream kernel (say as part of
> > > > > a regression test/investigation), having the upstream kernel decide the
> > > > > downstream kernel is no longer allowed to mount the fs seems kind of
> > > > > mean. :P
> > > > 
> > > > Agreed, though agfl-related crashes are also mean. :/
> > > > 
> > > 
> > > Yeah, but you can recover from that. In the use case above, the feature
> > > bit has permanently prevented the user from going back to the distro
> > > kernel. So this is one semi-realistic "switching between kernels" use
> > > case where I think this feature bit actually hurts more than it helps.
> > > If it were a big enough problem, I'd much rather have the downstream
> > > kernel implement the feature bit to indicate that the upstream kernel
> > > shouldn't/can't mount this filesystem than retroactively doing the
> > > opposite. Even then, I still think I'd prefer the downstream kernel just
> > > detect, fail to mount and encourage repair so we don't have to support
> > > the strange transition from a clearly unsupported sequence (but this is
> > > all downstream/distro discussion).
> > > 
> > > FWIW, if you actually want to make progress on this in absense of any
> > > other feedback, I think the best approach is to refactor this series to
> > > separate the detect/repair mechanism from all of this feature bit policy
> > > stuff. The former all seems reasonable/justified to me based on your
> > > explanations, so I'd be happy to review those portions of it without the
> > > feature bit (which could still be tacked on for further review,
> > > reordered appropriately on merge if ultimately necessary, etc.) and
> > > consider the feature bit separately based on justification for added
> > > benefit over the former.
> > 
> > Ok, I'll split out the part that fixes the AGFL and we can review those
> > parts separately from the feature bit stuff.
> > 
> > --D
> > 
> > > 
> > > Brian
> > > 
> > > > --D
> > > > 
> > > > > Brian
> > > > > 
> > > > > > --D
> > > > > > 
> > > > > > > 
> > > > > > > Brian
> > > > > > > 
> > > > > > > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > > > > > > elaborates on how/why a user hit that problem and backport to stable
> > > > > > > > > kernels as far and wide as possible. Hm?
> > > > > > > > 
> > > > > > > > I /do/ like the part about writing FAQ to update the kernel.
> > > > > > > > 
> > > > > > > > --D
> > > > > > > > 
> > > > > > > > > Brian
> > > > > > > > > 
> > > > > > > > > > --D
> > > > > > > > > > --
> > > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > > --
> > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > --
> > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > --
> > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-22 18:06                   ` Darrick J. Wong
@ 2018-02-22 18:11                     ` Darrick J. Wong
  2018-02-22 18:28                       ` Brian Foster
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2018-02-22 18:11 UTC (permalink / raw)
  To: Brian Foster; +Cc: linux-xfs

On Thu, Feb 22, 2018 at 10:06:28AM -0800, Darrick J. Wong wrote:
> On Thu, Feb 22, 2018 at 06:55:03AM -0500, Brian Foster wrote:
> > On Wed, Feb 21, 2018 at 09:35:46AM -0800, Darrick J. Wong wrote:
> > > On Wed, Feb 21, 2018 at 08:39:16AM -0500, Brian Foster wrote:
> > > > On Tue, Feb 20, 2018 at 09:08:30AM -0800, Darrick J. Wong wrote:
> > > > > On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> > > > > > On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > > > > > > On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > > > > > > > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > > > > > > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > > > ...
> > > > > > > > 
> > > > > > > > Ok, so the intent is to "protect" those old kernels from a filesystem
> > > > > > > > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > > > > > > > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > > > > > > > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > > > > > > > used in that manner. A user can always run a fixed kernel, wrap the
> > > > > > > > agfl, then go back to the bad one and cry when it explodes.
> > > > > > > > 
> > > > > > > > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > > > > > > > bit to be enabled any time the filesystem is in a state that would cause
> > > > > > > > problems on the old kernel. Technically, that could mean doing something
> > > > > > > > like setting the bit any time an AGFL is in a wrapped state and clearing
> > > > > > > > it when that state clears. Or perhaps setting it unconditionally on
> > > > > > > > mount and leaving it permanently would be another way to satisfy the
> > > > > > > > requirement, though certainly more heavy handed.
> > > > > > > 
> > > > > > > The previous version of this series did that, though Dave suggested I
> > > > > > > change it to set the bit only if we actually touched an agfl.  I
> > > > > > > probably should have pushed back harder, but it was only rfc at the
> > > > > > > time.
> > > > > > > 
> > > > > > 
> > > > > > It's not clear which of the above options you're referring to. FWIW, the
> > > > > > former seems notably more usable to me than the latter.
> > > > > 
> > > > > Sorry about that.  The previous iteration would do:
> > > > > 
> > > > > if (!has_fixedagfl) {
> > > > > 	fix_agfls();
> > > > > 	set_fixedagfl();
> > > > > }
> > > > > 
> > > > > Which probably doesn't satisfy your usability test. :)
> > > > > 
> > > > > Hmm... another possible strategy would be to fix the agfls and set the
> > > > > fixedagfl bit at mount time, then at unmount/freeze/remount-ro we fix
> > > > > the agfls again and clear the fixedagfl bit.  Then the only way an
> > > > > unpatched kernel hits this is if we crash with a patched kernel and the
> > > > > recovery mount is an unpatched kernel.
> > > > > 
> > > > 
> > > > What's the purpose of "fixing the agfls again" at unmount time? Hasn't
> > > > the patched kernel already addressed the size mismatch at mount time?
> > > 
> > > Sorry, my wording there didn't match what was in my head.  I'll try
> > > again:
> > > 
> > > At mount/remount-rw time:
> > > if agfl list is wrapped assuming the shorter agfl length:
> > > 	fix wrapping to fit the longer 4.5+ agfl length
> > > set fixedagfl bit
> > > 
> > > At unmount/freeze/remount-ro time:
> > > if agfl list touches the last agfl item:
> > > 	reset agfl list to the start of the agfl
> > > clear fixedagfl bit
> > > 
> > 
> > Hmm, that doesn't seem so bad. I was hesitant to change agfl/allocator
> > behavior going forward for such a rare case, but doing one-off fixups at
> > mount/unmount time is much less intrusive. The reduced scope of rocompat
> > enforcement also eliminates permanent breakage for unsuspecting users.
> > 
> > Of course it still suffers from the general drawbacks associated with
> > using a feature bit. IMO, it's not worth creating a new set of backwards
> > incompat kernels (i.e., those with the feature bit vs. those without,
> > where both kernels are essentially "fixed" already with regard to the
> > agfl) just to provide nicer behavior for users still on the broken
> > kernel. E.g., if they're still on the broken kernel, I don't see them
> > upgrading to the feature enabled non-broken kernel designed to fix the
> > broken kernel. Further, if the rhel case ends up using a similar
> > "backwards detection" (if that is even possible, I have no idea)
> > approach to address functionality, I suspect we'd have to support the
> > feature bit on a kernel that doesn't technically have the fixed agfl.
> > 
> > The detection patch fixes breakage going forward. AFAICT all the feature
> > bit does is create a softer landing for users going back to a broken
> > kernel. In the end, this doesn't actually fix the broken environment, so
> > the next steps are the same either way: the user needs to upgrade the
> > broken kernel. This all sums up to what seems like a superfluous change
> > to me. All that said, if others feel like we really need this for some
> > reason then I'd prefer this policy over anything we've discussed so far
> > wrt the feature bit.
> 
> It occurred to me over breakfast that we also have rocompat features
> that were introduced after 4.5, which narrows further the number of
> filesystems requiring fixups.
> 
> rmapbt and reflink were introduced in 4.8 and 4.9, which means that any
> fs with either of those features enabled has always the longer agfl size
> and cannot be write-mounted on an unpatched kernel.  As time goes by,
> the percentage of v5 filesystems without either feature will shrink.
> Therefore, we can skip both agfl fixups if either feature is enabled:
> 
> At mount/remount-rw time:
> if !reflink && !rmap && agfl list is wrapped assuming the shorter agfl length:
> 	fix wrapping to fit the longer 4.5+ agfl length
> 
> At unmount/freeze/remount-ro time:
> if !reflink && !rmap && agfl list touches the last agfl item:
> 	reset agfl list to the start of the agfl

<sigh> pseudocode bug; try again:

At mount/remount-rw time:
if v5fs && !reflink && !rmap && agfl list is wrapped assuming the shorter agfl length:
	fix wrapping to fit the longer 4.5+ agfl length

At unmount/freeze/remount-ro time:
if v5fs && !reflink && !rmap && agfl list touches the last agfl item:
	reset agfl list to the start of the agfl

Those first three predicates should be a helper function that does:

if v5 && (no v5 feature bits are set that weren't defined in 4.5)

--D

> 
> The only way we're vulnerable to agfl wrapping problems is if someone
> (a) uses an unpatched kernel to (b) recover a dirty filesystem that (c)
> has a wrapped agfl and (d) doesn't have rmap or reflink enabled.
> 
> I think that's narrow enough that we can skip the rocompat bit.
> 
> --D
> 
> > Brian
> > 
> > > This way, we're only using the rocompat bit to prevent unpatched kernels
> > > from *recovering* filesystems that might have an agfl wrap that it
> > > doesn't understand.
> > > 
> > > > > What does everyone think of this strategy?  I think I like it the most
> > > > > of all the things we've put forth because the only time anyone will trip
> > > > > over this rocompat bit is this one specific scenario.
> > > > > 
> > > > > (Apologies if we've already talked about this, I've gotten lost in this
> > > > > thread.)
> > > > > 
> > > > ...
> > > > > > > > 
> > > > > > > > Hmm, I still think my preferred approach is to mount time detect,
> > > > > > > > enforce read-only and tell the user to xfs_repair[1]. It's the most
> > > > > > > 
> > > > > > > I think this is difficult to do for the root fs -- in general I don't
> > > > > > > think initrds ship with xfs_repair and the logic to detect the "refuses
> > > > > > > to mount rw" condition and react to that with xfs_repair and a second
> > > > > > > mount attempt.
> > > > > > > 
> > > > > > 
> > > > > > Good point. I guess what concerns me is leaving around such highly
> > > > > > specialized code. I figured an error check is less risk than a function
> > > > > > that modifies the fs. Do we have a test case that would exercise this
> > > > > > codepath going forward, at least?
> > > > > 
> > > > > I did write xfstests for the general case, though apparently I've been
> > > > > lagging xfstests and haven't sent it.  :/
> > > > > 
> > > > > Oh, right, we're still discussing semantics & existence of an rocompat
> > > > > bit, which affects the golden output.
> > > > > 
> > > > 
> > > > Ok..
> > > > 
> > > > > > > Also I think this doesn't work if we do an initial ro mount and then try
> > > > > > > to remount-rw...
> > > > > > > 
> > > > > > 
> > > > > > That one seems like it would just require a bit more code. E.g., we'd
> > > > > > have to cache or re-check state on ro -> rw transitions. That's
> > > > > > secondary to the rootfs use case justification, though..
> > > > > 
> > > > > <nod>
> > > > > 
> > > > > > > > simple implementation as we don't have to carry fixup code in the kernel
> > > > > > > > for such an uncommon situation (which facilitates a broad backport
> > > > > > > > strategy). It protects the users on stable kernels who pull in the
> > > > > > > > latest bug fixes, including the guy who goes from a bad kernel to a good
> > > > > > > > one, and all kernels going forward without having to use a feature bit.
> > > > > > > > We don't do anything for the guy who wraps the agfl on newer kernels and
> > > > > > > > goes back to the bad one, but at some point he just needs to update the
> > > > > > > > broken kernel.
> > > > > > > > 
> > > > > > > > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > > > > > > > a separate question from the feature bit. I'd prefer the latter for
> > > > > > > > simplicity, but perhaps there are use cases where the benefit outweighs
> > > > > > > > the risk.
> > > > > > > > 
> > > > > > > > Anyways, that's just my .02. There is no real great option here, I
> > > > > > > > guess. I just think that at some point maybe it's best to fix all the
> > > > > > > > kernels we can to handle the size mismatch, live with the fact that some
> > > > > > > > users might have those old, unfixed kernels and bonk them on the head to
> > > > > > > > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > > > > > > > chime in with more thoughts..
> > > > > > > 
> > > > > > > Yeah, we could do that too (make all the next releases of
> > > > > > > rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> > > > > > > skip the feature bit) and just let the unpatched kernels rot.  It keeps
> > > > > > > coming up, though...
> > > > > > > 
> > > > > > 
> > > > > > I guess I haven't really noticed it enough to consider it an ongoing
> > > > > > issue (which doesn't mean it isn't :P). Any pattern to the use cache
> > > > > > behind these continued reports..?
> > > > > 
> > > > > No particular pattern, other than using/freeing agfl blocks.
> > > > > 
> > > > 
> > > > I'm more curious about the use case than the workload. E.g., what's the
> > > > purpose for using two kernels? Was it bad luck on an upgrade or
> > > > something that implies the problematic state could reoccur (i.e., active
> > > > switching between good/bad kernels)?
> > > > 
> > > > IOW, if the "it keeps coming up" situation is simply because the
> > > > existing userbase hasn't fully upgraded yet, then a simple detect/repair
> > > > forward-fix patch is going to be sufficient (perhaps with a warning not
> > > > to revert to $previous kernel) to resolve the problem for the remaining
> > > > set of users who are susceptible to the problem.
> > > > 
> > > > If the problem is instead a set of users that are switching back and
> > > > forth for whatever reason, then perhaps a feature bit policy is more
> > > > justified.
> > > 
> > > $employer has customers that want or have to run mixed environments.
> > > 
> > > Not many, though.
> > > 
> > > > > > From an upstream perspective, RHEL continuing to operate in this mode
> > > > > > certainly does create an ongoing situation where a "current" distro
> > > > > > has/causes compatibility issues, particularly since some other
> > > > > > downstream distros may have kernels that use the upstream/fixed format.
> > > > > > 
> > > > > > I'll reiterate that in principle I don't think downstream decisions
> > > > > > should prohibit doing the right thing upstream, but for somebody on a
> > > > > > downstream distro who happens to test an upstream kernel (say as part of
> > > > > > a regression test/investigation), having the upstream kernel decide the
> > > > > > downstream kernel is no longer allowed to mount the fs seems kind of
> > > > > > mean. :P
> > > > > 
> > > > > Agreed, though agfl-related crashes are also mean. :/
> > > > > 
> > > > 
> > > > Yeah, but you can recover from that. In the use case above, the feature
> > > > bit has permanently prevented the user from going back to the distro
> > > > kernel. So this is one semi-realistic "switching between kernels" use
> > > > case where I think this feature bit actually hurts more than it helps.
> > > > If it were a big enough problem, I'd much rather have the downstream
> > > > kernel implement the feature bit to indicate that the upstream kernel
> > > > shouldn't/can't mount this filesystem than retroactively doing the
> > > > opposite. Even then, I still think I'd prefer the downstream kernel just
> > > > detect, fail to mount and encourage repair so we don't have to support
> > > > the strange transition from a clearly unsupported sequence (but this is
> > > > all downstream/distro discussion).
> > > > 
> > > > FWIW, if you actually want to make progress on this in absense of any
> > > > other feedback, I think the best approach is to refactor this series to
> > > > separate the detect/repair mechanism from all of this feature bit policy
> > > > stuff. The former all seems reasonable/justified to me based on your
> > > > explanations, so I'd be happy to review those portions of it without the
> > > > feature bit (which could still be tacked on for further review,
> > > > reordered appropriately on merge if ultimately necessary, etc.) and
> > > > consider the feature bit separately based on justification for added
> > > > benefit over the former.
> > > 
> > > Ok, I'll split out the part that fixes the AGFL and we can review those
> > > parts separately from the feature bit stuff.
> > > 
> > > --D
> > > 
> > > > 
> > > > Brian
> > > > 
> > > > > --D
> > > > > 
> > > > > > Brian
> > > > > > 
> > > > > > > --D
> > > > > > > 
> > > > > > > > 
> > > > > > > > Brian
> > > > > > > > 
> > > > > > > > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > > > > > > > elaborates on how/why a user hit that problem and backport to stable
> > > > > > > > > > kernels as far and wide as possible. Hm?
> > > > > > > > > 
> > > > > > > > > I /do/ like the part about writing FAQ to update the kernel.
> > > > > > > > > 
> > > > > > > > > --D
> > > > > > > > > 
> > > > > > > > > > Brian
> > > > > > > > > > 
> > > > > > > > > > > --D
> > > > > > > > > > > --
> > > > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > > > --
> > > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > > --
> > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > --
> > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > --
> > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 0/4] xfs: fix v5 AGFL wrapping
  2018-02-22 18:11                     ` Darrick J. Wong
@ 2018-02-22 18:28                       ` Brian Foster
  0 siblings, 0 replies; 18+ messages in thread
From: Brian Foster @ 2018-02-22 18:28 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

On Thu, Feb 22, 2018 at 10:11:30AM -0800, Darrick J. Wong wrote:
> On Thu, Feb 22, 2018 at 10:06:28AM -0800, Darrick J. Wong wrote:
> > On Thu, Feb 22, 2018 at 06:55:03AM -0500, Brian Foster wrote:
> > > On Wed, Feb 21, 2018 at 09:35:46AM -0800, Darrick J. Wong wrote:
> > > > On Wed, Feb 21, 2018 at 08:39:16AM -0500, Brian Foster wrote:
> > > > > On Tue, Feb 20, 2018 at 09:08:30AM -0800, Darrick J. Wong wrote:
> > > > > > On Mon, Feb 19, 2018 at 08:54:57AM -0500, Brian Foster wrote:
> > > > > > > On Fri, Feb 16, 2018 at 09:57:01AM -0800, Darrick J. Wong wrote:
> > > > > > > > On Thu, Feb 15, 2018 at 08:16:30AM -0500, Brian Foster wrote:
> > > > > > > > > On Wed, Feb 14, 2018 at 06:05:48PM -0800, Darrick J. Wong wrote:
> > > > > > > > > > On Wed, Feb 14, 2018 at 02:56:51PM -0500, Brian Foster wrote:
> > > > > ...
> > > > > > > > > 
> > > > > > > > > Ok, so the intent is to "protect" those old kernels from a filesystem
> > > > > > > > > that has seen a "fixed" kernel so the _old_ kernel doesn't explode on a
> > > > > > > > > filesystem that has a wrapped agfl from a fixed kernel. That sounds
> > > > > > > > > reasonable.. I guess what concerns me is that the bit doesn't seem to be
> > > > > > > > > used in that manner. A user can always run a fixed kernel, wrap the
> > > > > > > > > agfl, then go back to the bad one and cry when it explodes.
> > > > > > > > > 
> > > > > > > > > IOW, if the goal is this kind of retroactive protection, I'd expect the
> > > > > > > > > bit to be enabled any time the filesystem is in a state that would cause
> > > > > > > > > problems on the old kernel. Technically, that could mean doing something
> > > > > > > > > like setting the bit any time an AGFL is in a wrapped state and clearing
> > > > > > > > > it when that state clears. Or perhaps setting it unconditionally on
> > > > > > > > > mount and leaving it permanently would be another way to satisfy the
> > > > > > > > > requirement, though certainly more heavy handed.
> > > > > > > > 
> > > > > > > > The previous version of this series did that, though Dave suggested I
> > > > > > > > change it to set the bit only if we actually touched an agfl.  I
> > > > > > > > probably should have pushed back harder, but it was only rfc at the
> > > > > > > > time.
> > > > > > > > 
> > > > > > > 
> > > > > > > It's not clear which of the above options you're referring to. FWIW, the
> > > > > > > former seems notably more usable to me than the latter.
> > > > > > 
> > > > > > Sorry about that.  The previous iteration would do:
> > > > > > 
> > > > > > if (!has_fixedagfl) {
> > > > > > 	fix_agfls();
> > > > > > 	set_fixedagfl();
> > > > > > }
> > > > > > 
> > > > > > Which probably doesn't satisfy your usability test. :)
> > > > > > 
> > > > > > Hmm... another possible strategy would be to fix the agfls and set the
> > > > > > fixedagfl bit at mount time, then at unmount/freeze/remount-ro we fix
> > > > > > the agfls again and clear the fixedagfl bit.  Then the only way an
> > > > > > unpatched kernel hits this is if we crash with a patched kernel and the
> > > > > > recovery mount is an unpatched kernel.
> > > > > > 
> > > > > 
> > > > > What's the purpose of "fixing the agfls again" at unmount time? Hasn't
> > > > > the patched kernel already addressed the size mismatch at mount time?
> > > > 
> > > > Sorry, my wording there didn't match what was in my head.  I'll try
> > > > again:
> > > > 
> > > > At mount/remount-rw time:
> > > > if agfl list is wrapped assuming the shorter agfl length:
> > > > 	fix wrapping to fit the longer 4.5+ agfl length
> > > > set fixedagfl bit
> > > > 
> > > > At unmount/freeze/remount-ro time:
> > > > if agfl list touches the last agfl item:
> > > > 	reset agfl list to the start of the agfl
> > > > clear fixedagfl bit
> > > > 
> > > 
> > > Hmm, that doesn't seem so bad. I was hesitant to change agfl/allocator
> > > behavior going forward for such a rare case, but doing one-off fixups at
> > > mount/unmount time is much less intrusive. The reduced scope of rocompat
> > > enforcement also eliminates permanent breakage for unsuspecting users.
> > > 
> > > Of course it still suffers from the general drawbacks associated with
> > > using a feature bit. IMO, it's not worth creating a new set of backwards
> > > incompat kernels (i.e., those with the feature bit vs. those without,
> > > where both kernels are essentially "fixed" already with regard to the
> > > agfl) just to provide nicer behavior for users still on the broken
> > > kernel. E.g., if they're still on the broken kernel, I don't see them
> > > upgrading to the feature enabled non-broken kernel designed to fix the
> > > broken kernel. Further, if the rhel case ends up using a similar
> > > "backwards detection" (if that is even possible, I have no idea)
> > > approach to address functionality, I suspect we'd have to support the
> > > feature bit on a kernel that doesn't technically have the fixed agfl.
> > > 
> > > The detection patch fixes breakage going forward. AFAICT all the feature
> > > bit does is create a softer landing for users going back to a broken
> > > kernel. In the end, this doesn't actually fix the broken environment, so
> > > the next steps are the same either way: the user needs to upgrade the
> > > broken kernel. This all sums up to what seems like a superfluous change
> > > to me. All that said, if others feel like we really need this for some
> > > reason then I'd prefer this policy over anything we've discussed so far
> > > wrt the feature bit.
> > 
> > It occurred to me over breakfast that we also have rocompat features
> > that were introduced after 4.5, which narrows further the number of
> > filesystems requiring fixups.
> > 
> > rmapbt and reflink were introduced in 4.8 and 4.9, which means that any
> > fs with either of those features enabled has always the longer agfl size
> > and cannot be write-mounted on an unpatched kernel.  As time goes by,
> > the percentage of v5 filesystems without either feature will shrink.
> > Therefore, we can skip both agfl fixups if either feature is enabled:
> > 
> > At mount/remount-rw time:
> > if !reflink && !rmap && agfl list is wrapped assuming the shorter agfl length:
> > 	fix wrapping to fit the longer 4.5+ agfl length
> > 
> > At unmount/freeze/remount-ro time:
> > if !reflink && !rmap && agfl list touches the last agfl item:
> > 	reset agfl list to the start of the agfl
> 
> <sigh> pseudocode bug; try again:
> 
> At mount/remount-rw time:
> if v5fs && !reflink && !rmap && agfl list is wrapped assuming the shorter agfl length:
> 	fix wrapping to fit the longer 4.5+ agfl length
> 
> At unmount/freeze/remount-ro time:
> if v5fs && !reflink && !rmap && agfl list touches the last agfl item:
> 	reset agfl list to the start of the agfl
> 
> Those first three predicates should be a helper function that does:
> 
> if v5 && (no v5 feature bits are set that weren't defined in 4.5)
> 

That sounds reasonable enough to me so long as we have a nice comment
that explains how we're using this combination of feature bits. :) I
also like how it at least puts some kind of life expectancy on the
bandaid itself.

Brian

> --D
> 
> > 
> > The only way we're vulnerable to agfl wrapping problems is if someone
> > (a) uses an unpatched kernel to (b) recover a dirty filesystem that (c)
> > has a wrapped agfl and (d) doesn't have rmap or reflink enabled.
> > 
> > I think that's narrow enough that we can skip the rocompat bit.
> > 
> > --D
> > 
> > > Brian
> > > 
> > > > This way, we're only using the rocompat bit to prevent unpatched kernels
> > > > from *recovering* filesystems that might have an agfl wrap that it
> > > > doesn't understand.
> > > > 
> > > > > > What does everyone think of this strategy?  I think I like it the most
> > > > > > of all the things we've put forth because the only time anyone will trip
> > > > > > over this rocompat bit is this one specific scenario.
> > > > > > 
> > > > > > (Apologies if we've already talked about this, I've gotten lost in this
> > > > > > thread.)
> > > > > > 
> > > > > ...
> > > > > > > > > 
> > > > > > > > > Hmm, I still think my preferred approach is to mount time detect,
> > > > > > > > > enforce read-only and tell the user to xfs_repair[1]. It's the most
> > > > > > > > 
> > > > > > > > I think this is difficult to do for the root fs -- in general I don't
> > > > > > > > think initrds ship with xfs_repair and the logic to detect the "refuses
> > > > > > > > to mount rw" condition and react to that with xfs_repair and a second
> > > > > > > > mount attempt.
> > > > > > > > 
> > > > > > > 
> > > > > > > Good point. I guess what concerns me is leaving around such highly
> > > > > > > specialized code. I figured an error check is less risk than a function
> > > > > > > that modifies the fs. Do we have a test case that would exercise this
> > > > > > > codepath going forward, at least?
> > > > > > 
> > > > > > I did write xfstests for the general case, though apparently I've been
> > > > > > lagging xfstests and haven't sent it.  :/
> > > > > > 
> > > > > > Oh, right, we're still discussing semantics & existence of an rocompat
> > > > > > bit, which affects the golden output.
> > > > > > 
> > > > > 
> > > > > Ok..
> > > > > 
> > > > > > > > Also I think this doesn't work if we do an initial ro mount and then try
> > > > > > > > to remount-rw...
> > > > > > > > 
> > > > > > > 
> > > > > > > That one seems like it would just require a bit more code. E.g., we'd
> > > > > > > have to cache or re-check state on ro -> rw transitions. That's
> > > > > > > secondary to the rootfs use case justification, though..
> > > > > > 
> > > > > > <nod>
> > > > > > 
> > > > > > > > > simple implementation as we don't have to carry fixup code in the kernel
> > > > > > > > > for such an uncommon situation (which facilitates a broad backport
> > > > > > > > > strategy). It protects the users on stable kernels who pull in the
> > > > > > > > > latest bug fixes, including the guy who goes from a bad kernel to a good
> > > > > > > > > one, and all kernels going forward without having to use a feature bit.
> > > > > > > > > We don't do anything for the guy who wraps the agfl on newer kernels and
> > > > > > > > > goes back to the bad one, but at some point he just needs to update the
> > > > > > > > > broken kernel.
> > > > > > > > > 
> > > > > > > > > [1] I guess doing a mount time fix up vs. a read-only mount + warning is
> > > > > > > > > a separate question from the feature bit. I'd prefer the latter for
> > > > > > > > > simplicity, but perhaps there are use cases where the benefit outweighs
> > > > > > > > > the risk.
> > > > > > > > > 
> > > > > > > > > Anyways, that's just my .02. There is no real great option here, I
> > > > > > > > > guess. I just think that at some point maybe it's best to fix all the
> > > > > > > > > kernels we can to handle the size mismatch, live with the fact that some
> > > > > > > > > users might have those old, unfixed kernels and bonk them on the head to
> > > > > > > > > upgrade when they (hopefully decreasingly) pop up. Perhaps others can
> > > > > > > > > chime in with more thoughts..
> > > > > > > > 
> > > > > > > > Yeah, we could do that too (make all the next releases of
> > > > > > > > rhel/ol/debian/ubuntu/sles/whatever quietly fix the bad-wrap agfls and
> > > > > > > > skip the feature bit) and just let the unpatched kernels rot.  It keeps
> > > > > > > > coming up, though...
> > > > > > > > 
> > > > > > > 
> > > > > > > I guess I haven't really noticed it enough to consider it an ongoing
> > > > > > > issue (which doesn't mean it isn't :P). Any pattern to the use cache
> > > > > > > behind these continued reports..?
> > > > > > 
> > > > > > No particular pattern, other than using/freeing agfl blocks.
> > > > > > 
> > > > > 
> > > > > I'm more curious about the use case than the workload. E.g., what's the
> > > > > purpose for using two kernels? Was it bad luck on an upgrade or
> > > > > something that implies the problematic state could reoccur (i.e., active
> > > > > switching between good/bad kernels)?
> > > > > 
> > > > > IOW, if the "it keeps coming up" situation is simply because the
> > > > > existing userbase hasn't fully upgraded yet, then a simple detect/repair
> > > > > forward-fix patch is going to be sufficient (perhaps with a warning not
> > > > > to revert to $previous kernel) to resolve the problem for the remaining
> > > > > set of users who are susceptible to the problem.
> > > > > 
> > > > > If the problem is instead a set of users that are switching back and
> > > > > forth for whatever reason, then perhaps a feature bit policy is more
> > > > > justified.
> > > > 
> > > > $employer has customers that want or have to run mixed environments.
> > > > 
> > > > Not many, though.
> > > > 
> > > > > > > From an upstream perspective, RHEL continuing to operate in this mode
> > > > > > > certainly does create an ongoing situation where a "current" distro
> > > > > > > has/causes compatibility issues, particularly since some other
> > > > > > > downstream distros may have kernels that use the upstream/fixed format.
> > > > > > > 
> > > > > > > I'll reiterate that in principle I don't think downstream decisions
> > > > > > > should prohibit doing the right thing upstream, but for somebody on a
> > > > > > > downstream distro who happens to test an upstream kernel (say as part of
> > > > > > > a regression test/investigation), having the upstream kernel decide the
> > > > > > > downstream kernel is no longer allowed to mount the fs seems kind of
> > > > > > > mean. :P
> > > > > > 
> > > > > > Agreed, though agfl-related crashes are also mean. :/
> > > > > > 
> > > > > 
> > > > > Yeah, but you can recover from that. In the use case above, the feature
> > > > > bit has permanently prevented the user from going back to the distro
> > > > > kernel. So this is one semi-realistic "switching between kernels" use
> > > > > case where I think this feature bit actually hurts more than it helps.
> > > > > If it were a big enough problem, I'd much rather have the downstream
> > > > > kernel implement the feature bit to indicate that the upstream kernel
> > > > > shouldn't/can't mount this filesystem than retroactively doing the
> > > > > opposite. Even then, I still think I'd prefer the downstream kernel just
> > > > > detect, fail to mount and encourage repair so we don't have to support
> > > > > the strange transition from a clearly unsupported sequence (but this is
> > > > > all downstream/distro discussion).
> > > > > 
> > > > > FWIW, if you actually want to make progress on this in absense of any
> > > > > other feedback, I think the best approach is to refactor this series to
> > > > > separate the detect/repair mechanism from all of this feature bit policy
> > > > > stuff. The former all seems reasonable/justified to me based on your
> > > > > explanations, so I'd be happy to review those portions of it without the
> > > > > feature bit (which could still be tacked on for further review,
> > > > > reordered appropriately on merge if ultimately necessary, etc.) and
> > > > > consider the feature bit separately based on justification for added
> > > > > benefit over the former.
> > > > 
> > > > Ok, I'll split out the part that fixes the AGFL and we can review those
> > > > parts separately from the feature bit stuff.
> > > > 
> > > > --D
> > > > 
> > > > > 
> > > > > Brian
> > > > > 
> > > > > > --D
> > > > > > 
> > > > > > > Brian
> > > > > > > 
> > > > > > > > --D
> > > > > > > > 
> > > > > > > > > 
> > > > > > > > > Brian
> > > > > > > > > 
> > > > > > > > > > > caused this in the first place ..." Then we write a FAQ entry that
> > > > > > > > > > > elaborates on how/why a user hit that problem and backport to stable
> > > > > > > > > > > kernels as far and wide as possible. Hm?
> > > > > > > > > > 
> > > > > > > > > > I /do/ like the part about writing FAQ to update the kernel.
> > > > > > > > > > 
> > > > > > > > > > --D
> > > > > > > > > > 
> > > > > > > > > > > Brian
> > > > > > > > > > > 
> > > > > > > > > > > > --D
> > > > > > > > > > > > --
> > > > > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > > > > --
> > > > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > > > --
> > > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > > --
> > > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > > --
> > > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > > --
> > > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > > --
> > > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > > the body of a message to majordomo@vger.kernel.org
> > > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > > --
> > > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > > the body of a message to majordomo@vger.kernel.org
> > > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2018-02-22 18:28 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-14 18:12 [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Darrick J. Wong
2018-02-14 18:12 ` [PATCH 1/4] xfs: convert XFS_AGFL_SIZE to a helper function Darrick J. Wong
2018-02-14 18:12 ` [PATCH 2/4] xfs: introduce 'fixed agfl' feature Darrick J. Wong
2018-02-14 18:12 ` [PATCH 3/4] xfs: fix agfl wrapping on v5 filesystems Darrick J. Wong
2018-02-14 18:12 ` [PATCH 4/4] xfs: enable fixed agfl feature Darrick J. Wong
2018-02-14 19:56 ` [PATCH v2 0/4] xfs: fix v5 AGFL wrapping Brian Foster
2018-02-15  2:05   ` Darrick J. Wong
2018-02-15 13:16     ` Brian Foster
2018-02-16 17:57       ` Darrick J. Wong
2018-02-19 13:54         ` Brian Foster
2018-02-20 17:08           ` Darrick J. Wong
2018-02-21 13:39             ` Brian Foster
2018-02-21 17:35               ` Darrick J. Wong
2018-02-22 11:55                 ` Brian Foster
2018-02-22 18:06                   ` Darrick J. Wong
2018-02-22 18:11                     ` Darrick J. Wong
2018-02-22 18:28                       ` Brian Foster
2018-02-21 21:16               ` Dave Chinner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.