linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 00/11] xfs: widen timestamps to deal with y2038
@ 2020-08-17 22:56 Darrick J. Wong
  2020-08-17 22:56 ` [PATCH 01/11] xfs: explicitly define inode timestamp range Darrick J. Wong
                   ` (11 more replies)
  0 siblings, 12 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:56 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

Hi all,

This series performs some refactoring of our timestamp and inode
encoding functions, then retrofits the timestamp union to handle
timestamps as a 64-bit nanosecond counter.  Next, it adds bit shifting
to the non-root dquot timer fields to boost their effective size to 34
bits.  These two changes enable correct time handling on XFS through the
year 2486.

v2: rebase to 5.9, having landed the quota refactoring

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=bigtime

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=bigtime

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=bigtime
---
 fs/xfs/libxfs/xfs_dquot_buf.c  |   60 +++++++++++++++++
 fs/xfs/libxfs/xfs_format.h     |  139 +++++++++++++++++++++++++++++++++++++---
 fs/xfs/libxfs/xfs_fs.h         |    1 
 fs/xfs/libxfs/xfs_inode_buf.c  |  132 ++++++++++++++++++--------------------
 fs/xfs/libxfs/xfs_inode_buf.h  |    7 +-
 fs/xfs/libxfs/xfs_log_format.h |   21 ++++--
 fs/xfs/libxfs/xfs_quota_defs.h |    9 ++-
 fs/xfs/libxfs/xfs_sb.c         |    2 +
 fs/xfs/scrub/inode.c           |   31 +++++++--
 fs/xfs/scrub/quota.c           |    8 ++
 fs/xfs/xfs_dquot.c             |   66 ++++++++++++++++---
 fs/xfs/xfs_dquot.h             |    4 +
 fs/xfs/xfs_inode.c             |   11 +++
 fs/xfs/xfs_inode_item.c        |   97 ++++++++++++++++++++++++++--
 fs/xfs/xfs_inode_item.h        |    3 +
 fs/xfs/xfs_ioctl.c             |    3 +
 fs/xfs/xfs_ondisk.h            |   30 ++++++++-
 fs/xfs/xfs_qm.c                |    2 +
 fs/xfs/xfs_qm_syscalls.c       |   18 +++--
 fs/xfs/xfs_super.c             |   14 +++-
 20 files changed, 531 insertions(+), 127 deletions(-)


^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH 01/11] xfs: explicitly define inode timestamp range
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
@ 2020-08-17 22:56 ` Darrick J. Wong
  2020-08-18  6:25   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 02/11] xfs: refactor quota expiration timer modification Darrick J. Wong
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:56 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Formally define the inode timestamp ranges that existing filesystems
support, and switch the vfs timetamp ranges to use it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |   19 +++++++++++++++++++
 fs/xfs/xfs_ondisk.h        |   12 ++++++++++++
 fs/xfs/xfs_super.c         |    5 +++--
 3 files changed, 34 insertions(+), 2 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index be86fa1a5556..b1b8a5c05cea 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -849,11 +849,30 @@ struct xfs_agfl {
 	    ASSERT(xfs_daddr_to_agno(mp, d) == \
 		   xfs_daddr_to_agno(mp, (d) + (len) - 1)))
 
+/*
+ * XFS Timestamps
+ * ==============
+ *
+ * Inode timestamps consist of signed 32-bit counters for seconds and
+ * nanoseconds; time zero is the Unix epoch, Jan  1 00:00:00 UTC 1970.
+ */
 typedef struct xfs_timestamp {
 	__be32		t_sec;		/* timestamp seconds */
 	__be32		t_nsec;		/* timestamp nanoseconds */
 } xfs_timestamp_t;
 
+/*
+ * Smallest possible timestamp with traditional timestamps, which is
+ * Dec 13 20:45:52 UTC 1901.
+ */
+#define XFS_INO_TIME_MIN	((int64_t)S32_MIN)
+
+/*
+ * Largest possible timestamp with traditional timestamps, which is
+ * Jan 19 03:14:07 UTC 2038.
+ */
+#define XFS_INO_TIME_MAX	((int64_t)S32_MAX)
+
 /*
  * On-disk inode structure.
  *
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index acb9b737fe6b..48a64fa49f91 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -15,6 +15,18 @@
 		"XFS: offsetof(" #structname ", " #member ") is wrong, " \
 		"expected " #off)
 
+#define XFS_CHECK_VALUE(value, expected) \
+	BUILD_BUG_ON_MSG((value) != (expected), \
+		"XFS: value of " #value " is wrong, expected " #expected)
+
+static inline void __init
+xfs_check_limits(void)
+{
+	/* make sure timestamp limits are correct */
+	XFS_CHECK_VALUE(XFS_INO_TIME_MIN,			-2147483648LL);
+	XFS_CHECK_VALUE(XFS_INO_TIME_MAX,			2147483647LL);
+}
+
 static inline void __init
 xfs_check_ondisk_structs(void)
 {
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c7ffcb57b586..375f05a47ba4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1484,8 +1484,8 @@ xfs_fc_fill_super(
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_max_links = XFS_MAXLINK;
 	sb->s_time_gran = 1;
-	sb->s_time_min = S32_MIN;
-	sb->s_time_max = S32_MAX;
+	sb->s_time_min = XFS_INO_TIME_MIN;
+	sb->s_time_max = XFS_INO_TIME_MAX;
 	sb->s_iflags |= SB_I_CGROUPWB;
 
 	set_posix_acl_flag(sb);
@@ -2077,6 +2077,7 @@ init_xfs_fs(void)
 {
 	int			error;
 
+	xfs_check_limits();
 	xfs_check_ondisk_structs();
 
 	printk(KERN_INFO XFS_VERSION_STRING " with "


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 02/11] xfs: refactor quota expiration timer modification
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
  2020-08-17 22:56 ` [PATCH 01/11] xfs: explicitly define inode timestamp range Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18  6:48   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
                   ` (9 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Define explicit limits on the range of quota grace period expiration
timeouts and refactor the code that modifies the timeouts into helpers
that clamp the values appropriately.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |   22 ++++++++++++++++++++++
 fs/xfs/xfs_dquot.c         |   13 ++++++++++++-
 fs/xfs/xfs_dquot.h         |    2 ++
 fs/xfs/xfs_ondisk.h        |    2 ++
 fs/xfs/xfs_qm_syscalls.c   |    9 +++++++--
 5 files changed, 45 insertions(+), 3 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index b1b8a5c05cea..ef36978239ac 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1197,6 +1197,28 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 
 #define XFS_DQTYPE_ANY		(XFS_DQTYPE_REC_MASK)
 
+/*
+ * XFS Quota Timers
+ * ================
+ *
+ * Quota grace period expiration timers are an unsigned 32-bit seconds counter;
+ * time zero is the Unix epoch, Jan  1 00:00:01 UTC 1970.  An expiration value
+ * of zero means that the quota limit has not been reached, and therefore no
+ * expiration has been set.
+ */
+
+/*
+ * Smallest possible quota expiration with traditional timestamps, which is
+ * Jan  1 00:00:01 UTC 1970.
+ */
+#define XFS_DQ_TIMEOUT_MIN	((int64_t)1)
+
+/*
+ * Largest possible quota expiration with traditional timestamps, which is
+ * Feb  7 06:28:15 UTC 2106.
+ */
+#define XFS_DQ_TIMEOUT_MAX	((int64_t)U32_MAX)
+
 /*
  * This is the main portion of the on-disk representation of quota information
  * for a user.  We pad this with some more expansion room to construct the on
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index bcd73b9c2994..2425b1c30d11 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -98,6 +98,16 @@ xfs_qm_adjust_dqlimits(
 		xfs_dquot_set_prealloc_limits(dq);
 }
 
+/* Set the expiration time of a quota's grace period. */
+void
+xfs_dquot_set_timeout(
+	time64_t		*timer,
+	time64_t		value)
+{
+	*timer = clamp_t(time64_t, value, XFS_DQ_TIMEOUT_MIN,
+					  XFS_DQ_TIMEOUT_MAX);
+}
+
 /*
  * Determine if this quota counter is over either limit and set the quota
  * timers as appropriate.
@@ -112,7 +122,8 @@ xfs_qm_adjust_res_timer(
 	if ((res->softlimit && res->count > res->softlimit) ||
 	    (res->hardlimit && res->count > res->hardlimit)) {
 		if (res->timer == 0)
-			res->timer = ktime_get_real_seconds() + qlim->time;
+			xfs_dquot_set_timeout(&res->timer,
+					ktime_get_real_seconds() + qlim->time);
 	} else {
 		if (res->timer == 0)
 			res->warnings = 0;
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 282a65da93c7..11bd0ee9b0fa 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -237,4 +237,6 @@ typedef int (*xfs_qm_dqiterate_fn)(struct xfs_dquot *dq,
 int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
 		xfs_qm_dqiterate_fn iter_fn, void *priv);
 
+void xfs_dquot_set_timeout(time64_t *timer, time64_t limit);
+
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 48a64fa49f91..38ccffcf3336 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -25,6 +25,8 @@ xfs_check_limits(void)
 	/* make sure timestamp limits are correct */
 	XFS_CHECK_VALUE(XFS_INO_TIME_MIN,			-2147483648LL);
 	XFS_CHECK_VALUE(XFS_INO_TIME_MAX,			2147483647LL);
+	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MIN,			1LL);
+	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MAX,			4294967295LL);
 }
 
 static inline void __init
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 1c542b4a5220..b16d533a6feb 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -483,9 +483,14 @@ xfs_setqlim_timer(
 	struct xfs_quota_limits	*qlim,
 	s64			timer)
 {
-	res->timer = timer;
-	if (qlim)
+	if (qlim) {
+		/* Set the length of the default grace period. */
+		res->timer = timer;
 		qlim->time = timer;
+	} else {
+		/* Set the grace period expiration on a quota. */
+		xfs_dquot_set_timeout(&res->timer, timer);
+	}
 }
 
 /*


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
  2020-08-17 22:56 ` [PATCH 01/11] xfs: explicitly define inode timestamp range Darrick J. Wong
  2020-08-17 22:57 ` [PATCH 02/11] xfs: refactor quota expiration timer modification Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 10:46   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 04/11] xfs: remove xfs_timestamp_t Darrick J. Wong
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Refactor the code that sets the default quota grace period into a helper
function so that we can override the ondisk behavior later.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |   13 +++++++++++++
 fs/xfs/xfs_dquot.c         |    9 +++++++++
 fs/xfs/xfs_dquot.h         |    1 +
 fs/xfs/xfs_ondisk.h        |    2 ++
 fs/xfs/xfs_qm_syscalls.c   |    4 ++--
 5 files changed, 27 insertions(+), 2 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index ef36978239ac..e9e6248b35be 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1205,6 +1205,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  * time zero is the Unix epoch, Jan  1 00:00:01 UTC 1970.  An expiration value
  * of zero means that the quota limit has not been reached, and therefore no
  * expiration has been set.
+ *
+ * The grace period for each quota type is stored in the root dquot (id = 0)
+ * and is applied to a non-root dquot when it exceeds the soft or hard limits.
+ * The length of quota grace periods are unsigned 32-bit quantities measured in
+ * units of seconds.  A value of zero means to use the default period.
  */
 
 /*
@@ -1219,6 +1224,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  */
 #define XFS_DQ_TIMEOUT_MAX	((int64_t)U32_MAX)
 
+/*
+ * Default quota grace periods, ranging from zero (use the compiled defaults)
+ * to ~136 years.  These are applied to a non-root dquot that has exceeded
+ * either limit.
+ */
+#define XFS_DQ_GRACE_MIN	((int64_t)0)
+#define XFS_DQ_GRACE_MAX	((int64_t)U32_MAX)
+
 /*
  * This is the main portion of the on-disk representation of quota information
  * for a user.  We pad this with some more expansion room to construct the on
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 2425b1c30d11..ed3fa6ada0d3 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -98,6 +98,15 @@ xfs_qm_adjust_dqlimits(
 		xfs_dquot_set_prealloc_limits(dq);
 }
 
+/* Set the length of the default grace period. */
+void
+xfs_dquot_set_grace_period(
+	time64_t		*timer,
+	time64_t		value)
+{
+	*timer = clamp_t(time64_t, value, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
+}
+
 /* Set the expiration time of a quota's grace period. */
 void
 xfs_dquot_set_timeout(
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 11bd0ee9b0fa..0ba4d91c3a11 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -237,6 +237,7 @@ typedef int (*xfs_qm_dqiterate_fn)(struct xfs_dquot *dq,
 int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
 		xfs_qm_dqiterate_fn iter_fn, void *priv);
 
+void xfs_dquot_set_grace_period(time64_t *timer, time64_t limit);
 void xfs_dquot_set_timeout(time64_t *timer, time64_t limit);
 
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 38ccffcf3336..498e9063c605 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -27,6 +27,8 @@ xfs_check_limits(void)
 	XFS_CHECK_VALUE(XFS_INO_TIME_MAX,			2147483647LL);
 	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MIN,			1LL);
 	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MAX,			4294967295LL);
+	XFS_CHECK_VALUE(XFS_DQ_GRACE_MIN,			0LL);
+	XFS_CHECK_VALUE(XFS_DQ_GRACE_MAX,			4294967295LL);
 }
 
 static inline void __init
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index b16d533a6feb..95b0c25b9969 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -485,8 +485,8 @@ xfs_setqlim_timer(
 {
 	if (qlim) {
 		/* Set the length of the default grace period. */
-		res->timer = timer;
-		qlim->time = timer;
+		xfs_dquot_set_grace_period(&res->timer, timer);
+		qlim->time = res->timer;
 	} else {
 		/* Set the grace period expiration on a quota. */
 		xfs_dquot_set_timeout(&res->timer, timer);


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 04/11] xfs: remove xfs_timestamp_t
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (2 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 10:50   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 05/11] xfs: move xfs_log_dinode_to_disk to the log code Darrick J. Wong
                   ` (7 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Kill this old typedef.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h     |   12 ++++++------
 fs/xfs/libxfs/xfs_log_format.h |   12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e9e6248b35be..1f3a2be6c396 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -856,10 +856,10 @@ struct xfs_agfl {
  * Inode timestamps consist of signed 32-bit counters for seconds and
  * nanoseconds; time zero is the Unix epoch, Jan  1 00:00:00 UTC 1970.
  */
-typedef struct xfs_timestamp {
+struct xfs_timestamp {
 	__be32		t_sec;		/* timestamp seconds */
 	__be32		t_nsec;		/* timestamp nanoseconds */
-} xfs_timestamp_t;
+};
 
 /*
  * Smallest possible timestamp with traditional timestamps, which is
@@ -904,9 +904,9 @@ typedef struct xfs_dinode {
 	__be16		di_projid_hi;	/* higher part owner's project id */
 	__u8		di_pad[6];	/* unused, zeroed space */
 	__be16		di_flushiter;	/* incremented on flush */
-	xfs_timestamp_t	di_atime;	/* time last accessed */
-	xfs_timestamp_t	di_mtime;	/* time last modified */
-	xfs_timestamp_t	di_ctime;	/* time created/inode modified */
+	struct xfs_timestamp di_atime;	/* time last accessed */
+	struct xfs_timestamp di_mtime;	/* time last modified */
+	struct xfs_timestamp di_ctime;	/* time created/inode modified */
 	__be64		di_size;	/* number of bytes in file */
 	__be64		di_nblocks;	/* # of direct & btree blocks used */
 	__be32		di_extsize;	/* basic/minimum extent size for file */
@@ -931,7 +931,7 @@ typedef struct xfs_dinode {
 	__u8		di_pad2[12];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
-	xfs_timestamp_t	di_crtime;	/* time created */
+	struct xfs_timestamp di_crtime;	/* time created */
 	__be64		di_ino;		/* inode number */
 	uuid_t		di_uuid;	/* UUID of the filesystem */
 
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index e3400c9c71cd..f2fac9bea66d 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -368,10 +368,10 @@ static inline int xfs_ilog_fdata(int w)
  * directly mirrors the xfs_dinode structure as it must contain all the same
  * information.
  */
-typedef struct xfs_ictimestamp {
+struct xfs_ictimestamp {
 	int32_t		t_sec;		/* timestamp seconds */
 	int32_t		t_nsec;		/* timestamp nanoseconds */
-} xfs_ictimestamp_t;
+};
 
 /*
  * Define the format of the inode core that is logged. This structure must be
@@ -390,9 +390,9 @@ struct xfs_log_dinode {
 	uint16_t	di_projid_hi;	/* higher part of owner's project id */
 	uint8_t		di_pad[6];	/* unused, zeroed space */
 	uint16_t	di_flushiter;	/* incremented on flush */
-	xfs_ictimestamp_t di_atime;	/* time last accessed */
-	xfs_ictimestamp_t di_mtime;	/* time last modified */
-	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
+	struct xfs_ictimestamp di_atime;	/* time last accessed */
+	struct xfs_ictimestamp di_mtime;	/* time last modified */
+	struct xfs_ictimestamp di_ctime;	/* time created/inode modified */
 	xfs_fsize_t	di_size;	/* number of bytes in file */
 	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
@@ -417,7 +417,7 @@ struct xfs_log_dinode {
 	uint8_t		di_pad2[12];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
-	xfs_ictimestamp_t di_crtime;	/* time created */
+	struct xfs_ictimestamp di_crtime;	/* time created */
 	xfs_ino_t	di_ino;		/* inode number */
 	uuid_t		di_uuid;	/* UUID of the filesystem */
 


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 05/11] xfs: move xfs_log_dinode_to_disk to the log code
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (3 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 04/11] xfs: remove xfs_timestamp_t Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 10:51   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 06/11] xfs: refactor inode timestamp coding Darrick J. Wong
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Move this function to xfs_inode_item.c to match the encoding function
that's already there.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_inode_buf.c |   52 -----------------------------------------
 fs/xfs/libxfs/xfs_inode_buf.h |    2 --
 fs/xfs/xfs_inode_item.c       |   52 +++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_inode_item.h       |    3 ++
 4 files changed, 55 insertions(+), 54 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 8d5dd08eab75..fa83591ca89b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -310,58 +310,6 @@ xfs_inode_to_disk(
 	}
 }
 
-void
-xfs_log_dinode_to_disk(
-	struct xfs_log_dinode	*from,
-	struct xfs_dinode	*to)
-{
-	to->di_magic = cpu_to_be16(from->di_magic);
-	to->di_mode = cpu_to_be16(from->di_mode);
-	to->di_version = from->di_version;
-	to->di_format = from->di_format;
-	to->di_onlink = 0;
-	to->di_uid = cpu_to_be32(from->di_uid);
-	to->di_gid = cpu_to_be32(from->di_gid);
-	to->di_nlink = cpu_to_be32(from->di_nlink);
-	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
-	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
-	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-
-	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
-	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
-	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
-	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
-	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
-	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
-
-	to->di_size = cpu_to_be64(from->di_size);
-	to->di_nblocks = cpu_to_be64(from->di_nblocks);
-	to->di_extsize = cpu_to_be32(from->di_extsize);
-	to->di_nextents = cpu_to_be32(from->di_nextents);
-	to->di_anextents = cpu_to_be16(from->di_anextents);
-	to->di_forkoff = from->di_forkoff;
-	to->di_aformat = from->di_aformat;
-	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
-	to->di_dmstate = cpu_to_be16(from->di_dmstate);
-	to->di_flags = cpu_to_be16(from->di_flags);
-	to->di_gen = cpu_to_be32(from->di_gen);
-
-	if (from->di_version == 3) {
-		to->di_changecount = cpu_to_be64(from->di_changecount);
-		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
-		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
-		to->di_flags2 = cpu_to_be64(from->di_flags2);
-		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
-		to->di_ino = cpu_to_be64(from->di_ino);
-		to->di_lsn = cpu_to_be64(from->di_lsn);
-		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
-		uuid_copy(&to->di_uuid, &from->di_uuid);
-		to->di_flushiter = 0;
-	} else {
-		to->di_flushiter = cpu_to_be16(from->di_flushiter);
-	}
-}
-
 static xfs_failaddr_t
 xfs_dinode_verify_fork(
 	struct xfs_dinode	*dip,
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6b08b9d060c2..89f7bea8efd6 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -49,8 +49,6 @@ void	xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
 void	xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
 			  xfs_lsn_t lsn);
 int	xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
-void	xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
-			       struct xfs_dinode *to);
 
 xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
 			   struct xfs_dinode *dip);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6c65938cee1c..d95a00376fad 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -295,6 +295,58 @@ xfs_inode_item_format_attr_fork(
 	}
 }
 
+void
+xfs_log_dinode_to_disk(
+	struct xfs_log_dinode	*from,
+	struct xfs_dinode	*to)
+{
+	to->di_magic = cpu_to_be16(from->di_magic);
+	to->di_mode = cpu_to_be16(from->di_mode);
+	to->di_version = from->di_version;
+	to->di_format = from->di_format;
+	to->di_onlink = 0;
+	to->di_uid = cpu_to_be32(from->di_uid);
+	to->di_gid = cpu_to_be32(from->di_gid);
+	to->di_nlink = cpu_to_be32(from->di_nlink);
+	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+
+	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
+	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
+	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
+	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
+	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
+	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
+
+	to->di_size = cpu_to_be64(from->di_size);
+	to->di_nblocks = cpu_to_be64(from->di_nblocks);
+	to->di_extsize = cpu_to_be32(from->di_extsize);
+	to->di_nextents = cpu_to_be32(from->di_nextents);
+	to->di_anextents = cpu_to_be16(from->di_anextents);
+	to->di_forkoff = from->di_forkoff;
+	to->di_aformat = from->di_aformat;
+	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+	to->di_dmstate = cpu_to_be16(from->di_dmstate);
+	to->di_flags = cpu_to_be16(from->di_flags);
+	to->di_gen = cpu_to_be32(from->di_gen);
+
+	if (from->di_version == 3) {
+		to->di_changecount = cpu_to_be64(from->di_changecount);
+		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
+		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+		to->di_flags2 = cpu_to_be64(from->di_flags2);
+		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
+		to->di_ino = cpu_to_be64(from->di_ino);
+		to->di_lsn = cpu_to_be64(from->di_lsn);
+		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+		uuid_copy(&to->di_uuid, &from->di_uuid);
+		to->di_flushiter = 0;
+	} else {
+		to->di_flushiter = cpu_to_be16(from->di_flushiter);
+	}
+}
+
 static void
 xfs_inode_to_log_dinode(
 	struct xfs_inode	*ip,
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 048b5e7dee90..dc924a1c94bc 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -50,4 +50,7 @@ extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
 
 extern struct kmem_zone	*xfs_ili_zone;
 
+void	xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
+			       struct xfs_dinode *to);
+
 #endif	/* __XFS_INODE_ITEM_H__ */


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 06/11] xfs: refactor inode timestamp coding
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (4 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 05/11] xfs: move xfs_log_dinode_to_disk to the log code Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 11:20   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 07/11] xfs: convert struct xfs_timestamp to union Darrick J. Wong
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Refactor inode timestamp encoding and decoding into helper functions so
that we can add extra behaviors in subsequent patches.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_inode_buf.c |   42 +++++++++++++++++++++++++----------------
 fs/xfs/libxfs/xfs_inode_buf.h |    5 +++++
 fs/xfs/scrub/inode.c          |   25 +++++++++++++++++-------
 fs/xfs/xfs_inode_item.c       |   42 +++++++++++++++++++++++++----------------
 4 files changed, 74 insertions(+), 40 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index fa83591ca89b..4930eabed6d8 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -157,6 +157,15 @@ xfs_imap_to_bp(
 	return 0;
 }
 
+void
+xfs_inode_from_disk_timestamp(
+	struct timespec64		*tv,
+	const struct xfs_timestamp	*ts)
+{
+	tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
+	tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
+}
+
 int
 xfs_inode_from_disk(
 	struct xfs_inode	*ip,
@@ -211,12 +220,9 @@ xfs_inode_from_disk(
 	 * a time before epoch is converted to a time long after epoch
 	 * on 64 bit systems.
 	 */
-	inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
-	inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
-	inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
-	inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
-	inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
-	inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
+	xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
+	xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
+	xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
 
 	to->di_size = be64_to_cpu(from->di_size);
 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
@@ -229,8 +235,7 @@ xfs_inode_from_disk(
 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 		inode_set_iversion_queried(inode,
 					   be64_to_cpu(from->di_changecount));
-		to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec);
-		to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec);
+		xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
 		to->di_flags2 = be64_to_cpu(from->di_flags2);
 		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
 	}
@@ -252,6 +257,15 @@ xfs_inode_from_disk(
 	return error;
 }
 
+void
+xfs_inode_to_disk_timestamp(
+	struct xfs_timestamp		*ts,
+	const struct timespec64		*tv)
+{
+	ts->t_sec = cpu_to_be32(tv->tv_sec);
+	ts->t_nsec = cpu_to_be32(tv->tv_nsec);
+}
+
 void
 xfs_inode_to_disk(
 	struct xfs_inode	*ip,
@@ -271,12 +285,9 @@ xfs_inode_to_disk(
 	to->di_projid_hi = cpu_to_be16(from->di_projid >> 16);
 
 	memset(to->di_pad, 0, sizeof(to->di_pad));
-	to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
-	to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
-	to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
-	to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
-	to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
-	to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+	xfs_inode_to_disk_timestamp(&to->di_atime, &inode->i_atime);
+	xfs_inode_to_disk_timestamp(&to->di_mtime, &inode->i_mtime);
+	xfs_inode_to_disk_timestamp(&to->di_ctime, &inode->i_ctime);
 	to->di_nlink = cpu_to_be32(inode->i_nlink);
 	to->di_gen = cpu_to_be32(inode->i_generation);
 	to->di_mode = cpu_to_be16(inode->i_mode);
@@ -295,8 +306,7 @@ xfs_inode_to_disk(
 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 		to->di_version = 3;
 		to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
-		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec);
-		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec);
+		xfs_inode_to_disk_timestamp(&to->di_crtime, &from->di_crtime);
 		to->di_flags2 = cpu_to_be64(from->di_flags2);
 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 		to->di_ino = cpu_to_be64(ip->i_ino);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 89f7bea8efd6..9c63f3f932d7 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -58,4 +58,9 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
 		uint32_t cowextsize, uint16_t mode, uint16_t flags,
 		uint64_t flags2);
 
+void xfs_inode_from_disk_timestamp(struct timespec64 *tv,
+		const struct xfs_timestamp *ts);
+void xfs_inode_to_disk_timestamp(struct xfs_timestamp *ts,
+		const struct timespec64 *tv);
+
 #endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 6d483ab29e63..ccb5c217c0ee 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -195,6 +195,19 @@ xchk_inode_flags2(
 	xchk_ino_set_corrupt(sc, ino);
 }
 
+static inline void
+xchk_dinode_nsec(
+	struct xfs_scrub		*sc,
+	xfs_ino_t			ino,
+	const struct xfs_timestamp	*ts)
+{
+	struct timespec64		tv;
+
+	xfs_inode_from_disk_timestamp(&tv, ts);
+	if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC)
+		xchk_ino_set_corrupt(sc, ino);
+}
+
 /* Scrub all the ondisk inode fields. */
 STATIC void
 xchk_dinode(
@@ -293,12 +306,9 @@ xchk_dinode(
 	}
 
 	/* di_[amc]time.nsec */
-	if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
-		xchk_ino_set_corrupt(sc, ino);
-	if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
-		xchk_ino_set_corrupt(sc, ino);
-	if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
-		xchk_ino_set_corrupt(sc, ino);
+	xchk_dinode_nsec(sc, ino, &dip->di_atime);
+	xchk_dinode_nsec(sc, ino, &dip->di_mtime);
+	xchk_dinode_nsec(sc, ino, &dip->di_ctime);
 
 	/*
 	 * di_size.  xfs_dinode_verify checks for things that screw up
@@ -403,8 +413,7 @@ xchk_dinode(
 	}
 
 	if (dip->di_version >= 3) {
-		if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
-			xchk_ino_set_corrupt(sc, ino);
+		xchk_dinode_nsec(sc, ino, &dip->di_crtime);
 		xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
 		xchk_inode_cowextsize(sc, dip, ino, mode, flags,
 				flags2);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d95a00376fad..c2f9a0adeed2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -295,6 +295,15 @@ xfs_inode_item_format_attr_fork(
 	}
 }
 
+static inline void
+xfs_from_log_timestamp(
+	struct xfs_timestamp		*ts,
+	const struct xfs_ictimestamp	*its)
+{
+	ts->t_sec = cpu_to_be32(its->t_sec);
+	ts->t_nsec = cpu_to_be32(its->t_nsec);
+}
+
 void
 xfs_log_dinode_to_disk(
 	struct xfs_log_dinode	*from,
@@ -312,12 +321,9 @@ xfs_log_dinode_to_disk(
 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
 
-	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
-	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
-	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
-	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
-	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
-	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
+	xfs_from_log_timestamp(&to->di_atime, &from->di_atime);
+	xfs_from_log_timestamp(&to->di_mtime, &from->di_mtime);
+	xfs_from_log_timestamp(&to->di_ctime, &from->di_ctime);
 
 	to->di_size = cpu_to_be64(from->di_size);
 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
@@ -333,8 +339,7 @@ xfs_log_dinode_to_disk(
 
 	if (from->di_version == 3) {
 		to->di_changecount = cpu_to_be64(from->di_changecount);
-		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
-		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+		xfs_from_log_timestamp(&to->di_crtime, &from->di_crtime);
 		to->di_flags2 = cpu_to_be64(from->di_flags2);
 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 		to->di_ino = cpu_to_be64(from->di_ino);
@@ -347,6 +352,15 @@ xfs_log_dinode_to_disk(
 	}
 }
 
+static inline void
+xfs_to_log_timestamp(
+	struct xfs_ictimestamp		*its,
+	const struct timespec64		*ts)
+{
+	its->t_sec = ts->tv_sec;
+	its->t_nsec = ts->tv_nsec;
+}
+
 static void
 xfs_inode_to_log_dinode(
 	struct xfs_inode	*ip,
@@ -365,12 +379,9 @@ xfs_inode_to_log_dinode(
 
 	memset(to->di_pad, 0, sizeof(to->di_pad));
 	memset(to->di_pad3, 0, sizeof(to->di_pad3));
-	to->di_atime.t_sec = inode->i_atime.tv_sec;
-	to->di_atime.t_nsec = inode->i_atime.tv_nsec;
-	to->di_mtime.t_sec = inode->i_mtime.tv_sec;
-	to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
-	to->di_ctime.t_sec = inode->i_ctime.tv_sec;
-	to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
+	xfs_to_log_timestamp(&to->di_atime, &inode->i_atime);
+	xfs_to_log_timestamp(&to->di_mtime, &inode->i_mtime);
+	xfs_to_log_timestamp(&to->di_ctime, &inode->i_ctime);
 	to->di_nlink = inode->i_nlink;
 	to->di_gen = inode->i_generation;
 	to->di_mode = inode->i_mode;
@@ -392,8 +403,7 @@ xfs_inode_to_log_dinode(
 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 		to->di_version = 3;
 		to->di_changecount = inode_peek_iversion(inode);
-		to->di_crtime.t_sec = from->di_crtime.tv_sec;
-		to->di_crtime.t_nsec = from->di_crtime.tv_nsec;
+		xfs_to_log_timestamp(&to->di_crtime, &from->di_crtime);
 		to->di_flags2 = from->di_flags2;
 		to->di_cowextsize = from->di_cowextsize;
 		to->di_ino = ip->i_ino;


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 07/11] xfs: convert struct xfs_timestamp to union
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (5 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 06/11] xfs: refactor inode timestamp coding Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 11:24   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem Darrick J. Wong
                   ` (4 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Convert the xfs_timestamp struct to a union so that we can overload it
in the next patch.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h     |   16 +++++++++-------
 fs/xfs/libxfs/xfs_inode_buf.c  |    4 ++--
 fs/xfs/libxfs/xfs_inode_buf.h  |    4 ++--
 fs/xfs/libxfs/xfs_log_format.h |   16 +++++++++-------
 fs/xfs/scrub/inode.c           |    2 +-
 fs/xfs/xfs_inode_item.c        |    6 +++---
 fs/xfs/xfs_ondisk.h            |    4 ++--
 7 files changed, 28 insertions(+), 24 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1f3a2be6c396..772113db41aa 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -856,9 +856,11 @@ struct xfs_agfl {
  * Inode timestamps consist of signed 32-bit counters for seconds and
  * nanoseconds; time zero is the Unix epoch, Jan  1 00:00:00 UTC 1970.
  */
-struct xfs_timestamp {
-	__be32		t_sec;		/* timestamp seconds */
-	__be32		t_nsec;		/* timestamp nanoseconds */
+union xfs_timestamp {
+	struct {
+		__be32		t_sec;		/* timestamp seconds */
+		__be32		t_nsec;		/* timestamp nanoseconds */
+	};
 };
 
 /*
@@ -904,9 +906,9 @@ typedef struct xfs_dinode {
 	__be16		di_projid_hi;	/* higher part owner's project id */
 	__u8		di_pad[6];	/* unused, zeroed space */
 	__be16		di_flushiter;	/* incremented on flush */
-	struct xfs_timestamp di_atime;	/* time last accessed */
-	struct xfs_timestamp di_mtime;	/* time last modified */
-	struct xfs_timestamp di_ctime;	/* time created/inode modified */
+	union xfs_timestamp di_atime;	/* time last accessed */
+	union xfs_timestamp di_mtime;	/* time last modified */
+	union xfs_timestamp di_ctime;	/* time created/inode modified */
 	__be64		di_size;	/* number of bytes in file */
 	__be64		di_nblocks;	/* # of direct & btree blocks used */
 	__be32		di_extsize;	/* basic/minimum extent size for file */
@@ -931,7 +933,7 @@ typedef struct xfs_dinode {
 	__u8		di_pad2[12];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
-	struct xfs_timestamp di_crtime;	/* time created */
+	union xfs_timestamp di_crtime;	/* time created */
 	__be64		di_ino;		/* inode number */
 	uuid_t		di_uuid;	/* UUID of the filesystem */
 
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 4930eabed6d8..cc1316a5fe0c 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -160,7 +160,7 @@ xfs_imap_to_bp(
 void
 xfs_inode_from_disk_timestamp(
 	struct timespec64		*tv,
-	const struct xfs_timestamp	*ts)
+	const union xfs_timestamp	*ts)
 {
 	tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
 	tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
@@ -259,7 +259,7 @@ xfs_inode_from_disk(
 
 void
 xfs_inode_to_disk_timestamp(
-	struct xfs_timestamp		*ts,
+	union xfs_timestamp		*ts,
 	const struct timespec64		*tv)
 {
 	ts->t_sec = cpu_to_be32(tv->tv_sec);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 9c63f3f932d7..f6160033fcbd 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -59,8 +59,8 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
 		uint64_t flags2);
 
 void xfs_inode_from_disk_timestamp(struct timespec64 *tv,
-		const struct xfs_timestamp *ts);
-void xfs_inode_to_disk_timestamp(struct xfs_timestamp *ts,
+		const union xfs_timestamp *ts);
+void xfs_inode_to_disk_timestamp(union xfs_timestamp *ts,
 		const struct timespec64 *tv);
 
 #endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index f2fac9bea66d..17c83d29998c 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -368,9 +368,11 @@ static inline int xfs_ilog_fdata(int w)
  * directly mirrors the xfs_dinode structure as it must contain all the same
  * information.
  */
-struct xfs_ictimestamp {
-	int32_t		t_sec;		/* timestamp seconds */
-	int32_t		t_nsec;		/* timestamp nanoseconds */
+union xfs_ictimestamp {
+	struct {
+		int32_t		t_sec;		/* timestamp seconds */
+		int32_t		t_nsec;		/* timestamp nanoseconds */
+	};
 };
 
 /*
@@ -390,9 +392,9 @@ struct xfs_log_dinode {
 	uint16_t	di_projid_hi;	/* higher part of owner's project id */
 	uint8_t		di_pad[6];	/* unused, zeroed space */
 	uint16_t	di_flushiter;	/* incremented on flush */
-	struct xfs_ictimestamp di_atime;	/* time last accessed */
-	struct xfs_ictimestamp di_mtime;	/* time last modified */
-	struct xfs_ictimestamp di_ctime;	/* time created/inode modified */
+	union xfs_ictimestamp di_atime;	/* time last accessed */
+	union xfs_ictimestamp di_mtime;	/* time last modified */
+	union xfs_ictimestamp di_ctime;	/* time created/inode modified */
 	xfs_fsize_t	di_size;	/* number of bytes in file */
 	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
@@ -417,7 +419,7 @@ struct xfs_log_dinode {
 	uint8_t		di_pad2[12];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
-	struct xfs_ictimestamp di_crtime;	/* time created */
+	union xfs_ictimestamp di_crtime;	/* time created */
 	xfs_ino_t	di_ino;		/* inode number */
 	uuid_t		di_uuid;	/* UUID of the filesystem */
 
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index ccb5c217c0ee..9f036053fdb7 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -199,7 +199,7 @@ static inline void
 xchk_dinode_nsec(
 	struct xfs_scrub		*sc,
 	xfs_ino_t			ino,
-	const struct xfs_timestamp	*ts)
+	const union xfs_timestamp	*ts)
 {
 	struct timespec64		tv;
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c2f9a0adeed2..64cde59ed51a 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -297,8 +297,8 @@ xfs_inode_item_format_attr_fork(
 
 static inline void
 xfs_from_log_timestamp(
-	struct xfs_timestamp		*ts,
-	const struct xfs_ictimestamp	*its)
+	union xfs_timestamp		*ts,
+	const union xfs_ictimestamp	*its)
 {
 	ts->t_sec = cpu_to_be32(its->t_sec);
 	ts->t_nsec = cpu_to_be32(its->t_nsec);
@@ -354,7 +354,7 @@ xfs_log_dinode_to_disk(
 
 static inline void
 xfs_to_log_timestamp(
-	struct xfs_ictimestamp		*its,
+	union xfs_ictimestamp		*its,
 	const struct timespec64		*ts)
 {
 	its->t_sec = ts->tv_sec;
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 498e9063c605..7158a8de719f 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -57,7 +57,7 @@ xfs_check_ondisk_structs(void)
 	XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec,		12);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key,		20);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec,		24);
-	XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp,		8);
+	XFS_CHECK_STRUCT_SIZE(union xfs_timestamp,		8);
 	XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t,			8);
 	XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t,			4);
 	XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t,			8);
@@ -137,7 +137,7 @@ xfs_check_ondisk_structs(void)
 	XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64,		16);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode,		176);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log,		28);
-	XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp,		8);
+	XFS_CHECK_STRUCT_SIZE(union xfs_ictimestamp,		8);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32,	52);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format,	56);
 	XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat,	20);


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (6 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 07/11] xfs: convert struct xfs_timestamp to union Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 12:00   ` Amir Goldstein
  2020-08-18 23:35   ` Dave Chinner
  2020-08-17 22:57 ` [PATCH 09/11] xfs: refactor quota timestamp coding Darrick J. Wong
                   ` (3 subsequent siblings)
  11 siblings, 2 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
time epoch).  This enables us to handle dates up to 2486, which solves
the y2038 problem.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h     |   38 +++++++++++++++++++++++++++-
 fs/xfs/libxfs/xfs_fs.h         |    1 +
 fs/xfs/libxfs/xfs_inode_buf.c  |   54 ++++++++++++++++++++++++++++++++++------
 fs/xfs/libxfs/xfs_inode_buf.h  |    8 +++---
 fs/xfs/libxfs/xfs_log_format.h |    3 ++
 fs/xfs/libxfs/xfs_sb.c         |    2 +
 fs/xfs/scrub/inode.c           |   16 ++++++++----
 fs/xfs/xfs_inode.c             |   11 ++++++++
 fs/xfs/xfs_inode_item.c        |   35 ++++++++++++++++++++------
 fs/xfs/xfs_ioctl.c             |    3 +-
 fs/xfs/xfs_ondisk.h            |    3 ++
 fs/xfs/xfs_super.c             |   13 ++++++++--
 12 files changed, 158 insertions(+), 29 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 772113db41aa..57343973e5e5 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -467,6 +467,7 @@ xfs_sb_has_ro_compat_feature(
 #define XFS_SB_FEAT_INCOMPAT_FTYPE	(1 << 0)	/* filetype in dirent */
 #define XFS_SB_FEAT_INCOMPAT_SPINODES	(1 << 1)	/* sparse inode chunks */
 #define XFS_SB_FEAT_INCOMPAT_META_UUID	(1 << 2)	/* metadata UUID */
+#define XFS_SB_FEAT_INCOMPAT_BIGTIME	(1 << 3)	/* large timestamps */
 #define XFS_SB_FEAT_INCOMPAT_ALL \
 		(XFS_SB_FEAT_INCOMPAT_FTYPE|	\
 		 XFS_SB_FEAT_INCOMPAT_SPINODES|	\
@@ -565,6 +566,12 @@ static inline bool xfs_sb_version_hasreflink(struct xfs_sb *sbp)
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
 }
 
+static inline bool xfs_sb_version_hasbigtime(struct xfs_sb *sbp)
+{
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+		(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME);
+}
+
 /*
  * Inode btree block counter.  We record the number of inobt and finobt blocks
  * in the AGI header so that we can skip the finobt walk at mount time when
@@ -855,12 +862,18 @@ struct xfs_agfl {
  *
  * Inode timestamps consist of signed 32-bit counters for seconds and
  * nanoseconds; time zero is the Unix epoch, Jan  1 00:00:00 UTC 1970.
+ *
+ * When bigtime is enabled, timestamps become an unsigned 64-bit nanoseconds
+ * counter.  Time zero is the start of the classic timestamp range.
  */
 union xfs_timestamp {
 	struct {
 		__be32		t_sec;		/* timestamp seconds */
 		__be32		t_nsec;		/* timestamp nanoseconds */
 	};
+
+	/* Nanoseconds since the bigtime epoch. */
+	__be64			t_bigtime;
 };
 
 /*
@@ -875,6 +888,25 @@ union xfs_timestamp {
  */
 #define XFS_INO_TIME_MAX	((int64_t)S32_MAX)
 
+/*
+ * Number of seconds between the start of the bigtime timestamp range and the
+ * start of the Unix epoch.
+ */
+#define XFS_INO_BIGTIME_EPOCH	(-XFS_INO_TIME_MIN)
+
+/*
+ * Smallest possible timestamp with big timestamps, which is
+ * Dec 13 20:45:52 UTC 1901.
+ */
+#define XFS_INO_BIGTIME_MIN	(XFS_INO_TIME_MIN)
+
+/*
+ * Largest possible timestamp with big timestamps, which is
+ * Jul  2 20:20:25 UTC 2486.
+ */
+#define XFS_INO_BIGTIME_MAX	((int64_t)((-1ULL / NSEC_PER_SEC) - \
+					   XFS_INO_BIGTIME_EPOCH))
+
 /*
  * On-disk inode structure.
  *
@@ -1100,12 +1132,16 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 #define XFS_DIFLAG2_DAX_BIT	0	/* use DAX for this inode */
 #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
 #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
+#define XFS_DIFLAG2_BIGTIME_BIT	3	/* big timestamps */
+
 #define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
 #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
 #define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
+#define XFS_DIFLAG2_BIGTIME	(1 << XFS_DIFLAG2_BIGTIME_BIT)
 
 #define XFS_DIFLAG2_ANY \
-	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)
+	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
+	 XFS_DIFLAG2_BIGTIME)
 
 /*
  * Inode number format:
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 84bcffa87753..2a2e3cfd94f0 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -249,6 +249,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_SPINODES	(1 << 18) /* sparse inode chunks   */
 #define XFS_FSOP_GEOM_FLAGS_RMAPBT	(1 << 19) /* reverse mapping btree */
 #define XFS_FSOP_GEOM_FLAGS_REFLINK	(1 << 20) /* files can share blocks */
+#define XFS_FSOP_GEOM_FLAGS_BIGTIME	(1 << 21) /* 64-bit nsec timestamps */
 
 /*
  * Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index cc1316a5fe0c..c59ddb56bb90 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -157,11 +157,25 @@ xfs_imap_to_bp(
 	return 0;
 }
 
+/* Convert an ondisk timestamp into the 64-bit safe incore format. */
 void
 xfs_inode_from_disk_timestamp(
+	struct xfs_dinode		*dip,
 	struct timespec64		*tv,
 	const union xfs_timestamp	*ts)
 {
+	if (dip->di_version >= 3 &&
+	    (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
+		uint64_t		t = be64_to_cpu(ts->t_bigtime);
+		uint64_t		s;
+		uint32_t		n;
+
+		s = div_u64_rem(t, NSEC_PER_SEC, &n);
+		tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
+		tv->tv_nsec = n;
+		return;
+	}
+
 	tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
 	tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
 }
@@ -220,9 +234,9 @@ xfs_inode_from_disk(
 	 * a time before epoch is converted to a time long after epoch
 	 * on 64 bit systems.
 	 */
-	xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
-	xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
-	xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
+	xfs_inode_from_disk_timestamp(from, &inode->i_atime, &from->di_atime);
+	xfs_inode_from_disk_timestamp(from, &inode->i_mtime, &from->di_mtime);
+	xfs_inode_from_disk_timestamp(from, &inode->i_ctime, &from->di_ctime);
 
 	to->di_size = be64_to_cpu(from->di_size);
 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
@@ -235,9 +249,17 @@ xfs_inode_from_disk(
 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 		inode_set_iversion_queried(inode,
 					   be64_to_cpu(from->di_changecount));
-		xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
+		xfs_inode_from_disk_timestamp(from, &to->di_crtime,
+				&from->di_crtime);
 		to->di_flags2 = be64_to_cpu(from->di_flags2);
 		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
+		/*
+		 * Set the bigtime flag incore so that we automatically convert
+		 * this inode's ondisk timestamps to bigtime format the next
+		 * time we write the inode core to disk.
+		 */
+		if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
+			to->di_flags2 |= XFS_DIFLAG2_BIGTIME;
 	}
 
 	error = xfs_iformat_data_fork(ip, from);
@@ -259,9 +281,19 @@ xfs_inode_from_disk(
 
 void
 xfs_inode_to_disk_timestamp(
+	struct xfs_icdinode		*from,
 	union xfs_timestamp		*ts,
 	const struct timespec64		*tv)
 {
+	if (from->di_flags2 & XFS_DIFLAG2_BIGTIME) {
+		uint64_t		t;
+
+		t = (uint64_t)(tv->tv_sec + XFS_INO_BIGTIME_EPOCH);
+		t *= NSEC_PER_SEC;
+		ts->t_bigtime = cpu_to_be64(t + tv->tv_nsec);
+		return;
+	}
+
 	ts->t_sec = cpu_to_be32(tv->tv_sec);
 	ts->t_nsec = cpu_to_be32(tv->tv_nsec);
 }
@@ -285,9 +317,9 @@ xfs_inode_to_disk(
 	to->di_projid_hi = cpu_to_be16(from->di_projid >> 16);
 
 	memset(to->di_pad, 0, sizeof(to->di_pad));
-	xfs_inode_to_disk_timestamp(&to->di_atime, &inode->i_atime);
-	xfs_inode_to_disk_timestamp(&to->di_mtime, &inode->i_mtime);
-	xfs_inode_to_disk_timestamp(&to->di_ctime, &inode->i_ctime);
+	xfs_inode_to_disk_timestamp(from, &to->di_atime, &inode->i_atime);
+	xfs_inode_to_disk_timestamp(from, &to->di_mtime, &inode->i_mtime);
+	xfs_inode_to_disk_timestamp(from, &to->di_ctime, &inode->i_ctime);
 	to->di_nlink = cpu_to_be32(inode->i_nlink);
 	to->di_gen = cpu_to_be32(inode->i_generation);
 	to->di_mode = cpu_to_be16(inode->i_mode);
@@ -306,7 +338,8 @@ xfs_inode_to_disk(
 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 		to->di_version = 3;
 		to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
-		xfs_inode_to_disk_timestamp(&to->di_crtime, &from->di_crtime);
+		xfs_inode_to_disk_timestamp(from, &to->di_crtime,
+				&from->di_crtime);
 		to->di_flags2 = cpu_to_be64(from->di_flags2);
 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 		to->di_ino = cpu_to_be64(ip->i_ino);
@@ -526,6 +559,11 @@ xfs_dinode_verify(
 	if (fa)
 		return fa;
 
+	/* bigtime iflag can only happen on bigtime filesystems */
+	if ((flags2 & (XFS_DIFLAG2_BIGTIME)) &&
+	    !xfs_sb_version_hasbigtime(&mp->m_sb))
+		return __this_address;
+
 	return NULL;
 }
 
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index f6160033fcbd..3b25e43eafa1 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -58,9 +58,9 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
 		uint32_t cowextsize, uint16_t mode, uint16_t flags,
 		uint64_t flags2);
 
-void xfs_inode_from_disk_timestamp(struct timespec64 *tv,
-		const union xfs_timestamp *ts);
-void xfs_inode_to_disk_timestamp(union xfs_timestamp *ts,
-		const struct timespec64 *tv);
+void xfs_inode_from_disk_timestamp(struct xfs_dinode *dip,
+		struct timespec64 *tv, const union xfs_timestamp *ts);
+void xfs_inode_to_disk_timestamp(struct xfs_icdinode *from,
+		union xfs_timestamp *ts, const struct timespec64 *tv);
 
 #endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 17c83d29998c..569721f7f9e5 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -373,6 +373,9 @@ union xfs_ictimestamp {
 		int32_t		t_sec;		/* timestamp seconds */
 		int32_t		t_nsec;		/* timestamp nanoseconds */
 	};
+
+	/* Nanoseconds since the bigtime epoch. */
+	uint64_t		t_bigtime;
 };
 
 /*
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index ae9aaf1f34bf..d5d60cd1c2ea 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1166,6 +1166,8 @@ xfs_fs_geometry(
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT;
 	if (xfs_sb_version_hasreflink(sbp))
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK;
+	if (xfs_sb_version_hasbigtime(sbp))
+		geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME;
 	if (xfs_sb_version_hassector(sbp))
 		geo->logsectsize = sbp->sb_logsectsize;
 	else
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 9f036053fdb7..b354825f4e51 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -190,6 +190,11 @@ xchk_inode_flags2(
 	if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
 		goto bad;
 
+	/* the incore bigtime iflag always follows the feature flag */
+	if (!!xfs_sb_version_hasbigtime(&mp->m_sb) ^
+	    !!(flags2 & XFS_DIFLAG2_BIGTIME))
+		goto bad;
+
 	return;
 bad:
 	xchk_ino_set_corrupt(sc, ino);
@@ -199,11 +204,12 @@ static inline void
 xchk_dinode_nsec(
 	struct xfs_scrub		*sc,
 	xfs_ino_t			ino,
+	struct xfs_dinode		*dip,
 	const union xfs_timestamp	*ts)
 {
 	struct timespec64		tv;
 
-	xfs_inode_from_disk_timestamp(&tv, ts);
+	xfs_inode_from_disk_timestamp(dip, &tv, ts);
 	if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC)
 		xchk_ino_set_corrupt(sc, ino);
 }
@@ -306,9 +312,9 @@ xchk_dinode(
 	}
 
 	/* di_[amc]time.nsec */
-	xchk_dinode_nsec(sc, ino, &dip->di_atime);
-	xchk_dinode_nsec(sc, ino, &dip->di_mtime);
-	xchk_dinode_nsec(sc, ino, &dip->di_ctime);
+	xchk_dinode_nsec(sc, ino, dip, &dip->di_atime);
+	xchk_dinode_nsec(sc, ino, dip, &dip->di_mtime);
+	xchk_dinode_nsec(sc, ino, dip, &dip->di_ctime);
 
 	/*
 	 * di_size.  xfs_dinode_verify checks for things that screw up
@@ -413,7 +419,7 @@ xchk_dinode(
 	}
 
 	if (dip->di_version >= 3) {
-		xchk_dinode_nsec(sc, ino, &dip->di_crtime);
+		xchk_dinode_nsec(sc, ino, dip, &dip->di_crtime);
 		xchk_inode_flags2(sc, dip, ino, mode, flags, flags2);
 		xchk_inode_cowextsize(sc, dip, ino, mode, flags,
 				flags2);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c06129cffba9..bcd1b4bef123 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -841,6 +841,8 @@ xfs_ialloc(
 	if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
 		inode_set_iversion(inode, 1);
 		ip->i_d.di_flags2 = 0;
+		if (xfs_sb_version_hasbigtime(&mp->m_sb))
+			ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME;
 		ip->i_d.di_cowextsize = 0;
 		ip->i_d.di_crtime = tv;
 	}
@@ -2718,6 +2720,8 @@ xfs_ifree(
 	VFS_I(ip)->i_mode = 0;		/* mark incore inode as free */
 	ip->i_d.di_flags = 0;
 	ip->i_d.di_flags2 = 0;
+	if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
+		ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME;
 	ip->i_d.di_dmevmask = 0;
 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
 	ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -3503,6 +3507,13 @@ xfs_iflush(
 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
 		goto flush_out;
 	}
+	if (xfs_sb_version_hasbigtime(&mp->m_sb) &&
+	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_BIGTIME)) {
+		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+			"%s: bad inode %Lu, bigtime unset, ptr "PTR_FMT,
+			__func__, ip->i_ino, ip);
+		goto flush_out;
+	}
 
 	/*
 	 * Inode item log recovery for v2 inodes are dependent on the
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 64cde59ed51a..08f061829429 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -297,9 +297,16 @@ xfs_inode_item_format_attr_fork(
 
 static inline void
 xfs_from_log_timestamp(
+	struct xfs_log_dinode		*from,
 	union xfs_timestamp		*ts,
 	const union xfs_ictimestamp	*its)
 {
+	if (from->di_version >= 3 &&
+	    (from->di_flags2 & XFS_DIFLAG2_BIGTIME)) {
+		ts->t_bigtime = cpu_to_be64(its->t_bigtime);
+		return;
+	}
+
 	ts->t_sec = cpu_to_be32(its->t_sec);
 	ts->t_nsec = cpu_to_be32(its->t_nsec);
 }
@@ -321,9 +328,9 @@ xfs_log_dinode_to_disk(
 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
 
-	xfs_from_log_timestamp(&to->di_atime, &from->di_atime);
-	xfs_from_log_timestamp(&to->di_mtime, &from->di_mtime);
-	xfs_from_log_timestamp(&to->di_ctime, &from->di_ctime);
+	xfs_from_log_timestamp(from, &to->di_atime, &from->di_atime);
+	xfs_from_log_timestamp(from, &to->di_mtime, &from->di_mtime);
+	xfs_from_log_timestamp(from, &to->di_ctime, &from->di_ctime);
 
 	to->di_size = cpu_to_be64(from->di_size);
 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
@@ -339,7 +346,7 @@ xfs_log_dinode_to_disk(
 
 	if (from->di_version == 3) {
 		to->di_changecount = cpu_to_be64(from->di_changecount);
-		xfs_from_log_timestamp(&to->di_crtime, &from->di_crtime);
+		xfs_from_log_timestamp(from, &to->di_crtime, &from->di_crtime);
 		to->di_flags2 = cpu_to_be64(from->di_flags2);
 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 		to->di_ino = cpu_to_be64(from->di_ino);
@@ -354,9 +361,19 @@ xfs_log_dinode_to_disk(
 
 static inline void
 xfs_to_log_timestamp(
+	struct xfs_icdinode		*from,
 	union xfs_ictimestamp		*its,
 	const struct timespec64		*ts)
 {
+	if (from->di_flags2 & XFS_DIFLAG2_BIGTIME) {
+		uint64_t		t;
+
+		t = (uint64_t)(ts->tv_sec + XFS_INO_BIGTIME_EPOCH);
+		t *= NSEC_PER_SEC;
+		its->t_bigtime = t + ts->tv_nsec;
+		return;
+	}
+
 	its->t_sec = ts->tv_sec;
 	its->t_nsec = ts->tv_nsec;
 }
@@ -379,9 +396,9 @@ xfs_inode_to_log_dinode(
 
 	memset(to->di_pad, 0, sizeof(to->di_pad));
 	memset(to->di_pad3, 0, sizeof(to->di_pad3));
-	xfs_to_log_timestamp(&to->di_atime, &inode->i_atime);
-	xfs_to_log_timestamp(&to->di_mtime, &inode->i_mtime);
-	xfs_to_log_timestamp(&to->di_ctime, &inode->i_ctime);
+	xfs_to_log_timestamp(from, &to->di_atime, &inode->i_atime);
+	xfs_to_log_timestamp(from, &to->di_mtime, &inode->i_mtime);
+	xfs_to_log_timestamp(from, &to->di_ctime, &inode->i_ctime);
 	to->di_nlink = inode->i_nlink;
 	to->di_gen = inode->i_generation;
 	to->di_mode = inode->i_mode;
@@ -403,7 +420,7 @@ xfs_inode_to_log_dinode(
 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
 		to->di_version = 3;
 		to->di_changecount = inode_peek_iversion(inode);
-		xfs_to_log_timestamp(&to->di_crtime, &from->di_crtime);
+		xfs_to_log_timestamp(from, &to->di_crtime, &from->di_crtime);
 		to->di_flags2 = from->di_flags2;
 		to->di_cowextsize = from->di_cowextsize;
 		to->di_ino = ip->i_ino;
@@ -411,6 +428,8 @@ xfs_inode_to_log_dinode(
 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
 		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 		to->di_flushiter = 0;
+		ASSERT((from->di_flags2 & XFS_DIFLAG2_BIGTIME) ||
+		       !xfs_sb_version_hasbigtime(&ip->i_mount->m_sb));
 	} else {
 		to->di_version = 2;
 		to->di_flushiter = from->di_flushiter;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6f22a66777cd..13396c3665d1 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1190,7 +1190,8 @@ xfs_flags2diflags2(
 	unsigned int		xflags)
 {
 	uint64_t		di_flags2 =
-		(ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
+		(ip->i_d.di_flags2 & (XFS_DIFLAG2_REFLINK |
+				      XFS_DIFLAG2_BIGTIME));
 
 	if (xflags & FS_XFLAG_DAX)
 		di_flags2 |= XFS_DIFLAG2_DAX;
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 7158a8de719f..80129b2dc392 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -25,6 +25,9 @@ xfs_check_limits(void)
 	/* make sure timestamp limits are correct */
 	XFS_CHECK_VALUE(XFS_INO_TIME_MIN,			-2147483648LL);
 	XFS_CHECK_VALUE(XFS_INO_TIME_MAX,			2147483647LL);
+	XFS_CHECK_VALUE(XFS_INO_BIGTIME_EPOCH,			2147483648LL);
+	XFS_CHECK_VALUE(XFS_INO_BIGTIME_MIN,			-2147483648LL);
+	XFS_CHECK_VALUE(XFS_INO_BIGTIME_MAX,			16299260425LL);
 	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MIN,			1LL);
 	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MAX,			4294967295LL);
 	XFS_CHECK_VALUE(XFS_DQ_GRACE_MIN,			0LL);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 375f05a47ba4..b19ae052f7a3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1484,8 +1484,13 @@ xfs_fc_fill_super(
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_max_links = XFS_MAXLINK;
 	sb->s_time_gran = 1;
-	sb->s_time_min = XFS_INO_TIME_MIN;
-	sb->s_time_max = XFS_INO_TIME_MAX;
+	if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+		sb->s_time_min = XFS_INO_BIGTIME_MIN;
+		sb->s_time_max = XFS_INO_BIGTIME_MAX;
+	} else {
+		sb->s_time_min = XFS_INO_TIME_MIN;
+		sb->s_time_max = XFS_INO_TIME_MAX;
+	}
 	sb->s_iflags |= SB_I_CGROUPWB;
 
 	set_posix_acl_flag(sb);
@@ -1494,6 +1499,10 @@ xfs_fc_fill_super(
 	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
 		sb->s_flags |= SB_I_VERSION;
 
+	if (xfs_sb_version_hasbigtime(&mp->m_sb))
+		xfs_warn(mp,
+ "EXPERIMENTAL big timestamp feature in use. Use at your own risk!");
+
 	if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
 		bool rtdev_is_dax = false, datadev_is_dax;
 


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 09/11] xfs: refactor quota timestamp coding
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (7 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 12:25   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 10/11] xfs: enable bigtime for quota timers Darrick J. Wong
                   ` (2 subsequent siblings)
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Refactor quota timestamp encoding and decoding into helper functions so
that we can add extra behavior in the next patch.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_dquot_buf.c  |   20 ++++++++++++++++++++
 fs/xfs/libxfs/xfs_quota_defs.h |    6 ++++++
 fs/xfs/xfs_dquot.c             |   12 ++++++------
 3 files changed, 32 insertions(+), 6 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 5a2db00b9d5f..7f5291022b11 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -288,3 +288,23 @@ const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
 	.verify_read = xfs_dquot_buf_readahead_verify,
 	.verify_write = xfs_dquot_buf_write_verify,
 };
+
+/* Convert an on-disk timer value into an incore timer value. */
+void
+xfs_dquot_from_disk_timestamp(
+	struct xfs_disk_dquot	*ddq,
+	time64_t		*timer,
+	__be32			dtimer)
+{
+	*timer = be32_to_cpu(dtimer);
+}
+
+/* Convert an incore timer value into an on-disk timer value. */
+void
+xfs_dquot_to_disk_timestamp(
+	struct xfs_dquot	*dqp,
+	__be32			*dtimer,
+	time64_t		timer)
+{
+	*dtimer = cpu_to_be32(timer);
+}
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 076bdc7037ee..b524059faab5 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -143,4 +143,10 @@ extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
 extern void xfs_dqblk_repair(struct xfs_mount *mp, struct xfs_dqblk *dqb,
 		xfs_dqid_t id, xfs_dqtype_t type);
 
+struct xfs_dquot;
+void xfs_dquot_from_disk_timestamp(struct xfs_disk_dquot *ddq,
+		time64_t *timer, __be32 dtimer);
+void xfs_dquot_to_disk_timestamp(struct xfs_dquot *ddq,
+		__be32 *dtimer, time64_t timer);
+
 #endif	/* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index ed3fa6ada0d3..08d497d413b9 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -534,9 +534,9 @@ xfs_dquot_from_disk(
 	dqp->q_ino.warnings = be16_to_cpu(ddqp->d_iwarns);
 	dqp->q_rtb.warnings = be16_to_cpu(ddqp->d_rtbwarns);
 
-	dqp->q_blk.timer = be32_to_cpu(ddqp->d_btimer);
-	dqp->q_ino.timer = be32_to_cpu(ddqp->d_itimer);
-	dqp->q_rtb.timer = be32_to_cpu(ddqp->d_rtbtimer);
+	xfs_dquot_from_disk_timestamp(ddqp, &dqp->q_blk.timer, ddqp->d_btimer);
+	xfs_dquot_from_disk_timestamp(ddqp, &dqp->q_ino.timer, ddqp->d_itimer);
+	xfs_dquot_from_disk_timestamp(ddqp, &dqp->q_rtb.timer, ddqp->d_rtbtimer);
 
 	/*
 	 * Reservation counters are defined as reservation plus current usage
@@ -579,9 +579,9 @@ xfs_dquot_to_disk(
 	ddqp->d_iwarns = cpu_to_be16(dqp->q_ino.warnings);
 	ddqp->d_rtbwarns = cpu_to_be16(dqp->q_rtb.warnings);
 
-	ddqp->d_btimer = cpu_to_be32(dqp->q_blk.timer);
-	ddqp->d_itimer = cpu_to_be32(dqp->q_ino.timer);
-	ddqp->d_rtbtimer = cpu_to_be32(dqp->q_rtb.timer);
+	xfs_dquot_to_disk_timestamp(dqp, &ddqp->d_btimer, dqp->q_blk.timer);
+	xfs_dquot_to_disk_timestamp(dqp, &ddqp->d_itimer, dqp->q_ino.timer);
+	xfs_dquot_to_disk_timestamp(dqp, &ddqp->d_rtbtimer, dqp->q_rtb.timer);
 }
 
 /* Allocate and initialize the dquot buffer for this in-core dquot. */


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 10/11] xfs: enable bigtime for quota timers
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (8 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 09/11] xfs: refactor quota timestamp coding Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 13:58   ` Amir Goldstein
  2020-08-17 22:57 ` [PATCH 11/11] xfs: enable big timestamps Darrick J. Wong
  2020-08-18 23:01 ` [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Dave Chinner
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Enable the bigtime feature for quota timers.  We decrease the accuracy
of the timers to ~4s in exchange for being able to set timers up to the
bigtime maximum.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_dquot_buf.c  |   40 ++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_format.h     |   26 +++++++++++++++++++++++++-
 fs/xfs/libxfs/xfs_quota_defs.h |    3 ++-
 fs/xfs/scrub/quota.c           |    8 ++++++++
 fs/xfs/xfs_dquot.c             |   34 ++++++++++++++++++++++++++++++----
 fs/xfs/xfs_dquot.h             |    3 ++-
 fs/xfs/xfs_ondisk.h            |    7 +++++++
 fs/xfs/xfs_qm.c                |    2 ++
 fs/xfs/xfs_qm_syscalls.c       |    9 +++++----
 9 files changed, 121 insertions(+), 11 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 7f5291022b11..39afefd52275 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -69,6 +69,13 @@ xfs_dquot_verify(
 	    ddq_type != XFS_DQTYPE_GROUP)
 		return __this_address;
 
+	if ((ddq->d_type & XFS_DQTYPE_BIGTIME) &&
+	    !xfs_sb_version_hasbigtime(&mp->m_sb))
+		return __this_address;
+
+	if ((ddq->d_type & XFS_DQTYPE_BIGTIME) && !ddq->d_id)
+		return __this_address;
+
 	if (id != -1 && id != be32_to_cpu(ddq->d_id))
 		return __this_address;
 
@@ -296,6 +303,20 @@ xfs_dquot_from_disk_timestamp(
 	time64_t		*timer,
 	__be32			dtimer)
 {
+	/* Zero always means zero, regardless of encoding. */
+	if (!dtimer) {
+		*timer = 0;
+		return;
+	}
+
+	if (ddq->d_type & XFS_DQTYPE_BIGTIME) {
+		uint64_t	t;
+
+		t = be32_to_cpu(dtimer);
+		*timer = t << XFS_DQ_BIGTIME_SHIFT;
+		return;
+	}
+
 	*timer = be32_to_cpu(dtimer);
 }
 
@@ -306,5 +327,24 @@ xfs_dquot_to_disk_timestamp(
 	__be32			*dtimer,
 	time64_t		timer)
 {
+	/* Zero always means zero, regardless of encoding. */
+	if (!timer) {
+		*dtimer = cpu_to_be32(0);
+		return;
+	}
+
+	if (dqp->q_type & XFS_DQTYPE_BIGTIME) {
+		uint64_t	t = timer;
+
+		/*
+		 * Round the end of the grace period up to the nearest bigtime
+		 * interval that we support, to give users the most time to fix
+		 * the problems.
+		 */
+		t = roundup_64(t, 1U << XFS_DQ_BIGTIME_SHIFT);
+		*dtimer = cpu_to_be32(t >> XFS_DQ_BIGTIME_SHIFT);
+		return;
+	}
+
 	*dtimer = cpu_to_be32(timer);
 }
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 57343973e5e5..1214fa58f45a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1227,13 +1227,15 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 #define XFS_DQTYPE_USER		0x01		/* user dquot record */
 #define XFS_DQTYPE_PROJ		0x02		/* project dquot record */
 #define XFS_DQTYPE_GROUP	0x04		/* group dquot record */
+#define XFS_DQTYPE_BIGTIME	0x08		/* large expiry timestamps */
 
 /* bitmask to determine if this is a user/group/project dquot */
 #define XFS_DQTYPE_REC_MASK	(XFS_DQTYPE_USER | \
 				 XFS_DQTYPE_PROJ | \
 				 XFS_DQTYPE_GROUP)
 
-#define XFS_DQTYPE_ANY		(XFS_DQTYPE_REC_MASK)
+#define XFS_DQTYPE_ANY		(XFS_DQTYPE_REC_MASK | \
+				 XFS_DQTYPE_BIGTIME)
 
 /*
  * XFS Quota Timers
@@ -1270,6 +1272,28 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 #define XFS_DQ_GRACE_MIN	((int64_t)0)
 #define XFS_DQ_GRACE_MAX	((int64_t)U32_MAX)
 
+/*
+ * When bigtime is enabled, we trade a few bits of precision to expand the
+ * expiration timeout range to match that of big inode timestamps.  The grace
+ * periods stored in dquot 0 are not shifted, since they record an interval,
+ * not a timestamp.
+ */
+#define XFS_DQ_BIGTIME_SHIFT		(2)
+
+/*
+ * Smallest possible quota expiration with big timestamps, which is
+ * Jan  1 00:00:01 UTC 1970.
+ */
+#define XFS_DQ_BIGTIMEOUT_MIN		(XFS_DQ_TIMEOUT_MIN)
+
+/*
+ * Largest supported quota expiration with traditional timestamps, which is
+ * the largest bigtime inode timestamp, or Jul  2 20:20:25 UTC 2486.  The field
+ * is large enough that it's possible to fit expirations up to 2514, but we
+ * want to keep the maximum timestamp in sync.
+ */
+#define XFS_DQ_BIGTIMEOUT_MAX		(XFS_INO_BIGTIME_MAX)
+
 /*
  * This is the main portion of the on-disk representation of quota information
  * for a user.  We pad this with some more expansion room to construct the on
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index b524059faab5..b9a47eae684b 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -23,7 +23,8 @@ typedef uint8_t		xfs_dqtype_t;
 #define XFS_DQTYPE_STRINGS \
 	{ XFS_DQTYPE_USER,	"USER" }, \
 	{ XFS_DQTYPE_PROJ,	"PROJ" }, \
-	{ XFS_DQTYPE_GROUP,	"GROUP" }
+	{ XFS_DQTYPE_GROUP,	"GROUP" }, \
+	{ XFS_DQTYPE_BIGTIME,	"BIGTIME" }
 
 /*
  * flags for q_flags field in the dquot.
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index e34ca20ae8e4..34b51989d8bc 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -97,6 +97,14 @@ xchk_quota_item(
 
 	sqi->last_id = dq->q_id;
 
+	/*
+	 * If the bigtime feature is enabled, all non-root incore dquots should
+	 * have the bigtime dquot flag set.
+	 */
+	if (xfs_sb_version_hasbigtime(&mp->m_sb) && dq->q_id &&
+	    !(dq->q_type & XFS_DQTYPE_BIGTIME))
+		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+
 	/*
 	 * Warn if the hard limits are larger than the fs.
 	 * Administrators can do this, though in production this seems
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 08d497d413b9..8b3ee4baab48 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -110,9 +110,15 @@ xfs_dquot_set_grace_period(
 /* Set the expiration time of a quota's grace period. */
 void
 xfs_dquot_set_timeout(
+	struct xfs_mount	*mp,
 	time64_t		*timer,
 	time64_t		value)
 {
+	if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+		*timer = clamp_t(time64_t, value, XFS_DQ_BIGTIMEOUT_MIN,
+						  XFS_DQ_BIGTIMEOUT_MAX);
+		return;
+	}
 	*timer = clamp_t(time64_t, value, XFS_DQ_TIMEOUT_MIN,
 					  XFS_DQ_TIMEOUT_MAX);
 }
@@ -123,6 +129,7 @@ xfs_dquot_set_timeout(
  */
 static inline void
 xfs_qm_adjust_res_timer(
+	struct xfs_dquot	*dqp,
 	struct xfs_dquot_res	*res,
 	struct xfs_quota_limits	*qlim)
 {
@@ -131,7 +138,7 @@ xfs_qm_adjust_res_timer(
 	if ((res->softlimit && res->count > res->softlimit) ||
 	    (res->hardlimit && res->count > res->hardlimit)) {
 		if (res->timer == 0)
-			xfs_dquot_set_timeout(&res->timer,
+			xfs_dquot_set_timeout(dqp->q_mount, &res->timer,
 					ktime_get_real_seconds() + qlim->time);
 	} else {
 		if (res->timer == 0)
@@ -165,9 +172,9 @@ xfs_qm_adjust_dqtimers(
 	ASSERT(dq->q_id);
 	defq = xfs_get_defquota(qi, xfs_dquot_type(dq));
 
-	xfs_qm_adjust_res_timer(&dq->q_blk, &defq->blk);
-	xfs_qm_adjust_res_timer(&dq->q_ino, &defq->ino);
-	xfs_qm_adjust_res_timer(&dq->q_rtb, &defq->rtb);
+	xfs_qm_adjust_res_timer(dq, &dq->q_blk, &defq->blk);
+	xfs_qm_adjust_res_timer(dq, &dq->q_ino, &defq->ino);
+	xfs_qm_adjust_res_timer(dq, &dq->q_rtb, &defq->rtb);
 }
 
 /*
@@ -221,6 +228,8 @@ xfs_qm_init_dquot_blk(
 		d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
 		d->dd_diskdq.d_id = cpu_to_be32(curid);
 		d->dd_diskdq.d_type = type;
+		if (curid > 0 && xfs_sb_version_hasbigtime(&mp->m_sb))
+			d->dd_diskdq.d_type |= XFS_DQTYPE_BIGTIME;
 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
 			uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
 			xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
@@ -538,6 +547,14 @@ xfs_dquot_from_disk(
 	xfs_dquot_from_disk_timestamp(ddqp, &dqp->q_ino.timer, ddqp->d_itimer);
 	xfs_dquot_from_disk_timestamp(ddqp, &dqp->q_rtb.timer, ddqp->d_rtbtimer);
 
+	/*
+	 * Set the bigtime flag incore so that we automatically convert this
+	 * dquot's ondisk timestamps to bigtime format the next time we write
+	 * the dquot to disk.
+	 */
+	if (xfs_sb_version_hasbigtime(&dqp->q_mount->m_sb) && dqp->q_id != 0)
+		dqp->q_type |= XFS_DQTYPE_BIGTIME;
+
 	/*
 	 * Reservation counters are defined as reservation plus current usage
 	 * to avoid having to add every time.
@@ -1165,6 +1182,15 @@ xfs_qm_dqflush_check(
 	    !dqp->q_rtb.timer)
 		return __this_address;
 
+	/*
+	 * Except for the root dquot (whose timer values are the default grace
+	 * period expirations) we should never write non-bigtime quota timers
+	 * to a bigtime fs.
+	 */
+	if (dqp->q_id != 0 && xfs_sb_version_hasbigtime(&dqp->q_mount->m_sb) &&
+	    !(dqp->q_type & XFS_DQTYPE_BIGTIME))
+		return __this_address;
+
 	return NULL;
 }
 
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 0ba4d91c3a11..74ca87e02a14 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -238,6 +238,7 @@ int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
 		xfs_qm_dqiterate_fn iter_fn, void *priv);
 
 void xfs_dquot_set_grace_period(time64_t *timer, time64_t limit);
-void xfs_dquot_set_timeout(time64_t *timer, time64_t limit);
+void xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t *timer,
+		time64_t limit);
 
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 80129b2dc392..734a0fe7dd73 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -32,6 +32,13 @@ xfs_check_limits(void)
 	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MAX,			4294967295LL);
 	XFS_CHECK_VALUE(XFS_DQ_GRACE_MIN,			0LL);
 	XFS_CHECK_VALUE(XFS_DQ_GRACE_MAX,			4294967295LL);
+	XFS_CHECK_VALUE(XFS_DQ_BIGTIMEOUT_MIN,			1LL);
+	XFS_CHECK_VALUE(XFS_DQ_BIGTIMEOUT_MAX,			16299260425LL);
+
+	BUILD_BUG_ON_MSG((XFS_DQ_TIMEOUT_MAX << XFS_DQ_BIGTIME_SHIFT) <
+			 XFS_DQ_BIGTIMEOUT_MAX,
+			 "XFS: quota timeout field is not large enough to fit "
+			 "XFS_DQ_BIGTIMEOUT_MAX");
 }
 
 static inline void __init
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index be67570badf8..a5136e40e118 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -879,6 +879,8 @@ xfs_qm_reset_dqcounts(
 			ddq->d_bwarns = 0;
 			ddq->d_iwarns = 0;
 			ddq->d_rtbwarns = 0;
+			if (xfs_sb_version_hasbigtime(&mp->m_sb))
+				ddq->d_type |= XFS_DQTYPE_BIGTIME;
 		}
 
 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 95b0c25b9969..4d9c245d65c2 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -479,6 +479,7 @@ xfs_setqlim_warns(
 
 static inline void
 xfs_setqlim_timer(
+	struct xfs_dquot	*dqp,
 	struct xfs_dquot_res	*res,
 	struct xfs_quota_limits	*qlim,
 	s64			timer)
@@ -489,7 +490,7 @@ xfs_setqlim_timer(
 		qlim->time = res->timer;
 	} else {
 		/* Set the grace period expiration on a quota. */
-		xfs_dquot_set_timeout(&res->timer, timer);
+		xfs_dquot_set_timeout(dqp->q_mount, &res->timer, timer);
 	}
 }
 
@@ -579,7 +580,7 @@ xfs_qm_scall_setqlim(
 	if (newlim->d_fieldmask & QC_SPC_WARNS)
 		xfs_setqlim_warns(res, qlim, newlim->d_spc_warns);
 	if (newlim->d_fieldmask & QC_SPC_TIMER)
-		xfs_setqlim_timer(res, qlim, newlim->d_spc_timer);
+		xfs_setqlim_timer(dqp, res, qlim, newlim->d_spc_timer);
 
 	/* Blocks on the realtime device. */
 	hard = (newlim->d_fieldmask & QC_RT_SPC_HARD) ?
@@ -595,7 +596,7 @@ xfs_qm_scall_setqlim(
 	if (newlim->d_fieldmask & QC_RT_SPC_WARNS)
 		xfs_setqlim_warns(res, qlim, newlim->d_rt_spc_warns);
 	if (newlim->d_fieldmask & QC_RT_SPC_TIMER)
-		xfs_setqlim_timer(res, qlim, newlim->d_rt_spc_timer);
+		xfs_setqlim_timer(dqp, res, qlim, newlim->d_rt_spc_timer);
 
 	/* Inodes */
 	hard = (newlim->d_fieldmask & QC_INO_HARD) ?
@@ -611,7 +612,7 @@ xfs_qm_scall_setqlim(
 	if (newlim->d_fieldmask & QC_INO_WARNS)
 		xfs_setqlim_warns(res, qlim, newlim->d_ino_warns);
 	if (newlim->d_fieldmask & QC_INO_TIMER)
-		xfs_setqlim_timer(res, qlim, newlim->d_ino_timer);
+		xfs_setqlim_timer(dqp, res, qlim, newlim->d_ino_timer);
 
 	if (id != 0) {
 		/*


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 11/11] xfs: enable big timestamps
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (9 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 10/11] xfs: enable bigtime for quota timers Darrick J. Wong
@ 2020-08-17 22:57 ` Darrick J. Wong
  2020-08-18 14:04   ` Amir Goldstein
  2020-08-18 23:01 ` [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Dave Chinner
  11 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-17 22:57 UTC (permalink / raw)
  To: darrick.wong; +Cc: linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Enable the big timestamp feature.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1214fa58f45a..7ef14f5fef80 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -471,7 +471,8 @@ xfs_sb_has_ro_compat_feature(
 #define XFS_SB_FEAT_INCOMPAT_ALL \
 		(XFS_SB_FEAT_INCOMPAT_FTYPE|	\
 		 XFS_SB_FEAT_INCOMPAT_SPINODES|	\
-		 XFS_SB_FEAT_INCOMPAT_META_UUID)
+		 XFS_SB_FEAT_INCOMPAT_META_UUID| \
+		 XFS_SB_FEAT_INCOMPAT_BIGTIME)
 
 #define XFS_SB_FEAT_INCOMPAT_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_ALL
 static inline bool


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH 01/11] xfs: explicitly define inode timestamp range
  2020-08-17 22:56 ` [PATCH 01/11] xfs: explicitly define inode timestamp range Darrick J. Wong
@ 2020-08-18  6:25   ` Amir Goldstein
  0 siblings, 0 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18  6:25 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:56 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Formally define the inode timestamp ranges that existing filesystems
> support, and switch the vfs timetamp ranges to use it.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 02/11] xfs: refactor quota expiration timer modification
  2020-08-17 22:57 ` [PATCH 02/11] xfs: refactor quota expiration timer modification Darrick J. Wong
@ 2020-08-18  6:48   ` Amir Goldstein
  2020-08-18 15:40     ` Darrick J. Wong
  0 siblings, 1 reply; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18  6:48 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:59 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Define explicit limits on the range of quota grace period expiration
> timeouts and refactor the code that modifies the timeouts into helpers
> that clamp the values appropriately.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Question below...

...

> diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
> index 1c542b4a5220..b16d533a6feb 100644
> --- a/fs/xfs/xfs_qm_syscalls.c
> +++ b/fs/xfs/xfs_qm_syscalls.c
> @@ -483,9 +483,14 @@ xfs_setqlim_timer(
>         struct xfs_quota_limits *qlim,
>         s64                     timer)
>  {
> -       res->timer = timer;
> -       if (qlim)
> +       if (qlim) {
> +               /* Set the length of the default grace period. */
> +               res->timer = timer;
>                 qlim->time = timer;
> +       } else {
> +               /* Set the grace period expiration on a quota. */
> +               xfs_dquot_set_timeout(&res->timer, timer);
> +       }
>  }

I understand why you did this. This is mid series craft, but it looks very odd
to your average reviewer.

Maybe leave a TODO comment above res->timer = timer which will be
removed later in the series?

Not critical.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-17 22:57 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
@ 2020-08-18 10:46   ` Amir Goldstein
  0 siblings, 0 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 10:46 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:59 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Refactor the code that sets the default quota grace period into a helper
> function so that we can override the ondisk behavior later.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 04/11] xfs: remove xfs_timestamp_t
  2020-08-17 22:57 ` [PATCH 04/11] xfs: remove xfs_timestamp_t Darrick J. Wong
@ 2020-08-18 10:50   ` Amir Goldstein
  0 siblings, 0 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 10:50 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Kill this old typedef.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Fine.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 05/11] xfs: move xfs_log_dinode_to_disk to the log code
  2020-08-17 22:57 ` [PATCH 05/11] xfs: move xfs_log_dinode_to_disk to the log code Darrick J. Wong
@ 2020-08-18 10:51   ` Amir Goldstein
  0 siblings, 0 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 10:51 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Move this function to xfs_inode_item.c to match the encoding function
> that's already there.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Ok.
Reviewed-by: Amir Goldstein <amir73il@gmail.com>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 06/11] xfs: refactor inode timestamp coding
  2020-08-17 22:57 ` [PATCH 06/11] xfs: refactor inode timestamp coding Darrick J. Wong
@ 2020-08-18 11:20   ` Amir Goldstein
  2020-08-18 15:42     ` Darrick J. Wong
  0 siblings, 1 reply; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 11:20 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Refactor inode timestamp encoding and decoding into helper functions so
> that we can add extra behaviors in subsequent patches.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

With nit below...

...

> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index d95a00376fad..c2f9a0adeed2 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -295,6 +295,15 @@ xfs_inode_item_format_attr_fork(
>         }
>  }
>
> +static inline void
> +xfs_from_log_timestamp(
> +       struct xfs_timestamp            *ts,
> +       const struct xfs_ictimestamp    *its)
> +{
> +       ts->t_sec = cpu_to_be32(its->t_sec);
> +       ts->t_nsec = cpu_to_be32(its->t_nsec);
> +}
> +
>  void
>  xfs_log_dinode_to_disk(

Following convention of xfs_inode_{to,from}_disk_timestamp()
I think it would be less confusing to name these helpers
xfs_log_to_disk_timestamp()

and...

>
> +static inline void
> +xfs_to_log_timestamp(
> +       struct xfs_ictimestamp          *its,
> +       const struct timespec64         *ts)
> +{
> +       its->t_sec = ts->tv_sec;
> +       its->t_nsec = ts->tv_nsec;
> +}
> +
>  static void
>  xfs_inode_to_log_dinode(

xfs_inode_to_log_timestamp()

Because xfs_{to,from}_log_timestamp() may sound like a matching pair,
to your average code reviewer, but they are not.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 07/11] xfs: convert struct xfs_timestamp to union
  2020-08-17 22:57 ` [PATCH 07/11] xfs: convert struct xfs_timestamp to union Darrick J. Wong
@ 2020-08-18 11:24   ` Amir Goldstein
  0 siblings, 0 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 11:24 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Convert the xfs_timestamp struct to a union so that we can overload it
> in the next patch.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

I guess we could have kept xfs_timestamp_t and friend a bit longer to
avoid this churn.
Oh well. I guess the agenda to remove those typedefs is strong.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-17 22:57 ` [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem Darrick J. Wong
@ 2020-08-18 12:00   ` Amir Goldstein
  2020-08-18 12:53     ` Amir Goldstein
  2020-08-18 15:44     ` Darrick J. Wong
  2020-08-18 23:35   ` Dave Chinner
  1 sibling, 2 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 12:00 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> time epoch).  This enables us to handle dates up to 2486, which solves
> the y2038 problem.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
...

> diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
> index 9f036053fdb7..b354825f4e51 100644
> --- a/fs/xfs/scrub/inode.c
> +++ b/fs/xfs/scrub/inode.c
> @@ -190,6 +190,11 @@ xchk_inode_flags2(
>         if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
>                 goto bad;
>
> +       /* the incore bigtime iflag always follows the feature flag */
> +       if (!!xfs_sb_version_hasbigtime(&mp->m_sb) ^
> +           !!(flags2 & XFS_DIFLAG2_BIGTIME))
> +               goto bad;
> +

Seems like flags2 are not the incore iflags and that the dinode iflags
can very well
have no bigtime on fs with bigtime support:

xchk_dinode(...
...
                flags2 = be64_to_cpu(dip->di_flags2);

What am I missing?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 09/11] xfs: refactor quota timestamp coding
  2020-08-17 22:57 ` [PATCH 09/11] xfs: refactor quota timestamp coding Darrick J. Wong
@ 2020-08-18 12:25   ` Amir Goldstein
  0 siblings, 0 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 12:25 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Refactor quota timestamp encoding and decoding into helper functions so
> that we can add extra behavior in the next patch.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-18 12:00   ` Amir Goldstein
@ 2020-08-18 12:53     ` Amir Goldstein
  2020-08-18 15:53       ` Darrick J. Wong
  2020-08-18 15:44     ` Darrick J. Wong
  1 sibling, 1 reply; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 12:53 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 3:00 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > time epoch).  This enables us to handle dates up to 2486, which solves
> > the y2038 problem.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> ...
>
> > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
> > index 9f036053fdb7..b354825f4e51 100644
> > --- a/fs/xfs/scrub/inode.c
> > +++ b/fs/xfs/scrub/inode.c
> > @@ -190,6 +190,11 @@ xchk_inode_flags2(
> >         if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
> >                 goto bad;
> >
> > +       /* the incore bigtime iflag always follows the feature flag */
> > +       if (!!xfs_sb_version_hasbigtime(&mp->m_sb) ^
> > +           !!(flags2 & XFS_DIFLAG2_BIGTIME))
> > +               goto bad;
> > +
>
> Seems like flags2 are not the incore iflags and that the dinode iflags
> can very well
> have no bigtime on fs with bigtime support:
>
> xchk_dinode(...
> ...
>                 flags2 = be64_to_cpu(dip->di_flags2);
>
> What am I missing?
>

Another question. Do we need to worry about xfs_reinit_inode()?
It seems not because incore inode should already have the correct bigtime
flag. Right? But by same logic, xfs_ifree() should already have the correct
bigtime flag as well, so we don't need to set the flag, maybe assert the flag?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 10/11] xfs: enable bigtime for quota timers
  2020-08-17 22:57 ` [PATCH 10/11] xfs: enable bigtime for quota timers Darrick J. Wong
@ 2020-08-18 13:58   ` Amir Goldstein
  2020-08-18 15:59     ` Darrick J. Wong
  0 siblings, 1 reply; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 13:58 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Enable the bigtime feature for quota timers.  We decrease the accuracy
> of the timers to ~4s in exchange for being able to set timers up to the
> bigtime maximum.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Minor suggestion below...


> @@ -306,5 +327,24 @@ xfs_dquot_to_disk_timestamp(
>         __be32                  *dtimer,
>         time64_t                timer)
>  {
> +       /* Zero always means zero, regardless of encoding. */
> +       if (!timer) {
> +               *dtimer = cpu_to_be32(0);
> +               return;
> +       }
> +
> +       if (dqp->q_type & XFS_DQTYPE_BIGTIME) {
> +               uint64_t        t = timer;
> +
> +               /*
> +                * Round the end of the grace period up to the nearest bigtime
> +                * interval that we support, to give users the most time to fix
> +                * the problems.
> +                */
> +               t = roundup_64(t, 1U << XFS_DQ_BIGTIME_SHIFT);
> +               *dtimer = cpu_to_be32(t >> XFS_DQ_BIGTIME_SHIFT);
> +               return;
> +       }
> +
>         *dtimer = cpu_to_be32(timer);
>  }

This suggestion has to do with elegance which is subjective...

/*
 * When bigtime is enabled, we trade a few bits of precision to expand the
 * expiration timeout range to match that of big inode timestamps.  The grace
 * periods stored in dquot 0 are not shifted, since they record an interval,
 * not a timestamp.
 */
#define XFS_DQ_BIGTIME_SHIFT   (2)
#define XFS_DQ_BIGTIME_SLACK ((1U << XFS_DQ_BIGTIME_SHIFT)-1)

               /*
                * Round the end of the grace period up to the nearest bigtime
                * interval that we support, to give users the most time to fix
                * the problems.
                */
               uint64_t        t = timer + XFS_DQ_BIGTIME_SLACK;
               *dtimer = cpu_to_be32(t >> XFS_DQ_BIGTIME_SHIFT);

Take it or leave it.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 11/11] xfs: enable big timestamps
  2020-08-17 22:57 ` [PATCH 11/11] xfs: enable big timestamps Darrick J. Wong
@ 2020-08-18 14:04   ` Amir Goldstein
  0 siblings, 0 replies; 51+ messages in thread
From: Amir Goldstein @ 2020-08-18 14:04 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 1:58 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> From: Darrick J. Wong <darrick.wong@oracle.com>
>
> Enable the big timestamp feature.
>
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

No complaints here :)

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 02/11] xfs: refactor quota expiration timer modification
  2020-08-18  6:48   ` Amir Goldstein
@ 2020-08-18 15:40     ` Darrick J. Wong
  0 siblings, 0 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-18 15:40 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 09:48:14AM +0300, Amir Goldstein wrote:
> On Tue, Aug 18, 2020 at 1:59 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > Define explicit limits on the range of quota grace period expiration
> > timeouts and refactor the code that modifies the timeouts into helpers
> > that clamp the values appropriately.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> 
> Question below...
> 
> ...
> 
> > diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
> > index 1c542b4a5220..b16d533a6feb 100644
> > --- a/fs/xfs/xfs_qm_syscalls.c
> > +++ b/fs/xfs/xfs_qm_syscalls.c
> > @@ -483,9 +483,14 @@ xfs_setqlim_timer(
> >         struct xfs_quota_limits *qlim,
> >         s64                     timer)
> >  {
> > -       res->timer = timer;
> > -       if (qlim)
> > +       if (qlim) {
> > +               /* Set the length of the default grace period. */
> > +               res->timer = timer;
> >                 qlim->time = timer;
> > +       } else {
> > +               /* Set the grace period expiration on a quota. */
> > +               xfs_dquot_set_timeout(&res->timer, timer);
> > +       }
> >  }
> 
> I understand why you did this. This is mid series craft, but it looks very odd
> to your average reviewer.
> 
> Maybe leave a TODO comment above res->timer = timer which will be
> removed later in the series?
> 
> Not critical.

<shrug> The grace period case gets changed in the very next patch, but I
suppose it wouldn't hurt to mention that in the commit log.

--D

> 
> Thanks,
> Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 06/11] xfs: refactor inode timestamp coding
  2020-08-18 11:20   ` Amir Goldstein
@ 2020-08-18 15:42     ` Darrick J. Wong
  0 siblings, 0 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-18 15:42 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 02:20:22PM +0300, Amir Goldstein wrote:
> On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > Refactor inode timestamp encoding and decoding into helper functions so
> > that we can add extra behaviors in subsequent patches.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> 
> With nit below...
> 
> ...
> 
> > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> > index d95a00376fad..c2f9a0adeed2 100644
> > --- a/fs/xfs/xfs_inode_item.c
> > +++ b/fs/xfs/xfs_inode_item.c
> > @@ -295,6 +295,15 @@ xfs_inode_item_format_attr_fork(
> >         }
> >  }
> >
> > +static inline void
> > +xfs_from_log_timestamp(
> > +       struct xfs_timestamp            *ts,
> > +       const struct xfs_ictimestamp    *its)
> > +{
> > +       ts->t_sec = cpu_to_be32(its->t_sec);
> > +       ts->t_nsec = cpu_to_be32(its->t_nsec);
> > +}
> > +
> >  void
> >  xfs_log_dinode_to_disk(
> 
> Following convention of xfs_inode_{to,from}_disk_timestamp()
> I think it would be less confusing to name these helpers
> xfs_log_to_disk_timestamp()
> 
> and...
> 
> >
> > +static inline void
> > +xfs_to_log_timestamp(
> > +       struct xfs_ictimestamp          *its,
> > +       const struct timespec64         *ts)
> > +{
> > +       its->t_sec = ts->tv_sec;
> > +       its->t_nsec = ts->tv_nsec;
> > +}
> > +
> >  static void
> >  xfs_inode_to_log_dinode(
> 
> xfs_inode_to_log_timestamp()
> 
> Because xfs_{to,from}_log_timestamp() may sound like a matching pair,
> to your average code reviewer, but they are not.

Ok, will do.

--D

> Thanks,
> Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-18 12:00   ` Amir Goldstein
  2020-08-18 12:53     ` Amir Goldstein
@ 2020-08-18 15:44     ` Darrick J. Wong
  1 sibling, 0 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-18 15:44 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 03:00:49PM +0300, Amir Goldstein wrote:
> On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > time epoch).  This enables us to handle dates up to 2486, which solves
> > the y2038 problem.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> ...
> 
> > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
> > index 9f036053fdb7..b354825f4e51 100644
> > --- a/fs/xfs/scrub/inode.c
> > +++ b/fs/xfs/scrub/inode.c
> > @@ -190,6 +190,11 @@ xchk_inode_flags2(
> >         if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
> >                 goto bad;
> >
> > +       /* the incore bigtime iflag always follows the feature flag */
> > +       if (!!xfs_sb_version_hasbigtime(&mp->m_sb) ^
> > +           !!(flags2 & XFS_DIFLAG2_BIGTIME))
> > +               goto bad;
> > +
> 
> Seems like flags2 are not the incore iflags and that the dinode iflags
> can very well
> have no bigtime on fs with bigtime support:
> 
> xchk_dinode(...
> ...
>                 flags2 = be64_to_cpu(dip->di_flags2);
> 
> What am I missing?

Nothing.  That chunk is just plain wrong and needs to reworked.  Repair
gets it right, the only failure case is if the inode flag is set but the
feature isn't.

I probably wrote this before I had the thought of letting people upgrade
existing filesystems.

Will fix, thanks for catching this.

--D

> 
> Thanks,
> Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-18 12:53     ` Amir Goldstein
@ 2020-08-18 15:53       ` Darrick J. Wong
  2020-08-18 20:52         ` Darrick J. Wong
  0 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-18 15:53 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 03:53:57PM +0300, Amir Goldstein wrote:
> On Tue, Aug 18, 2020 at 3:00 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > >
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > >
> > > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > > time epoch).  This enables us to handle dates up to 2486, which solves
> > > the y2038 problem.
> > >
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > ...
> >
> > > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
> > > index 9f036053fdb7..b354825f4e51 100644
> > > --- a/fs/xfs/scrub/inode.c
> > > +++ b/fs/xfs/scrub/inode.c
> > > @@ -190,6 +190,11 @@ xchk_inode_flags2(
> > >         if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
> > >                 goto bad;
> > >
> > > +       /* the incore bigtime iflag always follows the feature flag */
> > > +       if (!!xfs_sb_version_hasbigtime(&mp->m_sb) ^
> > > +           !!(flags2 & XFS_DIFLAG2_BIGTIME))
> > > +               goto bad;
> > > +
> >
> > Seems like flags2 are not the incore iflags and that the dinode iflags
> > can very well
> > have no bigtime on fs with bigtime support:
> >
> > xchk_dinode(...
> > ...
> >                 flags2 = be64_to_cpu(dip->di_flags2);
> >
> > What am I missing?
> >
> 
> Another question. Do we need to worry about xfs_reinit_inode()?
> It seems not because incore inode should already have the correct bigtime
> flag. Right?

I sure hope so.  Any inode being fed to xfs_reinit_inode was fully read
into memory, then we tore down the VFS inode, but before we tore down
the XFS inode, someone wanted it back, so all we have to do is reset
the VFS part of the incore state.

Therefore, we don't have to do anything about the XFS part of the incore
state. :)

> But by same logic, xfs_ifree() should already have the correct
> bigtime flag as well, so we don't need to set the flag, maybe assert the flag?

Right.  I think that piece can go since we set the incore flag
opportunistically now.

--D

> 
> Thanks,
> Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 10/11] xfs: enable bigtime for quota timers
  2020-08-18 13:58   ` Amir Goldstein
@ 2020-08-18 15:59     ` Darrick J. Wong
  0 siblings, 0 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-18 15:59 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 04:58:35PM +0300, Amir Goldstein wrote:
> On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> >
> > Enable the bigtime feature for quota timers.  We decrease the accuracy
> > of the timers to ~4s in exchange for being able to set timers up to the
> > bigtime maximum.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> 
> Minor suggestion below...
> 
> 
> > @@ -306,5 +327,24 @@ xfs_dquot_to_disk_timestamp(
> >         __be32                  *dtimer,
> >         time64_t                timer)
> >  {
> > +       /* Zero always means zero, regardless of encoding. */
> > +       if (!timer) {
> > +               *dtimer = cpu_to_be32(0);
> > +               return;
> > +       }
> > +
> > +       if (dqp->q_type & XFS_DQTYPE_BIGTIME) {
> > +               uint64_t        t = timer;
> > +
> > +               /*
> > +                * Round the end of the grace period up to the nearest bigtime
> > +                * interval that we support, to give users the most time to fix
> > +                * the problems.
> > +                */
> > +               t = roundup_64(t, 1U << XFS_DQ_BIGTIME_SHIFT);
> > +               *dtimer = cpu_to_be32(t >> XFS_DQ_BIGTIME_SHIFT);
> > +               return;
> > +       }
> > +
> >         *dtimer = cpu_to_be32(timer);
> >  }
> 
> This suggestion has to do with elegance which is subjective...
> 
> /*
>  * When bigtime is enabled, we trade a few bits of precision to expand the
>  * expiration timeout range to match that of big inode timestamps.  The grace
>  * periods stored in dquot 0 are not shifted, since they record an interval,
>  * not a timestamp.
>  */
> #define XFS_DQ_BIGTIME_SHIFT   (2)
> #define XFS_DQ_BIGTIME_SLACK ((1U << XFS_DQ_BIGTIME_SHIFT)-1)
> 
>                /*
>                 * Round the end of the grace period up to the nearest bigtime
>                 * interval that we support, to give users the most time to fix
>                 * the problems.
>                 */
>                uint64_t        t = timer + XFS_DQ_BIGTIME_SLACK;
>                *dtimer = cpu_to_be32(t >> XFS_DQ_BIGTIME_SHIFT);
> 
> Take it or leave it.

Hm.  Normally I don't really like open-coding a rounding operation, but
it does eliminate an integer division.

(I dunno about "SLACK", but I can't think of a better word for
imprecision...)

--D

> Thanks,
> Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-18 15:53       ` Darrick J. Wong
@ 2020-08-18 20:52         ` Darrick J. Wong
  0 siblings, 0 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-18 20:52 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-xfs, Eric Sandeen

On Tue, Aug 18, 2020 at 08:53:45AM -0700, Darrick J. Wong wrote:
> On Tue, Aug 18, 2020 at 03:53:57PM +0300, Amir Goldstein wrote:
> > On Tue, Aug 18, 2020 at 3:00 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > On Tue, Aug 18, 2020 at 1:57 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > > >
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > >
> > > > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > > > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > > > time epoch).  This enables us to handle dates up to 2486, which solves
> > > > the y2038 problem.
> > > >
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > ...
> > >
> > > > diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
> > > > index 9f036053fdb7..b354825f4e51 100644
> > > > --- a/fs/xfs/scrub/inode.c
> > > > +++ b/fs/xfs/scrub/inode.c
> > > > @@ -190,6 +190,11 @@ xchk_inode_flags2(
> > > >         if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
> > > >                 goto bad;
> > > >
> > > > +       /* the incore bigtime iflag always follows the feature flag */
> > > > +       if (!!xfs_sb_version_hasbigtime(&mp->m_sb) ^
> > > > +           !!(flags2 & XFS_DIFLAG2_BIGTIME))
> > > > +               goto bad;
> > > > +
> > >
> > > Seems like flags2 are not the incore iflags and that the dinode iflags
> > > can very well
> > > have no bigtime on fs with bigtime support:
> > >
> > > xchk_dinode(...
> > > ...
> > >                 flags2 = be64_to_cpu(dip->di_flags2);
> > >
> > > What am I missing?
> > >
> > 
> > Another question. Do we need to worry about xfs_reinit_inode()?
> > It seems not because incore inode should already have the correct bigtime
> > flag. Right?
> 
> I sure hope so.  Any inode being fed to xfs_reinit_inode was fully read
> into memory, then we tore down the VFS inode, but before we tore down
> the XFS inode, someone wanted it back, so all we have to do is reset
> the VFS part of the incore state.
> 
> Therefore, we don't have to do anything about the XFS part of the incore
> state. :)
> 
> > But by same logic, xfs_ifree() should already have the correct
> > bigtime flag as well, so we don't need to set the flag, maybe assert the flag?
> 
> Right.  I think that piece can go since we set the incore flag
> opportunistically now.

Bleh.  I missed the subtlety of:

	ip->i_d.di_flags2 = 0;
	if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
		ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME;

First we zero flags2 entirely, then we re-enable bigtime so that the
final ctime update is written in the correct format.  I guess that could
be simplified to:

	/*
	 * Preserve the bigtime flag so that ctime accurately stores the
	 * deletion time.
	 */
	ip->i_d.di_flags2 &= ~XFS_DIFLAG2_BIGTIME;

> --D
> 
> > 
> > Thanks,
> > Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v2 00/11] xfs: widen timestamps to deal with y2038
  2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
                   ` (10 preceding siblings ...)
  2020-08-17 22:57 ` [PATCH 11/11] xfs: enable big timestamps Darrick J. Wong
@ 2020-08-18 23:01 ` Dave Chinner
  2020-08-18 23:10   ` Darrick J. Wong
  11 siblings, 1 reply; 51+ messages in thread
From: Dave Chinner @ 2020-08-18 23:01 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, amir73il, sandeen

On Mon, Aug 17, 2020 at 03:56:48PM -0700, Darrick J. Wong wrote:
> Hi all,
> 
> This series performs some refactoring of our timestamp and inode
> encoding functions, then retrofits the timestamp union to handle
> timestamps as a 64-bit nanosecond counter.  Next, it adds bit shifting
> to the non-root dquot timer fields to boost their effective size to 34
> bits.  These two changes enable correct time handling on XFS through the
> year 2486.

A bit more detail would be nice :)

Like, the inode timestamp has a range of slightly greater than 2^34
because 10^9 < 2^30. i.e.

Inode timestamp range in days:

$ echo $(((2**62 / (1000*1000*1000) / 86400) * 2**2))
213500
$

While the quota timer range in days is:
$ echo $(((2**34 / 86400)))
198841
$

There's ~15,000 days difference in range here, which in years is
about 40 years. Hence the inodes have a timestamp range out to
~2485 from the 1901 epoch start, while quota timers have a range
out to only 2445 from the epoch start.

Some discussion of the different ranges, the problems it might cause
and why we don't have to worry about it would be appreciated :)

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v2 00/11] xfs: widen timestamps to deal with y2038
  2020-08-18 23:01 ` [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Dave Chinner
@ 2020-08-18 23:10   ` Darrick J. Wong
  2020-08-18 23:41     ` Dave Chinner
  0 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-18 23:10 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs, amir73il, sandeen

On Wed, Aug 19, 2020 at 09:01:21AM +1000, Dave Chinner wrote:
> On Mon, Aug 17, 2020 at 03:56:48PM -0700, Darrick J. Wong wrote:
> > Hi all,
> > 
> > This series performs some refactoring of our timestamp and inode
> > encoding functions, then retrofits the timestamp union to handle
> > timestamps as a 64-bit nanosecond counter.  Next, it adds bit shifting
> > to the non-root dquot timer fields to boost their effective size to 34
> > bits.  These two changes enable correct time handling on XFS through the
> > year 2486.
> 
> A bit more detail would be nice :)

Heh, ok.

> Like, the inode timestamp has a range of slightly greater than 2^34
> because 10^9 < 2^30. i.e.
> 
> Inode timestamp range in days:
> 
> $ echo $(((2**62 / (1000*1000*1000) / 86400) * 2**2))
> 213500
> $
> 
> While the quota timer range in days is:
> $ echo $(((2**34 / 86400)))
> 198841
> $
> 
> There's ~15,000 days difference in range here, which in years is
> about 40 years. Hence the inodes have a timestamp range out to
> ~2485 from the 1901 epoch start, while quota timers have a range
> out to only 2445 from the epoch start.

Quota timers have always treated the d_{b,i,rtb}timer value as an
unsigned 32-bit integer, which means that it has /never/ been possible
to set a timer expiration before 1/1/1970.  The quota timer range is
therefore 198,841 days *after* 1970, not after 1901.

Therefore, the quota timer range in days is:

$ echo $(( ((2**34) + (2**31)) / 86400) ))
223696

So, technically speaking, the quota timers could go beyond 2486, but the
current patchset clamps the quota counters to the same max as the
inodes.  I guess I just proved the need for more details upfront.

--D

> 
> Some discussion of the different ranges, the problems it might cause
> and why we don't have to worry about it would be appreciated :)
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-17 22:57 ` [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem Darrick J. Wong
  2020-08-18 12:00   ` Amir Goldstein
@ 2020-08-18 23:35   ` Dave Chinner
  2020-08-19 21:43     ` Darrick J. Wong
  1 sibling, 1 reply; 51+ messages in thread
From: Dave Chinner @ 2020-08-18 23:35 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, amir73il, sandeen

On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> time epoch).  This enables us to handle dates up to 2486, which solves
> the y2038 problem.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
.....
> +/* Convert an ondisk timestamp into the 64-bit safe incore format. */
>  void
>  xfs_inode_from_disk_timestamp(
> +	struct xfs_dinode		*dip,
>  	struct timespec64		*tv,
>  	const union xfs_timestamp	*ts)
>  {
> +	if (dip->di_version >= 3 &&
> +	    (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
> +		uint64_t		t = be64_to_cpu(ts->t_bigtime);
> +		uint64_t		s;
> +		uint32_t		n;
> +
> +		s = div_u64_rem(t, NSEC_PER_SEC, &n);
> +		tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
> +		tv->tv_nsec = n;
> +		return;
> +	}
> +
>  	tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
>  	tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
>  }

Can't say I'm sold on this union. It seems cleaner to me to just
make the timestamp an opaque 64 bit field on disk and convert it to
the in-memory representation directly in the to/from disk
operations. e.g.:

void
xfs_inode_from_disk_timestamp(
	struct xfs_dinode		*dip,
	struct timespec64		*tv,
	__be64				ts)
{

	uint64_t		t = be64_to_cpu(ts);
	uint64_t		s;
	uint32_t		n;

	if (xfs_dinode_is_bigtime(dip)) {
		s = div_u64_rem(t, NSEC_PER_SEC, &n) - XFS_INO_BIGTIME_EPOCH;
	} else {
		s = (int)(t >> 32);
		n = (int)(t & 0xffffffff);
	}
	tv->tv_sec = s;
	tv->tv_nsec = n;
}



> @@ -220,9 +234,9 @@ xfs_inode_from_disk(
>  	 * a time before epoch is converted to a time long after epoch
>  	 * on 64 bit systems.
>  	 */
> -	xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
> -	xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
> -	xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
> +	xfs_inode_from_disk_timestamp(from, &inode->i_atime, &from->di_atime);
> +	xfs_inode_from_disk_timestamp(from, &inode->i_mtime, &from->di_mtime);
> +	xfs_inode_from_disk_timestamp(from, &inode->i_ctime, &from->di_ctime);
>  
>  	to->di_size = be64_to_cpu(from->di_size);
>  	to->di_nblocks = be64_to_cpu(from->di_nblocks);
> @@ -235,9 +249,17 @@ xfs_inode_from_disk(
>  	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
>  		inode_set_iversion_queried(inode,
>  					   be64_to_cpu(from->di_changecount));
> -		xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
> +		xfs_inode_from_disk_timestamp(from, &to->di_crtime,
> +				&from->di_crtime);
>  		to->di_flags2 = be64_to_cpu(from->di_flags2);
>  		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
> +		/*
> +		 * Set the bigtime flag incore so that we automatically convert
> +		 * this inode's ondisk timestamps to bigtime format the next
> +		 * time we write the inode core to disk.
> +		 */
> +		if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
> +			to->di_flags2 |= XFS_DIFLAG2_BIGTIME;
>  	}

We do not want on-disk flags to be changed outside transactions like
this. Indeed, this has implications for O_DSYNC operation, in that
we do not trigger inode sync operations if the inode is only
timestamp dirty. If we've changed this flag, then the inode is more
than "timestamp dirty" and O_DSYNC will need to flush the entire
inode.... :/

IOWs, I think we should only change this flag in a timestamp
transaction where the timestamps are actually being logged and hence
we can set inode dirty state appropriately so that everything will
get logged, changed and written back correctly....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH v2 00/11] xfs: widen timestamps to deal with y2038
  2020-08-18 23:10   ` Darrick J. Wong
@ 2020-08-18 23:41     ` Dave Chinner
  0 siblings, 0 replies; 51+ messages in thread
From: Dave Chinner @ 2020-08-18 23:41 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, amir73il, sandeen

On Tue, Aug 18, 2020 at 04:10:33PM -0700, Darrick J. Wong wrote:
> On Wed, Aug 19, 2020 at 09:01:21AM +1000, Dave Chinner wrote:
> > On Mon, Aug 17, 2020 at 03:56:48PM -0700, Darrick J. Wong wrote:
> > > Hi all,
> > > 
> > > This series performs some refactoring of our timestamp and inode
> > > encoding functions, then retrofits the timestamp union to handle
> > > timestamps as a 64-bit nanosecond counter.  Next, it adds bit shifting
> > > to the non-root dquot timer fields to boost their effective size to 34
> > > bits.  These two changes enable correct time handling on XFS through the
> > > year 2486.
> > 
> > A bit more detail would be nice :)
> 
> Heh, ok.
> 
> > Like, the inode timestamp has a range of slightly greater than 2^34
> > because 10^9 < 2^30. i.e.
> > 
> > Inode timestamp range in days:
> > 
> > $ echo $(((2**62 / (1000*1000*1000) / 86400) * 2**2))
> > 213500
> > $
> > 
> > While the quota timer range in days is:
> > $ echo $(((2**34 / 86400)))
> > 198841
> > $
> > 
> > There's ~15,000 days difference in range here, which in years is
> > about 40 years. Hence the inodes have a timestamp range out to
> > ~2485 from the 1901 epoch start, while quota timers have a range
> > out to only 2445 from the epoch start.
> 
> Quota timers have always treated the d_{b,i,rtb}timer value as an
> unsigned 32-bit integer, which means that it has /never/ been possible
> to set a timer expiration before 1/1/1970.  The quota timer range is
> therefore 198,841 days *after* 1970, not after 1901.
> 
> Therefore, the quota timer range in days is:
> 
> $ echo $(( ((2**34) + (2**31)) / 86400) ))
> 223696
> 
> So, technically speaking, the quota timers could go beyond 2486, but the
> current patchset clamps the quota counters to the same max as the
> inodes.  I guess I just proved the need for more details upfront.

Yeah, little things like quota timers and inode timestamps having a
different epoch value are kinda important to understand. :)

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-18 23:35   ` Dave Chinner
@ 2020-08-19 21:43     ` Darrick J. Wong
  2020-08-19 23:58       ` Dave Chinner
  2020-08-20  0:01       ` Darrick J. Wong
  0 siblings, 2 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-19 21:43 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs, amir73il, sandeen

On Wed, Aug 19, 2020 at 09:35:35AM +1000, Dave Chinner wrote:
> On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > time epoch).  This enables us to handle dates up to 2486, which solves
> > the y2038 problem.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> .....
> > +/* Convert an ondisk timestamp into the 64-bit safe incore format. */
> >  void
> >  xfs_inode_from_disk_timestamp(
> > +	struct xfs_dinode		*dip,
> >  	struct timespec64		*tv,
> >  	const union xfs_timestamp	*ts)
> >  {
> > +	if (dip->di_version >= 3 &&
> > +	    (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
> > +		uint64_t		t = be64_to_cpu(ts->t_bigtime);
> > +		uint64_t		s;
> > +		uint32_t		n;
> > +
> > +		s = div_u64_rem(t, NSEC_PER_SEC, &n);
> > +		tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
> > +		tv->tv_nsec = n;
> > +		return;
> > +	}
> > +
> >  	tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> >  	tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> >  }
> 
> Can't say I'm sold on this union. It seems cleaner to me to just
> make the timestamp an opaque 64 bit field on disk and convert it to
> the in-memory representation directly in the to/from disk
> operations. e.g.:
> 
> void
> xfs_inode_from_disk_timestamp(
> 	struct xfs_dinode		*dip,
> 	struct timespec64		*tv,
> 	__be64				ts)
> {
> 
> 	uint64_t		t = be64_to_cpu(ts);
> 	uint64_t		s;
> 	uint32_t		n;
> 
> 	if (xfs_dinode_is_bigtime(dip)) {
> 		s = div_u64_rem(t, NSEC_PER_SEC, &n) - XFS_INO_BIGTIME_EPOCH;
> 	} else {
> 		s = (int)(t >> 32);
> 		n = (int)(t & 0xffffffff);
> 	}
> 	tv->tv_sec = s;
> 	tv->tv_nsec = n;
> }

I don't like this open-coded union approach at all because now I have to
keep the t_sec and t_nsec bits separate in my head instead of letting
the C compiler take care of that detail.  The sample code above doesn't
handle that correctly either:

Start with an old kernel on a little endian system; each uppercase
letter represents a byte (A is the LSB of t_sec, D is the MSB of t_sec,
E is the LSB of t_nsec, and H is the MSB of t_nsec):

	sec  nsec (incore)
	ABCD EFGH

That gets written out as:

	sec  nsec (ondisk)
	DCBA HGFE

Now reboot with a new kernel that only knows 64bit timestamps on disk:

	64bit (ondisk)
	DCBAHGFE

Now it does the first be64_to_cpu conversion:
	64bit (incore)
	EFGHABCD

And then masks and shifts:
	sec  nsec (incore)
	EFGH ABCD

Oops, we just switched the values!

The correct approach (I think) is to perform the shifting and masking on
the raw __be64 value before converting them to incore format via
be32_to_cpu, but now I have to work out all four cases by hand instead
of letting the compiler do the legwork for me.  I don't remember if it's
correct to go around shifting and masking __be64 values.

I guess the good news is that at least we have generic/402 to catch
these kinds of persistence problems, but ugh.

Anyway, what are you afraid of?  The C compiler smoking crack and not
actually overlapping the two union elements?  We could control for
that...

> > @@ -220,9 +234,9 @@ xfs_inode_from_disk(
> >  	 * a time before epoch is converted to a time long after epoch
> >  	 * on 64 bit systems.
> >  	 */
> > -	xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
> > -	xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
> > -	xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
> > +	xfs_inode_from_disk_timestamp(from, &inode->i_atime, &from->di_atime);
> > +	xfs_inode_from_disk_timestamp(from, &inode->i_mtime, &from->di_mtime);
> > +	xfs_inode_from_disk_timestamp(from, &inode->i_ctime, &from->di_ctime);
> >  
> >  	to->di_size = be64_to_cpu(from->di_size);
> >  	to->di_nblocks = be64_to_cpu(from->di_nblocks);
> > @@ -235,9 +249,17 @@ xfs_inode_from_disk(
> >  	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
> >  		inode_set_iversion_queried(inode,
> >  					   be64_to_cpu(from->di_changecount));
> > -		xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
> > +		xfs_inode_from_disk_timestamp(from, &to->di_crtime,
> > +				&from->di_crtime);
> >  		to->di_flags2 = be64_to_cpu(from->di_flags2);
> >  		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
> > +		/*
> > +		 * Set the bigtime flag incore so that we automatically convert
> > +		 * this inode's ondisk timestamps to bigtime format the next
> > +		 * time we write the inode core to disk.
> > +		 */
> > +		if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
> > +			to->di_flags2 |= XFS_DIFLAG2_BIGTIME;
> >  	}
> 
> We do not want on-disk flags to be changed outside transactions like
> this. Indeed, this has implications for O_DSYNC operation, in that
> we do not trigger inode sync operations if the inode is only
> timestamp dirty. If we've changed this flag, then the inode is more
> than "timestamp dirty" and O_DSYNC will need to flush the entire
> inode.... :/

I forgot about XFS_ILOG_TIMESTAMP.

> IOWs, I think we should only change this flag in a timestamp
> transaction where the timestamps are actually being logged and hence
> we can set inode dirty state appropriately so that everything will
> get logged, changed and written back correctly....

Yeah, that's fair.  I'll change xfs_trans_log_inode to set the bigtime
flag if we're logging either the timestamps or the core.

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-19 21:43     ` Darrick J. Wong
@ 2020-08-19 23:58       ` Dave Chinner
  2020-08-20  0:01       ` Darrick J. Wong
  1 sibling, 0 replies; 51+ messages in thread
From: Dave Chinner @ 2020-08-19 23:58 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs, amir73il, sandeen

On Wed, Aug 19, 2020 at 02:43:22PM -0700, Darrick J. Wong wrote:
> On Wed, Aug 19, 2020 at 09:35:35AM +1000, Dave Chinner wrote:
> > On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> The correct approach (I think) is to perform the shifting and masking on
> the raw __be64 value before converting them to incore format via
> be32_to_cpu, but now I have to work out all four cases by hand instead
> of letting the compiler do the legwork for me.  I don't remember if it's
> correct to go around shifting and masking __be64 values.
> 
> I guess the good news is that at least we have generic/402 to catch
> these kinds of persistence problems, but ugh.
> 
> Anyway, what are you afraid of?  The C compiler smoking crack and not
> actually overlapping the two union elements?  We could control for
> that...

No, I just didn't really like the way the code in the encode/decode
helpers turned out...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-19 21:43     ` Darrick J. Wong
  2020-08-19 23:58       ` Dave Chinner
@ 2020-08-20  0:01       ` Darrick J. Wong
  2020-08-20  4:42         ` griffin tucker
  2020-08-20  5:11         ` Amir Goldstein
  1 sibling, 2 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-20  0:01 UTC (permalink / raw)
  To: Dave Chinner; +Cc: linux-xfs, amir73il, sandeen

On Wed, Aug 19, 2020 at 02:43:22PM -0700, Darrick J. Wong wrote:
> On Wed, Aug 19, 2020 at 09:35:35AM +1000, Dave Chinner wrote:
> > On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > 
> > > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > > time epoch).  This enables us to handle dates up to 2486, which solves
> > > the y2038 problem.
> > > 
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > .....
> > > +/* Convert an ondisk timestamp into the 64-bit safe incore format. */
> > >  void
> > >  xfs_inode_from_disk_timestamp(
> > > +	struct xfs_dinode		*dip,
> > >  	struct timespec64		*tv,
> > >  	const union xfs_timestamp	*ts)
> > >  {
> > > +	if (dip->di_version >= 3 &&
> > > +	    (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
> > > +		uint64_t		t = be64_to_cpu(ts->t_bigtime);
> > > +		uint64_t		s;
> > > +		uint32_t		n;
> > > +
> > > +		s = div_u64_rem(t, NSEC_PER_SEC, &n);
> > > +		tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
> > > +		tv->tv_nsec = n;
> > > +		return;
> > > +	}
> > > +
> > >  	tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> > >  	tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> > >  }
> > 
> > Can't say I'm sold on this union. It seems cleaner to me to just
> > make the timestamp an opaque 64 bit field on disk and convert it to
> > the in-memory representation directly in the to/from disk
> > operations. e.g.:
> > 
> > void
> > xfs_inode_from_disk_timestamp(
> > 	struct xfs_dinode		*dip,
> > 	struct timespec64		*tv,
> > 	__be64				ts)
> > {
> > 
> > 	uint64_t		t = be64_to_cpu(ts);
> > 	uint64_t		s;
> > 	uint32_t		n;
> > 
> > 	if (xfs_dinode_is_bigtime(dip)) {
> > 		s = div_u64_rem(t, NSEC_PER_SEC, &n) - XFS_INO_BIGTIME_EPOCH;
> > 	} else {
> > 		s = (int)(t >> 32);
> > 		n = (int)(t & 0xffffffff);
> > 	}
> > 	tv->tv_sec = s;
> > 	tv->tv_nsec = n;
> > }
> 
> I don't like this open-coded union approach at all because now I have to
> keep the t_sec and t_nsec bits separate in my head instead of letting
> the C compiler take care of that detail.  The sample code above doesn't
> handle that correctly either:
> 
> Start with an old kernel on a little endian system; each uppercase
> letter represents a byte (A is the LSB of t_sec, D is the MSB of t_sec,
> E is the LSB of t_nsec, and H is the MSB of t_nsec):
> 
> 	sec  nsec (incore)
> 	ABCD EFGH
> 
> That gets written out as:
> 
> 	sec  nsec (ondisk)
> 	DCBA HGFE
> 
> Now reboot with a new kernel that only knows 64bit timestamps on disk:
> 
> 	64bit (ondisk)
> 	DCBAHGFE
> 
> Now it does the first be64_to_cpu conversion:
> 	64bit (incore)
> 	EFGHABCD
> 
> And then masks and shifts:
> 	sec  nsec (incore)
> 	EFGH ABCD
> 
> Oops, we just switched the values!
> 
> The correct approach (I think) is to perform the shifting and masking on
> the raw __be64 value before converting them to incore format via
> be32_to_cpu, but now I have to work out all four cases by hand instead
> of letting the compiler do the legwork for me.  I don't remember if it's
> correct to go around shifting and masking __be64 values.
> 
> I guess the good news is that at least we have generic/402 to catch
> these kinds of persistence problems, but ugh.
> 
> Anyway, what are you afraid of?  The C compiler smoking crack and not
> actually overlapping the two union elements?  We could control for
> that...

(Following up on the mailing list with something I pasted into IRC)

Ok, so I temporarily patched up my dev tree with this approximation of
how that would work, properly done:

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index c59ddb56bb90..7c71e4440402 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -176,8 +176,8 @@ xfs_inode_from_disk_timestamp(
 		return;
 	}
 
-	tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
-	tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
+	tv->tv_sec = (time64_t)be32_to_cpu((__be32)(ts->t_bigtime >> 32));
+	tv->tv_nsec = be32_to_cpu(ts->t_bigtime & 0xFFFFFFFFU);
 }
 
 int
@@ -294,8 +294,8 @@ xfs_inode_to_disk_timestamp(
 		return;
 	}
 
-	ts->t_sec = cpu_to_be32(tv->tv_sec);
-	ts->t_nsec = cpu_to_be32(tv->tv_nsec);
+	ts->t_bigtime = (__be64)cpu_to_be32(tv->tv_sec) << 32 |
+			cpu_to_be32(tv->tv_nsec);
 }
 
 void
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d44e8932979b..5d36d6dea326 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -308,8 +308,8 @@ xfs_log_dinode_to_disk_ts(
 		return;
 	}
 
-	ts->t_sec = cpu_to_be32(its->t_sec);
-	ts->t_nsec = cpu_to_be32(its->t_nsec);
+	ts->t_bigtime = (__be64)cpu_to_be32(its->t_sec) << 32 |
+			cpu_to_be32(its->t_nsec);
 }

And immediately got a ton of smatch warnings:

xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:179:32: warning: cast to restricted __be32
xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:179:32: warning: cast to restricted __be32
xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:179:32: warning: cast to restricted __be32
xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:179:32: warning: cast to restricted __be32
xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:179:32: warning: cast to restricted __be32
xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:179:32: warning: cast to restricted __be32
xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:180:23: warning: cast to restricted __be32
xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:180:23: warning: cast to restricted __be32
xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:180:23: warning: cast to restricted __be32
xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:180:23: warning: cast to restricted __be32
xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:180:23: warning: cast to restricted __be32
xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:180:23: warning: cast to restricted __be32
xfs_inode_buf.c:297:26: warning: cast to restricted __be64
xfs_inode_buf.c:297:26: warning: cast from restricted __be32
xfs_inode_buf.c:297:26: warning: restricted __be64 degrades to integer
xfs_inode_buf.c:298:25: warning: restricted __be32 degrades to integer
xfs_inode_buf.c:297:23: warning: incorrect type in assignment (different base types)
xfs_inode_buf.c:297:23:    expected restricted __be64 [usertype] t_bigtime
xfs_inode_buf.c:297:23:    got unsigned long long

(and even more in xfs_inode_item.c)

So... while we could get rid of the union and hand-decode the timestamp
from a __be64 on legacy filesystems, I see the static checker complaints
as a second piece of evidence that this would be unnecessarily risky.

--D

> > > @@ -220,9 +234,9 @@ xfs_inode_from_disk(
> > >  	 * a time before epoch is converted to a time long after epoch
> > >  	 * on 64 bit systems.
> > >  	 */
> > > -	xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
> > > -	xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
> > > -	xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
> > > +	xfs_inode_from_disk_timestamp(from, &inode->i_atime, &from->di_atime);
> > > +	xfs_inode_from_disk_timestamp(from, &inode->i_mtime, &from->di_mtime);
> > > +	xfs_inode_from_disk_timestamp(from, &inode->i_ctime, &from->di_ctime);
> > >  
> > >  	to->di_size = be64_to_cpu(from->di_size);
> > >  	to->di_nblocks = be64_to_cpu(from->di_nblocks);
> > > @@ -235,9 +249,17 @@ xfs_inode_from_disk(
> > >  	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
> > >  		inode_set_iversion_queried(inode,
> > >  					   be64_to_cpu(from->di_changecount));
> > > -		xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
> > > +		xfs_inode_from_disk_timestamp(from, &to->di_crtime,
> > > +				&from->di_crtime);
> > >  		to->di_flags2 = be64_to_cpu(from->di_flags2);
> > >  		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
> > > +		/*
> > > +		 * Set the bigtime flag incore so that we automatically convert
> > > +		 * this inode's ondisk timestamps to bigtime format the next
> > > +		 * time we write the inode core to disk.
> > > +		 */
> > > +		if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
> > > +			to->di_flags2 |= XFS_DIFLAG2_BIGTIME;
> > >  	}
> > 
> > We do not want on-disk flags to be changed outside transactions like
> > this. Indeed, this has implications for O_DSYNC operation, in that
> > we do not trigger inode sync operations if the inode is only
> > timestamp dirty. If we've changed this flag, then the inode is more
> > than "timestamp dirty" and O_DSYNC will need to flush the entire
> > inode.... :/
> 
> I forgot about XFS_ILOG_TIMESTAMP.
> 
> > IOWs, I think we should only change this flag in a timestamp
> > transaction where the timestamps are actually being logged and hence
> > we can set inode dirty state appropriately so that everything will
> > get logged, changed and written back correctly....
> 
> Yeah, that's fair.  I'll change xfs_trans_log_inode to set the bigtime
> flag if we're logging either the timestamps or the core.
> 
> --D
> 
> > Cheers,
> > 
> > Dave.
> > -- 
> > Dave Chinner
> > david@fromorbit.com

^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-20  0:01       ` Darrick J. Wong
@ 2020-08-20  4:42         ` griffin tucker
  2020-08-20 16:23           ` Darrick J. Wong
  2020-08-20  5:11         ` Amir Goldstein
  1 sibling, 1 reply; 51+ messages in thread
From: griffin tucker @ 2020-08-20  4:42 UTC (permalink / raw)
  To: linux-xfs

how difficult would it be to implement 128 bit timestamps? and how
much further in time beyond 2486 would this allow?

is the implementation of 128 bit timestamps just not feasible due to
64 bit alu width?

how long is it until hardware capability reaches beyond the 16exabyte limit?

On Thu, 20 Aug 2020 at 10:01, Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> On Wed, Aug 19, 2020 at 02:43:22PM -0700, Darrick J. Wong wrote:
> > On Wed, Aug 19, 2020 at 09:35:35AM +1000, Dave Chinner wrote:
> > > On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > >
> > > > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > > > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > > > time epoch).  This enables us to handle dates up to 2486, which solves
> > > > the y2038 problem.
> > > >
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > .....
> > > > +/* Convert an ondisk timestamp into the 64-bit safe incore format. */
> > > >  void
> > > >  xfs_inode_from_disk_timestamp(
> > > > + struct xfs_dinode               *dip,
> > > >   struct timespec64               *tv,
> > > >   const union xfs_timestamp       *ts)
> > > >  {
> > > > + if (dip->di_version >= 3 &&
> > > > +     (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
> > > > +         uint64_t                t = be64_to_cpu(ts->t_bigtime);
> > > > +         uint64_t                s;
> > > > +         uint32_t                n;
> > > > +
> > > > +         s = div_u64_rem(t, NSEC_PER_SEC, &n);
> > > > +         tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
> > > > +         tv->tv_nsec = n;
> > > > +         return;
> > > > + }
> > > > +
> > > >   tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> > > >   tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> > > >  }
> > >
> > > Can't say I'm sold on this union. It seems cleaner to me to just
> > > make the timestamp an opaque 64 bit field on disk and convert it to
> > > the in-memory representation directly in the to/from disk
> > > operations. e.g.:
> > >
> > > void
> > > xfs_inode_from_disk_timestamp(
> > >     struct xfs_dinode               *dip,
> > >     struct timespec64               *tv,
> > >     __be64                          ts)
> > > {
> > >
> > >     uint64_t                t = be64_to_cpu(ts);
> > >     uint64_t                s;
> > >     uint32_t                n;
> > >
> > >     if (xfs_dinode_is_bigtime(dip)) {
> > >             s = div_u64_rem(t, NSEC_PER_SEC, &n) - XFS_INO_BIGTIME_EPOCH;
> > >     } else {
> > >             s = (int)(t >> 32);
> > >             n = (int)(t & 0xffffffff);
> > >     }
> > >     tv->tv_sec = s;
> > >     tv->tv_nsec = n;
> > > }
> >
> > I don't like this open-coded union approach at all because now I have to
> > keep the t_sec and t_nsec bits separate in my head instead of letting
> > the C compiler take care of that detail.  The sample code above doesn't
> > handle that correctly either:
> >
> > Start with an old kernel on a little endian system; each uppercase
> > letter represents a byte (A is the LSB of t_sec, D is the MSB of t_sec,
> > E is the LSB of t_nsec, and H is the MSB of t_nsec):
> >
> >       sec  nsec (incore)
> >       ABCD EFGH
> >
> > That gets written out as:
> >
> >       sec  nsec (ondisk)
> >       DCBA HGFE
> >
> > Now reboot with a new kernel that only knows 64bit timestamps on disk:
> >
> >       64bit (ondisk)
> >       DCBAHGFE
> >
> > Now it does the first be64_to_cpu conversion:
> >       64bit (incore)
> >       EFGHABCD
> >
> > And then masks and shifts:
> >       sec  nsec (incore)
> >       EFGH ABCD
> >
> > Oops, we just switched the values!
> >
> > The correct approach (I think) is to perform the shifting and masking on
> > the raw __be64 value before converting them to incore format via
> > be32_to_cpu, but now I have to work out all four cases by hand instead
> > of letting the compiler do the legwork for me.  I don't remember if it's
> > correct to go around shifting and masking __be64 values.
> >
> > I guess the good news is that at least we have generic/402 to catch
> > these kinds of persistence problems, but ugh.
> >
> > Anyway, what are you afraid of?  The C compiler smoking crack and not
> > actually overlapping the two union elements?  We could control for
> > that...
>
> (Following up on the mailing list with something I pasted into IRC)
>
> Ok, so I temporarily patched up my dev tree with this approximation of
> how that would work, properly done:
>
> diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> index c59ddb56bb90..7c71e4440402 100644
> --- a/fs/xfs/libxfs/xfs_inode_buf.c
> +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> @@ -176,8 +176,8 @@ xfs_inode_from_disk_timestamp(
>                 return;
>         }
>
> -       tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> -       tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> +       tv->tv_sec = (time64_t)be32_to_cpu((__be32)(ts->t_bigtime >> 32));
> +       tv->tv_nsec = be32_to_cpu(ts->t_bigtime & 0xFFFFFFFFU);
>  }
>
>  int
> @@ -294,8 +294,8 @@ xfs_inode_to_disk_timestamp(
>                 return;
>         }
>
> -       ts->t_sec = cpu_to_be32(tv->tv_sec);
> -       ts->t_nsec = cpu_to_be32(tv->tv_nsec);
> +       ts->t_bigtime = (__be64)cpu_to_be32(tv->tv_sec) << 32 |
> +                       cpu_to_be32(tv->tv_nsec);
>  }
>
>  void
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index d44e8932979b..5d36d6dea326 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -308,8 +308,8 @@ xfs_log_dinode_to_disk_ts(
>                 return;
>         }
>
> -       ts->t_sec = cpu_to_be32(its->t_sec);
> -       ts->t_nsec = cpu_to_be32(its->t_nsec);
> +       ts->t_bigtime = (__be64)cpu_to_be32(its->t_sec) << 32 |
> +                       cpu_to_be32(its->t_nsec);
>  }
>
> And immediately got a ton of smatch warnings:
>
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:297:26: warning: cast to restricted __be64
> xfs_inode_buf.c:297:26: warning: cast from restricted __be32
> xfs_inode_buf.c:297:26: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:298:25: warning: restricted __be32 degrades to integer
> xfs_inode_buf.c:297:23: warning: incorrect type in assignment (different base types)
> xfs_inode_buf.c:297:23:    expected restricted __be64 [usertype] t_bigtime
> xfs_inode_buf.c:297:23:    got unsigned long long
>
> (and even more in xfs_inode_item.c)
>
> So... while we could get rid of the union and hand-decode the timestamp
> from a __be64 on legacy filesystems, I see the static checker complaints
> as a second piece of evidence that this would be unnecessarily risky.
>
> --D
>
> > > > @@ -220,9 +234,9 @@ xfs_inode_from_disk(
> > > >    * a time before epoch is converted to a time long after epoch
> > > >    * on 64 bit systems.
> > > >    */
> > > > - xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
> > > > - xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
> > > > - xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
> > > > + xfs_inode_from_disk_timestamp(from, &inode->i_atime, &from->di_atime);
> > > > + xfs_inode_from_disk_timestamp(from, &inode->i_mtime, &from->di_mtime);
> > > > + xfs_inode_from_disk_timestamp(from, &inode->i_ctime, &from->di_ctime);
> > > >
> > > >   to->di_size = be64_to_cpu(from->di_size);
> > > >   to->di_nblocks = be64_to_cpu(from->di_nblocks);
> > > > @@ -235,9 +249,17 @@ xfs_inode_from_disk(
> > > >   if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
> > > >           inode_set_iversion_queried(inode,
> > > >                                      be64_to_cpu(from->di_changecount));
> > > > -         xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
> > > > +         xfs_inode_from_disk_timestamp(from, &to->di_crtime,
> > > > +                         &from->di_crtime);
> > > >           to->di_flags2 = be64_to_cpu(from->di_flags2);
> > > >           to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
> > > > +         /*
> > > > +          * Set the bigtime flag incore so that we automatically convert
> > > > +          * this inode's ondisk timestamps to bigtime format the next
> > > > +          * time we write the inode core to disk.
> > > > +          */
> > > > +         if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
> > > > +                 to->di_flags2 |= XFS_DIFLAG2_BIGTIME;
> > > >   }
> > >
> > > We do not want on-disk flags to be changed outside transactions like
> > > this. Indeed, this has implications for O_DSYNC operation, in that
> > > we do not trigger inode sync operations if the inode is only
> > > timestamp dirty. If we've changed this flag, then the inode is more
> > > than "timestamp dirty" and O_DSYNC will need to flush the entire
> > > inode.... :/
> >
> > I forgot about XFS_ILOG_TIMESTAMP.
> >
> > > IOWs, I think we should only change this flag in a timestamp
> > > transaction where the timestamps are actually being logged and hence
> > > we can set inode dirty state appropriately so that everything will
> > > get logged, changed and written back correctly....
> >
> > Yeah, that's fair.  I'll change xfs_trans_log_inode to set the bigtime
> > flag if we're logging either the timestamps or the core.
> >
> > --D
> >
> > > Cheers,
> > >
> > > Dave.
> > > --
> > > Dave Chinner
> > > david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-20  0:01       ` Darrick J. Wong
  2020-08-20  4:42         ` griffin tucker
@ 2020-08-20  5:11         ` Amir Goldstein
  2020-08-20 22:47           ` Dave Chinner
  1 sibling, 1 reply; 51+ messages in thread
From: Amir Goldstein @ 2020-08-20  5:11 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Dave Chinner, linux-xfs, Eric Sandeen

On Thu, Aug 20, 2020 at 3:03 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> On Wed, Aug 19, 2020 at 02:43:22PM -0700, Darrick J. Wong wrote:
> > On Wed, Aug 19, 2020 at 09:35:35AM +1000, Dave Chinner wrote:
> > > On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > >
> > > > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > > > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > > > time epoch).  This enables us to handle dates up to 2486, which solves
> > > > the y2038 problem.
> > > >
> > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > ---
> > > .....
> > > > +/* Convert an ondisk timestamp into the 64-bit safe incore format. */
> > > >  void
> > > >  xfs_inode_from_disk_timestamp(
> > > > + struct xfs_dinode               *dip,
> > > >   struct timespec64               *tv,
> > > >   const union xfs_timestamp       *ts)
> > > >  {
> > > > + if (dip->di_version >= 3 &&
> > > > +     (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
> > > > +         uint64_t                t = be64_to_cpu(ts->t_bigtime);
> > > > +         uint64_t                s;
> > > > +         uint32_t                n;
> > > > +
> > > > +         s = div_u64_rem(t, NSEC_PER_SEC, &n);
> > > > +         tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
> > > > +         tv->tv_nsec = n;
> > > > +         return;
> > > > + }
> > > > +
> > > >   tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> > > >   tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> > > >  }
> > >
> > > Can't say I'm sold on this union. It seems cleaner to me to just
> > > make the timestamp an opaque 64 bit field on disk and convert it to
> > > the in-memory representation directly in the to/from disk
> > > operations. e.g.:
> > >
> > > void
> > > xfs_inode_from_disk_timestamp(
> > >     struct xfs_dinode               *dip,
> > >     struct timespec64               *tv,
> > >     __be64                          ts)
> > > {
> > >
> > >     uint64_t                t = be64_to_cpu(ts);
> > >     uint64_t                s;
> > >     uint32_t                n;
> > >
> > >     if (xfs_dinode_is_bigtime(dip)) {
> > >             s = div_u64_rem(t, NSEC_PER_SEC, &n) - XFS_INO_BIGTIME_EPOCH;
> > >     } else {
> > >             s = (int)(t >> 32);
> > >             n = (int)(t & 0xffffffff);
> > >     }
> > >     tv->tv_sec = s;
> > >     tv->tv_nsec = n;
> > > }
> >
> > I don't like this open-coded union approach at all because now I have to
> > keep the t_sec and t_nsec bits separate in my head instead of letting
> > the C compiler take care of that detail.  The sample code above doesn't
> > handle that correctly either:
> >
> > Start with an old kernel on a little endian system; each uppercase
> > letter represents a byte (A is the LSB of t_sec, D is the MSB of t_sec,
> > E is the LSB of t_nsec, and H is the MSB of t_nsec):
> >
> >       sec  nsec (incore)
> >       ABCD EFGH
> >
> > That gets written out as:
> >
> >       sec  nsec (ondisk)
> >       DCBA HGFE
> >
> > Now reboot with a new kernel that only knows 64bit timestamps on disk:
> >
> >       64bit (ondisk)
> >       DCBAHGFE
> >
> > Now it does the first be64_to_cpu conversion:
> >       64bit (incore)
> >       EFGHABCD
> >
> > And then masks and shifts:
> >       sec  nsec (incore)
> >       EFGH ABCD
> >
> > Oops, we just switched the values!
> >
> > The correct approach (I think) is to perform the shifting and masking on
> > the raw __be64 value before converting them to incore format via
> > be32_to_cpu, but now I have to work out all four cases by hand instead
> > of letting the compiler do the legwork for me.  I don't remember if it's
> > correct to go around shifting and masking __be64 values.
> >
> > I guess the good news is that at least we have generic/402 to catch
> > these kinds of persistence problems, but ugh.
> >
> > Anyway, what are you afraid of?  The C compiler smoking crack and not
> > actually overlapping the two union elements?  We could control for
> > that...
>
> (Following up on the mailing list with something I pasted into IRC)
>
> Ok, so I temporarily patched up my dev tree with this approximation of
> how that would work, properly done:
>
> diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> index c59ddb56bb90..7c71e4440402 100644
> --- a/fs/xfs/libxfs/xfs_inode_buf.c
> +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> @@ -176,8 +176,8 @@ xfs_inode_from_disk_timestamp(
>                 return;
>         }
>
> -       tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> -       tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> +       tv->tv_sec = (time64_t)be32_to_cpu((__be32)(ts->t_bigtime >> 32));
> +       tv->tv_nsec = be32_to_cpu(ts->t_bigtime & 0xFFFFFFFFU);
>  }
>
>  int
> @@ -294,8 +294,8 @@ xfs_inode_to_disk_timestamp(
>                 return;
>         }
>
> -       ts->t_sec = cpu_to_be32(tv->tv_sec);
> -       ts->t_nsec = cpu_to_be32(tv->tv_nsec);
> +       ts->t_bigtime = (__be64)cpu_to_be32(tv->tv_sec) << 32 |
> +                       cpu_to_be32(tv->tv_nsec);
>  }
>
>  void
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index d44e8932979b..5d36d6dea326 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -308,8 +308,8 @@ xfs_log_dinode_to_disk_ts(
>                 return;
>         }
>
> -       ts->t_sec = cpu_to_be32(its->t_sec);
> -       ts->t_nsec = cpu_to_be32(its->t_nsec);
> +       ts->t_bigtime = (__be64)cpu_to_be32(its->t_sec) << 32 |
> +                       cpu_to_be32(its->t_nsec);
>  }
>
> And immediately got a ton of smatch warnings:
>
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> xfs_inode_buf.c:297:26: warning: cast to restricted __be64
> xfs_inode_buf.c:297:26: warning: cast from restricted __be32
> xfs_inode_buf.c:297:26: warning: restricted __be64 degrades to integer
> xfs_inode_buf.c:298:25: warning: restricted __be32 degrades to integer
> xfs_inode_buf.c:297:23: warning: incorrect type in assignment (different base types)
> xfs_inode_buf.c:297:23:    expected restricted __be64 [usertype] t_bigtime
> xfs_inode_buf.c:297:23:    got unsigned long long
>
> (and even more in xfs_inode_item.c)
>
> So... while we could get rid of the union and hand-decode the timestamp
> from a __be64 on legacy filesystems, I see the static checker complaints
> as a second piece of evidence that this would be unnecessarily risky.
>

And unnecessarily make the code less readable and harder to review.
To what end? Dave writes:
"I just didn't really like the way the code in the encode/decode
helpers turned out..."

Cannot respond to that argument on a technical review.
I can only say that as a reviewer, the posted version was clear and easy
for me to verify and the posted alternative that turned out to have a bug,
I would never have never caught that bug in review and I would not have
felt confident about verifying the code in review either.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-20  4:42         ` griffin tucker
@ 2020-08-20 16:23           ` Darrick J. Wong
  2020-08-21  5:02             ` griffin tucker
  0 siblings, 1 reply; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-20 16:23 UTC (permalink / raw)
  To: griffin tucker; +Cc: linux-xfs

On Thu, Aug 20, 2020 at 02:42:29PM +1000, griffin tucker wrote:
> how difficult would it be to implement 128 bit timestamps?

Somewhat.  __uint128_t exists as a gcc extension, but you'd also have to
expand the ondisk inode structure definition to accomodate 64 more bits
of data per timestamp.  If you split the timestamp between a low u64 and
a high u64 you'd also need to recombine them carefully to avoid screwing
up sign extension like ext4 did when they adopted 34-bit timestamps.

> and how much further in time beyond 2486 would this allow?

I dunno.  2^66 seconds beyond December 1901, I suppose.

That's what, the year ... 2338218323135 or something?  Far beyond what
the maximum kernel supports (2^63-1 seconds).

> is the implementation of 128 bit timestamps just not feasible due to
> 64 bit alu width?

Just my opinion, but I think the difficulty here is the added complexity
to push XFS beyond the 25th century.  I don't plan to be around for
that, and prefer to let Captain Picard deal with that expansion. ;)

> how long is it until hardware capability reaches beyond the 16exabyte limit?

Still a few years, even if you staple together all the SMR HDDs in the
world.  You'd need, what, like, 400,000 20TB HDDs to reach 8EB?

--D

> On Thu, 20 Aug 2020 at 10:01, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >
> > On Wed, Aug 19, 2020 at 02:43:22PM -0700, Darrick J. Wong wrote:
> > > On Wed, Aug 19, 2020 at 09:35:35AM +1000, Dave Chinner wrote:
> > > > On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > >
> > > > > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > > > > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > > > > time epoch).  This enables us to handle dates up to 2486, which solves
> > > > > the y2038 problem.
> > > > >
> > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > ---
> > > > .....
> > > > > +/* Convert an ondisk timestamp into the 64-bit safe incore format. */
> > > > >  void
> > > > >  xfs_inode_from_disk_timestamp(
> > > > > + struct xfs_dinode               *dip,
> > > > >   struct timespec64               *tv,
> > > > >   const union xfs_timestamp       *ts)
> > > > >  {
> > > > > + if (dip->di_version >= 3 &&
> > > > > +     (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
> > > > > +         uint64_t                t = be64_to_cpu(ts->t_bigtime);
> > > > > +         uint64_t                s;
> > > > > +         uint32_t                n;
> > > > > +
> > > > > +         s = div_u64_rem(t, NSEC_PER_SEC, &n);
> > > > > +         tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
> > > > > +         tv->tv_nsec = n;
> > > > > +         return;
> > > > > + }
> > > > > +
> > > > >   tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> > > > >   tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> > > > >  }
> > > >
> > > > Can't say I'm sold on this union. It seems cleaner to me to just
> > > > make the timestamp an opaque 64 bit field on disk and convert it to
> > > > the in-memory representation directly in the to/from disk
> > > > operations. e.g.:
> > > >
> > > > void
> > > > xfs_inode_from_disk_timestamp(
> > > >     struct xfs_dinode               *dip,
> > > >     struct timespec64               *tv,
> > > >     __be64                          ts)
> > > > {
> > > >
> > > >     uint64_t                t = be64_to_cpu(ts);
> > > >     uint64_t                s;
> > > >     uint32_t                n;
> > > >
> > > >     if (xfs_dinode_is_bigtime(dip)) {
> > > >             s = div_u64_rem(t, NSEC_PER_SEC, &n) - XFS_INO_BIGTIME_EPOCH;
> > > >     } else {
> > > >             s = (int)(t >> 32);
> > > >             n = (int)(t & 0xffffffff);
> > > >     }
> > > >     tv->tv_sec = s;
> > > >     tv->tv_nsec = n;
> > > > }
> > >
> > > I don't like this open-coded union approach at all because now I have to
> > > keep the t_sec and t_nsec bits separate in my head instead of letting
> > > the C compiler take care of that detail.  The sample code above doesn't
> > > handle that correctly either:
> > >
> > > Start with an old kernel on a little endian system; each uppercase
> > > letter represents a byte (A is the LSB of t_sec, D is the MSB of t_sec,
> > > E is the LSB of t_nsec, and H is the MSB of t_nsec):
> > >
> > >       sec  nsec (incore)
> > >       ABCD EFGH
> > >
> > > That gets written out as:
> > >
> > >       sec  nsec (ondisk)
> > >       DCBA HGFE
> > >
> > > Now reboot with a new kernel that only knows 64bit timestamps on disk:
> > >
> > >       64bit (ondisk)
> > >       DCBAHGFE
> > >
> > > Now it does the first be64_to_cpu conversion:
> > >       64bit (incore)
> > >       EFGHABCD
> > >
> > > And then masks and shifts:
> > >       sec  nsec (incore)
> > >       EFGH ABCD
> > >
> > > Oops, we just switched the values!
> > >
> > > The correct approach (I think) is to perform the shifting and masking on
> > > the raw __be64 value before converting them to incore format via
> > > be32_to_cpu, but now I have to work out all four cases by hand instead
> > > of letting the compiler do the legwork for me.  I don't remember if it's
> > > correct to go around shifting and masking __be64 values.
> > >
> > > I guess the good news is that at least we have generic/402 to catch
> > > these kinds of persistence problems, but ugh.
> > >
> > > Anyway, what are you afraid of?  The C compiler smoking crack and not
> > > actually overlapping the two union elements?  We could control for
> > > that...
> >
> > (Following up on the mailing list with something I pasted into IRC)
> >
> > Ok, so I temporarily patched up my dev tree with this approximation of
> > how that would work, properly done:
> >
> > diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> > index c59ddb56bb90..7c71e4440402 100644
> > --- a/fs/xfs/libxfs/xfs_inode_buf.c
> > +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> > @@ -176,8 +176,8 @@ xfs_inode_from_disk_timestamp(
> >                 return;
> >         }
> >
> > -       tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> > -       tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> > +       tv->tv_sec = (time64_t)be32_to_cpu((__be32)(ts->t_bigtime >> 32));
> > +       tv->tv_nsec = be32_to_cpu(ts->t_bigtime & 0xFFFFFFFFU);
> >  }
> >
> >  int
> > @@ -294,8 +294,8 @@ xfs_inode_to_disk_timestamp(
> >                 return;
> >         }
> >
> > -       ts->t_sec = cpu_to_be32(tv->tv_sec);
> > -       ts->t_nsec = cpu_to_be32(tv->tv_nsec);
> > +       ts->t_bigtime = (__be64)cpu_to_be32(tv->tv_sec) << 32 |
> > +                       cpu_to_be32(tv->tv_nsec);
> >  }
> >
> >  void
> > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> > index d44e8932979b..5d36d6dea326 100644
> > --- a/fs/xfs/xfs_inode_item.c
> > +++ b/fs/xfs/xfs_inode_item.c
> > @@ -308,8 +308,8 @@ xfs_log_dinode_to_disk_ts(
> >                 return;
> >         }
> >
> > -       ts->t_sec = cpu_to_be32(its->t_sec);
> > -       ts->t_nsec = cpu_to_be32(its->t_nsec);
> > +       ts->t_bigtime = (__be64)cpu_to_be32(its->t_sec) << 32 |
> > +                       cpu_to_be32(its->t_nsec);
> >  }
> >
> > And immediately got a ton of smatch warnings:
> >
> > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > xfs_inode_buf.c:297:26: warning: cast to restricted __be64
> > xfs_inode_buf.c:297:26: warning: cast from restricted __be32
> > xfs_inode_buf.c:297:26: warning: restricted __be64 degrades to integer
> > xfs_inode_buf.c:298:25: warning: restricted __be32 degrades to integer
> > xfs_inode_buf.c:297:23: warning: incorrect type in assignment (different base types)
> > xfs_inode_buf.c:297:23:    expected restricted __be64 [usertype] t_bigtime
> > xfs_inode_buf.c:297:23:    got unsigned long long
> >
> > (and even more in xfs_inode_item.c)
> >
> > So... while we could get rid of the union and hand-decode the timestamp
> > from a __be64 on legacy filesystems, I see the static checker complaints
> > as a second piece of evidence that this would be unnecessarily risky.
> >
> > --D
> >
> > > > > @@ -220,9 +234,9 @@ xfs_inode_from_disk(
> > > > >    * a time before epoch is converted to a time long after epoch
> > > > >    * on 64 bit systems.
> > > > >    */
> > > > > - xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
> > > > > - xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
> > > > > - xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
> > > > > + xfs_inode_from_disk_timestamp(from, &inode->i_atime, &from->di_atime);
> > > > > + xfs_inode_from_disk_timestamp(from, &inode->i_mtime, &from->di_mtime);
> > > > > + xfs_inode_from_disk_timestamp(from, &inode->i_ctime, &from->di_ctime);
> > > > >
> > > > >   to->di_size = be64_to_cpu(from->di_size);
> > > > >   to->di_nblocks = be64_to_cpu(from->di_nblocks);
> > > > > @@ -235,9 +249,17 @@ xfs_inode_from_disk(
> > > > >   if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
> > > > >           inode_set_iversion_queried(inode,
> > > > >                                      be64_to_cpu(from->di_changecount));
> > > > > -         xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
> > > > > +         xfs_inode_from_disk_timestamp(from, &to->di_crtime,
> > > > > +                         &from->di_crtime);
> > > > >           to->di_flags2 = be64_to_cpu(from->di_flags2);
> > > > >           to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
> > > > > +         /*
> > > > > +          * Set the bigtime flag incore so that we automatically convert
> > > > > +          * this inode's ondisk timestamps to bigtime format the next
> > > > > +          * time we write the inode core to disk.
> > > > > +          */
> > > > > +         if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
> > > > > +                 to->di_flags2 |= XFS_DIFLAG2_BIGTIME;
> > > > >   }
> > > >
> > > > We do not want on-disk flags to be changed outside transactions like
> > > > this. Indeed, this has implications for O_DSYNC operation, in that
> > > > we do not trigger inode sync operations if the inode is only
> > > > timestamp dirty. If we've changed this flag, then the inode is more
> > > > than "timestamp dirty" and O_DSYNC will need to flush the entire
> > > > inode.... :/
> > >
> > > I forgot about XFS_ILOG_TIMESTAMP.
> > >
> > > > IOWs, I think we should only change this flag in a timestamp
> > > > transaction where the timestamps are actually being logged and hence
> > > > we can set inode dirty state appropriately so that everything will
> > > > get logged, changed and written back correctly....
> > >
> > > Yeah, that's fair.  I'll change xfs_trans_log_inode to set the bigtime
> > > flag if we're logging either the timestamps or the core.
> > >
> > > --D
> > >
> > > > Cheers,
> > > >
> > > > Dave.
> > > > --
> > > > Dave Chinner
> > > > david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-20  5:11         ` Amir Goldstein
@ 2020-08-20 22:47           ` Dave Chinner
  0 siblings, 0 replies; 51+ messages in thread
From: Dave Chinner @ 2020-08-20 22:47 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Darrick J. Wong, linux-xfs, Eric Sandeen

On Thu, Aug 20, 2020 at 08:11:27AM +0300, Amir Goldstein wrote:
> On Thu, Aug 20, 2020 at 3:03 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > On Wed, Aug 19, 2020 at 02:43:22PM -0700, Darrick J. Wong wrote:
> > So... while we could get rid of the union and hand-decode the timestamp
> > from a __be64 on legacy filesystems, I see the static checker complaints
> > as a second piece of evidence that this would be unnecessarily risky.
> >
> 
> And unnecessarily make the code less readable and harder to review.
> To what end? Dave writes:
> "I just didn't really like the way the code in the encode/decode
> helpers turned out..."
> 
> Cannot respond to that argument on a technical review.

Sure you can.

I gave a possible alternative implementation in my review, Darrick
pointed out it has problems and asked why I suggested a different
implementation. My answer was simply "I didn't really like the code,
maybe it could be done differently".

That's perfectly normal. If you -don't like the way the code is
written- then that should be a part of the review feedback. If
there's no other alternative to the way the code was presented, then
the reviewer is just going to have to live with it. That's perfectly
acceptible.

> I can only say that as a reviewer, the posted version was clear and easy
> for me to verify

Which is your personal opinion. Opinions differ and, again, there's
nothing wrong with that.

But you're stating that you don't want reviewers to express an
opinion on how the code looks because you "cannot respond to that on
a technical review". That's kind of naive - when was the last time
you asked someone to reformat the code because you found it was hard
to read?

We've always considered how the code looks as a key metric in
determining if the code will be maintainable. Why else would we care
about macro nesting, typedefs, keeping functions short and concise,
etc? That's all about how the code looks and how easy it is to read,
and hence we can infer the cost of maintainability of the code bein
proposed from that. That's all technical analysis of the code being
proposed, and it's all subjective.

See what I'm getting at here? Comments about the *look* of the code
is valid technical feedback, and we can argue *technically* at
length about whether the code is structured the best way it could be
done. And we often do this at length just because someone simply
"doesn't like the way the proposed code currently looks"...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-20 16:23           ` Darrick J. Wong
@ 2020-08-21  5:02             ` griffin tucker
  2020-08-21 15:31               ` Mike Fleetwood
  0 siblings, 1 reply; 51+ messages in thread
From: griffin tucker @ 2020-08-21  5:02 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs

thanks for taking the time to respond to my seemingly stupid questions.

what about interplanetary timestamps? and during transit to the
moon/planets when time slows?

On Fri, 21 Aug 2020 at 02:24, Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> On Thu, Aug 20, 2020 at 02:42:29PM +1000, griffin tucker wrote:
> > how difficult would it be to implement 128 bit timestamps?
>
> Somewhat.  __uint128_t exists as a gcc extension, but you'd also have to
> expand the ondisk inode structure definition to accomodate 64 more bits
> of data per timestamp.  If you split the timestamp between a low u64 and
> a high u64 you'd also need to recombine them carefully to avoid screwing
> up sign extension like ext4 did when they adopted 34-bit timestamps.
>
> > and how much further in time beyond 2486 would this allow?
>
> I dunno.  2^66 seconds beyond December 1901, I suppose.
>
> That's what, the year ... 2338218323135 or something?  Far beyond what
> the maximum kernel supports (2^63-1 seconds).
>
> > is the implementation of 128 bit timestamps just not feasible due to
> > 64 bit alu width?
>
> Just my opinion, but I think the difficulty here is the added complexity
> to push XFS beyond the 25th century.  I don't plan to be around for
> that, and prefer to let Captain Picard deal with that expansion. ;)
>
> > how long is it until hardware capability reaches beyond the 16exabyte limit?
>
> Still a few years, even if you staple together all the SMR HDDs in the
> world.  You'd need, what, like, 400,000 20TB HDDs to reach 8EB?
>
> --D
>
> > On Thu, 20 Aug 2020 at 10:01, Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > >
> > > On Wed, Aug 19, 2020 at 02:43:22PM -0700, Darrick J. Wong wrote:
> > > > On Wed, Aug 19, 2020 at 09:35:35AM +1000, Dave Chinner wrote:
> > > > > On Mon, Aug 17, 2020 at 03:57:39PM -0700, Darrick J. Wong wrote:
> > > > > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > >
> > > > > > Redesign the ondisk timestamps to be a simple unsigned 64-bit counter of
> > > > > > nanoseconds since 14 Dec 1901 (i.e. the minimum time in the 32-bit unix
> > > > > > time epoch).  This enables us to handle dates up to 2486, which solves
> > > > > > the y2038 problem.
> > > > > >
> > > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > > > ---
> > > > > .....
> > > > > > +/* Convert an ondisk timestamp into the 64-bit safe incore format. */
> > > > > >  void
> > > > > >  xfs_inode_from_disk_timestamp(
> > > > > > + struct xfs_dinode               *dip,
> > > > > >   struct timespec64               *tv,
> > > > > >   const union xfs_timestamp       *ts)
> > > > > >  {
> > > > > > + if (dip->di_version >= 3 &&
> > > > > > +     (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME))) {
> > > > > > +         uint64_t                t = be64_to_cpu(ts->t_bigtime);
> > > > > > +         uint64_t                s;
> > > > > > +         uint32_t                n;
> > > > > > +
> > > > > > +         s = div_u64_rem(t, NSEC_PER_SEC, &n);
> > > > > > +         tv->tv_sec = s - XFS_INO_BIGTIME_EPOCH;
> > > > > > +         tv->tv_nsec = n;
> > > > > > +         return;
> > > > > > + }
> > > > > > +
> > > > > >   tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> > > > > >   tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> > > > > >  }
> > > > >
> > > > > Can't say I'm sold on this union. It seems cleaner to me to just
> > > > > make the timestamp an opaque 64 bit field on disk and convert it to
> > > > > the in-memory representation directly in the to/from disk
> > > > > operations. e.g.:
> > > > >
> > > > > void
> > > > > xfs_inode_from_disk_timestamp(
> > > > >     struct xfs_dinode               *dip,
> > > > >     struct timespec64               *tv,
> > > > >     __be64                          ts)
> > > > > {
> > > > >
> > > > >     uint64_t                t = be64_to_cpu(ts);
> > > > >     uint64_t                s;
> > > > >     uint32_t                n;
> > > > >
> > > > >     if (xfs_dinode_is_bigtime(dip)) {
> > > > >             s = div_u64_rem(t, NSEC_PER_SEC, &n) - XFS_INO_BIGTIME_EPOCH;
> > > > >     } else {
> > > > >             s = (int)(t >> 32);
> > > > >             n = (int)(t & 0xffffffff);
> > > > >     }
> > > > >     tv->tv_sec = s;
> > > > >     tv->tv_nsec = n;
> > > > > }
> > > >
> > > > I don't like this open-coded union approach at all because now I have to
> > > > keep the t_sec and t_nsec bits separate in my head instead of letting
> > > > the C compiler take care of that detail.  The sample code above doesn't
> > > > handle that correctly either:
> > > >
> > > > Start with an old kernel on a little endian system; each uppercase
> > > > letter represents a byte (A is the LSB of t_sec, D is the MSB of t_sec,
> > > > E is the LSB of t_nsec, and H is the MSB of t_nsec):
> > > >
> > > >       sec  nsec (incore)
> > > >       ABCD EFGH
> > > >
> > > > That gets written out as:
> > > >
> > > >       sec  nsec (ondisk)
> > > >       DCBA HGFE
> > > >
> > > > Now reboot with a new kernel that only knows 64bit timestamps on disk:
> > > >
> > > >       64bit (ondisk)
> > > >       DCBAHGFE
> > > >
> > > > Now it does the first be64_to_cpu conversion:
> > > >       64bit (incore)
> > > >       EFGHABCD
> > > >
> > > > And then masks and shifts:
> > > >       sec  nsec (incore)
> > > >       EFGH ABCD
> > > >
> > > > Oops, we just switched the values!
> > > >
> > > > The correct approach (I think) is to perform the shifting and masking on
> > > > the raw __be64 value before converting them to incore format via
> > > > be32_to_cpu, but now I have to work out all four cases by hand instead
> > > > of letting the compiler do the legwork for me.  I don't remember if it's
> > > > correct to go around shifting and masking __be64 values.
> > > >
> > > > I guess the good news is that at least we have generic/402 to catch
> > > > these kinds of persistence problems, but ugh.
> > > >
> > > > Anyway, what are you afraid of?  The C compiler smoking crack and not
> > > > actually overlapping the two union elements?  We could control for
> > > > that...
> > >
> > > (Following up on the mailing list with something I pasted into IRC)
> > >
> > > Ok, so I temporarily patched up my dev tree with this approximation of
> > > how that would work, properly done:
> > >
> > > diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> > > index c59ddb56bb90..7c71e4440402 100644
> > > --- a/fs/xfs/libxfs/xfs_inode_buf.c
> > > +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> > > @@ -176,8 +176,8 @@ xfs_inode_from_disk_timestamp(
> > >                 return;
> > >         }
> > >
> > > -       tv->tv_sec = (int)be32_to_cpu(ts->t_sec);
> > > -       tv->tv_nsec = (int)be32_to_cpu(ts->t_nsec);
> > > +       tv->tv_sec = (time64_t)be32_to_cpu((__be32)(ts->t_bigtime >> 32));
> > > +       tv->tv_nsec = be32_to_cpu(ts->t_bigtime & 0xFFFFFFFFU);
> > >  }
> > >
> > >  int
> > > @@ -294,8 +294,8 @@ xfs_inode_to_disk_timestamp(
> > >                 return;
> > >         }
> > >
> > > -       ts->t_sec = cpu_to_be32(tv->tv_sec);
> > > -       ts->t_nsec = cpu_to_be32(tv->tv_nsec);
> > > +       ts->t_bigtime = (__be64)cpu_to_be32(tv->tv_sec) << 32 |
> > > +                       cpu_to_be32(tv->tv_nsec);
> > >  }
> > >
> > >  void
> > > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> > > index d44e8932979b..5d36d6dea326 100644
> > > --- a/fs/xfs/xfs_inode_item.c
> > > +++ b/fs/xfs/xfs_inode_item.c
> > > @@ -308,8 +308,8 @@ xfs_log_dinode_to_disk_ts(
> > >                 return;
> > >         }
> > >
> > > -       ts->t_sec = cpu_to_be32(its->t_sec);
> > > -       ts->t_nsec = cpu_to_be32(its->t_nsec);
> > > +       ts->t_bigtime = (__be64)cpu_to_be32(its->t_sec) << 32 |
> > > +                       cpu_to_be32(its->t_nsec);
> > >  }
> > >
> > > And immediately got a ton of smatch warnings:
> > >
> > > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > > xfs_inode_buf.c:179:32: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:179:32: warning: cast to restricted __be32
> > > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > > xfs_inode_buf.c:180:23: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:180:23: warning: cast to restricted __be32
> > > xfs_inode_buf.c:297:26: warning: cast to restricted __be64
> > > xfs_inode_buf.c:297:26: warning: cast from restricted __be32
> > > xfs_inode_buf.c:297:26: warning: restricted __be64 degrades to integer
> > > xfs_inode_buf.c:298:25: warning: restricted __be32 degrades to integer
> > > xfs_inode_buf.c:297:23: warning: incorrect type in assignment (different base types)
> > > xfs_inode_buf.c:297:23:    expected restricted __be64 [usertype] t_bigtime
> > > xfs_inode_buf.c:297:23:    got unsigned long long
> > >
> > > (and even more in xfs_inode_item.c)
> > >
> > > So... while we could get rid of the union and hand-decode the timestamp
> > > from a __be64 on legacy filesystems, I see the static checker complaints
> > > as a second piece of evidence that this would be unnecessarily risky.
> > >
> > > --D
> > >
> > > > > > @@ -220,9 +234,9 @@ xfs_inode_from_disk(
> > > > > >    * a time before epoch is converted to a time long after epoch
> > > > > >    * on 64 bit systems.
> > > > > >    */
> > > > > > - xfs_inode_from_disk_timestamp(&inode->i_atime, &from->di_atime);
> > > > > > - xfs_inode_from_disk_timestamp(&inode->i_mtime, &from->di_mtime);
> > > > > > - xfs_inode_from_disk_timestamp(&inode->i_ctime, &from->di_ctime);
> > > > > > + xfs_inode_from_disk_timestamp(from, &inode->i_atime, &from->di_atime);
> > > > > > + xfs_inode_from_disk_timestamp(from, &inode->i_mtime, &from->di_mtime);
> > > > > > + xfs_inode_from_disk_timestamp(from, &inode->i_ctime, &from->di_ctime);
> > > > > >
> > > > > >   to->di_size = be64_to_cpu(from->di_size);
> > > > > >   to->di_nblocks = be64_to_cpu(from->di_nblocks);
> > > > > > @@ -235,9 +249,17 @@ xfs_inode_from_disk(
> > > > > >   if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
> > > > > >           inode_set_iversion_queried(inode,
> > > > > >                                      be64_to_cpu(from->di_changecount));
> > > > > > -         xfs_inode_from_disk_timestamp(&to->di_crtime, &from->di_crtime);
> > > > > > +         xfs_inode_from_disk_timestamp(from, &to->di_crtime,
> > > > > > +                         &from->di_crtime);
> > > > > >           to->di_flags2 = be64_to_cpu(from->di_flags2);
> > > > > >           to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
> > > > > > +         /*
> > > > > > +          * Set the bigtime flag incore so that we automatically convert
> > > > > > +          * this inode's ondisk timestamps to bigtime format the next
> > > > > > +          * time we write the inode core to disk.
> > > > > > +          */
> > > > > > +         if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
> > > > > > +                 to->di_flags2 |= XFS_DIFLAG2_BIGTIME;
> > > > > >   }
> > > > >
> > > > > We do not want on-disk flags to be changed outside transactions like
> > > > > this. Indeed, this has implications for O_DSYNC operation, in that
> > > > > we do not trigger inode sync operations if the inode is only
> > > > > timestamp dirty. If we've changed this flag, then the inode is more
> > > > > than "timestamp dirty" and O_DSYNC will need to flush the entire
> > > > > inode.... :/
> > > >
> > > > I forgot about XFS_ILOG_TIMESTAMP.
> > > >
> > > > > IOWs, I think we should only change this flag in a timestamp
> > > > > transaction where the timestamps are actually being logged and hence
> > > > > we can set inode dirty state appropriately so that everything will
> > > > > get logged, changed and written back correctly....
> > > >
> > > > Yeah, that's fair.  I'll change xfs_trans_log_inode to set the bigtime
> > > > flag if we're logging either the timestamps or the core.
> > > >
> > > > --D
> > > >
> > > > > Cheers,
> > > > >
> > > > > Dave.
> > > > > --
> > > > > Dave Chinner
> > > > > david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem
  2020-08-21  5:02             ` griffin tucker
@ 2020-08-21 15:31               ` Mike Fleetwood
  0 siblings, 0 replies; 51+ messages in thread
From: Mike Fleetwood @ 2020-08-21 15:31 UTC (permalink / raw)
  To: griffin tucker; +Cc: Darrick J. Wong, linux-xfs

On Fri, 21 Aug 2020 at 06:02, griffin tucker
<xfssxsltislti2490@griffintucker.id.au> wrote:
>
> thanks for taking the time to respond to my seemingly stupid questions.
>
> what about interplanetary timestamps? and during transit to the
> moon/planets when time slows?


A little bit of searching provides the answers to your questions, which
are heading off topic.

1. SR and GR time dilation effects are very small [1].
   (~0.000027 seconds per day on the ISS [2][3]).
2. It's probably UTC everywhere in the solar system by definition.
   (Fixed offset for longitudinal distribution on the surface of a
   spinning planet is a separate issue [4].  It's UTC on the ISS [5] and
   apparently NASA probes too [6]).
3. Unix time (time_t) works in UTC but with leap seconds ignored [7], so
   is not appropriate for anything important in space.
   (Imagine if every time a leap second was added GPS was off by most of
   the distance to the moon)!

[1] https://en.wikipedia.org/wiki/Time_dilation#Combined_effect_of_velocity_and_gravitational_time_dilation
[2] https://www.businessinsider.com/do-astronauts-age-slower-than-people-on-earth-2015-8?r=US&IR=T
[3] https://spaceflight.nasa.gov/station/crew/exp7/luletters/lu_letter13.html
[4] https://en.wikipedia.org/wiki/Time_zone
[5] https://en.wikipedia.org/wiki/International_Space_Station#Crew_activities
[6] https://solarsystem.nasa.gov/basics/chapter2-3/
[7] https://en.wikipedia.org/wiki/Unix_time

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-09-02  2:56 [PATCH v6 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
@ 2020-09-02  2:56 ` Darrick J. Wong
  0 siblings, 0 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-09-02  2:56 UTC (permalink / raw)
  To: darrick.wong, david, hch
  Cc: Amir Goldstein, Christoph Hellwig, Allison Collins, linux-xfs,
	amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Refactor the code that sets the default quota grace period into a helper
function so that we can override the ondisk behavior later.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Allison Collins <allison.henderson@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |   13 +++++++++++++
 fs/xfs/xfs_dquot.c         |    8 ++++++++
 fs/xfs/xfs_dquot.h         |    1 +
 fs/xfs/xfs_qm_syscalls.c   |    4 ++--
 4 files changed, 24 insertions(+), 2 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index cb316053d3db..4b68a473b090 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1209,6 +1209,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  * been reached, and therefore no expiration has been set.  Therefore, the
  * ondisk min and max defined here can be used directly to constrain the incore
  * quota expiration timestamps on a Unix system.
+ *
+ * The grace period for each quota type is stored in the root dquot (id = 0)
+ * and is applied to a non-root dquot when it exceeds the soft or hard limits.
+ * The length of quota grace periods are unsigned 32-bit quantities measured in
+ * units of seconds.  A value of zero means to use the default period.
  */
 
 /*
@@ -1223,6 +1228,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  */
 #define XFS_DQ_LEGACY_EXPIRY_MAX	((int64_t)U32_MAX)
 
+/*
+ * Default quota grace periods, ranging from zero (use the compiled defaults)
+ * to ~136 years.  These are applied to a non-root dquot that has exceeded
+ * either limit.
+ */
+#define XFS_DQ_GRACE_MIN		((int64_t)0)
+#define XFS_DQ_GRACE_MAX		((int64_t)U32_MAX)
+
 /*
  * This is the main portion of the on-disk representation of quota information
  * for a user.  We pad this with some more expansion room to construct the on
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index f34841f98d44..e63a933413a3 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -110,6 +110,14 @@ xfs_dquot_set_timeout(
 					  qi->qi_expiry_max);
 }
 
+/* Set the length of the default grace period. */
+time64_t
+xfs_dquot_set_grace_period(
+	time64_t		grace)
+{
+	return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
+}
+
 /*
  * Determine if this quota counter is over either limit and set the quota
  * timers as appropriate.
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 0e449101c861..f642884a6834 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -238,5 +238,6 @@ int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
 		xfs_qm_dqiterate_fn iter_fn, void *priv);
 
 time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout);
+time64_t xfs_dquot_set_grace_period(time64_t grace);
 
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 750f775ae915..ca1b57d291dc 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -486,8 +486,8 @@ xfs_setqlim_timer(
 {
 	if (qlim) {
 		/* Set the length of the default grace period. */
-		res->timer = timer;
-		qlim->time = timer;
+		res->timer = xfs_dquot_set_grace_period(timer);
+		qlim->time = res->timer;
 	} else {
 		/* Set the grace period expiration on a quota. */
 		res->timer = xfs_dquot_set_timeout(mp, timer);


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-31  6:06 [PATCH v5 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
@ 2020-08-31  6:07 ` Darrick J. Wong
  0 siblings, 0 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-31  6:07 UTC (permalink / raw)
  To: darrick.wong, david, hch
  Cc: Amir Goldstein, Christoph Hellwig, Allison Collins, linux-xfs,
	amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Refactor the code that sets the default quota grace period into a helper
function so that we can override the ondisk behavior later.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Allison Collins <allison.henderson@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h |   13 +++++++++++++
 fs/xfs/xfs_dquot.c         |    8 ++++++++
 fs/xfs/xfs_dquot.h         |    1 +
 fs/xfs/xfs_qm_syscalls.c   |    4 ++--
 4 files changed, 24 insertions(+), 2 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index cb316053d3db..4b68a473b090 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1209,6 +1209,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  * been reached, and therefore no expiration has been set.  Therefore, the
  * ondisk min and max defined here can be used directly to constrain the incore
  * quota expiration timestamps on a Unix system.
+ *
+ * The grace period for each quota type is stored in the root dquot (id = 0)
+ * and is applied to a non-root dquot when it exceeds the soft or hard limits.
+ * The length of quota grace periods are unsigned 32-bit quantities measured in
+ * units of seconds.  A value of zero means to use the default period.
  */
 
 /*
@@ -1223,6 +1228,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  */
 #define XFS_DQ_LEGACY_EXPIRY_MAX	((int64_t)U32_MAX)
 
+/*
+ * Default quota grace periods, ranging from zero (use the compiled defaults)
+ * to ~136 years.  These are applied to a non-root dquot that has exceeded
+ * either limit.
+ */
+#define XFS_DQ_GRACE_MIN		((int64_t)0)
+#define XFS_DQ_GRACE_MAX		((int64_t)U32_MAX)
+
 /*
  * This is the main portion of the on-disk representation of quota information
  * for a user.  We pad this with some more expansion room to construct the on
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index f34841f98d44..e63a933413a3 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -110,6 +110,14 @@ xfs_dquot_set_timeout(
 					  qi->qi_expiry_max);
 }
 
+/* Set the length of the default grace period. */
+time64_t
+xfs_dquot_set_grace_period(
+	time64_t		grace)
+{
+	return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
+}
+
 /*
  * Determine if this quota counter is over either limit and set the quota
  * timers as appropriate.
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 0e449101c861..f642884a6834 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -238,5 +238,6 @@ int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
 		xfs_qm_dqiterate_fn iter_fn, void *priv);
 
 time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout);
+time64_t xfs_dquot_set_grace_period(time64_t grace);
 
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 750f775ae915..ca1b57d291dc 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -486,8 +486,8 @@ xfs_setqlim_timer(
 {
 	if (qlim) {
 		/* Set the length of the default grace period. */
-		res->timer = timer;
-		qlim->time = timer;
+		res->timer = xfs_dquot_set_grace_period(timer);
+		qlim->time = res->timer;
 	} else {
 		/* Set the grace period expiration on a quota. */
 		res->timer = xfs_dquot_set_timeout(mp, timer);


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-26 22:05 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
  2020-08-27  6:44   ` Christoph Hellwig
@ 2020-08-28  4:08   ` Allison Collins
  1 sibling, 0 replies; 51+ messages in thread
From: Allison Collins @ 2020-08-28  4:08 UTC (permalink / raw)
  To: Darrick J. Wong, david, hch; +Cc: Amir Goldstein, linux-xfs, sandeen



On 8/26/20 3:05 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Refactor the code that sets the default quota grace period into a helper
> function so that we can override the ondisk behavior later.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Looks ok
Reviewed-by: Allison Collins <allison.henderson@oracle.com>

> ---
>   fs/xfs/libxfs/xfs_format.h |   13 +++++++++++++
>   fs/xfs/xfs_dquot.c         |    8 ++++++++
>   fs/xfs/xfs_dquot.h         |    1 +
>   fs/xfs/xfs_qm_syscalls.c   |    4 ++--
>   4 files changed, 24 insertions(+), 2 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index cb316053d3db..4b68a473b090 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -1209,6 +1209,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
>    * been reached, and therefore no expiration has been set.  Therefore, the
>    * ondisk min and max defined here can be used directly to constrain the incore
>    * quota expiration timestamps on a Unix system.
> + *
> + * The grace period for each quota type is stored in the root dquot (id = 0)
> + * and is applied to a non-root dquot when it exceeds the soft or hard limits.
> + * The length of quota grace periods are unsigned 32-bit quantities measured in
> + * units of seconds.  A value of zero means to use the default period.
>    */
>   
>   /*
> @@ -1223,6 +1228,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
>    */
>   #define XFS_DQ_LEGACY_EXPIRY_MAX	((int64_t)U32_MAX)
>   
> +/*
> + * Default quota grace periods, ranging from zero (use the compiled defaults)
> + * to ~136 years.  These are applied to a non-root dquot that has exceeded
> + * either limit.
> + */
> +#define XFS_DQ_GRACE_MIN		((int64_t)0)
> +#define XFS_DQ_GRACE_MAX		((int64_t)U32_MAX)
> +
>   /*
>    * This is the main portion of the on-disk representation of quota information
>    * for a user.  We pad this with some more expansion room to construct the on
> diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
> index f34841f98d44..e63a933413a3 100644
> --- a/fs/xfs/xfs_dquot.c
> +++ b/fs/xfs/xfs_dquot.c
> @@ -110,6 +110,14 @@ xfs_dquot_set_timeout(
>   					  qi->qi_expiry_max);
>   }
>   
> +/* Set the length of the default grace period. */
> +time64_t
> +xfs_dquot_set_grace_period(
> +	time64_t		grace)
> +{
> +	return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
> +}
> +
>   /*
>    * Determine if this quota counter is over either limit and set the quota
>    * timers as appropriate.
> diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
> index 0e449101c861..f642884a6834 100644
> --- a/fs/xfs/xfs_dquot.h
> +++ b/fs/xfs/xfs_dquot.h
> @@ -238,5 +238,6 @@ int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
>   		xfs_qm_dqiterate_fn iter_fn, void *priv);
>   
>   time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout);
> +time64_t xfs_dquot_set_grace_period(time64_t grace);
>   
>   #endif /* __XFS_DQUOT_H__ */
> diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
> index 750f775ae915..ca1b57d291dc 100644
> --- a/fs/xfs/xfs_qm_syscalls.c
> +++ b/fs/xfs/xfs_qm_syscalls.c
> @@ -486,8 +486,8 @@ xfs_setqlim_timer(
>   {
>   	if (qlim) {
>   		/* Set the length of the default grace period. */
> -		res->timer = timer;
> -		qlim->time = timer;
> +		res->timer = xfs_dquot_set_grace_period(timer);
> +		qlim->time = res->timer;
>   	} else {
>   		/* Set the grace period expiration on a quota. */
>   		res->timer = xfs_dquot_set_timeout(mp, timer);
> 

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-26 22:05 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
@ 2020-08-27  6:44   ` Christoph Hellwig
  2020-08-28  4:08   ` Allison Collins
  1 sibling, 0 replies; 51+ messages in thread
From: Christoph Hellwig @ 2020-08-27  6:44 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: david, hch, Amir Goldstein, linux-xfs, sandeen

On Wed, Aug 26, 2020 at 03:05:17PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Refactor the code that sets the default quota grace period into a helper
> function so that we can override the ondisk behavior later.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-26 22:04 [PATCH v4 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
@ 2020-08-26 22:05 ` Darrick J. Wong
  2020-08-27  6:44   ` Christoph Hellwig
  2020-08-28  4:08   ` Allison Collins
  0 siblings, 2 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-26 22:05 UTC (permalink / raw)
  To: darrick.wong, david, hch; +Cc: Amir Goldstein, linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Refactor the code that sets the default quota grace period into a helper
function so that we can override the ondisk behavior later.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/xfs/libxfs/xfs_format.h |   13 +++++++++++++
 fs/xfs/xfs_dquot.c         |    8 ++++++++
 fs/xfs/xfs_dquot.h         |    1 +
 fs/xfs/xfs_qm_syscalls.c   |    4 ++--
 4 files changed, 24 insertions(+), 2 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index cb316053d3db..4b68a473b090 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1209,6 +1209,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  * been reached, and therefore no expiration has been set.  Therefore, the
  * ondisk min and max defined here can be used directly to constrain the incore
  * quota expiration timestamps on a Unix system.
+ *
+ * The grace period for each quota type is stored in the root dquot (id = 0)
+ * and is applied to a non-root dquot when it exceeds the soft or hard limits.
+ * The length of quota grace periods are unsigned 32-bit quantities measured in
+ * units of seconds.  A value of zero means to use the default period.
  */
 
 /*
@@ -1223,6 +1228,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  */
 #define XFS_DQ_LEGACY_EXPIRY_MAX	((int64_t)U32_MAX)
 
+/*
+ * Default quota grace periods, ranging from zero (use the compiled defaults)
+ * to ~136 years.  These are applied to a non-root dquot that has exceeded
+ * either limit.
+ */
+#define XFS_DQ_GRACE_MIN		((int64_t)0)
+#define XFS_DQ_GRACE_MAX		((int64_t)U32_MAX)
+
 /*
  * This is the main portion of the on-disk representation of quota information
  * for a user.  We pad this with some more expansion room to construct the on
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index f34841f98d44..e63a933413a3 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -110,6 +110,14 @@ xfs_dquot_set_timeout(
 					  qi->qi_expiry_max);
 }
 
+/* Set the length of the default grace period. */
+time64_t
+xfs_dquot_set_grace_period(
+	time64_t		grace)
+{
+	return clamp_t(time64_t, grace, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
+}
+
 /*
  * Determine if this quota counter is over either limit and set the quota
  * timers as appropriate.
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 0e449101c861..f642884a6834 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -238,5 +238,6 @@ int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
 		xfs_qm_dqiterate_fn iter_fn, void *priv);
 
 time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout);
+time64_t xfs_dquot_set_grace_period(time64_t grace);
 
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 750f775ae915..ca1b57d291dc 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -486,8 +486,8 @@ xfs_setqlim_timer(
 {
 	if (qlim) {
 		/* Set the length of the default grace period. */
-		res->timer = timer;
-		qlim->time = timer;
+		res->timer = xfs_dquot_set_grace_period(timer);
+		qlim->time = res->timer;
 	} else {
 		/* Set the grace period expiration on a quota. */
 		res->timer = xfs_dquot_set_timeout(mp, timer);


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-21  2:11 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
  2020-08-22  7:15   ` Christoph Hellwig
@ 2020-08-24  0:01   ` Dave Chinner
  1 sibling, 0 replies; 51+ messages in thread
From: Dave Chinner @ 2020-08-24  0:01 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Amir Goldstein, linux-xfs, sandeen

On Thu, Aug 20, 2020 at 07:11:47PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Refactor the code that sets the default quota grace period into a helper
> function so that we can override the ondisk behavior later.

Same comments as last patch.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-21  2:11 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
@ 2020-08-22  7:15   ` Christoph Hellwig
  2020-08-24  0:01   ` Dave Chinner
  1 sibling, 0 replies; 51+ messages in thread
From: Christoph Hellwig @ 2020-08-22  7:15 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Amir Goldstein, linux-xfs, sandeen

On Thu, Aug 20, 2020 at 07:11:47PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Refactor the code that sets the default quota grace period into a helper
> function so that we can override the ondisk behavior later.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/xfs/libxfs/xfs_format.h |   13 +++++++++++++
>  fs/xfs/xfs_dquot.c         |    9 +++++++++
>  fs/xfs/xfs_dquot.h         |    1 +
>  fs/xfs/xfs_ondisk.h        |    2 ++
>  fs/xfs/xfs_qm_syscalls.c   |    4 ++--
>  5 files changed, 27 insertions(+), 2 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index ef36978239ac..e9e6248b35be 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -1205,6 +1205,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
>   * time zero is the Unix epoch, Jan  1 00:00:01 UTC 1970.  An expiration value
>   * of zero means that the quota limit has not been reached, and therefore no
>   * expiration has been set.
> + *
> + * The grace period for each quota type is stored in the root dquot (id = 0)
> + * and is applied to a non-root dquot when it exceeds the soft or hard limits.
> + * The length of quota grace periods are unsigned 32-bit quantities measured in
> + * units of seconds.  A value of zero means to use the default period.
>   */
>  
>  /*
> @@ -1219,6 +1224,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
>   */
>  #define XFS_DQ_TIMEOUT_MAX	((int64_t)U32_MAX)
>  
> +/*
> + * Default quota grace periods, ranging from zero (use the compiled defaults)
> + * to ~136 years.  These are applied to a non-root dquot that has exceeded
> + * either limit.
> + */
> +#define XFS_DQ_GRACE_MIN	((int64_t)0)
> +#define XFS_DQ_GRACE_MAX	((int64_t)U32_MAX)
> +
>  /*
>   * This is the main portion of the on-disk representation of quota information
>   * for a user.  We pad this with some more expansion room to construct the on
> diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
> index 2425b1c30d11..ed3fa6ada0d3 100644
> --- a/fs/xfs/xfs_dquot.c
> +++ b/fs/xfs/xfs_dquot.c
> @@ -98,6 +98,15 @@ xfs_qm_adjust_dqlimits(
>  		xfs_dquot_set_prealloc_limits(dq);
>  }
>  
> +/* Set the length of the default grace period. */
> +void
> +xfs_dquot_set_grace_period(
> +	time64_t		*timer,
> +	time64_t		value)
> +{
> +	*timer = clamp_t(time64_t, value, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
> +}

Why not return the value?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH 03/11] xfs: refactor default quota grace period setting code
  2020-08-21  2:11 [PATCH v3 " Darrick J. Wong
@ 2020-08-21  2:11 ` Darrick J. Wong
  2020-08-22  7:15   ` Christoph Hellwig
  2020-08-24  0:01   ` Dave Chinner
  0 siblings, 2 replies; 51+ messages in thread
From: Darrick J. Wong @ 2020-08-21  2:11 UTC (permalink / raw)
  To: darrick.wong; +Cc: Amir Goldstein, linux-xfs, amir73il, sandeen

From: Darrick J. Wong <darrick.wong@oracle.com>

Refactor the code that sets the default quota grace period into a helper
function so that we can override the ondisk behavior later.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/xfs/libxfs/xfs_format.h |   13 +++++++++++++
 fs/xfs/xfs_dquot.c         |    9 +++++++++
 fs/xfs/xfs_dquot.h         |    1 +
 fs/xfs/xfs_ondisk.h        |    2 ++
 fs/xfs/xfs_qm_syscalls.c   |    4 ++--
 5 files changed, 27 insertions(+), 2 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index ef36978239ac..e9e6248b35be 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1205,6 +1205,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  * time zero is the Unix epoch, Jan  1 00:00:01 UTC 1970.  An expiration value
  * of zero means that the quota limit has not been reached, and therefore no
  * expiration has been set.
+ *
+ * The grace period for each quota type is stored in the root dquot (id = 0)
+ * and is applied to a non-root dquot when it exceeds the soft or hard limits.
+ * The length of quota grace periods are unsigned 32-bit quantities measured in
+ * units of seconds.  A value of zero means to use the default period.
  */
 
 /*
@@ -1219,6 +1224,14 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  */
 #define XFS_DQ_TIMEOUT_MAX	((int64_t)U32_MAX)
 
+/*
+ * Default quota grace periods, ranging from zero (use the compiled defaults)
+ * to ~136 years.  These are applied to a non-root dquot that has exceeded
+ * either limit.
+ */
+#define XFS_DQ_GRACE_MIN	((int64_t)0)
+#define XFS_DQ_GRACE_MAX	((int64_t)U32_MAX)
+
 /*
  * This is the main portion of the on-disk representation of quota information
  * for a user.  We pad this with some more expansion room to construct the on
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 2425b1c30d11..ed3fa6ada0d3 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -98,6 +98,15 @@ xfs_qm_adjust_dqlimits(
 		xfs_dquot_set_prealloc_limits(dq);
 }
 
+/* Set the length of the default grace period. */
+void
+xfs_dquot_set_grace_period(
+	time64_t		*timer,
+	time64_t		value)
+{
+	*timer = clamp_t(time64_t, value, XFS_DQ_GRACE_MIN, XFS_DQ_GRACE_MAX);
+}
+
 /* Set the expiration time of a quota's grace period. */
 void
 xfs_dquot_set_timeout(
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 11bd0ee9b0fa..0ba4d91c3a11 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -237,6 +237,7 @@ typedef int (*xfs_qm_dqiterate_fn)(struct xfs_dquot *dq,
 int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type,
 		xfs_qm_dqiterate_fn iter_fn, void *priv);
 
+void xfs_dquot_set_grace_period(time64_t *timer, time64_t limit);
 void xfs_dquot_set_timeout(time64_t *timer, time64_t limit);
 
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 38ccffcf3336..498e9063c605 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -27,6 +27,8 @@ xfs_check_limits(void)
 	XFS_CHECK_VALUE(XFS_INO_TIME_MAX,			2147483647LL);
 	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MIN,			1LL);
 	XFS_CHECK_VALUE(XFS_DQ_TIMEOUT_MAX,			4294967295LL);
+	XFS_CHECK_VALUE(XFS_DQ_GRACE_MIN,			0LL);
+	XFS_CHECK_VALUE(XFS_DQ_GRACE_MAX,			4294967295LL);
 }
 
 static inline void __init
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index b16d533a6feb..95b0c25b9969 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -485,8 +485,8 @@ xfs_setqlim_timer(
 {
 	if (qlim) {
 		/* Set the length of the default grace period. */
-		res->timer = timer;
-		qlim->time = timer;
+		xfs_dquot_set_grace_period(&res->timer, timer);
+		qlim->time = res->timer;
 	} else {
 		/* Set the grace period expiration on a quota. */
 		xfs_dquot_set_timeout(&res->timer, timer);


^ permalink raw reply related	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2020-09-02  3:01 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-17 22:56 [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
2020-08-17 22:56 ` [PATCH 01/11] xfs: explicitly define inode timestamp range Darrick J. Wong
2020-08-18  6:25   ` Amir Goldstein
2020-08-17 22:57 ` [PATCH 02/11] xfs: refactor quota expiration timer modification Darrick J. Wong
2020-08-18  6:48   ` Amir Goldstein
2020-08-18 15:40     ` Darrick J. Wong
2020-08-17 22:57 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
2020-08-18 10:46   ` Amir Goldstein
2020-08-17 22:57 ` [PATCH 04/11] xfs: remove xfs_timestamp_t Darrick J. Wong
2020-08-18 10:50   ` Amir Goldstein
2020-08-17 22:57 ` [PATCH 05/11] xfs: move xfs_log_dinode_to_disk to the log code Darrick J. Wong
2020-08-18 10:51   ` Amir Goldstein
2020-08-17 22:57 ` [PATCH 06/11] xfs: refactor inode timestamp coding Darrick J. Wong
2020-08-18 11:20   ` Amir Goldstein
2020-08-18 15:42     ` Darrick J. Wong
2020-08-17 22:57 ` [PATCH 07/11] xfs: convert struct xfs_timestamp to union Darrick J. Wong
2020-08-18 11:24   ` Amir Goldstein
2020-08-17 22:57 ` [PATCH 08/11] xfs: widen ondisk timestamps to deal with y2038 problem Darrick J. Wong
2020-08-18 12:00   ` Amir Goldstein
2020-08-18 12:53     ` Amir Goldstein
2020-08-18 15:53       ` Darrick J. Wong
2020-08-18 20:52         ` Darrick J. Wong
2020-08-18 15:44     ` Darrick J. Wong
2020-08-18 23:35   ` Dave Chinner
2020-08-19 21:43     ` Darrick J. Wong
2020-08-19 23:58       ` Dave Chinner
2020-08-20  0:01       ` Darrick J. Wong
2020-08-20  4:42         ` griffin tucker
2020-08-20 16:23           ` Darrick J. Wong
2020-08-21  5:02             ` griffin tucker
2020-08-21 15:31               ` Mike Fleetwood
2020-08-20  5:11         ` Amir Goldstein
2020-08-20 22:47           ` Dave Chinner
2020-08-17 22:57 ` [PATCH 09/11] xfs: refactor quota timestamp coding Darrick J. Wong
2020-08-18 12:25   ` Amir Goldstein
2020-08-17 22:57 ` [PATCH 10/11] xfs: enable bigtime for quota timers Darrick J. Wong
2020-08-18 13:58   ` Amir Goldstein
2020-08-18 15:59     ` Darrick J. Wong
2020-08-17 22:57 ` [PATCH 11/11] xfs: enable big timestamps Darrick J. Wong
2020-08-18 14:04   ` Amir Goldstein
2020-08-18 23:01 ` [PATCH v2 00/11] xfs: widen timestamps to deal with y2038 Dave Chinner
2020-08-18 23:10   ` Darrick J. Wong
2020-08-18 23:41     ` Dave Chinner
2020-08-21  2:11 [PATCH v3 " Darrick J. Wong
2020-08-21  2:11 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
2020-08-22  7:15   ` Christoph Hellwig
2020-08-24  0:01   ` Dave Chinner
2020-08-26 22:04 [PATCH v4 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
2020-08-26 22:05 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
2020-08-27  6:44   ` Christoph Hellwig
2020-08-28  4:08   ` Allison Collins
2020-08-31  6:06 [PATCH v5 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
2020-08-31  6:07 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong
2020-09-02  2:56 [PATCH v6 00/11] xfs: widen timestamps to deal with y2038 Darrick J. Wong
2020-09-02  2:56 ` [PATCH 03/11] xfs: refactor default quota grace period setting code Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).