On Nov 20, 2015, at 7:54 AM, David Howells wrote: > > The handling of extended timestamps in Ext4 is broken as can be seen in the > output of the test program attached below: > > time extra bad decode good decode bad encode good encode > ======== ===== ================= ================= =========== =========== > ffffffff 0 > ffffffffffffffff ffffffffffffffff > *ffffffff 3 ffffffff 0 > 80000000 0 > ffffffff80000000 ffffffff80000000 > *80000000 3 80000000 0 > 00000000 0 > 0 0 > 00000000 0 00000000 0 > 7fffffff 0 > 7fffffff 7fffffff > 7fffffff 0 7fffffff 0 > 80000000 1 > *ffffffff80000000 80000000 > *80000000 0 80000000 1 > ffffffff 1 > *ffffffffffffffff ffffffff > *ffffffff 0 ffffffff 1 > 00000000 1 > 100000000 100000000 > 00000000 1 00000000 1 > 7fffffff 1 > 17fffffff 17fffffff > 7fffffff 1 7fffffff 1 > 80000000 2 > *ffffffff80000000 180000000 > *80000000 1 80000000 2 > ffffffff 2 > *ffffffffffffffff 1ffffffff > *ffffffff 1 ffffffff 2 > 00000000 2 > 200000000 200000000 > 00000000 2 00000000 2 > 7fffffff 2 > 27fffffff 27fffffff > 7fffffff 2 7fffffff 2 > 80000000 3 > *ffffffff80000000 280000000 > *80000000 2 80000000 3 > ffffffff 3 > *ffffffffffffffff 2ffffffff > *ffffffff 2 ffffffff 3 > 00000000 3 > 300000000 300000000 > 00000000 3 00000000 3 > 7fffffff 3 > 37fffffff 37fffffff > 7fffffff 3 7fffffff 3 > > The values marked with asterisks are wrong. > > The problem is that with a 64-bit time, in ext4_decode_extra_time() the > epoch value is just OR'd with the sign-extended time - which, if negative, > has all of the upper 32 bits set anyway. We need to add the epoch instead > of OR'ing it. In ext4_encode_extra_time(), the reverse operation needs to > take place as the 32-bit part of the number of seconds needs to be > subtracted from the 64-bit value before the epoch is shifted down. > > Since the epoch is presumably unsigned, this has the slightly strange > effect of, for epochs > 0, putting the 0x80000000-0xffffffff range before > the 0x00000000-0x7fffffff range. > > This affects all kernels from v2.6.23-rc1 onwards. > > The test program: > > #include > > #define EXT4_FITS_IN_INODE(x, y, z) 1 > #define EXT4_EPOCH_BITS 2 > #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) > #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) > > #define le32_to_cpu(x) (x) > #define cpu_to_le32(x) (x) > typedef unsigned int __le32; > typedef unsigned int u32; > typedef signed int s32; > typedef unsigned long long __u64; > typedef signed long long s64; > > struct timespec { > long long tv_sec; /* seconds */ > long tv_nsec; /* nanoseconds */ > }; > > struct ext4_inode_info { > struct timespec i_crtime; > }; > > struct ext4_inode { > __le32 i_crtime; /* File Creation time */ > __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ > }; > > /* Incorrect implementation */ > static inline void ext4_decode_extra_time_bad(struct timespec *time, __le32 extra) > { > if (sizeof(time->tv_sec) > 4) > time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) > << 32; > time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; > } > > static inline __le32 ext4_encode_extra_time_bad(struct timespec *time) > { > return cpu_to_le32((sizeof(time->tv_sec) > 4 ? > (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | > ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); > } > > /* Fixed implementation */ > static inline void ext4_decode_extra_time_good(struct timespec *time, __le32 _extra) > { > u32 extra = le32_to_cpu(_extra); > u32 epoch = extra & EXT4_EPOCH_MASK; > > time->tv_sec = (s32)time->tv_sec + ((s64)epoch << 32); > time->tv_nsec = (extra & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; > } > > static inline __le32 ext4_encode_extra_time_good(struct timespec *time) > { > u32 extra; > s64 epoch = time->tv_sec - (s32)time->tv_sec; > > extra = (epoch >> 32) & EXT4_EPOCH_MASK; > extra |= (time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK; > return cpu_to_le32(extra); > } > > #define EXT4_INODE_GET_XTIME_BAD(xtime, inode, raw_inode) \ > do { \ > (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ > if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ > ext4_decode_extra_time_bad(&(inode)->xtime, \ > raw_inode->xtime ## _extra); \ > else \ > (inode)->xtime.tv_nsec = 0; \ > } while (0) > > #define EXT4_INODE_SET_XTIME_BAD(xtime, inode, raw_inode) \ > do { \ > (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ > if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ > (raw_inode)->xtime ## _extra = \ > ext4_encode_extra_time_bad(&(inode)->xtime); \ > } while (0) > > #define EXT4_INODE_GET_XTIME_GOOD(xtime, inode, raw_inode) \ > do { \ > (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ > if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ > ext4_decode_extra_time_good(&(inode)->xtime, \ > raw_inode->xtime ## _extra); \ > else \ > (inode)->xtime.tv_nsec = 0; \ > } while (0) > > #define EXT4_INODE_SET_XTIME_GOOD(xtime, inode, raw_inode) \ > do { \ > (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ > if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ > (raw_inode)->xtime ## _extra = \ > ext4_encode_extra_time_good(&(inode)->xtime); \ > } while (0) > > static const struct test { > unsigned crtime; > unsigned extra; > long long sec; > int nsec; > } tests[] = { > // crtime extra tv_sec tv_nsec > 0xffffffff, 0x00000000, 0xffffffffffffffff, 0, > 0x80000000, 0x00000000, 0xffffffff80000000, 0, > 0x00000000, 0x00000000, 0x0000000000000000, 0, > 0x7fffffff, 0x00000000, 0x000000007fffffff, 0, > 0x80000000, 0x00000001, 0x0000000080000000, 0, > 0xffffffff, 0x00000001, 0x00000000ffffffff, 0, > 0x00000000, 0x00000001, 0x0000000100000000, 0, > 0x7fffffff, 0x00000001, 0x000000017fffffff, 0, > 0x80000000, 0x00000002, 0x0000000180000000, 0, > 0xffffffff, 0x00000002, 0x00000001ffffffff, 0, > 0x00000000, 0x00000002, 0x0000000200000000, 0, > 0x7fffffff, 0x00000002, 0x000000027fffffff, 0, > 0x80000000, 0x00000003, 0x0000000280000000, 0, > 0xffffffff, 0x00000003, 0x00000002ffffffff, 0, > 0x00000000, 0x00000003, 0x0000000300000000, 0, > 0x7fffffff, 0x00000003, 0x000000037fffffff, 0, > }; > > int main() > { > struct ext4_inode_info ii_bad, ii_good; > struct ext4_inode raw, *praw = &raw; > struct ext4_inode raw_bad, *praw_bad = &raw_bad; > struct ext4_inode raw_good, *praw_good = &raw_good; > const struct test *t; > int i, ret = 0; > > printf("time extra bad decode good decode bad encode good encode\n"); > printf("======== ===== ================= ================= =========== ===========\n"); > for (i = 0; i < sizeof(tests) / sizeof(t[0]); i++) { > t = &tests[i]; > raw.i_crtime = t->crtime; > raw.i_crtime_extra = t->extra; > printf("%08x %5d > ", t->crtime, t->extra); > > EXT4_INODE_GET_XTIME_BAD(i_crtime, &ii_bad, praw); > EXT4_INODE_GET_XTIME_GOOD(i_crtime, &ii_good, praw); > if (ii_bad.i_crtime.tv_sec != t->sec || > ii_bad.i_crtime.tv_nsec != t->nsec) > printf("*"); > else > printf(" "); > printf("%16llx", ii_bad.i_crtime.tv_sec); > printf(" "); > if (ii_good.i_crtime.tv_sec != t->sec || > ii_good.i_crtime.tv_nsec != t->nsec) { > printf("*"); > ret = 1; > } else { > printf(" "); > } > printf("%16llx", ii_good.i_crtime.tv_sec); > > EXT4_INODE_SET_XTIME_BAD(i_crtime, &ii_good, praw_bad); > EXT4_INODE_SET_XTIME_GOOD(i_crtime, &ii_good, praw_good); > > printf(" > "); > if (raw_bad.i_crtime != raw.i_crtime || > raw_bad.i_crtime_extra != raw.i_crtime_extra) > printf("*"); > else > printf(" "); > printf("%08llx %d", raw_bad.i_crtime, raw_bad.i_crtime_extra); > printf(" "); > > if (raw_good.i_crtime != raw.i_crtime || > raw_good.i_crtime_extra != raw.i_crtime_extra) { > printf("*"); > ret = 1; > } else { > printf(" "); > } > printf("%08llx %d", raw_good.i_crtime, raw_good.i_crtime_extra); > printf("\n"); > } > > return ret; > } > > Signed-off-by: David Howells > --- > > fs/ext4/ext4.h | 22 +++++++++++++--------- > 1 file changed, 13 insertions(+), 9 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index fd1f28be5296..31efcd78bf51 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -723,19 +723,23 @@ struct move_extent { > <= (EXT4_GOOD_OLD_INODE_SIZE + \ > (einode)->i_extra_isize)) \ > > -static inline __le32 ext4_encode_extra_time(struct timespec *time) > +static inline void ext4_decode_extra_time(struct timespec *time, __le32 _extra) > { > - return cpu_to_le32((sizeof(time->tv_sec) > 4 ? > - (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | > - ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); > + u32 extra = le32_to_cpu(_extra); > + u32 epoch = extra & EXT4_EPOCH_MASK; > + > + time->tv_sec = (s32)time->tv_sec + ((s64)epoch << 32); Minor nit - two spaces before "<<". > + time->tv_nsec = (extra & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; > } > > -static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) > +static inline __le32 ext4_encode_extra_time(struct timespec *time) > { > - if (sizeof(time->tv_sec) > 4) > - time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) > - << 32; > - time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; > + u32 extra; > + s64 epoch = time->tv_sec - (s32)time->tv_sec; > + > + extra = (epoch >> 32) & EXT4_EPOCH_MASK; > + extra |= (time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK; > + return cpu_to_le32(extra); > } David, thanks for moving this patch forward. It would have been easier to review if the order of encode and decode functions had not been reversed... :-) It would be good to get a comment block in the code describing the encoding: /* * We need is an encoding that preserves the times for extra epoch "00": * * extra add to 32-bit * epoch tv_sec to get * bits decoded 64-bit tv_sec 64-bit value valid time range * 0 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31 * 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19 * 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07 * 1 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25 * 2 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16 * 2 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04 * 3 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22 * 3 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10 * * Note that previous versions of the kernel on 64-bit systems would * incorrectly use extra epoch bits 1,1 for dates between 1901 and 1970. * e2fsck will correct this, assuming that it is run on the affected * filesystem before 2242. */ The only other question is whether you compile tested this on a 32-bit system? IIRC, the "sizeof(time->tv_sec)" check was to avoid compiler warnings due to assigning values too large for the data type, and (to a lesser extent) avoiding overhead on those systems. If there is no 32-bit compile warning then I'm fine with this as-is, since systems with 32-bit tv_sec are going to be broken at that point in any case. Cheers, Andreas