From: "Đoàn Trần Công Danh" <congdanhqx@gmail.com>
To: git@vger.kernel.org, Phil Hord <phil.hord@gmail.com>,
plavarre@purestorage.com, Junio C Hamano <gitster@pobox.com>
Cc: "Đoàn Trần Công Danh" <congdanhqx@gmail.com>
Subject: [PATCH] date.c: limit less precision ISO-8601 with its marker
Date: Mon, 9 Jan 2023 19:29:15 +0700 [thread overview]
Message-ID: <20230109122915.30973-1-congdanhqx@gmail.com> (raw)
In-Reply-To: <Y7v6jThT9GQ8Oav8@danh.dev>
The newly added heuristic to parse less precision ISO-8601 conflicts
with other heuristics to parse datetime-strings. E.g.:
Thu, 7 Apr 2005 15:14:13 -0700
Let's limit the new heuristic to only datetime string with a 'T'
followed immediately by some digits, and if we failed to parse the
upcoming string, rollback the change.
Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
Here is a better thought out change, which tried to minimize the impact of
new heuristics.
While I think it's a fixup, but I still needs explaination, I think I may
reword it's as a full patch instead.
Range-diff:
1: 4036e5a944 ! 1: b703425a57 fixup! date.c: allow ISO 8601 reduced precision times
@@ Metadata
Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
## Commit message ##
- fixup! date.c: allow ISO 8601 reduced precision times
+ date.c: limit less precision ISO-8601 with its marker
+
+ The newly added heuristic to parse less precision ISO-8601 conflicts
+ with other heuristics to parse datetime-strings. E.g.:
+
+ Thu, 7 Apr 2005 15:14:13 -0700
+
+ Let's limit the new heuristic to only datetime string with a 'T'
+ followed immediately by some digits, and if we failed to parse the
+ upcoming string, rollback the change.
Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
@@ date.c: static int match_alpha(const char *date, struct tm *tm, int *offset)
}
+ /* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */
-+ if (*date == 'T' && isdigit(date[1])) {
-+ tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
-+ return strlen("T");
++ if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) {
++ tm->tm_min = tm->tm_sec = 0;
++ return 1;
+ }
+
/* BAD CRAP */
@@ date.c: static inline int nodate(struct tm *tm)
- * We just do a binary 'and' to see if the sign bit
- * is set in all the values.
+ * Have we seen an ISO-8601-alike date, i.e. 20220101T0,
-+ * In those special case, those fields have been set to 0
++ * In which, hour is still unset,
++ * and minutes and second has been set to 0.
*/
-static inline int notime(struct tm *tm)
+static inline int maybeiso8601(struct tm *tm)
@@ date.c: static inline int nodate(struct tm *tm)
- return (tm->tm_hour &
- tm->tm_min &
- tm->tm_sec) < 0;
-+ return tm->tm_hour == 0 &&
++ return tm->tm_hour == -1 &&
+ tm->tm_min == 0 &&
+ tm->tm_sec == 0;
}
/*
@@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
- /* 4 digits, compact style of ISO-8601's time: HHMM */
- /* 2 digits, compact style of ISO-8601's time: HH */
- if (n == 8 || n == 6 ||
+
+ /* 8 digits, compact style of ISO-8601's date: YYYYmmDD */
+ /* 6 digits, compact style of ISO-8601's time: HHMMSS */
+- /* 4 digits, compact style of ISO-8601's time: HHMM */
+- /* 2 digits, compact style of ISO-8601's time: HH */
+- if (n == 8 || n == 6 ||
- (!nodate(tm) && notime(tm) &&
-+ (!nodate(tm) && maybeiso8601(tm) &&
- (n == 4 || n == 2))) {
+- (n == 4 || n == 2))) {
++ if (n == 8 || n == 6) {
unsigned int num1 = num / 10000;
unsigned int num2 = (num % 10000) / 100;
+ unsigned int num3 = num % 100;
+@@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
+ else if (n == 6 && set_time(num1, num2, num3, tm) == 0 &&
+ *end == '.' && isdigit(end[1]))
+ strtoul(end + 1, &end, 10);
+- else if (n == 4)
+- set_time(num2, num3, 0, tm);
+- else if (n == 2)
+- set_time(num3, 0, 0, tm);
+ return end - date;
+ }
+
++ /* reduced precision of ISO-8601's time: HHMM or HH */
++ if (maybeiso8601(tm)) {
++ unsigned int num1 = num;
++ unsigned int num2 = 0;
++ if (n == 4) {
++ num1 = num / 100;
++ num2 = num % 100;
++ }
++ if ((n == 4 || n == 2) && !nodate(tm) &&
++ set_time(num1, num2, 0, tm) == 0)
++ return n;
++ /*
++ * We thought this is an ISO-8601 time string,
++ * we set minutes and seconds to 0,
++ * turn out it isn't, rollback the change.
++ */
++ tm->tm_min = tm->tm_sec = -1;
++ }
++
+ /* Four-digit year or a timezone? */
+ if (n == 4) {
+ if (num <= 1400 && *offset == -1) {
## t/t0006-date.sh ##
@@ t/t0006-date.sh: check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000'
date.c | 49 +++++++++++++++++++++++++++++++++----------------
t/t0006-date.sh | 3 ++-
2 files changed, 35 insertions(+), 17 deletions(-)
diff --git a/date.c b/date.c
index b011b9d6b3..6f45eeb356 100644
--- a/date.c
+++ b/date.c
@@ -493,6 +493,12 @@ static int match_alpha(const char *date, struct tm *tm, int *offset)
return 2;
}
+ /* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */
+ if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) {
+ tm->tm_min = tm->tm_sec = 0;
+ return 1;
+ }
+
/* BAD CRAP */
return skip_alpha(date);
}
@@ -639,15 +645,15 @@ static inline int nodate(struct tm *tm)
}
/*
- * Have we filled in any part of the time yet?
- * We just do a binary 'and' to see if the sign bit
- * is set in all the values.
+ * Have we seen an ISO-8601-alike date, i.e. 20220101T0,
+ * In which, hour is still unset,
+ * and minutes and second has been set to 0.
*/
-static inline int notime(struct tm *tm)
+static inline int maybeiso8601(struct tm *tm)
{
- return (tm->tm_hour &
- tm->tm_min &
- tm->tm_sec) < 0;
+ return tm->tm_hour == -1 &&
+ tm->tm_min == 0 &&
+ tm->tm_sec == 0;
}
/*
@@ -701,11 +707,7 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
/* 8 digits, compact style of ISO-8601's date: YYYYmmDD */
/* 6 digits, compact style of ISO-8601's time: HHMMSS */
- /* 4 digits, compact style of ISO-8601's time: HHMM */
- /* 2 digits, compact style of ISO-8601's time: HH */
- if (n == 8 || n == 6 ||
- (!nodate(tm) && notime(tm) &&
- (n == 4 || n == 2))) {
+ if (n == 8 || n == 6) {
unsigned int num1 = num / 10000;
unsigned int num2 = (num % 10000) / 100;
unsigned int num3 = num % 100;
@@ -714,13 +716,28 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
else if (n == 6 && set_time(num1, num2, num3, tm) == 0 &&
*end == '.' && isdigit(end[1]))
strtoul(end + 1, &end, 10);
- else if (n == 4)
- set_time(num2, num3, 0, tm);
- else if (n == 2)
- set_time(num3, 0, 0, tm);
return end - date;
}
+ /* reduced precision of ISO-8601's time: HHMM or HH */
+ if (maybeiso8601(tm)) {
+ unsigned int num1 = num;
+ unsigned int num2 = 0;
+ if (n == 4) {
+ num1 = num / 100;
+ num2 = num % 100;
+ }
+ if ((n == 4 || n == 2) && !nodate(tm) &&
+ set_time(num1, num2, 0, tm) == 0)
+ return n;
+ /*
+ * We thought this is an ISO-8601 time string,
+ * we set minutes and seconds to 0,
+ * turn out it isn't, rollback the change.
+ */
+ tm->tm_min = tm->tm_sec = -1;
+ }
+
/* Four-digit year or a timezone? */
if (n == 4) {
if (num <= 1400 && *offset == -1) {
diff --git a/t/t0006-date.sh b/t/t0006-date.sh
index 16fb0bf4bd..130207fc04 100755
--- a/t/t0006-date.sh
+++ b/t/t0006-date.sh
@@ -93,7 +93,8 @@ check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000'
check_parse '20080214T20' '2008-02-14 20:00:00 +0000'
check_parse '20080214T203045' '2008-02-14 20:30:45 +0000'
check_parse '20080214T2030' '2008-02-14 20:30:00 +0000'
-check_parse '20080214T20' '2008-02-14 20:00:00 +0000'
+check_parse '20080214T000000.20' '2008-02-14 00:00:00 +0000'
+check_parse '20080214T00:00:00.20' '2008-02-14 00:00:00 +0000'
check_parse '20080214T203045-04:00' '2008-02-14 20:30:45 -0400'
check_parse '20080214T203045 -04:00' '2008-02-14 20:30:45 -0400'
check_parse '20080214T203045.019-04:00' '2008-02-14 20:30:45 -0400'
--
2.39.0.287.g690a66fa66
next prev parent reply other threads:[~2023-01-09 12:30 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-16 3:36 [PATCH] date.c: allow ISO 8601 reduced precision times Phil Hord
2022-12-16 4:23 ` Junio C Hamano
2022-12-16 18:38 ` Phil Hord
2023-01-09 6:41 ` Phil Hord
2023-01-09 8:48 ` Junio C Hamano
2023-01-09 9:16 ` Junio C Hamano
2023-01-09 18:30 ` Phil Hord
2023-01-09 11:29 ` [PATCH] fixup! " Đoàn Trần Công Danh
2023-01-09 12:29 ` Đoàn Trần Công Danh [this message]
2023-01-09 18:57 ` [PATCH] date.c: limit less precision ISO-8601 with its marker Phil Hord
2023-01-11 0:10 ` [PATCH v2] date.c: allow ISO 8601 reduced precision times Đoàn Trần Công Danh
2023-01-13 19:50 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230109122915.30973-1-congdanhqx@gmail.com \
--to=congdanhqx@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=phil.hord@gmail.com \
--cc=plavarre@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).