git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Đoàn Trần Công Danh" <congdanhqx@gmail.com>
To: git@vger.kernel.org, Phil Hord <phil.hord@gmail.com>,
	plavarre@purestorage.com, Junio C Hamano <gitster@pobox.com>
Cc: "Đoàn Trần Công Danh" <congdanhqx@gmail.com>
Subject: [PATCH] date.c: limit less precision ISO-8601 with its marker
Date: Mon,  9 Jan 2023 19:29:15 +0700	[thread overview]
Message-ID: <20230109122915.30973-1-congdanhqx@gmail.com> (raw)
In-Reply-To: <Y7v6jThT9GQ8Oav8@danh.dev>

The newly added heuristic to parse less precision ISO-8601 conflicts
with other heuristics to parse datetime-strings. E.g.:

	Thu, 7 Apr 2005 15:14:13 -0700

Let's limit the new heuristic to only datetime string with a 'T'
followed immediately by some digits, and if we failed to parse the
upcoming string, rollback the change.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---

Here is a better thought out change, which tried to minimize the impact of
new heuristics.

While I think it's a fixup, but I still needs explaination, I think I may
reword it's as a full patch instead.
Range-diff:
1:  4036e5a944 ! 1:  b703425a57 fixup! date.c: allow ISO 8601 reduced precision times
    @@ Metadata
     Author: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
      ## Commit message ##
    -    fixup! date.c: allow ISO 8601 reduced precision times
    +    date.c: limit less precision ISO-8601 with its marker
    +
    +    The newly added heuristic to parse less precision ISO-8601 conflicts
    +    with other heuristics to parse datetime-strings. E.g.:
    +
    +            Thu, 7 Apr 2005 15:14:13 -0700
    +
    +    Let's limit the new heuristic to only datetime string with a 'T'
    +    followed immediately by some digits, and if we failed to parse the
    +    upcoming string, rollback the change.
     
         Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
     
    @@ date.c: static int match_alpha(const char *date, struct tm *tm, int *offset)
      	}
      
     +	/* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */
    -+	if (*date == 'T' && isdigit(date[1])) {
    -+		tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
    -+		return strlen("T");
    ++	if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) {
    ++		tm->tm_min = tm->tm_sec = 0;
    ++		return 1;
     +	}
     +
      	/* BAD CRAP */
    @@ date.c: static inline int nodate(struct tm *tm)
     - * We just do a binary 'and' to see if the sign bit
     - * is set in all the values.
     + * Have we seen an ISO-8601-alike date, i.e. 20220101T0,
    -+ * In those special case, those fields have been set to 0
    ++ * In which, hour is still unset,
    ++ * and minutes and second has been set to 0.
       */
     -static inline int notime(struct tm *tm)
     +static inline int maybeiso8601(struct tm *tm)
    @@ date.c: static inline int nodate(struct tm *tm)
     -	return (tm->tm_hour &
     -		tm->tm_min &
     -		tm->tm_sec) < 0;
    -+	return tm->tm_hour == 0 &&
    ++	return tm->tm_hour == -1 &&
     +		tm->tm_min == 0 &&
     +		tm->tm_sec == 0;
      }
      
      /*
     @@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
    - 	/* 4 digits, compact style of ISO-8601's time: HHMM */
    - 	/* 2 digits, compact style of ISO-8601's time: HH */
    - 	if (n == 8 || n == 6 ||
    + 
    + 	/* 8 digits, compact style of ISO-8601's date: YYYYmmDD */
    + 	/* 6 digits, compact style of ISO-8601's time: HHMMSS */
    +-	/* 4 digits, compact style of ISO-8601's time: HHMM */
    +-	/* 2 digits, compact style of ISO-8601's time: HH */
    +-	if (n == 8 || n == 6 ||
     -		(!nodate(tm) && notime(tm) &&
    -+		(!nodate(tm) && maybeiso8601(tm) &&
    - 		(n == 4 || n == 2))) {
    +-		(n == 4 || n == 2))) {
    ++	if (n == 8 || n == 6) {
      		unsigned int num1 = num / 10000;
      		unsigned int num2 = (num % 10000) / 100;
    + 		unsigned int num3 = num % 100;
    +@@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
    + 		else if (n == 6 && set_time(num1, num2, num3, tm) == 0 &&
    + 			 *end == '.' && isdigit(end[1]))
    + 			strtoul(end + 1, &end, 10);
    +-		else if (n == 4)
    +-			set_time(num2, num3, 0, tm);
    +-		else if (n == 2)
    +-			set_time(num3, 0, 0, tm);
    + 		return end - date;
    + 	}
    + 
    ++	/* reduced precision of ISO-8601's time: HHMM or HH */
    ++	if (maybeiso8601(tm)) {
    ++		unsigned int num1 = num;
    ++		unsigned int num2 = 0;
    ++		if (n == 4) {
    ++			num1 = num / 100;
    ++			num2 = num % 100;
    ++		}
    ++		if ((n == 4 || n == 2) && !nodate(tm) &&
    ++		    set_time(num1, num2, 0, tm) == 0)
    ++			return n;
    ++		/*
    ++		 * We thought this is an ISO-8601 time string,
    ++		 * we set minutes and seconds to 0,
    ++		 * turn out it isn't, rollback the change.
    ++		 */
    ++		tm->tm_min = tm->tm_sec = -1;
    ++	}
    ++
    + 	/* Four-digit year or a timezone? */
    + 	if (n == 4) {
    + 		if (num <= 1400 && *offset == -1) {
     
      ## t/t0006-date.sh ##
     @@ t/t0006-date.sh: check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000'

 date.c          | 49 +++++++++++++++++++++++++++++++++----------------
 t/t0006-date.sh |  3 ++-
 2 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/date.c b/date.c
index b011b9d6b3..6f45eeb356 100644
--- a/date.c
+++ b/date.c
@@ -493,6 +493,12 @@ static int match_alpha(const char *date, struct tm *tm, int *offset)
 		return 2;
 	}
 
+	/* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */
+	if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) {
+		tm->tm_min = tm->tm_sec = 0;
+		return 1;
+	}
+
 	/* BAD CRAP */
 	return skip_alpha(date);
 }
@@ -639,15 +645,15 @@ static inline int nodate(struct tm *tm)
 }
 
 /*
- * Have we filled in any part of the time yet?
- * We just do a binary 'and' to see if the sign bit
- * is set in all the values.
+ * Have we seen an ISO-8601-alike date, i.e. 20220101T0,
+ * In which, hour is still unset,
+ * and minutes and second has been set to 0.
  */
-static inline int notime(struct tm *tm)
+static inline int maybeiso8601(struct tm *tm)
 {
-	return (tm->tm_hour &
-		tm->tm_min &
-		tm->tm_sec) < 0;
+	return tm->tm_hour == -1 &&
+		tm->tm_min == 0 &&
+		tm->tm_sec == 0;
 }
 
 /*
@@ -701,11 +707,7 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
 
 	/* 8 digits, compact style of ISO-8601's date: YYYYmmDD */
 	/* 6 digits, compact style of ISO-8601's time: HHMMSS */
-	/* 4 digits, compact style of ISO-8601's time: HHMM */
-	/* 2 digits, compact style of ISO-8601's time: HH */
-	if (n == 8 || n == 6 ||
-		(!nodate(tm) && notime(tm) &&
-		(n == 4 || n == 2))) {
+	if (n == 8 || n == 6) {
 		unsigned int num1 = num / 10000;
 		unsigned int num2 = (num % 10000) / 100;
 		unsigned int num3 = num % 100;
@@ -714,13 +716,28 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
 		else if (n == 6 && set_time(num1, num2, num3, tm) == 0 &&
 			 *end == '.' && isdigit(end[1]))
 			strtoul(end + 1, &end, 10);
-		else if (n == 4)
-			set_time(num2, num3, 0, tm);
-		else if (n == 2)
-			set_time(num3, 0, 0, tm);
 		return end - date;
 	}
 
+	/* reduced precision of ISO-8601's time: HHMM or HH */
+	if (maybeiso8601(tm)) {
+		unsigned int num1 = num;
+		unsigned int num2 = 0;
+		if (n == 4) {
+			num1 = num / 100;
+			num2 = num % 100;
+		}
+		if ((n == 4 || n == 2) && !nodate(tm) &&
+		    set_time(num1, num2, 0, tm) == 0)
+			return n;
+		/*
+		 * We thought this is an ISO-8601 time string,
+		 * we set minutes and seconds to 0,
+		 * turn out it isn't, rollback the change.
+		 */
+		tm->tm_min = tm->tm_sec = -1;
+	}
+
 	/* Four-digit year or a timezone? */
 	if (n == 4) {
 		if (num <= 1400 && *offset == -1) {
diff --git a/t/t0006-date.sh b/t/t0006-date.sh
index 16fb0bf4bd..130207fc04 100755
--- a/t/t0006-date.sh
+++ b/t/t0006-date.sh
@@ -93,7 +93,8 @@ check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000'
 check_parse '20080214T20' '2008-02-14 20:00:00 +0000'
 check_parse '20080214T203045' '2008-02-14 20:30:45 +0000'
 check_parse '20080214T2030' '2008-02-14 20:30:00 +0000'
-check_parse '20080214T20' '2008-02-14 20:00:00 +0000'
+check_parse '20080214T000000.20' '2008-02-14 00:00:00 +0000'
+check_parse '20080214T00:00:00.20' '2008-02-14 00:00:00 +0000'
 check_parse '20080214T203045-04:00' '2008-02-14 20:30:45 -0400'
 check_parse '20080214T203045 -04:00' '2008-02-14 20:30:45 -0400'
 check_parse '20080214T203045.019-04:00' '2008-02-14 20:30:45 -0400'
-- 
2.39.0.287.g690a66fa66


  reply	other threads:[~2023-01-09 12:30 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-16  3:36 [PATCH] date.c: allow ISO 8601 reduced precision times Phil Hord
2022-12-16  4:23 ` Junio C Hamano
2022-12-16 18:38   ` Phil Hord
2023-01-09  6:41     ` Phil Hord
2023-01-09  8:48       ` Junio C Hamano
2023-01-09  9:16         ` Junio C Hamano
2023-01-09 18:30           ` Phil Hord
2023-01-09 11:29         ` [PATCH] fixup! " Đoàn Trần Công Danh
2023-01-09 12:29           ` Đoàn Trần Công Danh [this message]
2023-01-09 18:57             ` [PATCH] date.c: limit less precision ISO-8601 with its marker Phil Hord
2023-01-11  0:10 ` [PATCH v2] date.c: allow ISO 8601 reduced precision times Đoàn Trần Công Danh
2023-01-13 19:50   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230109122915.30973-1-congdanhqx@gmail.com \
    --to=congdanhqx@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=phil.hord@gmail.com \
    --cc=plavarre@purestorage.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).