All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand()
@ 2008-02-09 14:40 Marco Costalba
  2008-02-10  2:36 ` Junio C Hamano
  2008-02-10  2:52 ` Johannes Schindelin
  0 siblings, 2 replies; 6+ messages in thread
From: Marco Costalba @ 2008-02-09 14:40 UTC (permalink / raw)
  To: gitster; +Cc: git, Marco Costalba

Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if prefix is found is then
used as argument for format_commit_item() that does another
search by a switch statement to select the proper operation.

Because the switch statement is already able to discard
unknown matches we don't need the prefix lookup before
to call format_commit_item()

This patch removes an useless loop in a very fast path,
used by, as example, by 'git log' with --pretty=format option

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---

New patch with Junio suggestions included.

Apply above 'next' branch


 pretty.c |  167 ++++++++++++++++++++++++++-----------------------------------
 strbuf.c |   19 +++----
 strbuf.h |    4 +-
 3 files changed, 81 insertions(+), 109 deletions(-)

diff --git a/pretty.c b/pretty.c
index b987ff2..aed32f2 100644
--- a/pretty.c
+++ b/pretty.c
@@ -282,59 +282,57 @@ static char *logmsg_reencode(const struct commit *commit,
 	return out;
 }
 
-static void format_person_part(struct strbuf *sb, char part,
+static size_t format_person_part(struct strbuf *sb, char part,
                                const char *msg, int len)
 {
+	const int placeholder_len = 2; /* currently all placeholders have same length */
 	int start, end, tz = 0;
-	unsigned long date;
+	unsigned long date = 0;
 	char *ep;
 
-	/* parse name */
+	/* advance 'end' to point to email start delimiter */
 	for (end = 0; end < len && msg[end] != '<'; end++)
 		; /* do nothing */
-	/*
-	 * If it does not even have a '<' and '>', that is
-	 * quite a bogus commit author and we discard it;
-	 * this is in line with add_user_info() that is used
-	 * in the normal codepath.  When end points at the '<'
-	 * that we found, it should have matching '>' later,
-	 * which means start (beginning of email address) must
-	 * be strictly below len.
+
+	/* When end points at the '<' that we found, it should have
+	 * matching '>' later, which means 'end' must be strictly
+	 * below len - 1.
 	 */
-	start = end + 1;
-	if (start >= len - 1)
-		return;
-	while (end > 0 && isspace(msg[end - 1]))
-		end--;
+	if (end >= len - 2)
+		goto skip;
+
 	if (part == 'n') {	/* name */
+		while (end > 0 && isspace(msg[end - 1]))
+			end--;
 		strbuf_add(sb, msg, end);
-		return;
+		return placeholder_len;
 	}
+	start = ++end; /* save email start position */
 
-	/* parse email */
-	for (end = start; end < len && msg[end] != '>'; end++)
+	/* advance 'end' to point to email end delimiter */
+	for ( ; end < len && msg[end] != '>'; end++)
 		; /* do nothing */
 
 	if (end >= len)
-		return;
+		goto skip;
 
 	if (part == 'e') {	/* email */
 		strbuf_add(sb, msg + start, end - start);
-		return;
+		return placeholder_len;
 	}
 
-	/* parse date */
+	/* advance 'start' to point to date start delimiter */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
 		; /* do nothing */
 	if (start >= len)
-		return;
+		goto skip;
 	date = strtoul(msg + start, &ep, 10);
 	if (msg + start == ep)
-		return;
+		goto skip;
 
 	if (part == 't') {	/* date, UNIX timestamp */
 		strbuf_add(sb, msg + start, ep - (msg + start));
-		return;
+		return placeholder_len;
 	}
 
 	/* parse tz */
@@ -349,17 +347,27 @@ static void format_person_part(struct strbuf *sb, char part,
 	switch (part) {
 	case 'd':	/* date */
 		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
-		return;
+		return placeholder_len;
 	case 'D':	/* date, RFC2822 style */
 		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
-		return;
+		return placeholder_len;
 	case 'r':	/* date, relative */
 		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
-		return;
+		return placeholder_len;
 	case 'i':	/* date, ISO 8601 */
 		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
-		return;
+		return placeholder_len;
 	}
+
+skip:
+	/* bougus commit, 'sb' cannot be updated, but
+	 * still we need to compute a valid return value.
+	 */
+	if (   part == 'n' || part == 'e' || part == 't' || part == 'd'
+	    || part == 'D' || part == 'r' || part == 'i')
+		return placeholder_len;
+
+	return 0; /* unknown placeholder */
 }
 
 struct chunk {
@@ -440,7 +448,7 @@ static void parse_commit_header(struct format_commit_context *context)
 	context->commit_header_parsed = 1;
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
                                void *context)
 {
 	struct format_commit_context *c = context;
@@ -451,23 +459,23 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	/* these are independent of the commit */
 	switch (placeholder[0]) {
 	case 'C':
-		switch (placeholder[3]) {
-		case 'd':	/* red */
+		if (!prefixcmp(placeholder + 1, "red")) {
 			strbuf_addstr(sb, "\033[31m");
-			return;
-		case 'e':	/* green */
+			return 4;
+		} else if (!prefixcmp(placeholder + 1, "green")) {
 			strbuf_addstr(sb, "\033[32m");
-			return;
-		case 'u':	/* blue */
+			return 6;
+		} else if (!prefixcmp(placeholder + 1, "blue")) {
 			strbuf_addstr(sb, "\033[34m");
-			return;
-		case 's':	/* reset color */
+			return 5;
+		} else if (!prefixcmp(placeholder + 1, "reset")) {
 			strbuf_addstr(sb, "\033[m");
-			return;
-		}
+			return 6;
+		} else
+			return 0;
 	case 'n':		/* newline */
 		strbuf_addch(sb, '\n');
-		return;
+		return 1;
 	}
 
 	/* these depend on the commit */
@@ -477,34 +485,34 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	switch (placeholder[0]) {
 	case 'H':		/* commit hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
-		return;
+		return 1;
 	case 'h':		/* abbreviated commit hash */
 		if (add_again(sb, &c->abbrev_commit_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
-		return;
+		return 1;
 	case 'T':		/* tree hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
-		return;
+		return 1;
 	case 't':		/* abbreviated tree hash */
 		if (add_again(sb, &c->abbrev_tree_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
-		return;
+		return 1;
 	case 'P':		/* parent hashes */
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
 			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
 		}
-		return;
+		return 1;
 	case 'p':		/* abbreviated parent hashes */
 		if (add_again(sb, &c->abbrev_parent_hashes))
-			return;
+			return 1;
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
@@ -513,14 +521,14 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		}
 		c->abbrev_parent_hashes.len = sb->len -
 		                              c->abbrev_parent_hashes.off;
-		return;
+		return 1;
 	case 'm':		/* left/right/bottom */
 		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
 		                 ? '-'
 		                 : (commit->object.flags & SYMMETRIC_LEFT)
 		                 ? '<'
 		                 : '>');
-		return;
+		return 1;
 	}
 
 	/* For the rest we have to parse the commit header. */
@@ -528,66 +536,33 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		parse_commit_header(c);
 
 	switch (placeholder[0]) {
-	case 's':
+	case 's':	/* subject */
 		strbuf_add(sb, msg + c->subject.off, c->subject.len);
-		return;
-	case 'a':
-		format_person_part(sb, placeholder[1],
+		return 1;
+	case 'a':	/* author ... */
+		return format_person_part(sb, placeholder[1],
 		                   msg + c->author.off, c->author.len);
-		return;
-	case 'c':
-		format_person_part(sb, placeholder[1],
+	case 'c':	/* committer ... */
+		return format_person_part(sb, placeholder[1],
 		                   msg + c->committer.off, c->committer.len);
-		return;
-	case 'e':
+	case 'e':	/* encoding */
 		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
-		return;
-	case 'b':
+		return 1;
+	case 'b':	/* body */
 		strbuf_addstr(sb, msg + c->body_off);
-		return;
+		return 1;
 	}
+	return 0;	/* unknown placeholder */
 }
 
 void format_commit_message(const struct commit *commit,
                            const void *format, struct strbuf *sb)
 {
-	const char *placeholders[] = {
-		"H",		/* commit hash */
-		"h",		/* abbreviated commit hash */
-		"T",		/* tree hash */
-		"t",		/* abbreviated tree hash */
-		"P",		/* parent hashes */
-		"p",		/* abbreviated parent hashes */
-		"an",		/* author name */
-		"ae",		/* author email */
-		"ad",		/* author date */
-		"aD",		/* author date, RFC2822 style */
-		"ar",		/* author date, relative */
-		"at",		/* author date, UNIX timestamp */
-		"ai",		/* author date, ISO 8601 */
-		"cn",		/* committer name */
-		"ce",		/* committer email */
-		"cd",		/* committer date */
-		"cD",		/* committer date, RFC2822 style */
-		"cr",		/* committer date, relative */
-		"ct",		/* committer date, UNIX timestamp */
-		"ci",		/* committer date, ISO 8601 */
-		"e",		/* encoding */
-		"s",		/* subject */
-		"b",		/* body */
-		"Cred",		/* red */
-		"Cgreen",	/* green */
-		"Cblue",	/* blue */
-		"Creset",	/* reset color */
-		"n",		/* newline */
-		"m",		/* left/right/bottom */
-		NULL
-	};
 	struct format_commit_context context;
 
 	memset(&context, 0, sizeof(context));
 	context.commit = commit;
-	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
+	strbuf_expand(sb, format, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index 5efcfc8..32ab8e5 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -146,11 +146,12 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_expand(struct strbuf *sb, const char *format,
-                   const char **placeholders, expand_fn_t fn, void *context)
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+                   void *context)
 {
 	for (;;) {
-		const char *percent, **p;
+		const char *percent;
+		size_t consumed;
 
 		percent = strchrnul(format, '%');
 		strbuf_add(sb, format, percent - format);
@@ -158,14 +159,10 @@ void strbuf_expand(struct strbuf *sb, const char *format,
 			break;
 		format = percent + 1;
 
-		for (p = placeholders; *p; p++) {
-			if (!prefixcmp(format, *p))
-				break;
-		}
-		if (*p) {
-			fn(sb, *p, context);
-			format += strlen(*p);
-		} else
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
 			strbuf_addch(sb, '%');
 	}
 }
diff --git a/strbuf.h b/strbuf.h
index 36d61db..faec229 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
 }
 extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
 
-typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context);
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
 
 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
-- 
1.5.4.1220.g1a838

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-09 14:40 [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
@ 2008-02-10  2:36 ` Junio C Hamano
  2008-02-10  6:59   ` Marco Costalba
  2008-02-10  2:52 ` Johannes Schindelin
  1 sibling, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2008-02-10  2:36 UTC (permalink / raw)
  To: Marco Costalba; +Cc: git

Marco Costalba <mcostalba@gmail.com> writes:

> Currently the --prett=format prefix is looked up in a

s/prett/&y/;

> New patch with Junio suggestions included.

Hmph, this is a blast from the past.

I do recall pointing out that a rather common "format:%an <%ae>"
ends up parsing the same line twice, and mentioned we may want
to memoise the first call's result in the format_commit_context
structure, but what else did I suggest???

Anyway, I'll take a look later.  Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-09 14:40 [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
  2008-02-10  2:36 ` Junio C Hamano
@ 2008-02-10  2:52 ` Johannes Schindelin
  2008-02-10  7:00   ` Marco Costalba
  1 sibling, 1 reply; 6+ messages in thread
From: Johannes Schindelin @ 2008-02-10  2:52 UTC (permalink / raw)
  To: Marco Costalba; +Cc: gitster, git

Hi,

On Sat, 9 Feb 2008, Marco Costalba wrote:

> +	/* bougus commit, 'sb' cannot be updated, but

s/bougus/bogus/

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-10  2:36 ` Junio C Hamano
@ 2008-02-10  6:59   ` Marco Costalba
  2008-02-10 10:52     ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: Marco Costalba @ 2008-02-10  6:59 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Feb 10, 2008 3:36 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Marco Costalba <mcostalba@gmail.com> writes:
>
> > Currently the --prett=format prefix is looked up in a
>
> s/prett/&y/;
>
> > New patch with Junio suggestions included.
>
> Hmph, this is a blast from the past.
>
> I do recall pointing out that a rather common "format:%an <%ae>"
> ends up parsing the same line twice, and mentioned we may want
> to memoise the first call's result in the format_commit_context
> structure, but what else did I suggest???
>

Please read thread: "[PATCH RESEND] Avoid a useless prefix lookup in
strbuf_expand()"

you will find your suggestions and following answers.

> Anyway, I'll take a look later.  Thanks.
>

Thanks
Marco

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-10  2:52 ` Johannes Schindelin
@ 2008-02-10  7:00   ` Marco Costalba
  0 siblings, 0 replies; 6+ messages in thread
From: Marco Costalba @ 2008-02-10  7:00 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: gitster, git

On Feb 10, 2008 3:52 AM, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> Hi,
>
> On Sat, 9 Feb 2008, Marco Costalba wrote:
>
> > +     /* bougus commit, 'sb' cannot be updated, but
>
> s/bougus/bogus/
>

Yes :-) you are right.

Thanks

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-10  6:59   ` Marco Costalba
@ 2008-02-10 10:52     ` Junio C Hamano
  0 siblings, 0 replies; 6+ messages in thread
From: Junio C Hamano @ 2008-02-10 10:52 UTC (permalink / raw)
  To: Marco Costalba; +Cc: git

"Marco Costalba" <mcostalba@gmail.com> writes:

> On Feb 10, 2008 3:36 AM, Junio C Hamano <gitster@pobox.com> wrote:
> ...
>> I do recall pointing out that a rather common "format:%an <%ae>"
>> ends up parsing the same line twice, and mentioned we may want
>> to memoise the first call's result in the format_commit_context
>> structure, but what else did I suggest???
>>
>
> Please read thread: "[PATCH RESEND] Avoid a useless prefix lookup in
> strbuf_expand()"
>
> you will find your suggestions and following answers.

Yeah, I mentioned the comment needing to be adjusted (which you
did in this round), asked a minor question about the code (you
answered in the thread), besides pointing out that a rather
common "format:%an <%ae>" being inefficient (which you brushed
aside, claiming that is a special case).

I've touched-up a few typoes and style glitches and will park
this on 'pu' for now.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-02-10 10:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-02-09 14:40 [PATCH TAKE 2] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
2008-02-10  2:36 ` Junio C Hamano
2008-02-10  6:59   ` Marco Costalba
2008-02-10 10:52     ` Junio C Hamano
2008-02-10  2:52 ` Johannes Schindelin
2008-02-10  7:00   ` Marco Costalba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.