* [PATCH 1/4] utf8.c: remove print_wrapped_text()
2010-02-19 22:13 [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text() René Scharfe
@ 2010-02-19 22:15 ` René Scharfe
2010-02-19 22:15 ` [PATCH 2/4] utf8.c: remove print_spaces() René Scharfe
` (3 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: René Scharfe @ 2010-02-19 22:15 UTC (permalink / raw)
To: Git Mailing List; +Cc: Johannes Schindelin
print_wrapped_text() is the only function that calls
strbuf_add_wrapped_text() without a strbuf, which then writes its
results to stdout. At its only callside, supply a strbuf, call
strbuf_add_wrapped_text() directly and remove the wrapper function.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
builtin-shortlog.c | 17 ++++++++++++++---
utf8.c | 5 -----
utf8.h | 1 -
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/builtin-shortlog.c b/builtin-shortlog.c
index b3b055f..ecd2d45 100644
--- a/builtin-shortlog.c
+++ b/builtin-shortlog.c
@@ -304,9 +304,19 @@ parse_done:
return 0;
}
+static void add_wrapped_shortlog_msg(struct strbuf *sb, const char *s,
+ const struct shortlog *log)
+{
+ int col = strbuf_add_wrapped_text(sb, s, log->in1, log->in2, log->wrap);
+ if (col != log->wrap)
+ strbuf_addch(sb, '\n');
+}
+
void shortlog_output(struct shortlog *log)
{
int i, j;
+ struct strbuf sb = STRBUF_INIT;
+
if (log->sort_by_number)
qsort(log->list.items, log->list.nr, sizeof(struct string_list_item),
compare_by_number);
@@ -321,9 +331,9 @@ void shortlog_output(struct shortlog *log)
const char *msg = onelines->items[j].string;
if (log->wrap_lines) {
- int col = print_wrapped_text(msg, log->in1, log->in2, log->wrap);
- if (col != log->wrap)
- putchar('\n');
+ strbuf_reset(&sb);
+ add_wrapped_shortlog_msg(&sb, msg, log);
+ fwrite(sb.buf, sb.len, 1, stdout);
}
else
printf(" %s\n", msg);
@@ -337,6 +347,7 @@ void shortlog_output(struct shortlog *log)
log->list.items[i].util = NULL;
}
+ strbuf_release(&sb);
log->list.strdup_strings = 1;
string_list_clear(&log->list, 1);
clear_mailmap(&log->mailmap);
diff --git a/utf8.c b/utf8.c
index ab326ac..831cad6 100644
--- a/utf8.c
+++ b/utf8.c
@@ -405,11 +405,6 @@ new_line:
}
}
-int print_wrapped_text(const char *text, int indent, int indent2, int width)
-{
- return strbuf_add_wrapped_text(NULL, text, indent, indent2, width);
-}
-
int is_encoding_utf8(const char *name)
{
if (!name)
diff --git a/utf8.h b/utf8.h
index c9738d8..ebc4d2f 100644
--- a/utf8.h
+++ b/utf8.h
@@ -8,7 +8,6 @@ int utf8_strwidth(const char *string);
int is_utf8(const char *text);
int is_encoding_utf8(const char *name);
-int print_wrapped_text(const char *text, int indent, int indent2, int len);
int strbuf_add_wrapped_text(struct strbuf *buf,
const char *text, int indent, int indent2, int width);
--
1.7.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/4] utf8.c: remove print_spaces()
2010-02-19 22:13 [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text() René Scharfe
2010-02-19 22:15 ` [PATCH 1/4] utf8.c: remove print_wrapped_text() René Scharfe
@ 2010-02-19 22:15 ` René Scharfe
2010-02-19 22:16 ` [PATCH 3/4] utf8.c: remove strbuf_write() René Scharfe
` (2 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: René Scharfe @ 2010-02-19 22:15 UTC (permalink / raw)
To: Git Mailing List; +Cc: Johannes Schindelin
The previous patch made sure that strbuf_add_wrapped_text() (and thus
strbuf_add_indented_text(), too) always get a strbuf. Make use of
this fact by adding strbuf_addchars(), a small helper that adds a
char the specified number of times to a strbuf, and use it to replace
print_spaces().
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
utf8.c | 15 ++++++---------
1 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/utf8.c b/utf8.c
index 831cad6..5146d30 100644
--- a/utf8.c
+++ b/utf8.c
@@ -288,14 +288,11 @@ static inline void strbuf_write(struct strbuf *sb, const char *buf, int len)
fwrite(buf, len, 1, stdout);
}
-static void print_spaces(struct strbuf *buf, int count)
+static void strbuf_addchars(struct strbuf *sb, int c, size_t n)
{
- static const char s[] = " ";
- while (count >= sizeof(s)) {
- strbuf_write(buf, s, sizeof(s) - 1);
- count -= sizeof(s) - 1;
- }
- strbuf_write(buf, s, count);
+ strbuf_grow(sb, n);
+ memset(sb->buf + sb->len, c, n);
+ strbuf_setlen(sb, sb->len + n);
}
static void strbuf_add_indented_text(struct strbuf *buf, const char *text,
@@ -307,7 +304,7 @@ static void strbuf_add_indented_text(struct strbuf *buf, const char *text,
const char *eol = strchrnul(text, '\n');
if (*eol == '\n')
eol++;
- print_spaces(buf, indent);
+ strbuf_addchars(buf, ' ', indent);
strbuf_write(buf, text, eol - text);
text = eol;
indent = indent2;
@@ -366,7 +363,7 @@ int strbuf_add_wrapped_text(struct strbuf *buf,
if (space)
start = space;
else
- print_spaces(buf, indent);
+ strbuf_addchars(buf, ' ', indent);
strbuf_write(buf, start, text - start);
if (!c)
return w;
--
1.7.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/4] utf8.c: remove strbuf_write()
2010-02-19 22:13 [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text() René Scharfe
2010-02-19 22:15 ` [PATCH 1/4] utf8.c: remove print_wrapped_text() René Scharfe
2010-02-19 22:15 ` [PATCH 2/4] utf8.c: remove print_spaces() René Scharfe
@ 2010-02-19 22:16 ` René Scharfe
2010-02-19 22:20 ` [PATCH 4/4] utf8.c: speculatively assume utf-8 in strbuf_add_wrapped_text() René Scharfe
2010-02-20 9:03 ` [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text() Johannes Schindelin
4 siblings, 0 replies; 8+ messages in thread
From: René Scharfe @ 2010-02-19 22:16 UTC (permalink / raw)
To: Git Mailing List; +Cc: Johannes Schindelin
The patch before the previous one made sure that all callers of
strbuf_add_wrapped_text() supply a strbuf. Replace all calls of
strbuf_write() with regular strbuf functions and remove it.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
utf8.c | 18 +++++-------------
1 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/utf8.c b/utf8.c
index 5146d30..87437b0 100644
--- a/utf8.c
+++ b/utf8.c
@@ -280,14 +280,6 @@ int is_utf8(const char *text)
return 1;
}
-static inline void strbuf_write(struct strbuf *sb, const char *buf, int len)
-{
- if (sb)
- strbuf_insert(sb, sb->len, buf, len);
- else
- fwrite(buf, len, 1, stdout);
-}
-
static void strbuf_addchars(struct strbuf *sb, int c, size_t n)
{
strbuf_grow(sb, n);
@@ -305,7 +297,7 @@ static void strbuf_add_indented_text(struct strbuf *buf, const char *text,
if (*eol == '\n')
eol++;
strbuf_addchars(buf, ' ', indent);
- strbuf_write(buf, text, eol - text);
+ strbuf_add(buf, text, eol - text);
text = eol;
indent = indent2;
}
@@ -364,7 +356,7 @@ int strbuf_add_wrapped_text(struct strbuf *buf,
start = space;
else
strbuf_addchars(buf, ' ', indent);
- strbuf_write(buf, start, text - start);
+ strbuf_add(buf, start, text - start);
if (!c)
return w;
space = text;
@@ -373,20 +365,20 @@ int strbuf_add_wrapped_text(struct strbuf *buf,
else if (c == '\n') {
space++;
if (*space == '\n') {
- strbuf_write(buf, "\n", 1);
+ strbuf_addch(buf, '\n');
goto new_line;
}
else if (!isalnum(*space))
goto new_line;
else
- strbuf_write(buf, " ", 1);
+ strbuf_addch(buf, ' ');
}
w++;
text++;
}
else {
new_line:
- strbuf_write(buf, "\n", 1);
+ strbuf_addch(buf, '\n');
text = bol = space + isspace(*space);
space = NULL;
w = indent = indent2;
--
1.7.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/4] utf8.c: speculatively assume utf-8 in strbuf_add_wrapped_text()
2010-02-19 22:13 [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text() René Scharfe
` (2 preceding siblings ...)
2010-02-19 22:16 ` [PATCH 3/4] utf8.c: remove strbuf_write() René Scharfe
@ 2010-02-19 22:20 ` René Scharfe
2010-02-20 9:14 ` Johannes Schindelin
2010-02-20 9:03 ` [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text() Johannes Schindelin
4 siblings, 1 reply; 8+ messages in thread
From: René Scharfe @ 2010-02-19 22:20 UTC (permalink / raw)
To: Git Mailing List; +Cc: Johannes Schindelin
is_utf8() works by calling utf8_width() for each character at the
supplied location. In strbuf_add_wrapped_text(), we do that anyway
while wrapping the lines. So instead of checking the encoding
beforehand, optimistically assume that it's utf-8 and wrap along
until an invalid character is hit, and when that happens start over.
This pays off if the text consists only of valid utf-8 characters.
The following command was run against the Linux kernel repo with
git 1.7.0:
$ time git log --format='%b' v2.6.32 >/dev/null
real 0m2.679s
user 0m2.580s
sys 0m0.100s
$ time git log --format='%w(60,4,8)%b' >/dev/null
real 0m4.342s
user 0m4.230s
sys 0m0.110s
And with this patch series:
$ time git log --format='%w(60,4,8)%b' >/dev/null
real 0m3.741s
user 0m3.630s
sys 0m0.110s
So the cost of wrapping is reduced to 70% in this case.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
Missing: numbers for a non-utf-8 repo.
utf8.c | 23 +++++++++++++++++------
1 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/utf8.c b/utf8.c
index 87437b0..84cfc72 100644
--- a/utf8.c
+++ b/utf8.c
@@ -324,16 +324,21 @@ static size_t display_mode_esc_sequence_len(const char *s)
* consumed (and no extra indent is necessary for the first line).
*/
int strbuf_add_wrapped_text(struct strbuf *buf,
- const char *text, int indent, int indent2, int width)
+ const char *text, int indent1, int indent2, int width)
{
- int w = indent, assume_utf8 = is_utf8(text);
- const char *bol = text, *space = NULL;
+ int indent, w, assume_utf8 = 1;
+ const char *bol, *space, *start = text;
+ size_t orig_len = buf->len;
if (width <= 0) {
- strbuf_add_indented_text(buf, text, indent, indent2);
+ strbuf_add_indented_text(buf, text, indent1, indent2);
return 1;
}
+retry:
+ bol = text;
+ w = indent = indent1;
+ space = NULL;
if (indent < 0) {
w = -indent;
space = text;
@@ -385,9 +390,15 @@ new_line:
}
continue;
}
- if (assume_utf8)
+ if (assume_utf8) {
w += utf8_width(&text, NULL);
- else {
+ if (!text) {
+ assume_utf8 = 0;
+ text = start;
+ strbuf_setlen(buf, orig_len);
+ goto retry;
+ }
+ } else {
w++;
text++;
}
--
1.7.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text()
2010-02-19 22:13 [PATCH 0/4] utf8.c: strbuf'ify strbuf_add_wrapped_text() René Scharfe
` (3 preceding siblings ...)
2010-02-19 22:20 ` [PATCH 4/4] utf8.c: speculatively assume utf-8 in strbuf_add_wrapped_text() René Scharfe
@ 2010-02-20 9:03 ` Johannes Schindelin
4 siblings, 0 replies; 8+ messages in thread
From: Johannes Schindelin @ 2010-02-20 9:03 UTC (permalink / raw)
To: René Scharfe; +Cc: Git Mailing List
[-- Attachment #1: Type: TEXT/PLAIN, Size: 451 bytes --]
Hi,
On Fri, 19 Feb 2010, René Scharfe wrote:
> The strbuf parameter of strbuf_add_wrapped_text() in utf8.c is
> optional; if it's missing, results are directly written to stdout.
The reasoning for it is my private 'git-fmt' program, which can reformat
really large text files that I did not want to be read in fully,
converted, and written out fully.
But as I am the only user of this feature, it makes sense to simplify the
code.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 8+ messages in thread