All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] column: make defined separator to be non-greedy
@ 2012-09-26 20:45 Sami Kerola
  2012-09-26 21:01 ` Pádraig Brady
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Sami Kerola @ 2012-09-26 20:45 UTC (permalink / raw)
  To: util-linux; +Cc: kerolasa

This patch changes interpretation of subsequent delimeter interpretation.
Earlier version merged columns that had null string as content together,
which lead to output as visualized below.

$ printf "a:b:c\n1::3\n" | column  -t -s ':'
a  b  c
1  3

The number 3 has wrong column, which this patch takes care of, and alters
the output following way.

$ printf "a:b:c\n1::3\n" | column  -t -s ':'
a  b  c
1     3

This patch does not alter the default case, e.g., subsequent white spaces
are understood as separator of the same field, and the beginning of line
white spaces are being ignored together.

Signed-off-by: Sami Kerola <kerolasa@iki.fi>
---
 text-utils/column.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/text-utils/column.c b/text-utils/column.c
index 61668fd..7eea66a 100644
--- a/text-utils/column.c
+++ b/text-utils/column.c
@@ -75,7 +75,8 @@ static char *mtsafe_strtok(char *, const char *, char **);
 static int input(FILE *fp, int *maxlength, wchar_t ***list, int *entries);
 static void c_columnate(int maxlength, long termwidth, wchar_t **list, int entries);
 static void r_columnate(int maxlength, long termwidth, wchar_t **list, int entries);
-static void maketbl(wchar_t **list, int entries, wchar_t *separator);
+static wchar_t *local_wcstok(wchar_t *p, const wchar_t *separator, int greedy, wchar_t **wcstok_state);
+static void maketbl(wchar_t **list, int entries, wchar_t *separator, int greedy);
 static void print(wchar_t **list, int entries);
 
 typedef struct _tbl {
@@ -112,6 +113,7 @@ int main(int argc, char **argv)
 	unsigned int eval = 0;		/* exit value */
 	int maxlength = 0;		/* longest record */
 	wchar_t **list = NULL;		/* array of pointers to records */
+	int greedy = 1;
 
 	/* field separator for table option */
 	wchar_t default_separator[] = { '\t', ' ', 0 };
@@ -151,6 +153,7 @@ int main(int argc, char **argv)
 			break;
 		case 's':
 			separator = mbs_to_wcs(optarg);
+			greedy = 0;
 			break;
 		case 't':
 			tflag = 1;
@@ -183,7 +186,7 @@ int main(int argc, char **argv)
 		exit(eval);
 
 	if (tflag)
-		maketbl(list, entries, separator);
+		maketbl(list, entries, separator, greedy);
 	else if (maxlength >= termwidth)
 		print(list, entries);
 	else if (xflag)
@@ -270,7 +273,31 @@ static void print(wchar_t **list, int entries)
 	}
 }
 
-static void maketbl(wchar_t **list, int entries, wchar_t *separator)
+wchar_t *local_wcstok(wchar_t * p, const wchar_t * separator, int greedy,
+		      wchar_t ** wcstok_state)
+{
+	wchar_t *result;
+	if (greedy)
+		return wcstok(p, separator, wcstok_state);
+
+	if (p == NULL) {
+		if (*wcstok_state == NULL)
+			return NULL;
+		else
+			p = *wcstok_state;
+	}
+	result = p;
+	p = wcspbrk(result, separator);
+	if (p == NULL)
+		*wcstok_state = NULL;
+	else {
+		*p = '\0';
+		*wcstok_state = p + 1;
+	}
+	return result;
+}
+
+static void maketbl(wchar_t **list, int entries, wchar_t *separator, int greedy)
 {
 	TBL *t;
 	int cnt, i;
@@ -279,7 +306,7 @@ static void maketbl(wchar_t **list, int entries, wchar_t *separator)
 	ssize_t maxcols = DEFCOLS, coloff;
 	TBL *tbl;
 	wchar_t **cols;
-	wchar_t *wcstok_state;
+	wchar_t *wcstok_state = NULL;
 
 	t = tbl = xcalloc(entries, sizeof(TBL));
 	cols = xcalloc(maxcols, sizeof(wchar_t *));
@@ -288,7 +315,7 @@ static void maketbl(wchar_t **list, int entries, wchar_t *separator)
 	for (lp = list, cnt = 0; cnt < entries; ++cnt, ++lp, ++t) {
 		coloff = 0;
 		p = *lp;
-		while ((cols[coloff] = wcstok(p, separator, &wcstok_state)) != NULL) {
+		while ((cols[coloff] = local_wcstok(p, separator, greedy, &wcstok_state)) != NULL) {
 			if (++coloff == maxcols) {
 				maxcols += DEFCOLS;
 				cols = xrealloc(cols, maxcols * sizeof(wchar_t *));
-- 
1.7.12.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] column: make defined separator to be non-greedy
  2012-09-26 20:45 [PATCH] column: make defined separator to be non-greedy Sami Kerola
@ 2012-09-26 21:01 ` Pádraig Brady
  2012-09-29  9:21 ` [PATCH] docs: column.1 describe change of separator behavior in bugs section Sami Kerola
  2012-10-02  8:29 ` [PATCH] column: make defined separator to be non-greedy Karel Zak
  2 siblings, 0 replies; 7+ messages in thread
From: Pádraig Brady @ 2012-09-26 21:01 UTC (permalink / raw)
  To: Sami Kerola; +Cc: util-linux

On 09/26/2012 09:45 PM, Sami Kerola wrote:
> This patch changes interpretation of subsequent delimeter interpretation.
> Earlier version merged columns that had null string as content together,
> which lead to output as visualized below.
>
> $ printf "a:b:c\n1::3\n" | column  -t -s ':'
> a  b  c
> 1  3
>
> The number 3 has wrong column, which this patch takes care of, and alters
> the output following way.
>
> $ printf "a:b:c\n1::3\n" | column  -t -s ':'
> a  b  c
> 1     3
>
> This patch does not alter the default case, e.g., subsequent white spaces
> are understood as separator of the same field, and the beginning of line
> white spaces are being ignored together.
>
> Signed-off-by: Sami Kerola<kerolasa@iki.fi>

The new operation makes sense to me.
You can always get the previous operation
by filtering through tr -s ':' first.

cheers,
Pádraig.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] docs: column.1 describe change of separator behavior in bugs section
  2012-09-26 20:45 [PATCH] column: make defined separator to be non-greedy Sami Kerola
  2012-09-26 21:01 ` Pádraig Brady
@ 2012-09-29  9:21 ` Sami Kerola
  2012-09-29 10:23   ` Pádraig Brady
  2012-10-02  8:29 ` [PATCH] column: make defined separator to be non-greedy Karel Zak
  2 siblings, 1 reply; 7+ messages in thread
From: Sami Kerola @ 2012-09-29  9:21 UTC (permalink / raw)
  To: util-linux; +Cc: kerolasa

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 1126 bytes --]

Add to manual page how to achieve old behavior, just in case someone
relies on buggy behavior of the command.

Reported-by: Pádraig Brady <P@draigbrady.com>
Signed-off-by: Sami Kerola <kerolasa@iki.fi>
---
 text-utils/column.1 | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/text-utils/column.1 b/text-utils/column.1
index 2050666..775c73b 100644
--- a/text-utils/column.1
+++ b/text-utils/column.1
@@ -65,6 +65,35 @@ the screen if no other information is available.
 .nf
 sed 's/#.*//' /etc/fstab | column -t
 .nf
+.SH BUGS
+The util-linux version 2.23 changed
+.B \-s
+option to be non-greedy.  The change made the following output
+.PP
+.EX
+$ printf "a:b:c\n1::3\n" | column  -t -s ':'
+a  b  c
+1  3
+.EE
+.PP
+to be
+.PP
+.EX
+$ printf "a:b:c\n1::3\n" | column  -t -s ':'
+a  b  c
+1     3
+.EE
+.PP
+If there is need to use greedy separator the old behavior can be achieved
+by filtering with
+.BR sed (1).
+.PP
+.EX
+$ printf "xzcatxzdog\\ndonkeyxzzxkong\\n" |
+> sed 's/^[xz]*//
+> s/[xz]\\+/x/g' |
+> column -t -s xz
+.EE
 .SH "SEE ALSO"
 .BR colrm (1),
 .BR ls (1),
-- 
1.7.12.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] docs: column.1 describe change of separator behavior in bugs section
  2012-09-29  9:21 ` [PATCH] docs: column.1 describe change of separator behavior in bugs section Sami Kerola
@ 2012-09-29 10:23   ` Pádraig Brady
  2012-09-29 12:49     ` Sami Kerola
  2012-10-02  8:45     ` Karel Zak
  0 siblings, 2 replies; 7+ messages in thread
From: Pádraig Brady @ 2012-09-29 10:23 UTC (permalink / raw)
  To: Sami Kerola; +Cc: util-linux

On 09/29/2012 10:21 AM, Sami Kerola wrote:
> Add to manual page how to achieve old behavior, just in case someone
> relies on buggy behavior of the command.
>
> Reported-by: P�draig Brady<P@draigbrady.com>
> Signed-off-by: Sami Kerola<kerolasa@iki.fi>
> ---
>   text-utils/column.1 | 29 +++++++++++++++++++++++++++++
>   1 file changed, 29 insertions(+)
>
> diff --git a/text-utils/column.1 b/text-utils/column.1
> index 2050666..775c73b 100644
> --- a/text-utils/column.1
> +++ b/text-utils/column.1
> @@ -65,6 +65,35 @@ the screen if no other information is available.
>   .nf
>   sed 's/#.*//' /etc/fstab | column -t
>   .nf
> +.SH BUGS
> +The util-linux version 2.23 changed
> +.B \-s
> +option to be non-greedy.  The change made the following output
> +.PP
> +.EX
> +$ printf "a:b:c\n1::3\n" | column  -t -s ':'
> +a  b  c
> +1  3
> +.EE
> +.PP
> +to be
> +.PP
> +.EX
> +$ printf "a:b:c\n1::3\n" | column  -t -s ':'
> +a  b  c
> +1     3
> +.EE
> +.PP
> +If there is need to use greedy separator the old behavior can be achieved
> +by filtering with
> +.BR sed (1).
> +.PP
> +.EX
> +$ printf "xzcatxzdog\\ndonkeyxzzxkong\\n" |
> +>  sed 's/^[xz]*//
> +>  s/[xz]\\+/x/g' |
> +>  column -t -s xz
> +.EE

Too much info for a man page IMHO.

cheers,
Pádraig.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] docs: column.1 describe change of separator behavior in bugs section
  2012-09-29 10:23   ` Pádraig Brady
@ 2012-09-29 12:49     ` Sami Kerola
  2012-10-02  8:45     ` Karel Zak
  1 sibling, 0 replies; 7+ messages in thread
From: Sami Kerola @ 2012-09-29 12:49 UTC (permalink / raw)
  To: Pádraig Brady; +Cc: util-linux

On Sat, Sep 29, 2012 at 11:23 AM, Pádraig Brady <P@draigbrady.com> wrote:
> On 09/29/2012 10:21 AM, Sami Kerola wrote:
>> Add to manual page how to achieve old behavior, just in case someone
>> relies on buggy behavior of the command.
>
> Too much info for a man page IMHO.

Hi Pádraig,

You might be right. Karel, feel free to drop the patch if you want to
keep the man page short. If you choose to merge maybe adding a FIXME
comment 'remove column version < 2.23 compatibility note after
29.09.2017' is good idea.

-- 
   Sami Kerola
   http://www.iki.fi/kerolasa/

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] column: make defined separator to be non-greedy
  2012-09-26 20:45 [PATCH] column: make defined separator to be non-greedy Sami Kerola
  2012-09-26 21:01 ` Pádraig Brady
  2012-09-29  9:21 ` [PATCH] docs: column.1 describe change of separator behavior in bugs section Sami Kerola
@ 2012-10-02  8:29 ` Karel Zak
  2 siblings, 0 replies; 7+ messages in thread
From: Karel Zak @ 2012-10-02  8:29 UTC (permalink / raw)
  To: Sami Kerola; +Cc: util-linux

On Wed, Sep 26, 2012 at 09:45:36PM +0100, Sami Kerola wrote:
>  text-utils/column.c | 37 ++++++++++++++++++++++++++++++++-----
>  1 file changed, 32 insertions(+), 5 deletions(-)

Applied, thanks.

-- 
 Karel Zak  <kzak@redhat.com>
 http://karelzak.blogspot.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] docs: column.1 describe change of separator behavior in bugs section
  2012-09-29 10:23   ` Pádraig Brady
  2012-09-29 12:49     ` Sami Kerola
@ 2012-10-02  8:45     ` Karel Zak
  1 sibling, 0 replies; 7+ messages in thread
From: Karel Zak @ 2012-10-02  8:45 UTC (permalink / raw)
  To: Pádraig Brady; +Cc: Sami Kerola, util-linux

On Sat, Sep 29, 2012 at 11:23:53AM +0100, Pádraig Brady wrote:
> On 09/29/2012 10:21 AM, Sami Kerola wrote:
> Too much info for a man page IMHO.

 Applied with some changes, thanks

-- 
 Karel Zak  <kzak@redhat.com>
 http://karelzak.blogspot.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-10-02  8:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-26 20:45 [PATCH] column: make defined separator to be non-greedy Sami Kerola
2012-09-26 21:01 ` Pádraig Brady
2012-09-29  9:21 ` [PATCH] docs: column.1 describe change of separator behavior in bugs section Sami Kerola
2012-09-29 10:23   ` Pádraig Brady
2012-09-29 12:49     ` Sami Kerola
2012-10-02  8:45     ` Karel Zak
2012-10-02  8:29 ` [PATCH] column: make defined separator to be non-greedy Karel Zak

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.