dash.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Herbert Xu <herbert@gondor.apana.org.au>
To: Harald van Dijk <harald@gigawatt.nl>
Cc: Stephane Chazelas <stephane.chazelas@gmail.com>,
	Gioele Barabucci <gioele@svario.it>,
	dash@vger.kernel.org
Subject: [PATCH v2] builtin: Fix handling of trailing IFS white spaces
Date: Mon, 6 Jun 2016 16:48:05 +0800	[thread overview]
Message-ID: <20160606084805.GA20946@gondor.apana.org.au> (raw)
In-Reply-To: <5661EEB7.8080908@gigawatt.nl>

On Fri, Dec 04, 2015 at 08:51:19PM +0100, Harald van Dijk wrote:
>
> Here it is. Attached is an updated patch that ignores the complete 
> terminator if only a single field remains, otherwise ignores only 
> trailing IFS whitespace.

There is a little bug in the patch if you happened to start with
an escaped IFS, otherwise I think it's good.

However, I think this loop is getting too deep so I've reworked
it to have just one loop over the string instead of three (two
nested loops).

Please test this.

Thanks!

---8<---
The read built-in does not handle trailing IFS white spaces in
the right way, when there are more fields than variables.  Part
of the problem is that this case is handled outside of ifsbreakup.

Harald van Dijk wrote a patch to fix this by moving the magic
into ifsbreakup itself.

This patch further reorganises the ifsbreakup loop by having only
one loop over the whole string.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/src/expand.c b/src/expand.c
index b2d710d..921eecf 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -50,6 +50,7 @@
 #include <glob.h>
 #endif
 #include <ctype.h>
+#include <stdbool.h>
 
 /*
  * Routines to expand arguments to commands.  We have to deal with
@@ -203,7 +204,7 @@ expandarg(union node *arg, struct arglist *arglist, int flag)
 	 * TODO - EXP_REDIR
 	 */
 	if (flag & EXP_FULL) {
-		ifsbreakup(p, &exparg);
+		ifsbreakup(p, -1, &exparg);
 		*exparg.lastp = NULL;
 		exparg.lastp = &exparg.list;
 		expandmeta(exparg.list, flag);
@@ -1016,15 +1017,18 @@ recordregion(int start, int end, int nulonly)
  * Break the argument string into pieces based upon IFS and add the
  * strings to the argument list.  The regions of the string to be
  * searched for IFS characters have been stored by recordregion.
+ * If maxargs is non-negative, at most maxargs arguments will be created, by
+ * joining together the last arguments.
  */
 void
-ifsbreakup(char *string, struct arglist *arglist)
+ifsbreakup(char *string, int maxargs, struct arglist *arglist)
 {
 	struct ifsregion *ifsp;
 	struct strlist *sp;
 	char *start;
 	char *p;
 	char *q;
+	char *r = NULL;
 	const char *ifs, *realifs;
 	int ifsspc;
 	int nulonly;
@@ -1042,56 +1046,94 @@ ifsbreakup(char *string, struct arglist *arglist)
 			ifs = nulonly ? nullstr : realifs;
 			ifsspc = 0;
 			while (p < string + ifsp->endoff) {
+				int c;
+				bool isifs;
+				bool isdefifs;
+
 				q = p;
-				if (*p == (char)CTLESC)
-					p++;
-				if (strchr(ifs, *p)) {
+				c = *p++;
+				if (c == (char)CTLESC)
+					c = *p++;
+
+				isifs = strchr(ifs, c);
+				isdefifs = false;
+				if (isifs)
+					isdefifs = strchr(defifs, c);
+
+				/* If only reading one more argument:
+				 * If we have exactly one field,
+				 * read that field without its terminator.
+				 * If we have more than one field,
+				 * read all fields including their terminators,
+				 * except for trailing IFS whitespace.
+				 *
+				 * This means that if we have only IFS
+				 * characters left, and at most one
+				 * of them is non-whitespace, we stop
+				 * reading here.
+				 * Otherwise, we read all the remaining
+				 * characters except for trailing
+				 * IFS whitespace.
+				 *
+				 * In any case, r indicates the start
+				 * of the characters to remove, or NULL
+				 * if no characters should be removed.
+				 */
+				if (!maxargs) {
+					if (isdefifs) {
+						if (!r)
+							r = q;
+						continue;
+					}
+
+					if (!(isifs && ifsspc))
+						r = NULL;
+
+					ifsspc = 0;
+					continue;
+				}
+
+				if (ifsspc) {
+					if (isdefifs)
+						continue;
+
+					if (!isifs) {
+						ifsspc = 0;
+						p = q;
+					}
+
+					start = p;
+				}
+
+				if (isifs) {
 					if (!nulonly)
-						ifsspc = (strchr(defifs, *p) != NULL);
+						ifsspc = isdefifs;
 					/* Ignore IFS whitespace at start */
 					if (q == start && ifsspc) {
-						p++;
 						start = p;
 						continue;
 					}
+					if (maxargs > 0 && !--maxargs) {
+						r = q;
+						continue;
+					}
 					*q = '\0';
 					sp = (struct strlist *)stalloc(sizeof *sp);
 					sp->text = start;
 					*arglist->lastp = sp;
 					arglist->lastp = &sp->next;
-					p++;
-					if (!nulonly) {
-						for (;;) {
-							if (p >= string + ifsp->endoff) {
-								break;
-							}
-							q = p;
-							if (*p == (char)CTLESC)
-								p++;
-							if (strchr(ifs, *p) == NULL ) {
-								p = q;
-								break;
-							} else if (strchr(defifs, *p) == NULL) {
-								if (ifsspc) {
-									p++;
-									ifsspc = 0;
-								} else {
-									p = q;
-									break;
-								}
-							} else
-								p++;
-						}
-					}
 					start = p;
 				} else
-					p++;
+					ifsspc = 0;
 			}
 		} while ((ifsp = ifsp->next) != NULL);
 		if (nulonly)
 			goto add;
 	}
 
+	if (r)
+		*r = '\0';
+
 	if (!*start)
 		return;
 
diff --git a/src/expand.h b/src/expand.h
index 6a90f67..26dc5b4 100644
--- a/src/expand.h
+++ b/src/expand.h
@@ -69,7 +69,7 @@ char *_rmescapes(char *, int);
 int casematch(union node *, char *);
 void recordregion(int, int, int);
 void removerecordregions(int); 
-void ifsbreakup(char *, struct arglist *);
+void ifsbreakup(char *, int, struct arglist *);
 void ifsfree(void);
 
 /* From arith.y */
diff --git a/src/miscbltin.c b/src/miscbltin.c
index b596fd2..39b9c47 100644
--- a/src/miscbltin.c
+++ b/src/miscbltin.c
@@ -67,28 +67,21 @@
  *  less fields than variables -> remaining variables unset.
  *
  *  @param line complete line of input
+ *  @param ac argument count
  *  @param ap argument (variable) list
  *  @param len length of line including trailing '\0'
  */
 static void
-readcmd_handle_line(char *s, char **ap)
+readcmd_handle_line(char *s, int ac, char **ap)
 {
 	struct arglist arglist;
 	struct strlist *sl;
-	char *backup;
-	char *line;
 
-	/* ifsbreakup will fiddle with stack region... */
-	line = stackblock();
 	s = grabstackstr(s);
 
-	/* need a copy, so that delimiters aren't lost
-	 * in case there are more fields than variables */
-	backup = sstrdup(line);
-
 	arglist.lastp = &arglist.list;
 	
-	ifsbreakup(s, &arglist);
+	ifsbreakup(s, ac, &arglist);
 	*arglist.lastp = NULL;
 	ifsfree();
 
@@ -104,21 +97,6 @@ readcmd_handle_line(char *s, char **ap)
 			return;
 		}
 
-		/* remaining fields present, but no variables left. */
-		if (!ap[1] && sl->next) {
-			size_t offset;
-			char *remainder;
-
-			/* FIXME little bit hacky, assuming that ifsbreakup 
-			 * will not modify the length of the string */
-			offset = sl->text - s;
-			remainder = backup + offset;
-			rmescapes(remainder);
-			setvar(*ap, remainder, 0);
-
-			return;
-		}
-		
 		/* set variable to field */
 		rmescapes(sl->text);
 		setvar(*ap, sl->text, 0);
@@ -211,7 +189,7 @@ start:
 out:
 	recordregion(startloc, p - (char *)stackblock(), 0);
 	STACKSTRNUL(p);
-	readcmd_handle_line(p + 1, ap);
+	readcmd_handle_line(p + 1, argc - (ap - argv), ap);
 	return status;
 }
 
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

  parent reply	other threads:[~2016-06-06  8:48 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-02 22:37 dash: read does not ignore trailing spaces Gioele Barabucci
2015-12-02 23:25 ` Jilles Tjoelker
2015-12-03 21:02 ` Harald van Dijk
2015-12-03 21:17   ` Stephane Chazelas
2015-12-03 21:43     ` Martijn Dekker
2015-12-03 23:04       ` Stephane Chazelas
2015-12-03 23:17         ` Stephane Chazelas
2015-12-04  0:00           ` Stephane Chazelas
2015-12-03 22:26     ` Harald van Dijk
2015-12-04 19:51       ` Harald van Dijk
2016-01-29 12:57         ` Martijn Dekker
2016-06-06  8:48         ` Herbert Xu [this message]
2016-06-06 20:43           ` [PATCH v2] builtin: Fix handling of trailing IFS white spaces Harald van Dijk
2016-06-07  9:25             ` [PATCH v3] " Herbert Xu
2016-06-12 10:35               ` Harald van Dijk
2016-06-12 11:06                 ` Herbert Xu
2016-06-12 11:12                   ` Harald van Dijk
2016-06-12 12:17                     ` [PATCH v4] " Herbert Xu
2016-06-19 22:01                       ` Harald van Dijk
2016-06-20  1:28                         ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160606084805.GA20946@gondor.apana.org.au \
    --to=herbert@gondor.apana.org.au \
    --cc=dash@vger.kernel.org \
    --cc=gioele@svario.it \
    --cc=harald@gigawatt.nl \
    --cc=stephane.chazelas@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).