dash.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Herbert Xu <herbert@gondor.apana.org.au>
To: DASH Mailing List <dash@vger.kernel.org>
Subject: [v3 PATCH 05/13] expand: Process multi-byte characters in expmeta
Date: Sun, 05 May 2024 17:14:35 +0800	[thread overview]
Message-ID: <3245a4aa0efcb3f503d6974ec63ba822a5d9e288.1714900377.git.herbert@gondor.apana.org.au> (raw)
In-Reply-To: <cover.1714900377.git.herbert@gondor.apana.org.au>

When glob(3) is not in use, make sure that expmeta processes
multi-byte characters correctly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 src/expand.c | 107 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 34 deletions(-)

diff --git a/src/expand.c b/src/expand.c
index 60a51b1..0e85025 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -84,6 +84,7 @@
 #define RMESCAPE_GLOB	0x2	/* Add backslashes for glob */
 #define RMESCAPE_GROW	0x8	/* Grow strings instead of stalloc */
 #define RMESCAPE_HEAP	0x10	/* Malloc strings instead of stalloc */
+#define RMESCAPE_EMETA	0x20	/* Remove backslashes too */
 
 /* Add CTLESC when necessary. */
 #define QUOTES_ESC	(EXP_FULL | EXP_CASE)
@@ -1347,15 +1348,13 @@ expandmeta(struct strlist *str)
 		savelastp = exparg.lastp;
 
 		INTOFF;
-		p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP);
+		p = str->text;
 		len = strlen(p);
 		expdir_max = len + PATH_MAX;
 		expdir = ckmalloc(expdir_max);
 
 		expmeta(p, len, 0);
 		ckfree(expdir);
-		if (p != str->text)
-			ckfree(p);
 		INTON;
 		if (exparg.lastp == savelastp) {
 			/*
@@ -1376,6 +1375,41 @@ nometa:
 	}
 }
 
+static void expmeta_rmescapes(char *enddir, char *name)
+{
+	preglob(strcpy(enddir, name), RMESCAPE_EMETA);
+}
+
+static unsigned mbcharlen(char *p)
+{
+	int esc = 0;
+
+	if (*++p == (char)CTLESC)
+		esc++;
+
+	return esc + 3 + (unsigned char)p[esc];
+}
+
+static int skipesc(char *p)
+{
+	int esc = 0;
+
+	if (p[esc] == (char)CTLMBCHAR)
+		return esc + mbcharlen(p);
+
+	if (*p == (char)CTLESC)
+		esc++;
+
+	if (p[esc] == '\\' && p[esc + 1]) {
+		esc++;
+		if (p[esc] == (char)CTLMBCHAR)
+			return esc + mbcharlen(p + esc);
+		if (p[esc] == (char)CTLESC)
+			esc++;
+	}
+
+	return esc;
+}
 
 /*
  * Do metacharacter (i.e. *, ?, [...]) expansion.
@@ -1385,17 +1419,18 @@ STATIC void
 expmeta(char *name, unsigned name_len, unsigned expdir_len)
 {
 	char *enddir = expdir + expdir_len;
-	char *p;
+	struct stat64 statb;
+	struct dirent64 *dp;
 	const char *cp;
-	char *start;
 	char *endname;
 	int metaflag;
-	struct stat64 statb;
-	DIR *dirp;
-	struct dirent64 *dp;
-	int atend;
 	int matchdot;
+	char *start;
+	DIR *dirp;
+	char *pat;
+	char *p;
 	int esc;
+	int c;
 
 	metaflag = 0;
 	start = name;
@@ -1407,8 +1442,7 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
 			if (*q == '!')
 				q++;
 			for (;;) {
-				if (*q == '\\')
-					q++;
+				q += skipesc(q);
 				if (*q == '/' || *q == '\0')
 					break;
 				if (*++q == ']') {
@@ -1417,8 +1451,8 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
 				}
 			}
 		} else {
-			if (*p == '\\' && p[1])
-				esc++;
+			esc = skipesc(p);
+
 			if (p[esc] == '/') {
 				if (metaflag)
 					break;
@@ -1429,24 +1463,18 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
 	if (metaflag == 0) {	/* we've reached the end of the file name */
 		if (!expdir_len)
 			return;
-		p = name;
-		do {
-			if (*p == '\\' && p[1])
-				p++;
-			*enddir++ = *p;
-		} while (*p++);
+		expmeta_rmescapes(enddir, name);
 		if (lstat64(expdir, &statb) >= 0)
 			addfname(expdir);
 		return;
 	}
 	endname = p;
 	if (name < start) {
-		p = name;
-		do {
-			if (*p == '\\' && p[1])
-				p++;
-			*enddir++ = *p++;
-		} while (p < start);
+		c = *start;
+		*start = 0;
+		expmeta_rmescapes(enddir, name);
+		*start = c;
+		enddir += strlen(enddir);
 	}
 	*enddir = 0;
 	cp = expdir;
@@ -1455,25 +1483,26 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
 		cp = ".";
 	if ((dirp = opendir(cp)) == NULL)
 		return;
-	if (*endname == 0) {
-		atend = 1;
-	} else {
-		atend = 0;
+	c = *endname;
+	if (c) {
 		*endname = '\0';
 		endname += esc + 1;
 	}
 	name_len -= endname - name;
 	matchdot = 0;
 	p = start;
+	if (*p == (char)CTLESC)
+		p++;
 	if (*p == '\\')
 		p++;
 	if (*p == '.')
 		matchdot++;
+	pat = preglob(start, RMESCAPE_ALLOC | RMESCAPE_HEAP);
 	while (! int_pending() && (dp = readdir64(dirp)) != NULL) {
 		if (dp->d_name[0] == '.' && ! matchdot)
 			continue;
-		if (pmatch(start, dp->d_name)) {
-			if (atend) {
+		if (pmatch(pat, dp->d_name)) {
+			if (!c) {
 				scopy(dp->d_name, enddir);
 				addfname(expdir);
 			} else {
@@ -1496,9 +1525,11 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
 			}
 		}
 	}
+	if (pat != start)
+		ckfree(pat);
 	closedir(dirp);
-	if (! atend)
-		endname[-esc - 1] = esc ? '\\' : '/';
+	if (c)
+		endname[-esc - 1] = c;
 }
 #endif	/* HAVE_GLOB */
 
@@ -1743,6 +1774,7 @@ _rmescapes(char *str, int flag)
 	int notescaped;
 	int globbing;
 	int inquotes;
+	int expmeta;
 
 	p = strpbrk(str, cqchars);
 	if (!p) {
@@ -1751,6 +1783,7 @@ _rmescapes(char *str, int flag)
 	q = p;
 	r = str;
 	globbing = flag & RMESCAPE_GLOB;
+	expmeta = (flag & RMESCAPE_EMETA) ? RMESCAPE_GLOB : 0;
 
 	if (flag & RMESCAPE_ALLOC) {
 		size_t len = p - str;
@@ -1790,6 +1823,10 @@ _rmescapes(char *str, int flag)
 		if (*p == '\\') {
 			/* naked back slash */
 			notescaped ^= globbing;
+			if (expmeta & ~notescaped) {
+				p++;
+				continue;
+			}
 			goto copy;
 		}
 		if (FNMATCH_IS_ENABLED && *p == '^')
@@ -1797,7 +1834,9 @@ _rmescapes(char *str, int flag)
 		if (*p == (char)CTLESC) {
 			p++;
 add_escape:
-			if (notescaped)
+			if (expmeta)
+				;
+			else if (notescaped)
 				*q++ = '\\';
 			else if (inquotes) {
 				*q++ = '\\';
-- 
2.39.2


  parent reply	other threads:[~2024-05-05  9:14 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-05  9:14 [v3 PATCH 00/13] Add multi-byte support Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 01/13] shell: Call setlocale Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 02/13] shell: Use strcoll instead of strcmp where applicable Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 03/13] expand: Count multi-byte characters for VSLENGTH Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 04/13] expand: Process multi-byte characters in subevalvar Herbert Xu
2024-05-05  9:14 ` Herbert Xu [this message]
2024-05-05  9:14 ` [v3 PATCH 06/13] expand: Support multi-byte characters during field splitting Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 07/13] input: Allow MB_LEN_MAX calls to pungetc Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 08/13] input: Add pgetc_eoa Herbert Xu
2024-05-05  9:14 ` [v3 PATCH 09/13] parser: Add support for multi-byte characters Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 10/13] input: Always push in setinputfile Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 11/13] memalloc: Use void * instead of pointer Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 12/13] builtin: Use pgetc in read(1) Herbert Xu
2024-05-05  9:15 ` [v3 PATCH 13/13] builtin: Process multi-byte characters " Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3245a4aa0efcb3f503d6974ec63ba822a5d9e288.1714900377.git.herbert@gondor.apana.org.au \
    --to=herbert@gondor.apana.org.au \
    --cc=dash@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).