From: Herbert Xu <herbert@gondor.apana.org.au>
To: DASH Mailing List <dash@vger.kernel.org>
Subject: [v3 PATCH 05/13] expand: Process multi-byte characters in expmeta
Date: Sun, 05 May 2024 17:14:35 +0800 [thread overview]
Message-ID: <3245a4aa0efcb3f503d6974ec63ba822a5d9e288.1714900377.git.herbert@gondor.apana.org.au> (raw)
In-Reply-To: <cover.1714900377.git.herbert@gondor.apana.org.au>
When glob(3) is not in use, make sure that expmeta processes
multi-byte characters correctly.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/expand.c | 107 +++++++++++++++++++++++++++++++++++----------------
1 file changed, 73 insertions(+), 34 deletions(-)
diff --git a/src/expand.c b/src/expand.c
index 60a51b1..0e85025 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -84,6 +84,7 @@
#define RMESCAPE_GLOB 0x2 /* Add backslashes for glob */
#define RMESCAPE_GROW 0x8 /* Grow strings instead of stalloc */
#define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */
+#define RMESCAPE_EMETA 0x20 /* Remove backslashes too */
/* Add CTLESC when necessary. */
#define QUOTES_ESC (EXP_FULL | EXP_CASE)
@@ -1347,15 +1348,13 @@ expandmeta(struct strlist *str)
savelastp = exparg.lastp;
INTOFF;
- p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP);
+ p = str->text;
len = strlen(p);
expdir_max = len + PATH_MAX;
expdir = ckmalloc(expdir_max);
expmeta(p, len, 0);
ckfree(expdir);
- if (p != str->text)
- ckfree(p);
INTON;
if (exparg.lastp == savelastp) {
/*
@@ -1376,6 +1375,41 @@ nometa:
}
}
+static void expmeta_rmescapes(char *enddir, char *name)
+{
+ preglob(strcpy(enddir, name), RMESCAPE_EMETA);
+}
+
+static unsigned mbcharlen(char *p)
+{
+ int esc = 0;
+
+ if (*++p == (char)CTLESC)
+ esc++;
+
+ return esc + 3 + (unsigned char)p[esc];
+}
+
+static int skipesc(char *p)
+{
+ int esc = 0;
+
+ if (p[esc] == (char)CTLMBCHAR)
+ return esc + mbcharlen(p);
+
+ if (*p == (char)CTLESC)
+ esc++;
+
+ if (p[esc] == '\\' && p[esc + 1]) {
+ esc++;
+ if (p[esc] == (char)CTLMBCHAR)
+ return esc + mbcharlen(p + esc);
+ if (p[esc] == (char)CTLESC)
+ esc++;
+ }
+
+ return esc;
+}
/*
* Do metacharacter (i.e. *, ?, [...]) expansion.
@@ -1385,17 +1419,18 @@ STATIC void
expmeta(char *name, unsigned name_len, unsigned expdir_len)
{
char *enddir = expdir + expdir_len;
- char *p;
+ struct stat64 statb;
+ struct dirent64 *dp;
const char *cp;
- char *start;
char *endname;
int metaflag;
- struct stat64 statb;
- DIR *dirp;
- struct dirent64 *dp;
- int atend;
int matchdot;
+ char *start;
+ DIR *dirp;
+ char *pat;
+ char *p;
int esc;
+ int c;
metaflag = 0;
start = name;
@@ -1407,8 +1442,7 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
if (*q == '!')
q++;
for (;;) {
- if (*q == '\\')
- q++;
+ q += skipesc(q);
if (*q == '/' || *q == '\0')
break;
if (*++q == ']') {
@@ -1417,8 +1451,8 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
}
}
} else {
- if (*p == '\\' && p[1])
- esc++;
+ esc = skipesc(p);
+
if (p[esc] == '/') {
if (metaflag)
break;
@@ -1429,24 +1463,18 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
if (metaflag == 0) { /* we've reached the end of the file name */
if (!expdir_len)
return;
- p = name;
- do {
- if (*p == '\\' && p[1])
- p++;
- *enddir++ = *p;
- } while (*p++);
+ expmeta_rmescapes(enddir, name);
if (lstat64(expdir, &statb) >= 0)
addfname(expdir);
return;
}
endname = p;
if (name < start) {
- p = name;
- do {
- if (*p == '\\' && p[1])
- p++;
- *enddir++ = *p++;
- } while (p < start);
+ c = *start;
+ *start = 0;
+ expmeta_rmescapes(enddir, name);
+ *start = c;
+ enddir += strlen(enddir);
}
*enddir = 0;
cp = expdir;
@@ -1455,25 +1483,26 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
cp = ".";
if ((dirp = opendir(cp)) == NULL)
return;
- if (*endname == 0) {
- atend = 1;
- } else {
- atend = 0;
+ c = *endname;
+ if (c) {
*endname = '\0';
endname += esc + 1;
}
name_len -= endname - name;
matchdot = 0;
p = start;
+ if (*p == (char)CTLESC)
+ p++;
if (*p == '\\')
p++;
if (*p == '.')
matchdot++;
+ pat = preglob(start, RMESCAPE_ALLOC | RMESCAPE_HEAP);
while (! int_pending() && (dp = readdir64(dirp)) != NULL) {
if (dp->d_name[0] == '.' && ! matchdot)
continue;
- if (pmatch(start, dp->d_name)) {
- if (atend) {
+ if (pmatch(pat, dp->d_name)) {
+ if (!c) {
scopy(dp->d_name, enddir);
addfname(expdir);
} else {
@@ -1496,9 +1525,11 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len)
}
}
}
+ if (pat != start)
+ ckfree(pat);
closedir(dirp);
- if (! atend)
- endname[-esc - 1] = esc ? '\\' : '/';
+ if (c)
+ endname[-esc - 1] = c;
}
#endif /* HAVE_GLOB */
@@ -1743,6 +1774,7 @@ _rmescapes(char *str, int flag)
int notescaped;
int globbing;
int inquotes;
+ int expmeta;
p = strpbrk(str, cqchars);
if (!p) {
@@ -1751,6 +1783,7 @@ _rmescapes(char *str, int flag)
q = p;
r = str;
globbing = flag & RMESCAPE_GLOB;
+ expmeta = (flag & RMESCAPE_EMETA) ? RMESCAPE_GLOB : 0;
if (flag & RMESCAPE_ALLOC) {
size_t len = p - str;
@@ -1790,6 +1823,10 @@ _rmescapes(char *str, int flag)
if (*p == '\\') {
/* naked back slash */
notescaped ^= globbing;
+ if (expmeta & ~notescaped) {
+ p++;
+ continue;
+ }
goto copy;
}
if (FNMATCH_IS_ENABLED && *p == '^')
@@ -1797,7 +1834,9 @@ _rmescapes(char *str, int flag)
if (*p == (char)CTLESC) {
p++;
add_escape:
- if (notescaped)
+ if (expmeta)
+ ;
+ else if (notescaped)
*q++ = '\\';
else if (inquotes) {
*q++ = '\\';
--
2.39.2
next prev parent reply other threads:[~2024-05-05 9:14 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-05 9:14 [v3 PATCH 00/13] Add multi-byte support Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 01/13] shell: Call setlocale Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 02/13] shell: Use strcoll instead of strcmp where applicable Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 03/13] expand: Count multi-byte characters for VSLENGTH Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 04/13] expand: Process multi-byte characters in subevalvar Herbert Xu
2024-05-05 9:14 ` Herbert Xu [this message]
2024-05-05 9:14 ` [v3 PATCH 06/13] expand: Support multi-byte characters during field splitting Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 07/13] input: Allow MB_LEN_MAX calls to pungetc Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 08/13] input: Add pgetc_eoa Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 09/13] parser: Add support for multi-byte characters Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 10/13] input: Always push in setinputfile Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 11/13] memalloc: Use void * instead of pointer Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 12/13] builtin: Use pgetc in read(1) Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 13/13] builtin: Process multi-byte characters " Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3245a4aa0efcb3f503d6974ec63ba822a5d9e288.1714900377.git.herbert@gondor.apana.org.au \
--to=herbert@gondor.apana.org.au \
--cc=dash@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).