From: Herbert Xu <herbert@gondor.apana.org.au>
To: DASH Mailing List <dash@vger.kernel.org>
Subject: [v3 PATCH 07/13] input: Allow MB_LEN_MAX calls to pungetc
Date: Sun, 05 May 2024 17:14:40 +0800 [thread overview]
Message-ID: <809389416f3b39d55ff77a7f20d09f936b07d1f8.1714900377.git.herbert@gondor.apana.org.au> (raw)
In-Reply-To: <cover.1714900377.git.herbert@gondor.apana.org.au>
In order to parse multi-byte characters which may be up to MB_LEN_MAX
bytes long, allow enough calls to pungetc to undo a single multi-byte
character.
Also add a function pungetn to do multiple pungetc calls in a row.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
src/input.c | 58 ++++++++++++++++++++++++++++++++++-------------------
src/input.h | 11 +++++-----
2 files changed, 42 insertions(+), 27 deletions(-)
diff --git a/src/input.c b/src/input.c
index 1c598b2..e17e067 100644
--- a/src/input.c
+++ b/src/input.c
@@ -56,7 +56,7 @@
#include "main.h"
#include "myhistedit.h"
-#define IBUFSIZ (BUFSIZ + 1)
+#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1)
MKINIT struct parsefile basepf; /* top level input file */
@@ -83,13 +83,16 @@ INIT {
}
RESET {
+ int c;
+
/* clear input buffer */
popallfiles();
- basepf.unget = 0;
- while (basepf.lastc[0] != '\n' &&
- basepf.lastc[0] != PEOF &&
- !int_pending())
- pgetc();
+
+ c = PEOF;
+ if (basepf.nextc - basebuf > basepf.unget)
+ c = basepf.nextc[-basepf.unget - 1];
+ while (c != '\n' && c != PEOF && !int_pending())
+ c = pgetc();
}
FORKRESET {
@@ -131,17 +134,20 @@ static int __pgetc(void)
{
int c;
- if (parsefile->unget)
- return parsefile->lastc[--parsefile->unget];
+ if (parsefile->unget) {
+ long unget = -(long)(unsigned)parsefile->unget--;
+
+ if (parsefile->nleft < 0)
+ return preadbuffer();
+
+ return parsefile->nextc[unget];
+ }
if (--parsefile->nleft >= 0)
c = (signed char)*parsefile->nextc++;
else
c = preadbuffer();
- parsefile->lastc[1] = parsefile->lastc[0];
- parsefile->lastc[0] = c;
-
return c;
}
@@ -176,9 +182,16 @@ static int stdin_clear_nonblock(void)
static int
preadfd(void)
{
+ char *buf = parsefile->buf;
+ int unget;
int nr;
- char *buf = parsefile->buf;
- parsefile->nextc = buf;
+
+ unget = parsefile->nextc - buf;
+ if (unget > PUNGETC_MAX)
+ unget = PUNGETC_MAX;
+
+ memmove(buf, parsefile->nextc - unget, unget);
+ parsefile->nextc = buf += unget;
retry:
#ifndef SMALL
@@ -196,8 +209,8 @@ retry:
nr = 0;
else {
nr = el_len;
- if (nr > IBUFSIZ - 1)
- nr = IBUFSIZ - 1;
+ if (nr > BUFSIZ)
+ nr = BUFSIZ;
memcpy(buf, rl_cp, nr);
if (nr != el_len) {
el_len -= nr;
@@ -209,9 +222,9 @@ retry:
} else
#endif
if (parsefile->fd)
- nr = read(parsefile->fd, buf, IBUFSIZ - 1);
+ nr = read(parsefile->fd, buf, BUFSIZ);
else {
- unsigned len = IBUFSIZ - 1;
+ unsigned len = BUFSIZ;
nr = 0;
@@ -348,6 +361,11 @@ done:
return (signed char)*parsefile->nextc++;
}
+void pungetn(int n)
+{
+ parsefile->unget += n;
+}
+
/*
* Undo a call to pgetc. Only two characters may be pushed back.
* PEOF may be pushed back.
@@ -356,7 +374,7 @@ done:
void
pungetc(void)
{
- parsefile->unget++;
+ pungetn(1);
}
/*
@@ -383,7 +401,6 @@ pushstring(char *s, void *ap)
sp->prevnleft = parsefile->nleft;
sp->unget = parsefile->unget;
sp->spfree = parsefile->spfree;
- memcpy(sp->lastc, parsefile->lastc, sizeof(sp->lastc));
sp->ap = (struct alias *)ap;
if (ap) {
((struct alias *)ap)->flag |= ALIASINUSE;
@@ -413,7 +430,6 @@ static void popstring(void)
parsefile->nextc = sp->prevstring;
parsefile->nleft = sp->prevnleft;
parsefile->unget = sp->unget;
- memcpy(parsefile->lastc, sp->lastc, sizeof(sp->lastc));
/*dprintf("*** calling popstring: restoring to '%s'\n", parsenextc);*/
parsefile->strpush = sp->prev;
parsefile->spfree = sp;
@@ -457,7 +473,7 @@ setinputfd(int fd, int push)
}
parsefile->fd = fd;
if (parsefile->buf == NULL)
- parsefile->buf = ckmalloc(IBUFSIZ);
+ parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ);
input_set_lleft(parsefile, parsefile->nleft = 0);
plinno = 1;
}
diff --git a/src/input.h b/src/input.h
index 1ff5773..5b4a045 100644
--- a/src/input.h
+++ b/src/input.h
@@ -34,12 +34,16 @@
* @(#)input.h 8.2 (Berkeley) 5/4/95
*/
+#include <limits.h>
+
#ifdef SMALL
#define IS_DEFINED_SMALL 1
#else
#define IS_DEFINED_SMALL 0
#endif
+#define PUNGETC_MAX (MB_LEN_MAX > 16 ? MB_LEN_MAX : 16)
+
/* PEOF (the end of file marker) is defined in syntax.h */
enum {
@@ -59,9 +63,6 @@ struct strpush {
/* Delay freeing so we can stop nested aliases. */
struct strpush *spfree;
- /* Remember last two characters for pungetc. */
- int lastc[2];
-
/* Number of outstanding calls to pungetc. */
int unget;
};
@@ -87,9 +88,6 @@ struct parsefile {
/* Delay freeing so we can stop nested aliases. */
struct strpush *spfree;
- /* Remember last two characters for pungetc. */
- int lastc[2];
-
/* Number of outstanding calls to pungetc. */
int unget;
};
@@ -106,6 +104,7 @@ extern struct parsefile *parsefile;
int pgetc(void);
int pgetc2(void);
void pungetc(void);
+void pungetn(int);
void pushstring(char *, void *);
int setinputfile(const char *, int);
void setinputstring(char *);
--
2.39.2
next prev parent reply other threads:[~2024-05-05 9:14 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-05 9:14 [v3 PATCH 00/13] Add multi-byte support Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 01/13] shell: Call setlocale Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 02/13] shell: Use strcoll instead of strcmp where applicable Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 03/13] expand: Count multi-byte characters for VSLENGTH Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 04/13] expand: Process multi-byte characters in subevalvar Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 05/13] expand: Process multi-byte characters in expmeta Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 06/13] expand: Support multi-byte characters during field splitting Herbert Xu
2024-05-05 9:14 ` Herbert Xu [this message]
2024-05-05 9:14 ` [v3 PATCH 08/13] input: Add pgetc_eoa Herbert Xu
2024-05-05 9:14 ` [v3 PATCH 09/13] parser: Add support for multi-byte characters Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 10/13] input: Always push in setinputfile Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 11/13] memalloc: Use void * instead of pointer Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 12/13] builtin: Use pgetc in read(1) Herbert Xu
2024-05-05 9:15 ` [v3 PATCH 13/13] builtin: Process multi-byte characters " Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=809389416f3b39d55ff77a7f20d09f936b07d1f8.1714900377.git.herbert@gondor.apana.org.au \
--to=herbert@gondor.apana.org.au \
--cc=dash@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).