All of lore.kernel.org
 help / color / mirror / Atom feed
From: Herbert Xu <herbert@gondor.apana.org.au>
To: DASH Mailing List <dash@vger.kernel.org>
Subject: [v2 PATCH 7/8] input: Allow MB_LEN_MAX calls to pungetc
Date: Sun, 28 Apr 2024 11:57:11 +0800	[thread overview]
Message-ID: <9a1c18b16b066510266ed9f14ec954840221e7c1.1714276539.git.herbert@gondor.apana.org.au> (raw)
In-Reply-To: <cover.1714276539.git.herbert@gondor.apana.org.au>

In order to parse multi-byte characters which may be up to MB_LEN_MAX
bytes long, allow enough calls to pungetc to undo a single multi-byte
character.

Also add a function pungetn to do multiple pungetc calls in a row.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 src/input.c | 58 ++++++++++++++++++++++++++++++++++-------------------
 src/input.h | 11 +++++-----
 2 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/input.c b/src/input.c
index fb9858f..c7805ad 100644
--- a/src/input.c
+++ b/src/input.c
@@ -56,7 +56,7 @@
 #include "main.h"
 #include "myhistedit.h"
 
-#define IBUFSIZ (BUFSIZ + 1)
+#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1)
 
 
 MKINIT struct parsefile basepf;	/* top level input file */
@@ -83,13 +83,16 @@ INIT {
 }
 
 RESET {
+	int c;
+
 	/* clear input buffer */
 	popallfiles();
-	basepf.unget = 0;
-	while (basepf.lastc[0] != '\n' &&
-	       basepf.lastc[0] != PEOF &&
-	       !int_pending())
-		pgetc();
+
+	c = PEOF;
+	if (basepf.nextc - basebuf > basepf.unget)
+		c = basepf.nextc[-basepf.unget];
+	while (c != '\n' && c != PEOF && !int_pending())
+		c = pgetc();
 }
 
 FORKRESET {
@@ -131,17 +134,20 @@ static int __pgetc(void)
 {
 	int c;
 
-	if (parsefile->unget)
-		return parsefile->lastc[--parsefile->unget];
+	if (parsefile->unget) {
+		long unget = -(long)(unsigned)parsefile->unget--;
+
+		if (parsefile->nleft < 0)
+			return preadbuffer();
+
+		return parsefile->nextc[unget];
+	}
 
 	if (--parsefile->nleft >= 0)
 		c = (signed char)*parsefile->nextc++;
 	else
 		c = preadbuffer();
 
-	parsefile->lastc[1] = parsefile->lastc[0];
-	parsefile->lastc[0] = c;
-
 	return c;
 }
 
@@ -176,9 +182,16 @@ static int stdin_clear_nonblock(void)
 static int
 preadfd(void)
 {
+	char *buf = parsefile->buf;
+	int unget;
 	int nr;
-	char *buf =  parsefile->buf;
-	parsefile->nextc = buf;
+
+	unget = parsefile->nextc - buf;
+	if (unget > PUNGETC_MAX)
+		unget = PUNGETC_MAX;
+
+	memmove(buf, parsefile->nextc - unget, unget);
+	parsefile->nextc = buf += unget;
 
 retry:
 #ifndef SMALL
@@ -196,8 +209,8 @@ retry:
 			nr = 0;
 		else {
 			nr = el_len;
-			if (nr > IBUFSIZ - 1)
-				nr = IBUFSIZ - 1;
+			if (nr > BUFSIZ)
+				nr = BUFSIZ;
 			memcpy(buf, rl_cp, nr);
 			if (nr != el_len) {
 				el_len -= nr;
@@ -209,9 +222,9 @@ retry:
 	} else
 #endif
 	if (parsefile->fd)
-		nr = read(parsefile->fd, buf, IBUFSIZ - 1);
+		nr = read(parsefile->fd, buf, BUFSIZ);
 	else {
-		unsigned len = IBUFSIZ - 1;
+		unsigned len = BUFSIZ;
 
 		nr = 0;
 
@@ -348,6 +361,11 @@ done:
 	return (signed char)*parsefile->nextc++;
 }
 
+void pungetn(int n)
+{
+	parsefile->unget += n;
+}
+
 /*
  * Undo a call to pgetc.  Only two characters may be pushed back.
  * PEOF may be pushed back.
@@ -356,7 +374,7 @@ done:
 void
 pungetc(void)
 {
-	parsefile->unget++;
+	pungetn(1);
 }
 
 /*
@@ -383,7 +401,6 @@ pushstring(char *s, void *ap)
 	sp->prevnleft = parsefile->nleft;
 	sp->unget = parsefile->unget;
 	sp->spfree = parsefile->spfree;
-	memcpy(sp->lastc, parsefile->lastc, sizeof(sp->lastc));
 	sp->ap = (struct alias *)ap;
 	if (ap) {
 		((struct alias *)ap)->flag |= ALIASINUSE;
@@ -413,7 +430,6 @@ static void popstring(void)
 	parsefile->nextc = sp->prevstring;
 	parsefile->nleft = sp->prevnleft;
 	parsefile->unget = sp->unget;
-	memcpy(parsefile->lastc, sp->lastc, sizeof(sp->lastc));
 /*dprintf("*** calling popstring: restoring to '%s'\n", parsenextc);*/
 	parsefile->strpush = sp->prev;
 	parsefile->spfree = sp;
@@ -457,7 +473,7 @@ setinputfd(int fd, int push)
 	}
 	parsefile->fd = fd;
 	if (parsefile->buf == NULL)
-		parsefile->buf = ckmalloc(IBUFSIZ);
+		parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ);
 	input_set_lleft(parsefile, parsefile->nleft = 0);
 	plinno = 1;
 }
diff --git a/src/input.h b/src/input.h
index 1ff5773..5b4a045 100644
--- a/src/input.h
+++ b/src/input.h
@@ -34,12 +34,16 @@
  *	@(#)input.h	8.2 (Berkeley) 5/4/95
  */
 
+#include <limits.h>
+
 #ifdef SMALL
 #define IS_DEFINED_SMALL 1
 #else
 #define IS_DEFINED_SMALL 0
 #endif
 
+#define PUNGETC_MAX (MB_LEN_MAX > 16 ? MB_LEN_MAX : 16)
+
 /* PEOF (the end of file marker) is defined in syntax.h */
 
 enum {
@@ -59,9 +63,6 @@ struct strpush {
 	/* Delay freeing so we can stop nested aliases. */
 	struct strpush *spfree;
 
-	/* Remember last two characters for pungetc. */
-	int lastc[2];
-
 	/* Number of outstanding calls to pungetc. */
 	int unget;
 };
@@ -87,9 +88,6 @@ struct parsefile {
 	/* Delay freeing so we can stop nested aliases. */
 	struct strpush *spfree;
 
-	/* Remember last two characters for pungetc. */
-	int lastc[2];
-
 	/* Number of outstanding calls to pungetc. */
 	int unget;
 };
@@ -106,6 +104,7 @@ extern struct parsefile *parsefile;
 int pgetc(void);
 int pgetc2(void);
 void pungetc(void);
+void pungetn(int);
 void pushstring(char *, void *);
 int setinputfile(const char *, int);
 void setinputstring(char *);
-- 
2.39.2


  parent reply	other threads:[~2024-04-28  3:56 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-28  3:56 [v2 PATCH 0/8] Add multi-byte support Herbert Xu
2024-04-28  3:56 ` [v2 PATCH 1/8] shell: Call setlocale Herbert Xu
2024-04-28  3:57 ` [v2 PATCH 2/8] shell: Use strcoll instead of strcmp where applicable Herbert Xu
2024-04-28  3:57 ` [v2 PATCH 3/8] expand: Count multi-byte characters for VSLENGTH Herbert Xu
2024-04-28  3:57 ` [v2 PATCH 4/8] expand: Process multi-byte characters in subevalvar Herbert Xu
2024-04-28  3:57 ` [v2 PATCH 5/8] expand: Process multi-byte characters in expmeta Herbert Xu
2024-04-28  3:57 ` [v2 PATCH 6/8] expand: Support multi-byte characters during field splitting Herbert Xu
2024-04-28  3:57 ` Herbert Xu [this message]
2024-04-28  3:57 ` [v2 PATCH 8/8] parser: Add support for multi-byte characters Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9a1c18b16b066510266ed9f14ec954840221e7c1.1714276539.git.herbert@gondor.apana.org.au \
    --to=herbert@gondor.apana.org.au \
    --cc=dash@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.