From 0432ec33408ac124b620c44416c9c58f0c10b63b Mon Sep 17 00:00:00 2001 From: Kerin Millar Date: Fri, 23 Aug 2024 04:14:36 +0100 Subject: [PATCH] Backport fix for issue with read delimiter in invalid mutibyte char This addresses a regression introduced by 5.0. Consider the following test case. for i in {194..245}; do printf -v o %o "$i"; printf "\\$o\\n"; done | while read -r; do declare -p REPLY; done BEFORE declare -- REPLY=$'\302\n\303\n\304\n\305\n\306\n\307\n\310\n\311\n\312\ n\313\n\314\n\315\n\316\n\317\n\320\n\321\n\322\n\323\n\324\n\325\n\326\ n\327\n\330\n\331\n\332\n\333\n\334\n\335\n\336\n\337\n\340\n\341\n\342\ n\343\n\344\n\345\n\346\n\347\n\350\n\351\n\352\n\353\n\354\n\355\n\356\ n\357\n\360\n\361\n\362\n\363\n\364\n\365' AFTER declare -- REPLY=$'\302' declare -- REPLY=$'\303' declare -- REPLY=$'\304' declare -- REPLY=$'\305' declare -- REPLY=$'\306' declare -- REPLY=$'\307' declare -- REPLY=$'\310' declare -- REPLY=$'\311' declare -- REPLY=$'\312' declare -- REPLY=$'\313' declare -- REPLY=$'\314' declare -- REPLY=$'\315' declare -- REPLY=$'\316' declare -- REPLY=$'\317' declare -- REPLY=$'\320' declare -- REPLY=$'\321' declare -- REPLY=$'\322' declare -- REPLY=$'\323' declare -- REPLY=$'\324' declare -- REPLY=$'\325' declare -- REPLY=$'\326' declare -- REPLY=$'\327' declare -- REPLY=$'\330' declare -- REPLY=$'\331' declare -- REPLY=$'\332' declare -- REPLY=$'\333' declare -- REPLY=$'\334' declare -- REPLY=$'\335' declare -- REPLY=$'\336' declare -- REPLY=$'\337' declare -- REPLY=$'\340' declare -- REPLY=$'\341' declare -- REPLY=$'\342' declare -- REPLY=$'\343' declare -- REPLY=$'\344' declare -- REPLY=$'\345' declare -- REPLY=$'\346' declare -- REPLY=$'\347' declare -- REPLY=$'\350' declare -- REPLY=$'\351' declare -- REPLY=$'\352' declare -- REPLY=$'\353' declare -- REPLY=$'\354' declare -- REPLY=$'\355' declare -- REPLY=$'\356' declare -- REPLY=$'\357' declare -- REPLY=$'\360' declare -- REPLY=$'\361' declare -- REPLY=$'\362' declare -- REPLY=$'\363' declare -- REPLY=$'\364' declare -- REPLY=$'\365' Signed-off-by: Kerin Millar --- builtins/read.def | 25 ++++++++++++---- externs.h | 1 + lib/sh/zread.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 6 deletions(-) diff --git builtins/read.def builtins/read.def index ddd91d32..53b4bd81 100644 --- builtins/read.def +++ builtins/read.def @@ -130,7 +130,7 @@ static void set_readline_timeout PARAMS((sh_timer *t, time_t, long)); #endif static SHELL_VAR *bind_read_variable PARAMS((char *, char *, int)); #if defined (HANDLE_MULTIBYTE) -static int read_mbchar PARAMS((int, char *, int, int, int)); +static int read_mbchar PARAMS((int, char *, int, int, int, int)); #endif static void ttyrestore PARAMS((struct ttsave *)); @@ -806,7 +806,7 @@ add_char: else # endif if (locale_utf8locale == 0 || ((c & 0x80) != 0)) - i += read_mbchar (fd, input_string, i, c, unbuffered_read); + i += read_mbchar (fd, input_string, i, c, delim, unbuffered_read); } #endif @@ -1064,10 +1064,10 @@ bind_read_variable (name, value, flags) #if defined (HANDLE_MULTIBYTE) static int -read_mbchar (fd, string, ind, ch, unbuffered) +read_mbchar (fd, string, ind, ch, delim, unbuffered) int fd; char *string; - int ind, ch, unbuffered; + int ind, ch, delim, unbuffered; { char mbchar[MB_LEN_MAX + 1]; int i, n, r; @@ -1101,8 +1101,21 @@ read_mbchar (fd, string, ind, ch, unbuffered) mbchar[i++] = c; continue; } - else if (ret == (size_t)-1 || ret == (size_t)0 || ret > (size_t)0) - break; + else if (ret == (size_t)-1) + { + /* If we read a delimiter character that makes this an invalid + multibyte character, we can't just add it to the input string + and treat it as a byte. We need to push it back so a subsequent + zread will pick it up. */ + if (c == delim) + { + zungetc (c); + mbchar[--i] = '\0'; /* unget the delimiter */ + } + break; /* invalid multibyte character */ + } + else if (ret == (size_t)0 || ret > (size_t)0) + break; /* valid multibyte character */ } mbchar_return: diff --git externs.h externs.h index 931dba9c..1b70a13b 100644 --- externs.h +++ externs.h @@ -536,6 +536,7 @@ extern ssize_t zreadintr PARAMS((int, char *, size_t)); extern ssize_t zreadc PARAMS((int, char *)); extern ssize_t zreadcintr PARAMS((int, char *)); extern ssize_t zreadn PARAMS((int, char *, size_t)); +extern int zungetc PARAMS((int)); extern void zreset PARAMS((void)); extern void zsyncfd PARAMS((int)); diff --git lib/sh/zread.c lib/sh/zread.c index dafb7f60..7cfbb288 100644 --- lib/sh/zread.c +++ lib/sh/zread.c @@ -41,6 +41,10 @@ extern int errno; # define ZBUFSIZ 4096 #endif +#ifndef EOF +# define EOF -1 +#endif + extern int executing_builtin; extern void check_signals_and_traps (void); @@ -48,6 +52,11 @@ extern void check_signals (void); extern int signal_is_trapped (int); extern int read_builtin_timeout (int); +int zungetc (int); + +/* Provide one character of pushback whether we are using read or zread. */ +static int zpushedchar = -1; + /* Read LEN bytes from FD into BUF. Retry the read on EINTR. Any other error causes the loop to break. */ ssize_t @@ -59,6 +68,15 @@ zread (fd, buf, len) ssize_t r; check_signals (); /* check for signals before a blocking read */ + + /* If we pushed a char back, return it immediately */ + if (zpushedchar != -1) + { + *buf = (unsigned char)zpushedchar; + zpushedchar = -1; + return 1; + } + /* should generalize into a mechanism where different parts of the shell can `register' timeouts and have them checked here. */ while (((r = read_builtin_timeout (fd)) < 0 || (r = read (fd, buf, len)) < 0) && @@ -95,6 +113,14 @@ zreadretry (fd, buf, len) ssize_t r; int nintr; + /* If we pushed a char back, return it immediately */ + if (zpushedchar != -1) + { + *buf = (unsigned char)zpushedchar; + zpushedchar = -1; + return 1; + } + for (nintr = 0; ; ) { r = read (fd, buf, len); @@ -118,6 +144,15 @@ zreadintr (fd, buf, len) size_t len; { check_signals (); + + /* If we pushed a char back, return it immediately */ + if (zpushedchar != -1) + { + *buf = (unsigned char)zpushedchar; + zpushedchar = -1; + return 1; + } + return (read (fd, buf, len)); } @@ -135,6 +170,14 @@ zreadc (fd, cp) { ssize_t nr; + /* If we pushed a char back, return it immediately */ + if (zpushedchar != -1 && cp) + { + *cp = (unsigned char)zpushedchar; + zpushedchar = -1; + return 1; + } + if (lind == lused || lused == 0) { nr = zread (fd, lbuf, sizeof (lbuf)); @@ -160,6 +203,14 @@ zreadcintr (fd, cp) { ssize_t nr; + /* If we pushed a char back, return it immediately */ + if (zpushedchar != -1 && cp) + { + *cp = (unsigned char)zpushedchar; + zpushedchar = -1; + return 1; + } + if (lind == lused || lused == 0) { nr = zreadintr (fd, lbuf, sizeof (lbuf)); @@ -186,6 +237,13 @@ zreadn (fd, cp, len) { ssize_t nr; + if (zpushedchar != -1 && cp) + { + *cp = zpushedchar; + zpushedchar = -1; + return 1; + } + if (lind == lused || lused == 0) { if (len > sizeof (lbuf)) @@ -204,6 +262,22 @@ zreadn (fd, cp, len) return 1; } +int +zungetc (c) + int c; +{ + if (zpushedchar == -1) + { + zpushedchar = c; + return c; + } + + if (c == EOF || lind == 0) + return (EOF); + lbuf[--lind] = c; /* XXX */ + return c; +} + void zreset () { -- 2.45.2