diff --git a/NEWS b/NEWS index ba2d4c6c3261..ad2ff1bbbe9c 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,13 @@ This documents significant changes in the 1.0 branch of ksh 93u+m. For full details, see the git log at: https://github.com/ksh93/ksh/tree/1.0 Uppercase BUG_* IDs are shell bug IDs as used by the Modernish shell library. +2023-04-05: + +- Fixed a spurious syntax error in compound assignments upon encountering a + pair of repeated opening parentheses '(('. This bug behaved differently + depending on whether the compound assignment was outside or within a command + substitution of the form $(...) or ${ ...; }. Both cases are now fixed. + 2023-04-03: - Fixed multiple crashing bugs in discipline functions invoked from non-forked diff --git a/src/cmd/ksh93/include/shlex.h b/src/cmd/ksh93/include/shlex.h index 67f586e38660..9e767c4b38ed 100644 --- a/src/cmd/ksh93/include/shlex.h +++ b/src/cmd/ksh93/include/shlex.h @@ -45,7 +45,9 @@ struct _shlex_pvt_lexdata_ { char nocopy; char paren; - char dolparen; + char dolparen; /* set during the comsub() lexical analysis hack */ + unsigned short dolparen_eqparen; /* flags up =( ... ) within a comsub */ + char dolparen_arithexp; /* set while comsub() is lexing an arithmetic expansion */ char nest; char docword; char nested_tilde; diff --git a/src/cmd/ksh93/include/version.h b/src/cmd/ksh93/include/version.h index 049be5c05255..add8f72ae334 100644 --- a/src/cmd/ksh93/include/version.h +++ b/src/cmd/ksh93/include/version.h @@ -18,7 +18,7 @@ #define SH_RELEASE_FORK "93u+m" /* only change if you develop a new ksh93 fork */ #define SH_RELEASE_SVER "1.0.5-beta" /* semantic version number: https://semver.org */ -#define SH_RELEASE_DATE "2023-04-03" /* must be in this format for $((.sh.version)) */ +#define SH_RELEASE_DATE "2023-04-05" /* must be in this format for $((.sh.version)) */ #define SH_RELEASE_CPYR "(c) 2020-2023 Contributors to ksh " SH_RELEASE_FORK /* Scripts sometimes field-split ${.sh.version}, so don't change amount of whitespace. */ diff --git a/src/cmd/ksh93/sh/lex.c b/src/cmd/ksh93/sh/lex.c index 361132a71663..80ec304120f9 100644 --- a/src/cmd/ksh93/sh/lex.c +++ b/src/cmd/ksh93/sh/lex.c @@ -476,20 +476,29 @@ int sh_lex(Lex_t* lp) { if(n==c) { - if(c=='<') - lp->lexd.docword=1; - else if(n==LPAREN) + if(c==LPAREN) { - if(lp->lex.intest) - return c; - /* '((' arithmetic command */ + /* Avoid misdetecting EXPRSYM in [[ ... ]] or compound assignments */ + if(lp->lex.intest || lp->comp_assign) + return lp->token=c; + /* The comsub() reading hack avoids the parser, so comp_assign is never + * set; try to detect compound assignments with this workaround instead */ + if(lp->lexd.dolparen && !lp->lexd.dolparen_arithexp + && (fcpeek(-2)=='=' || lp->lexd.dolparen_eqparen)) + return lp->token=c; + /* OK, maybe this is EXPRSYM (arith '((', possibly following '$'). + * But this cannot be concluded until a final '))' is detected. + * Use a recursive lexer invocation for that. */ lp->lexd.nest=1; lp->lastline = sh.inlineno; lp->lexd.lex_state = ST_NESTED; fcseek(1); return sh_lex(lp); } - c |= SYMREP; + c |= SYMREP; + /* Here document redirection operator '<<' */ + if(c==IODOCSYM) + lp->lexd.docword = 1; } else if(c=='(' || c==')') return lp->token=c; @@ -667,7 +676,7 @@ int sh_lex(Lex_t* lp) if(mode==ST_BEGIN) { do_reg: - /* skip new-line joining if not called from comsub() */ + /* skip new-line joining if called from comsub() */ if(c=='\\' && fcpeek(0)=='\n' && !lp->lexd.dolparen) { sh.inlineno++; @@ -1509,19 +1518,43 @@ int sh_lex(Lex_t* lp) /* * read to end of command substitution - * of the form $(...) + * of the form $(...) or ${ ...;} + * or arithmetic expansion $((...)) + * + * Ugly hack alert: At parse time, command substitutions and arithmetic expansions are read + * without parsing, using lexical analysis only. This is only to determine their length, so + * that their literal source text can be stored in the parse tree. They are then actually + * parsed at runtime (!) each time they are executed (!) via comsubst() in macro.c. + * + * This approach is okay for arithmetic expansions, but for command substitutions it is an + * unreliable hack. The lexer does not have real shell grammar knowledge; that's what the + * parser is for. However, a clean separation between lexical analysis and parsing is not + * possible, because the design of the shell language is fundamentally messy. So we need the + * parser to set the some flags in the lexer at the appropriate times to avoid spurious + * syntax errors (these are the non-private Lex_t struct members). But the parser obviously + * cannot do this if we're not using it. + * + * The comsub() hack below, along with all the dolparen checks in the lexer, tries to work + * around this fundamental problem as best we can to make it work in all but corner cases. + * It sets the lexd.dolparen, lexd.dolparen_eqparen and lexd.dolparen_arithexp flags for the + * rest of the lexer code to execute lots of workarounds. + * + * TODO: to achieve correctness, actually parse command substitutions at parse time. */ static int comsub(Lex_t *lp, int endtok) { - int n,c,count=1; + int n,c; + unsigned short count=1; int line=sh.inlineno; struct ionod *inheredoc = lp->heredoc; + char save_arithexp = lp->lexd.dolparen_arithexp; char *first,*cp=fcseek(0),word[5]; int off, messages=0, assignok=lp->assignok, csub; struct _shlex_pvt_lexstate_ save = lp->lex; csub = lp->comsub; sh_lexopen(lp,1); lp->lexd.dolparen++; + lp->lexd.dolparen_arithexp = endtok==LPAREN && fcpeek(1)==LPAREN; /* $(( */ lp->lex.incase=0; pushlevel(lp,0,0); lp->comsub = (endtok==LBRACE); @@ -1601,10 +1634,16 @@ static int comsub(Lex_t *lp, int endtok) break; case IPROCSYM: case OPROCSYM: case LPAREN: + /* lexd.dolparen_eqparen flags up "=(": we presume it's a compound assignment. + * This is a workaround for . */ + if(!lp->lexd.dolparen_eqparen && fcpeek(-2)=='=') + lp->lexd.dolparen_eqparen = count; if(endtok==LPAREN && !lp->lex.incase) count++; break; case RPAREN: + if(lp->lexd.dolparen_eqparen >= count) + lp->lexd.dolparen_eqparen = 0; if(lp->lex.incase) lp->lex.incase=0; else if(endtok==LPAREN && --count<=0) @@ -1646,6 +1685,7 @@ static int comsub(Lex_t *lp, int endtok) lp->comsub = csub; lp->lastline = line; lp->lexd.dolparen--; + lp->lexd.dolparen_arithexp = save_arithexp; lp->lex = save; lp->assignok = (endchar(lp)==RBRACT?assignok:0); if(lp->heredoc && !inheredoc) diff --git a/src/cmd/ksh93/sh/macro.c b/src/cmd/ksh93/sh/macro.c index c0591bc6c017..a7ad32a0874f 100644 --- a/src/cmd/ksh93/sh/macro.c +++ b/src/cmd/ksh93/sh/macro.c @@ -2110,6 +2110,7 @@ static int varsub(Mac_t *mp) /* * This routine handles command substitution + * and arithmetic expansion. * is 0 for older `...` version * 1 for $(...) or 2 for ${ subshare; } */ diff --git a/src/cmd/ksh93/tests/basic.sh b/src/cmd/ksh93/tests/basic.sh index 0146e63487e5..ac39abe3987b 100755 --- a/src/cmd/ksh93/tests/basic.sh +++ b/src/cmd/ksh93/tests/basic.sh @@ -990,5 +990,34 @@ do || err_exit "last command in script exec-optimized in spite of $sig trap ($pid1 == $pid2)" done +# ====== +# Nested compound assignment misparsed in $(...) or ${ ...; } command substitution +# https://github.com/ksh93/ksh/issues/269 +# TODO: a few tests below crash when actually executed; test lexing only by using noexec. https://github.com/ksh93/ksh/issues/621 +for testcode in \ + ': $( typeset -a arr=((a b c) 1) )' \ + ': ${ typeset -a arr=((a b c) 1); }' \ + ': $( typeset -a arr=( ( ((a b c)1))) )' \ + ': ${ typeset -a arr=( ( ((a b c)1))); }' \ + ': $(( 1 << 2 ))' \ + ': $(: $(( 1 << 2 )) )' \ + ': $( (( 1 << 2 )) )' \ + ': $( : $( (( 1 << 2 )) ) )' \ + ': $( (( $( (( 1 << 2 )); echo 1 ) << 2 )) )' \ + ': $( typeset -a arr=((a $(( 1 << 2 )) c) 1) )' \ + 'typeset -Ca arr=((a=ah b=beh c=si))' \ + ': $( typeset -Ca arr=((a=ah b=beh c=si)) )' \ + 'r=${ typeset -Ca arr=((a=ah b=beh c=si)); }' \ + 'set --noexec; : $( typeset -a arr=((a $(( $( typeset -a barr=((a $(( 1 << 2 )) c) 1); echo 1 ) << $( typeset -a bazz=((a $(( 1 << 2 )) c) 1); echo 2 ) )) c) 1) )' \ + 'set --noexec; r=$(typeset -C arr=( (a=ah b=beh c=si) 1 (e f g)));' +do + # fork comsub with 'ulimit' on old ksh to avoid a fixed lexer bug crashing the entire test script + got=$(let ".sh.version >= 20211209" || ulimit -c 0 + eval "set +x; $testcode" 2>&1) \ + || err_exit "comsub/arithexp lexing test $(printf %q "$testcode"):" \ + "got status $? and $(printf %q "$got")" +done +unset testcode + # ====== exit $((Errors<125?Errors:125))