1 |
/* $OpenBSD: lex.c,v 1.49 2013/12/17 16:37:06 deraadt Exp $ */ |
2 |
|
3 |
/*- |
4 |
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, |
5 |
* 2011, 2012, 2013, 2014 |
6 |
* Thorsten Glaser <tg@mirbsd.org> |
7 |
* |
8 |
* Provided that these terms and disclaimer and all copyright notices |
9 |
* are retained or reproduced in an accompanying document, permission |
10 |
* is granted to deal in this work without restriction, including un- |
11 |
* limited rights to use, publicly perform, distribute, sell, modify, |
12 |
* merge, give away, or sublicence. |
13 |
* |
14 |
* This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to |
15 |
* the utmost extent permitted by applicable law, neither express nor |
16 |
* implied; without malicious intent or gross negligence. In no event |
17 |
* may a licensor, author or contributor be held liable for indirect, |
18 |
* direct, other damage, loss, or other issues arising in any way out |
19 |
* of dealing in the work, even if advised of the possibility of such |
20 |
* damage or existence of a defect, except proven that it results out |
21 |
* of said person's immediate fault when using the work as intended. |
22 |
*/ |
23 |
|
24 |
#include "sh.h" |
25 |
|
26 |
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.193 2014/06/29 11:28:28 tg Exp $"); |
27 |
|
28 |
/* |
29 |
* states while lexing word |
30 |
*/ |
31 |
#define SBASE 0 /* outside any lexical constructs */ |
32 |
#define SWORD 1 /* implicit quoting for substitute() */ |
33 |
#define SLETPAREN 2 /* inside (( )), implicit quoting */ |
34 |
#define SSQUOTE 3 /* inside '' */ |
35 |
#define SDQUOTE 4 /* inside "" */ |
36 |
#define SEQUOTE 5 /* inside $'' */ |
37 |
#define SBRACE 6 /* inside ${} */ |
38 |
#define SQBRACE 7 /* inside "${}" */ |
39 |
#define SBQUOTE 8 /* inside `` */ |
40 |
#define SASPAREN 9 /* inside $(( )) */ |
41 |
#define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */ |
42 |
#define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */ |
43 |
#define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ |
44 |
#define SADELIM 13 /* like SBASE, looking for delimiter */ |
45 |
#define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ |
46 |
#define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ |
47 |
#define SINVALID 255 /* invalid state */ |
48 |
|
49 |
struct sretrace_info { |
50 |
struct sretrace_info *next; |
51 |
XString xs; |
52 |
char *xp; |
53 |
}; |
54 |
|
55 |
/* |
56 |
* Structure to keep track of the lexing state and the various pieces of info |
57 |
* needed for each particular state. |
58 |
*/ |
59 |
typedef struct lex_state { |
60 |
union { |
61 |
/* point to the next state block */ |
62 |
struct lex_state *base; |
63 |
/* marks start of state output in output string */ |
64 |
int start; |
65 |
/* SBQUOTE: true if in double quotes: "`...`" */ |
66 |
/* SEQUOTE: got NUL, ignore rest of string */ |
67 |
bool abool; |
68 |
/* SADELIM information */ |
69 |
struct { |
70 |
/* character to search for */ |
71 |
unsigned char delimiter; |
72 |
/* max. number of delimiters */ |
73 |
unsigned char num; |
74 |
} adelim; |
75 |
} u; |
76 |
/* count open parentheses */ |
77 |
short nparen; |
78 |
/* type of this state */ |
79 |
uint8_t type; |
80 |
} Lex_state; |
81 |
#define ls_base u.base |
82 |
#define ls_start u.start |
83 |
#define ls_bool u.abool |
84 |
#define ls_adelim u.adelim |
85 |
|
86 |
typedef struct { |
87 |
Lex_state *base; |
88 |
Lex_state *end; |
89 |
} State_info; |
90 |
|
91 |
static void readhere(struct ioword *); |
92 |
static void ungetsc(int); |
93 |
static void ungetsc_i(int); |
94 |
static int getsc_uu(void); |
95 |
static void getsc_line(Source *); |
96 |
static int getsc_bn(void); |
97 |
static int s_get(void); |
98 |
static void s_put(int); |
99 |
static char *get_brace_var(XString *, char *); |
100 |
static bool arraysub(char **); |
101 |
static void gethere(bool); |
102 |
static Lex_state *push_state_i(State_info *, Lex_state *); |
103 |
static Lex_state *pop_state_i(State_info *, Lex_state *); |
104 |
|
105 |
static int backslash_skip; |
106 |
static int ignore_backslash_newline; |
107 |
|
108 |
/* optimised getsc_bn() */ |
109 |
#define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ |
110 |
!backslash_skip ? *source->str++ : getsc_bn()) |
111 |
/* optimised getsc_uu() */ |
112 |
#define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) |
113 |
|
114 |
/* retrace helper */ |
115 |
#define o_getsc_r(carg) { \ |
116 |
int cev = (carg); \ |
117 |
struct sretrace_info *rp = retrace_info; \ |
118 |
\ |
119 |
while (rp) { \ |
120 |
Xcheck(rp->xs, rp->xp); \ |
121 |
*rp->xp++ = cev; \ |
122 |
rp = rp->next; \ |
123 |
} \ |
124 |
\ |
125 |
return (cev); \ |
126 |
} |
127 |
|
128 |
#if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) |
129 |
static int getsc(void); |
130 |
|
131 |
static int |
132 |
getsc(void) |
133 |
{ |
134 |
o_getsc_r(o_getsc()); |
135 |
} |
136 |
#else |
137 |
static int getsc_r(int); |
138 |
|
139 |
static int |
140 |
getsc_r(int c) |
141 |
{ |
142 |
o_getsc_r(c); |
143 |
} |
144 |
|
145 |
#define getsc() getsc_r(o_getsc()) |
146 |
#endif |
147 |
|
148 |
#define STATE_BSIZE 8 |
149 |
|
150 |
#define PUSH_STATE(s) do { \ |
151 |
if (++statep == state_info.end) \ |
152 |
statep = push_state_i(&state_info, statep); \ |
153 |
state = statep->type = (s); \ |
154 |
} while (/* CONSTCOND */ 0) |
155 |
|
156 |
#define POP_STATE() do { \ |
157 |
if (--statep == state_info.base) \ |
158 |
statep = pop_state_i(&state_info, statep); \ |
159 |
state = statep->type; \ |
160 |
} while (/* CONSTCOND */ 0) |
161 |
|
162 |
#define PUSH_SRETRACE(s) do { \ |
163 |
struct sretrace_info *ri; \ |
164 |
\ |
165 |
PUSH_STATE(s); \ |
166 |
statep->ls_start = Xsavepos(ws, wp); \ |
167 |
ri = alloc(sizeof(struct sretrace_info), ATEMP); \ |
168 |
Xinit(ri->xs, ri->xp, 64, ATEMP); \ |
169 |
ri->next = retrace_info; \ |
170 |
retrace_info = ri; \ |
171 |
} while (/* CONSTCOND */ 0) |
172 |
|
173 |
#define POP_SRETRACE() do { \ |
174 |
wp = Xrestpos(ws, wp, statep->ls_start); \ |
175 |
*retrace_info->xp = '\0'; \ |
176 |
sp = Xstring(retrace_info->xs, retrace_info->xp); \ |
177 |
dp = (void *)retrace_info; \ |
178 |
retrace_info = retrace_info->next; \ |
179 |
afree(dp, ATEMP); \ |
180 |
POP_STATE(); \ |
181 |
} while (/* CONSTCOND */ 0) |
182 |
|
183 |
/** |
184 |
* Lexical analyser |
185 |
* |
186 |
* tokens are not regular expressions, they are LL(1). |
187 |
* for example, "${var:-${PWD}}", and "$(size $(whence ksh))". |
188 |
* hence the state stack. Note "$(...)" are now parsed recursively. |
189 |
*/ |
190 |
|
191 |
int |
192 |
yylex(int cf) |
193 |
{ |
194 |
Lex_state states[STATE_BSIZE], *statep, *s2, *base; |
195 |
State_info state_info; |
196 |
int c, c2, state; |
197 |
size_t cz; |
198 |
XString ws; /* expandable output word */ |
199 |
char *wp; /* output word pointer */ |
200 |
char *sp, *dp; |
201 |
|
202 |
Again: |
203 |
states[0].type = SINVALID; |
204 |
states[0].ls_base = NULL; |
205 |
statep = &states[1]; |
206 |
state_info.base = states; |
207 |
state_info.end = &state_info.base[STATE_BSIZE]; |
208 |
|
209 |
Xinit(ws, wp, 64, ATEMP); |
210 |
|
211 |
backslash_skip = 0; |
212 |
ignore_backslash_newline = 0; |
213 |
|
214 |
if (cf & ONEWORD) |
215 |
state = SWORD; |
216 |
else if (cf & LETEXPR) { |
217 |
/* enclose arguments in (double) quotes */ |
218 |
*wp++ = OQUOTE; |
219 |
state = SLETPAREN; |
220 |
statep->nparen = 0; |
221 |
} else { |
222 |
/* normal lexing */ |
223 |
state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; |
224 |
while ((c = getsc()) == ' ' || c == '\t') |
225 |
; |
226 |
if (c == '#') { |
227 |
ignore_backslash_newline++; |
228 |
while ((c = getsc()) != '\0' && c != '\n') |
229 |
; |
230 |
ignore_backslash_newline--; |
231 |
} |
232 |
ungetsc(c); |
233 |
} |
234 |
if (source->flags & SF_ALIAS) { |
235 |
/* trailing ' ' in alias definition */ |
236 |
source->flags &= ~SF_ALIAS; |
237 |
cf |= ALIAS; |
238 |
} |
239 |
|
240 |
/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ |
241 |
statep->type = state; |
242 |
|
243 |
/* check for here string */ |
244 |
if (state == SHEREDELIM) { |
245 |
c = getsc(); |
246 |
if (c == '<') { |
247 |
state = SHEREDELIM; |
248 |
while ((c = getsc()) == ' ' || c == '\t') |
249 |
; |
250 |
ungetsc(c); |
251 |
c = '<'; |
252 |
goto accept_nonword; |
253 |
} |
254 |
ungetsc(c); |
255 |
} |
256 |
|
257 |
/* collect non-special or quoted characters to form word */ |
258 |
while (!((c = getsc()) == 0 || |
259 |
((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { |
260 |
if (state == SBASE && |
261 |
subshell_nesting_type == /*{*/ '}' && |
262 |
c == /*{*/ '}') |
263 |
/* possibly end ${ :;} */ |
264 |
break; |
265 |
accept_nonword: |
266 |
Xcheck(ws, wp); |
267 |
switch (state) { |
268 |
case SADELIM: |
269 |
if (c == '(') |
270 |
statep->nparen++; |
271 |
else if (c == ')') |
272 |
statep->nparen--; |
273 |
else if (statep->nparen == 0 && (c == /*{*/ '}' || |
274 |
c == (int)statep->ls_adelim.delimiter)) { |
275 |
*wp++ = ADELIM; |
276 |
*wp++ = c; |
277 |
if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) |
278 |
POP_STATE(); |
279 |
if (c == /*{*/ '}') |
280 |
POP_STATE(); |
281 |
break; |
282 |
} |
283 |
/* FALLTHROUGH */ |
284 |
case SBASE: |
285 |
if (c == '[' && (cf & (VARASN|ARRAYVAR))) { |
286 |
/* temporary */ |
287 |
*wp = EOS; |
288 |
if (is_wdvarname(Xstring(ws, wp), false)) { |
289 |
char *p, *tmp; |
290 |
|
291 |
if (arraysub(&tmp)) { |
292 |
*wp++ = CHAR; |
293 |
*wp++ = c; |
294 |
for (p = tmp; *p; ) { |
295 |
Xcheck(ws, wp); |
296 |
*wp++ = CHAR; |
297 |
*wp++ = *p++; |
298 |
} |
299 |
afree(tmp, ATEMP); |
300 |
break; |
301 |
} else { |
302 |
Source *s; |
303 |
|
304 |
s = pushs(SREREAD, |
305 |
source->areap); |
306 |
s->start = s->str = |
307 |
s->u.freeme = tmp; |
308 |
s->next = source; |
309 |
source = s; |
310 |
} |
311 |
} |
312 |
*wp++ = CHAR; |
313 |
*wp++ = c; |
314 |
break; |
315 |
} |
316 |
/* FALLTHROUGH */ |
317 |
Sbase1: /* includes *(...|...) pattern (*+?@!) */ |
318 |
if (c == '*' || c == '@' || c == '+' || c == '?' || |
319 |
c == '!') { |
320 |
c2 = getsc(); |
321 |
if (c2 == '(' /*)*/ ) { |
322 |
*wp++ = OPAT; |
323 |
*wp++ = c; |
324 |
PUSH_STATE(SPATTERN); |
325 |
break; |
326 |
} |
327 |
ungetsc(c2); |
328 |
} |
329 |
/* FALLTHROUGH */ |
330 |
Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ |
331 |
switch (c) { |
332 |
case '\\': |
333 |
getsc_qchar: |
334 |
if ((c = getsc())) { |
335 |
/* trailing \ is lost */ |
336 |
*wp++ = QCHAR; |
337 |
*wp++ = c; |
338 |
} |
339 |
break; |
340 |
case '\'': |
341 |
open_ssquote_unless_heredoc: |
342 |
if ((cf & HEREDOC)) |
343 |
goto store_char; |
344 |
*wp++ = OQUOTE; |
345 |
ignore_backslash_newline++; |
346 |
PUSH_STATE(SSQUOTE); |
347 |
break; |
348 |
case '"': |
349 |
open_sdquote: |
350 |
*wp++ = OQUOTE; |
351 |
PUSH_STATE(SDQUOTE); |
352 |
break; |
353 |
case '$': |
354 |
/* |
355 |
* processing of dollar sign belongs into |
356 |
* Subst, except for those which can open |
357 |
* a string: $'…' and $"…" |
358 |
*/ |
359 |
subst_dollar_ex: |
360 |
c = getsc(); |
361 |
switch (c) { |
362 |
case '"': |
363 |
goto open_sdquote; |
364 |
case '\'': |
365 |
goto open_sequote; |
366 |
default: |
367 |
goto SubstS; |
368 |
} |
369 |
default: |
370 |
goto Subst; |
371 |
} |
372 |
break; |
373 |
|
374 |
Subst: |
375 |
switch (c) { |
376 |
case '\\': |
377 |
c = getsc(); |
378 |
switch (c) { |
379 |
case '"': |
380 |
if ((cf & HEREDOC)) |
381 |
goto heredocquote; |
382 |
/* FALLTHROUGH */ |
383 |
case '\\': |
384 |
case '$': case '`': |
385 |
store_qchar: |
386 |
*wp++ = QCHAR; |
387 |
*wp++ = c; |
388 |
break; |
389 |
default: |
390 |
heredocquote: |
391 |
Xcheck(ws, wp); |
392 |
if (c) { |
393 |
/* trailing \ is lost */ |
394 |
*wp++ = CHAR; |
395 |
*wp++ = '\\'; |
396 |
*wp++ = CHAR; |
397 |
*wp++ = c; |
398 |
} |
399 |
break; |
400 |
} |
401 |
break; |
402 |
case '$': |
403 |
c = getsc(); |
404 |
SubstS: |
405 |
if (c == '(') /*)*/ { |
406 |
c = getsc(); |
407 |
if (c == '(') /*)*/ { |
408 |
*wp++ = EXPRSUB; |
409 |
PUSH_SRETRACE(SASPAREN); |
410 |
statep->nparen = 2; |
411 |
*retrace_info->xp++ = '('; |
412 |
} else { |
413 |
ungetsc(c); |
414 |
subst_command: |
415 |
c = COMSUB; |
416 |
subst_command2: |
417 |
sp = yyrecursive(c); |
418 |
cz = strlen(sp) + 1; |
419 |
XcheckN(ws, wp, cz); |
420 |
*wp++ = c; |
421 |
memcpy(wp, sp, cz); |
422 |
wp += cz; |
423 |
} |
424 |
} else if (c == '{') /*}*/ { |
425 |
if ((c = getsc()) == '|') { |
426 |
/* |
427 |
* non-subenvironment |
428 |
* value substitution |
429 |
*/ |
430 |
c = VALSUB; |
431 |
goto subst_command2; |
432 |
} else if (ctype(c, C_IFSWS)) { |
433 |
/* |
434 |
* non-subenvironment |
435 |
* "command" substitution |
436 |
*/ |
437 |
c = FUNSUB; |
438 |
goto subst_command2; |
439 |
} |
440 |
ungetsc(c); |
441 |
*wp++ = OSUBST; |
442 |
*wp++ = '{'; /*}*/ |
443 |
wp = get_brace_var(&ws, wp); |
444 |
c = getsc(); |
445 |
/* allow :# and :% (ksh88 compat) */ |
446 |
if (c == ':') { |
447 |
*wp++ = CHAR; |
448 |
*wp++ = c; |
449 |
c = getsc(); |
450 |
if (c == ':') { |
451 |
*wp++ = CHAR; |
452 |
*wp++ = '0'; |
453 |
*wp++ = ADELIM; |
454 |
*wp++ = ':'; |
455 |
PUSH_STATE(SBRACE); |
456 |
PUSH_STATE(SADELIM); |
457 |
statep->ls_adelim.delimiter = ':'; |
458 |
statep->ls_adelim.num = 1; |
459 |
statep->nparen = 0; |
460 |
break; |
461 |
} else if (ksh_isdigit(c) || |
462 |
c == '('/*)*/ || c == ' ' || |
463 |
/*XXX what else? */ |
464 |
c == '$') { |
465 |
/* substring subst. */ |
466 |
if (c != ' ') { |
467 |
*wp++ = CHAR; |
468 |
*wp++ = ' '; |
469 |
} |
470 |
ungetsc(c); |
471 |
PUSH_STATE(SBRACE); |
472 |
PUSH_STATE(SADELIM); |
473 |
statep->ls_adelim.delimiter = ':'; |
474 |
statep->ls_adelim.num = 2; |
475 |
statep->nparen = 0; |
476 |
break; |
477 |
} |
478 |
} else if (c == '/') { |
479 |
*wp++ = CHAR; |
480 |
*wp++ = c; |
481 |
if ((c = getsc()) == '/') { |
482 |
*wp++ = ADELIM; |
483 |
*wp++ = c; |
484 |
} else |
485 |
ungetsc(c); |
486 |
PUSH_STATE(SBRACE); |
487 |
PUSH_STATE(SADELIM); |
488 |
statep->ls_adelim.delimiter = '/'; |
489 |
statep->ls_adelim.num = 1; |
490 |
statep->nparen = 0; |
491 |
break; |
492 |
} |
493 |
/* |
494 |
* If this is a trim operation, |
495 |
* treat (,|,) specially in STBRACE. |
496 |
*/ |
497 |
if (ctype(c, C_SUBOP2)) { |
498 |
ungetsc(c); |
499 |
if (Flag(FSH)) |
500 |
PUSH_STATE(STBRACEBOURNE); |
501 |
else |
502 |
PUSH_STATE(STBRACEKORN); |
503 |
} else { |
504 |
ungetsc(c); |
505 |
if (state == SDQUOTE || |
506 |
state == SQBRACE) |
507 |
PUSH_STATE(SQBRACE); |
508 |
else |
509 |
PUSH_STATE(SBRACE); |
510 |
} |
511 |
} else if (ksh_isalphx(c)) { |
512 |
*wp++ = OSUBST; |
513 |
*wp++ = 'X'; |
514 |
do { |
515 |
Xcheck(ws, wp); |
516 |
*wp++ = c; |
517 |
c = getsc(); |
518 |
} while (ksh_isalnux(c)); |
519 |
*wp++ = '\0'; |
520 |
*wp++ = CSUBST; |
521 |
*wp++ = 'X'; |
522 |
ungetsc(c); |
523 |
} else if (ctype(c, C_VAR1 | C_DIGIT)) { |
524 |
Xcheck(ws, wp); |
525 |
*wp++ = OSUBST; |
526 |
*wp++ = 'X'; |
527 |
*wp++ = c; |
528 |
*wp++ = '\0'; |
529 |
*wp++ = CSUBST; |
530 |
*wp++ = 'X'; |
531 |
} else { |
532 |
*wp++ = CHAR; |
533 |
*wp++ = '$'; |
534 |
ungetsc(c); |
535 |
} |
536 |
break; |
537 |
case '`': |
538 |
subst_gravis: |
539 |
PUSH_STATE(SBQUOTE); |
540 |
*wp++ = COMSUB; |
541 |
/* |
542 |
* Need to know if we are inside double quotes |
543 |
* since sh/AT&T-ksh translate the \" to " in |
544 |
* "`...\"...`". |
545 |
* This is not done in POSIX mode (section |
546 |
* 3.2.3, Double Quotes: "The backquote shall |
547 |
* retain its special meaning introducing the |
548 |
* other form of command substitution (see |
549 |
* 3.6.3). The portion of the quoted string |
550 |
* from the initial backquote and the |
551 |
* characters up to the next backquote that |
552 |
* is not preceded by a backslash (having |
553 |
* escape characters removed) defines that |
554 |
* command whose output replaces `...` when |
555 |
* the word is expanded." |
556 |
* Section 3.6.3, Command Substitution: |
557 |
* "Within the backquoted style of command |
558 |
* substitution, backslash shall retain its |
559 |
* literal meaning, except when followed by |
560 |
* $ ` \."). |
561 |
*/ |
562 |
statep->ls_bool = false; |
563 |
s2 = statep; |
564 |
base = state_info.base; |
565 |
while (/* CONSTCOND */ 1) { |
566 |
for (; s2 != base; s2--) { |
567 |
if (s2->type == SDQUOTE) { |
568 |
statep->ls_bool = true; |
569 |
break; |
570 |
} |
571 |
} |
572 |
if (s2 != base) |
573 |
break; |
574 |
if (!(s2 = s2->ls_base)) |
575 |
break; |
576 |
base = s2-- - STATE_BSIZE; |
577 |
} |
578 |
break; |
579 |
case QCHAR: |
580 |
if (cf & LQCHAR) { |
581 |
*wp++ = QCHAR; |
582 |
*wp++ = getsc(); |
583 |
break; |
584 |
} |
585 |
/* FALLTHROUGH */ |
586 |
default: |
587 |
store_char: |
588 |
*wp++ = CHAR; |
589 |
*wp++ = c; |
590 |
} |
591 |
break; |
592 |
|
593 |
case SEQUOTE: |
594 |
if (c == '\'') { |
595 |
POP_STATE(); |
596 |
*wp++ = CQUOTE; |
597 |
ignore_backslash_newline--; |
598 |
} else if (c == '\\') { |
599 |
if ((c2 = unbksl(true, s_get, s_put)) == -1) |
600 |
c2 = s_get(); |
601 |
if (c2 == 0) |
602 |
statep->ls_bool = true; |
603 |
if (!statep->ls_bool) { |
604 |
char ts[4]; |
605 |
|
606 |
if ((unsigned int)c2 < 0x100) { |
607 |
*wp++ = QCHAR; |
608 |
*wp++ = c2; |
609 |
} else { |
610 |
cz = utf_wctomb(ts, c2 - 0x100); |
611 |
ts[cz] = 0; |
612 |
for (cz = 0; ts[cz]; ++cz) { |
613 |
*wp++ = QCHAR; |
614 |
*wp++ = ts[cz]; |
615 |
} |
616 |
} |
617 |
} |
618 |
} else if (!statep->ls_bool) { |
619 |
*wp++ = QCHAR; |
620 |
*wp++ = c; |
621 |
} |
622 |
break; |
623 |
|
624 |
case SSQUOTE: |
625 |
if (c == '\'') { |
626 |
POP_STATE(); |
627 |
if ((cf & HEREDOC) || state == SQBRACE) |
628 |
goto store_char; |
629 |
*wp++ = CQUOTE; |
630 |
ignore_backslash_newline--; |
631 |
} else { |
632 |
*wp++ = QCHAR; |
633 |
*wp++ = c; |
634 |
} |
635 |
break; |
636 |
|
637 |
case SDQUOTE: |
638 |
if (c == '"') { |
639 |
POP_STATE(); |
640 |
*wp++ = CQUOTE; |
641 |
} else |
642 |
goto Subst; |
643 |
break; |
644 |
|
645 |
/* $(( ... )) */ |
646 |
case SASPAREN: |
647 |
if (c == '(') |
648 |
statep->nparen++; |
649 |
else if (c == ')') { |
650 |
statep->nparen--; |
651 |
if (statep->nparen == 1) { |
652 |
/* end of EXPRSUB */ |
653 |
POP_SRETRACE(); |
654 |
|
655 |
if ((c2 = getsc()) == /*(*/ ')') { |
656 |
cz = strlen(sp) - 2; |
657 |
XcheckN(ws, wp, cz); |
658 |
memcpy(wp, sp + 1, cz); |
659 |
wp += cz; |
660 |
afree(sp, ATEMP); |
661 |
*wp++ = '\0'; |
662 |
break; |
663 |
} else { |
664 |
Source *s; |
665 |
|
666 |
ungetsc(c2); |
667 |
/* |
668 |
* mismatched parenthesis - |
669 |
* assume we were really |
670 |
* parsing a $(...) expression |
671 |
*/ |
672 |
--wp; |
673 |
s = pushs(SREREAD, |
674 |
source->areap); |
675 |
s->start = s->str = |
676 |
s->u.freeme = sp; |
677 |
s->next = source; |
678 |
source = s; |
679 |
goto subst_command; |
680 |
} |
681 |
} |
682 |
} |
683 |
/* reuse existing state machine */ |
684 |
goto Sbase2; |
685 |
|
686 |
case SQBRACE: |
687 |
if (c == '\\') { |
688 |
/* |
689 |
* perform POSIX "quote removal" if the back- |
690 |
* slash is "special", i.e. same cases as the |
691 |
* {case '\\':} in Subst: plus closing brace; |
692 |
* in mksh code "quote removal" on '\c' means |
693 |
* write QCHAR+c, otherwise CHAR+\+CHAR+c are |
694 |
* emitted (in heredocquote:) |
695 |
*/ |
696 |
if ((c = getsc()) == '"' || c == '\\' || |
697 |
c == '$' || c == '`' || c == /*{*/'}') |
698 |
goto store_qchar; |
699 |
goto heredocquote; |
700 |
} |
701 |
goto common_SQBRACE; |
702 |
|
703 |
case SBRACE: |
704 |
if (c == '\'') |
705 |
goto open_ssquote_unless_heredoc; |
706 |
else if (c == '\\') |
707 |
goto getsc_qchar; |
708 |
common_SQBRACE: |
709 |
if (c == '"') |
710 |
goto open_sdquote; |
711 |
else if (c == '$') |
712 |
goto subst_dollar_ex; |
713 |
else if (c == '`') |
714 |
goto subst_gravis; |
715 |
else if (c != /*{*/ '}') |
716 |
goto store_char; |
717 |
POP_STATE(); |
718 |
*wp++ = CSUBST; |
719 |
*wp++ = /*{*/ '}'; |
720 |
break; |
721 |
|
722 |
/* Same as SBASE, except (,|,) treated specially */ |
723 |
case STBRACEKORN: |
724 |
if (c == '|') |
725 |
*wp++ = SPAT; |
726 |
else if (c == '(') { |
727 |
*wp++ = OPAT; |
728 |
/* simile for @ */ |
729 |
*wp++ = ' '; |
730 |
PUSH_STATE(SPATTERN); |
731 |
} else /* FALLTHROUGH */ |
732 |
case STBRACEBOURNE: |
733 |
if (c == /*{*/ '}') { |
734 |
POP_STATE(); |
735 |
*wp++ = CSUBST; |
736 |
*wp++ = /*{*/ '}'; |
737 |
} else |
738 |
goto Sbase1; |
739 |
break; |
740 |
|
741 |
case SBQUOTE: |
742 |
if (c == '`') { |
743 |
*wp++ = 0; |
744 |
POP_STATE(); |
745 |
} else if (c == '\\') { |
746 |
switch (c = getsc()) { |
747 |
case 0: |
748 |
/* trailing \ is lost */ |
749 |
break; |
750 |
case '\\': |
751 |
case '$': case '`': |
752 |
*wp++ = c; |
753 |
break; |
754 |
case '"': |
755 |
if (statep->ls_bool) { |
756 |
*wp++ = c; |
757 |
break; |
758 |
} |
759 |
/* FALLTHROUGH */ |
760 |
default: |
761 |
*wp++ = '\\'; |
762 |
*wp++ = c; |
763 |
break; |
764 |
} |
765 |
} else |
766 |
*wp++ = c; |
767 |
break; |
768 |
|
769 |
/* ONEWORD */ |
770 |
case SWORD: |
771 |
goto Subst; |
772 |
|
773 |
/* LETEXPR: (( ... )) */ |
774 |
case SLETPAREN: |
775 |
if (c == /*(*/ ')') { |
776 |
if (statep->nparen > 0) |
777 |
--statep->nparen; |
778 |
else if ((c2 = getsc()) == /*(*/ ')') { |
779 |
c = 0; |
780 |
*wp++ = CQUOTE; |
781 |
goto Done; |
782 |
} else { |
783 |
Source *s; |
784 |
|
785 |
ungetsc(c2); |
786 |
/* |
787 |
* mismatched parenthesis - |
788 |
* assume we were really |
789 |
* parsing a (...) expression |
790 |
*/ |
791 |
*wp = EOS; |
792 |
sp = Xstring(ws, wp); |
793 |
dp = wdstrip(sp, WDS_KEEPQ); |
794 |
s = pushs(SREREAD, source->areap); |
795 |
s->start = s->str = s->u.freeme = dp; |
796 |
s->next = source; |
797 |
source = s; |
798 |
return ('('/*)*/); |
799 |
} |
800 |
} else if (c == '(') |
801 |
/* |
802 |
* parentheses inside quotes and |
803 |
* backslashes are lost, but AT&T ksh |
804 |
* doesn't count them either |
805 |
*/ |
806 |
++statep->nparen; |
807 |
goto Sbase2; |
808 |
|
809 |
/* <<, <<-, <<< delimiter */ |
810 |
case SHEREDELIM: |
811 |
/* |
812 |
* here delimiters need a special case since |
813 |
* $ and `...` are not to be treated specially |
814 |
*/ |
815 |
switch (c) { |
816 |
case '\\': |
817 |
if ((c = getsc())) { |
818 |
/* trailing \ is lost */ |
819 |
*wp++ = QCHAR; |
820 |
*wp++ = c; |
821 |
} |
822 |
break; |
823 |
case '\'': |
824 |
goto open_ssquote_unless_heredoc; |
825 |
case '$': |
826 |
if ((c2 = getsc()) == '\'') { |
827 |
open_sequote: |
828 |
*wp++ = OQUOTE; |
829 |
ignore_backslash_newline++; |
830 |
PUSH_STATE(SEQUOTE); |
831 |
statep->ls_bool = false; |
832 |
break; |
833 |
} else if (c2 == '"') { |
834 |
/* FALLTHROUGH */ |
835 |
case '"': |
836 |
PUSH_SRETRACE(SHEREDQUOTE); |
837 |
break; |
838 |
} |
839 |
ungetsc(c2); |
840 |
/* FALLTHROUGH */ |
841 |
default: |
842 |
*wp++ = CHAR; |
843 |
*wp++ = c; |
844 |
} |
845 |
break; |
846 |
|
847 |
/* " in <<, <<-, <<< delimiter */ |
848 |
case SHEREDQUOTE: |
849 |
if (c != '"') |
850 |
goto Subst; |
851 |
POP_SRETRACE(); |
852 |
dp = strnul(sp) - 1; |
853 |
/* remove the trailing double quote */ |
854 |
*dp = '\0'; |
855 |
/* store the quoted string */ |
856 |
*wp++ = OQUOTE; |
857 |
XcheckN(ws, wp, (dp - sp)); |
858 |
dp = sp; |
859 |
while ((c = *dp++)) { |
860 |
if (c == '\\') { |
861 |
switch ((c = *dp++)) { |
862 |
case '\\': |
863 |
case '"': |
864 |
case '$': |
865 |
case '`': |
866 |
break; |
867 |
default: |
868 |
*wp++ = CHAR; |
869 |
*wp++ = '\\'; |
870 |
break; |
871 |
} |
872 |
} |
873 |
*wp++ = CHAR; |
874 |
*wp++ = c; |
875 |
} |
876 |
afree(sp, ATEMP); |
877 |
*wp++ = CQUOTE; |
878 |
state = statep->type = SHEREDELIM; |
879 |
break; |
880 |
|
881 |
/* in *(...|...) pattern (*+?@!) */ |
882 |
case SPATTERN: |
883 |
if (c == /*(*/ ')') { |
884 |
*wp++ = CPAT; |
885 |
POP_STATE(); |
886 |
} else if (c == '|') { |
887 |
*wp++ = SPAT; |
888 |
} else if (c == '(') { |
889 |
*wp++ = OPAT; |
890 |
/* simile for @ */ |
891 |
*wp++ = ' '; |
892 |
PUSH_STATE(SPATTERN); |
893 |
} else |
894 |
goto Sbase1; |
895 |
break; |
896 |
} |
897 |
} |
898 |
Done: |
899 |
Xcheck(ws, wp); |
900 |
if (statep != &states[1]) |
901 |
/* XXX figure out what is missing */ |
902 |
yyerror("no closing quote\n"); |
903 |
|
904 |
/* This done to avoid tests for SHEREDELIM wherever SBASE tested */ |
905 |
if (state == SHEREDELIM) |
906 |
state = SBASE; |
907 |
|
908 |
dp = Xstring(ws, wp); |
909 |
if (state == SBASE && ( |
910 |
#ifndef MKSH_LEGACY_MODE |
911 |
(c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || |
912 |
#endif |
913 |
c == '<' || c == '>')) { |
914 |
struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); |
915 |
|
916 |
if (Xlength(ws, wp) == 0) |
917 |
iop->unit = c == '<' ? 0 : 1; |
918 |
else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { |
919 |
if (dp[c2] != CHAR) |
920 |
goto no_iop; |
921 |
if (!ksh_isdigit(dp[c2 + 1])) |
922 |
goto no_iop; |
923 |
iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; |
924 |
} |
925 |
|
926 |
if (iop->unit >= FDBASE) |
927 |
goto no_iop; |
928 |
|
929 |
if (c == '&') { |
930 |
if ((c2 = getsc()) != '>') { |
931 |
ungetsc(c2); |
932 |
goto no_iop; |
933 |
} |
934 |
c = c2; |
935 |
iop->flag = IOBASH; |
936 |
} else |
937 |
iop->flag = 0; |
938 |
|
939 |
c2 = getsc(); |
940 |
/* <<, >>, <> are ok, >< is not */ |
941 |
if (c == c2 || (c == '<' && c2 == '>')) { |
942 |
iop->flag |= c == c2 ? |
943 |
(c == '>' ? IOCAT : IOHERE) : IORDWR; |
944 |
if (iop->flag == IOHERE) { |
945 |
if ((c2 = getsc()) == '-') { |
946 |
iop->flag |= IOSKIP; |
947 |
c2 = getsc(); |
948 |
} else if (c2 == '<') |
949 |
iop->flag |= IOHERESTR; |
950 |
ungetsc(c2); |
951 |
if (c2 == '\n') |
952 |
iop->flag |= IONDELIM; |
953 |
} |
954 |
} else if (c2 == '&') |
955 |
iop->flag |= IODUP | (c == '<' ? IORDUP : 0); |
956 |
else { |
957 |
iop->flag |= c == '>' ? IOWRITE : IOREAD; |
958 |
if (c == '>' && c2 == '|') |
959 |
iop->flag |= IOCLOB; |
960 |
else |
961 |
ungetsc(c2); |
962 |
} |
963 |
|
964 |
iop->name = NULL; |
965 |
iop->delim = NULL; |
966 |
iop->heredoc = NULL; |
967 |
/* free word */ |
968 |
Xfree(ws, wp); |
969 |
yylval.iop = iop; |
970 |
return (REDIR); |
971 |
no_iop: |
972 |
afree(iop, ATEMP); |
973 |
} |
974 |
|
975 |
if (wp == dp && state == SBASE) { |
976 |
/* free word */ |
977 |
Xfree(ws, wp); |
978 |
/* no word, process LEX1 character */ |
979 |
if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { |
980 |
if ((c2 = getsc()) == c) |
981 |
c = (c == ';') ? BREAK : |
982 |
(c == '|') ? LOGOR : |
983 |
(c == '&') ? LOGAND : |
984 |
/* c == '(' ) */ MDPAREN; |
985 |
else if (c == '|' && c2 == '&') |
986 |
c = COPROC; |
987 |
else if (c == ';' && c2 == '|') |
988 |
c = BRKEV; |
989 |
else if (c == ';' && c2 == '&') |
990 |
c = BRKFT; |
991 |
else |
992 |
ungetsc(c2); |
993 |
#ifndef MKSH_SMALL |
994 |
if (c == BREAK) { |
995 |
if ((c2 = getsc()) == '&') |
996 |
c = BRKEV; |
997 |
else |
998 |
ungetsc(c2); |
999 |
} |
1000 |
#endif |
1001 |
} else if (c == '\n') { |
1002 |
gethere(false); |
1003 |
if (cf & CONTIN) |
1004 |
goto Again; |
1005 |
} else if (c == '\0') |
1006 |
/* need here strings at EOF */ |
1007 |
gethere(true); |
1008 |
return (c); |
1009 |
} |
1010 |
|
1011 |
/* terminate word */ |
1012 |
*wp++ = EOS; |
1013 |
yylval.cp = Xclose(ws, wp); |
1014 |
if (state == SWORD || state == SLETPAREN |
1015 |
/* XXX ONEWORD? */) |
1016 |
return (LWORD); |
1017 |
|
1018 |
/* unget terminator */ |
1019 |
ungetsc(c); |
1020 |
|
1021 |
/* |
1022 |
* note: the alias-vs-function code below depends on several |
1023 |
* interna: starting from here, source->str is not modified; |
1024 |
* the way getsc() and ungetsc() operate; etc. |
1025 |
*/ |
1026 |
|
1027 |
/* copy word to unprefixed string ident */ |
1028 |
sp = yylval.cp; |
1029 |
dp = ident; |
1030 |
if ((cf & HEREDELIM) && (sp[1] == '<')) |
1031 |
while ((dp - ident) < IDENT) { |
1032 |
if ((c = *sp++) == CHAR) |
1033 |
*dp++ = *sp++; |
1034 |
else if ((c != OQUOTE) && (c != CQUOTE)) |
1035 |
break; |
1036 |
} |
1037 |
else |
1038 |
while ((dp - ident) < IDENT && (c = *sp++) == CHAR) |
1039 |
*dp++ = *sp++; |
1040 |
/* Make sure the ident array stays '\0' padded */ |
1041 |
memset(dp, 0, (ident + IDENT) - dp + 1); |
1042 |
if (c != EOS) |
1043 |
/* word is not unquoted */ |
1044 |
*ident = '\0'; |
1045 |
|
1046 |
if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) { |
1047 |
struct tbl *p; |
1048 |
uint32_t h = hash(ident); |
1049 |
|
1050 |
if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && |
1051 |
(!(cf & ESACONLY) || p->val.i == ESAC || |
1052 |
p->val.i == /*{*/ '}')) { |
1053 |
afree(yylval.cp, ATEMP); |
1054 |
return (p->val.i); |
1055 |
} |
1056 |
if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && |
1057 |
(p->flag & ISSET)) { |
1058 |
/* |
1059 |
* this still points to the same character as the |
1060 |
* ungetsc'd terminator from above |
1061 |
*/ |
1062 |
const char *cp = source->str; |
1063 |
|
1064 |
/* prefer POSIX but not Korn functions over aliases */ |
1065 |
while (*cp == ' ' || *cp == '\t') |
1066 |
/* |
1067 |
* this is like getsc() without skipping |
1068 |
* over Source boundaries (including not |
1069 |
* parsing ungetsc'd characters that got |
1070 |
* pushed into an SREREAD) which is what |
1071 |
* we want here anyway: find out whether |
1072 |
* the alias name is followed by a POSIX |
1073 |
* function definition (only the opening |
1074 |
* parenthesis is checked though) |
1075 |
*/ |
1076 |
++cp; |
1077 |
/* prefer functions over aliases */ |
1078 |
if (cp[0] != '(' || cp[1] != ')') { |
1079 |
Source *s = source; |
1080 |
|
1081 |
while (s && (s->flags & SF_HASALIAS)) |
1082 |
if (s->u.tblp == p) |
1083 |
return (LWORD); |
1084 |
else |
1085 |
s = s->next; |
1086 |
/* push alias expansion */ |
1087 |
s = pushs(SALIAS, source->areap); |
1088 |
s->start = s->str = p->val.s; |
1089 |
s->u.tblp = p; |
1090 |
s->flags |= SF_HASALIAS; |
1091 |
s->next = source; |
1092 |
if (source->type == SEOF) { |
1093 |
/* prevent infinite recursion at EOS */ |
1094 |
source->u.tblp = p; |
1095 |
source->flags |= SF_HASALIAS; |
1096 |
} |
1097 |
source = s; |
1098 |
afree(yylval.cp, ATEMP); |
1099 |
goto Again; |
1100 |
} |
1101 |
} |
1102 |
} |
1103 |
|
1104 |
return (LWORD); |
1105 |
} |
1106 |
|
1107 |
static void |
1108 |
gethere(bool iseof) |
1109 |
{ |
1110 |
struct ioword **p; |
1111 |
|
1112 |
for (p = heres; p < herep; p++) |
1113 |
if (iseof && !((*p)->flag & IOHERESTR)) |
1114 |
/* only here strings at EOF */ |
1115 |
return; |
1116 |
else |
1117 |
readhere(*p); |
1118 |
herep = heres; |
1119 |
} |
1120 |
|
1121 |
/* |
1122 |
* read "<<word" text into temp file |
1123 |
*/ |
1124 |
|
1125 |
static void |
1126 |
readhere(struct ioword *iop) |
1127 |
{ |
1128 |
int c; |
1129 |
const char *eof, *eofp; |
1130 |
XString xs; |
1131 |
char *xp; |
1132 |
int xpos; |
1133 |
|
1134 |
if (iop->flag & IOHERESTR) { |
1135 |
/* process the here string */ |
1136 |
iop->heredoc = xp = evalstr(iop->delim, DOBLANK); |
1137 |
xpos = strlen(xp) - 1; |
1138 |
memmove(xp, xp + 1, xpos); |
1139 |
xp[xpos] = '\n'; |
1140 |
return; |
1141 |
} |
1142 |
|
1143 |
eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0); |
1144 |
|
1145 |
if (!(iop->flag & IOEVAL)) |
1146 |
ignore_backslash_newline++; |
1147 |
|
1148 |
Xinit(xs, xp, 256, ATEMP); |
1149 |
|
1150 |
heredoc_read_line: |
1151 |
/* beginning of line */ |
1152 |
eofp = eof; |
1153 |
xpos = Xsavepos(xs, xp); |
1154 |
if (iop->flag & IOSKIP) { |
1155 |
/* skip over leading tabs */ |
1156 |
while ((c = getsc()) == '\t') |
1157 |
/* nothing */; |
1158 |
goto heredoc_parse_char; |
1159 |
} |
1160 |
heredoc_read_char: |
1161 |
c = getsc(); |
1162 |
heredoc_parse_char: |
1163 |
/* compare with here document marker */ |
1164 |
if (!*eofp) { |
1165 |
/* end of here document marker, what to do? */ |
1166 |
switch (c) { |
1167 |
case /*(*/ ')': |
1168 |
if (!subshell_nesting_type) |
1169 |
/*- |
1170 |
* not allowed outside $(...) or (...) |
1171 |
* => mismatch |
1172 |
*/ |
1173 |
break; |
1174 |
/* allow $(...) or (...) to close here */ |
1175 |
ungetsc(/*(*/ ')'); |
1176 |
/* FALLTHROUGH */ |
1177 |
case 0: |
1178 |
/* |
1179 |
* Allow EOF here to commands without trailing |
1180 |
* newlines (mksh -c '...') will work as well. |
1181 |
*/ |
1182 |
case '\n': |
1183 |
/* Newline terminates here document marker */ |
1184 |
goto heredoc_found_terminator; |
1185 |
} |
1186 |
} else if (c == *eofp++) |
1187 |
/* store; then read and compare next character */ |
1188 |
goto heredoc_store_and_loop; |
1189 |
/* nope, mismatch; read until end of line */ |
1190 |
while (c != '\n') { |
1191 |
if (!c) |
1192 |
/* oops, reached EOF */ |
1193 |
yyerror("%s '%s' unclosed\n", "here document", eof); |
1194 |
/* store character */ |
1195 |
Xcheck(xs, xp); |
1196 |
Xput(xs, xp, c); |
1197 |
/* read next character */ |
1198 |
c = getsc(); |
1199 |
} |
1200 |
/* we read a newline as last character */ |
1201 |
heredoc_store_and_loop: |
1202 |
/* store character */ |
1203 |
Xcheck(xs, xp); |
1204 |
Xput(xs, xp, c); |
1205 |
if (c == '\n') |
1206 |
goto heredoc_read_line; |
1207 |
goto heredoc_read_char; |
1208 |
|
1209 |
heredoc_found_terminator: |
1210 |
/* jump back to saved beginning of line */ |
1211 |
xp = Xrestpos(xs, xp, xpos); |
1212 |
/* terminate, close and store */ |
1213 |
Xput(xs, xp, '\0'); |
1214 |
iop->heredoc = Xclose(xs, xp); |
1215 |
|
1216 |
if (!(iop->flag & IOEVAL)) |
1217 |
ignore_backslash_newline--; |
1218 |
} |
1219 |
|
1220 |
void |
1221 |
yyerror(const char *fmt, ...) |
1222 |
{ |
1223 |
va_list va; |
1224 |
|
1225 |
/* pop aliases and re-reads */ |
1226 |
while (source->type == SALIAS || source->type == SREREAD) |
1227 |
source = source->next; |
1228 |
/* zap pending input */ |
1229 |
source->str = null; |
1230 |
|
1231 |
error_prefix(true); |
1232 |
va_start(va, fmt); |
1233 |
shf_vfprintf(shl_out, fmt, va); |
1234 |
va_end(va); |
1235 |
errorfz(); |
1236 |
} |
1237 |
|
1238 |
/* |
1239 |
* input for yylex with alias expansion |
1240 |
*/ |
1241 |
|
1242 |
Source * |
1243 |
pushs(int type, Area *areap) |
1244 |
{ |
1245 |
Source *s; |
1246 |
|
1247 |
s = alloc(sizeof(Source), areap); |
1248 |
memset(s, 0, sizeof(Source)); |
1249 |
s->type = type; |
1250 |
s->str = null; |
1251 |
s->areap = areap; |
1252 |
if (type == SFILE || type == SSTDIN) |
1253 |
XinitN(s->xs, 256, s->areap); |
1254 |
return (s); |
1255 |
} |
1256 |
|
1257 |
static int |
1258 |
getsc_uu(void) |
1259 |
{ |
1260 |
Source *s = source; |
1261 |
int c; |
1262 |
|
1263 |
while ((c = *s->str++) == 0) { |
1264 |
/* return 0 for EOF by default */ |
1265 |
s->str = NULL; |
1266 |
switch (s->type) { |
1267 |
case SEOF: |
1268 |
s->str = null; |
1269 |
return (0); |
1270 |
|
1271 |
case SSTDIN: |
1272 |
case SFILE: |
1273 |
getsc_line(s); |
1274 |
break; |
1275 |
|
1276 |
case SWSTR: |
1277 |
break; |
1278 |
|
1279 |
case SSTRING: |
1280 |
case SSTRINGCMDLINE: |
1281 |
break; |
1282 |
|
1283 |
case SWORDS: |
1284 |
s->start = s->str = *s->u.strv++; |
1285 |
s->type = SWORDSEP; |
1286 |
break; |
1287 |
|
1288 |
case SWORDSEP: |
1289 |
if (*s->u.strv == NULL) { |
1290 |
s->start = s->str = "\n"; |
1291 |
s->type = SEOF; |
1292 |
} else { |
1293 |
s->start = s->str = " "; |
1294 |
s->type = SWORDS; |
1295 |
} |
1296 |
break; |
1297 |
|
1298 |
case SALIAS: |
1299 |
if (s->flags & SF_ALIASEND) { |
1300 |
/* pass on an unused SF_ALIAS flag */ |
1301 |
source = s->next; |
1302 |
source->flags |= s->flags & SF_ALIAS; |
1303 |
s = source; |
1304 |
} else if (*s->u.tblp->val.s && |
1305 |
(c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { |
1306 |
/* pop source stack */ |
1307 |
source = s = s->next; |
1308 |
/* |
1309 |
* Note that this alias ended with a |
1310 |
* space, enabling alias expansion on |
1311 |
* the following word. |
1312 |
*/ |
1313 |
s->flags |= SF_ALIAS; |
1314 |
} else { |
1315 |
/* |
1316 |
* At this point, we need to keep the current |
1317 |
* alias in the source list so recursive |
1318 |
* aliases can be detected and we also need to |
1319 |
* return the next character. Do this by |
1320 |
* temporarily popping the alias to get the |
1321 |
* next character and then put it back in the |
1322 |
* source list with the SF_ALIASEND flag set. |
1323 |
*/ |
1324 |
/* pop source stack */ |
1325 |
source = s->next; |
1326 |
source->flags |= s->flags & SF_ALIAS; |
1327 |
c = getsc_uu(); |
1328 |
if (c) { |
1329 |
s->flags |= SF_ALIASEND; |
1330 |
s->ugbuf[0] = c; s->ugbuf[1] = '\0'; |
1331 |
s->start = s->str = s->ugbuf; |
1332 |
s->next = source; |
1333 |
source = s; |
1334 |
} else { |
1335 |
s = source; |
1336 |
/* avoid reading EOF twice */ |
1337 |
s->str = NULL; |
1338 |
break; |
1339 |
} |
1340 |
} |
1341 |
continue; |
1342 |
|
1343 |
case SREREAD: |
1344 |
if (s->start != s->ugbuf) |
1345 |
/* yuck */ |
1346 |
afree(s->u.freeme, ATEMP); |
1347 |
source = s = s->next; |
1348 |
continue; |
1349 |
} |
1350 |
if (s->str == NULL) { |
1351 |
s->type = SEOF; |
1352 |
s->start = s->str = null; |
1353 |
return ('\0'); |
1354 |
} |
1355 |
if (s->flags & SF_ECHO) { |
1356 |
shf_puts(s->str, shl_out); |
1357 |
shf_flush(shl_out); |
1358 |
} |
1359 |
} |
1360 |
return (c); |
1361 |
} |
1362 |
|
1363 |
static void |
1364 |
getsc_line(Source *s) |
1365 |
{ |
1366 |
char *xp = Xstring(s->xs, xp), *cp; |
1367 |
bool interactive = Flag(FTALKING) && s->type == SSTDIN; |
1368 |
bool have_tty = tobool(interactive && (s->flags & SF_TTY)); |
1369 |
|
1370 |
/* Done here to ensure nothing odd happens when a timeout occurs */ |
1371 |
XcheckN(s->xs, xp, LINE); |
1372 |
*xp = '\0'; |
1373 |
s->start = s->str = xp; |
1374 |
|
1375 |
if (have_tty && ksh_tmout) { |
1376 |
ksh_tmout_state = TMOUT_READING; |
1377 |
alarm(ksh_tmout); |
1378 |
} |
1379 |
if (interactive) |
1380 |
change_winsz(); |
1381 |
#ifndef MKSH_NO_CMDLINE_EDITING |
1382 |
if (have_tty && ( |
1383 |
#if !MKSH_S_NOVI |
1384 |
Flag(FVI) || |
1385 |
#endif |
1386 |
Flag(FEMACS) || Flag(FGMACS))) { |
1387 |
int nread; |
1388 |
|
1389 |
nread = x_read(xp); |
1390 |
if (nread < 0) |
1391 |
/* read error */ |
1392 |
nread = 0; |
1393 |
xp[nread] = '\0'; |
1394 |
xp += nread; |
1395 |
} else |
1396 |
#endif |
1397 |
{ |
1398 |
if (interactive) |
1399 |
pprompt(prompt, 0); |
1400 |
else |
1401 |
s->line++; |
1402 |
|
1403 |
while (/* CONSTCOND */ 1) { |
1404 |
char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); |
1405 |
|
1406 |
if (!p && shf_error(s->u.shf) && |
1407 |
shf_errno(s->u.shf) == EINTR) { |
1408 |
shf_clearerr(s->u.shf); |
1409 |
if (trap) |
1410 |
runtraps(0); |
1411 |
continue; |
1412 |
} |
1413 |
if (!p || (xp = p, xp[-1] == '\n')) |
1414 |
break; |
1415 |
/* double buffer size */ |
1416 |
/* move past NUL so doubling works... */ |
1417 |
xp++; |
1418 |
XcheckN(s->xs, xp, Xlength(s->xs, xp)); |
1419 |
/* ...and move back again */ |
1420 |
xp--; |
1421 |
} |
1422 |
/* |
1423 |
* flush any unwanted input so other programs/builtins |
1424 |
* can read it. Not very optimal, but less error prone |
1425 |
* than flushing else where, dealing with redirections, |
1426 |
* etc. |
1427 |
* TODO: reduce size of shf buffer (~128?) if SSTDIN |
1428 |
*/ |
1429 |
if (s->type == SSTDIN) |
1430 |
shf_flush(s->u.shf); |
1431 |
} |
1432 |
/* |
1433 |
* XXX: temporary kludge to restore source after a |
1434 |
* trap may have been executed. |
1435 |
*/ |
1436 |
source = s; |
1437 |
if (have_tty && ksh_tmout) { |
1438 |
ksh_tmout_state = TMOUT_EXECUTING; |
1439 |
alarm(0); |
1440 |
} |
1441 |
cp = Xstring(s->xs, xp); |
1442 |
rndpush(cp); |
1443 |
s->start = s->str = cp; |
1444 |
strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); |
1445 |
/* Note: if input is all nulls, this is not eof */ |
1446 |
if (Xlength(s->xs, xp) == 0) { |
1447 |
/* EOF */ |
1448 |
if (s->type == SFILE) |
1449 |
shf_fdclose(s->u.shf); |
1450 |
s->str = NULL; |
1451 |
} else if (interactive && *s->str && |
1452 |
(cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) { |
1453 |
histsave(&s->line, s->str, true, true); |
1454 |
#if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY |
1455 |
} else if (interactive && cur_prompt == PS1) { |
1456 |
cp = Xstring(s->xs, xp); |
1457 |
while (*cp && ctype(*cp, C_IFSWS)) |
1458 |
++cp; |
1459 |
if (!*cp) |
1460 |
histsync(); |
1461 |
#endif |
1462 |
} |
1463 |
if (interactive) |
1464 |
set_prompt(PS2, NULL); |
1465 |
} |
1466 |
|
1467 |
void |
1468 |
set_prompt(int to, Source *s) |
1469 |
{ |
1470 |
cur_prompt = to; |
1471 |
|
1472 |
switch (to) { |
1473 |
/* command */ |
1474 |
case PS1: |
1475 |
/* |
1476 |
* Substitute ! and !! here, before substitutions are done |
1477 |
* so ! in expanded variables are not expanded. |
1478 |
* NOTE: this is not what AT&T ksh does (it does it after |
1479 |
* substitutions, POSIX doesn't say which is to be done. |
1480 |
*/ |
1481 |
{ |
1482 |
struct shf *shf; |
1483 |
char * volatile ps1; |
1484 |
Area *saved_atemp; |
1485 |
|
1486 |
ps1 = str_val(global("PS1")); |
1487 |
shf = shf_sopen(NULL, strlen(ps1) * 2, |
1488 |
SHF_WR | SHF_DYNAMIC, NULL); |
1489 |
while (*ps1) |
1490 |
if (*ps1 != '!' || *++ps1 == '!') |
1491 |
shf_putchar(*ps1++, shf); |
1492 |
else |
1493 |
shf_fprintf(shf, "%d", |
1494 |
s ? s->line + 1 : 0); |
1495 |
ps1 = shf_sclose(shf); |
1496 |
saved_atemp = ATEMP; |
1497 |
newenv(E_ERRH); |
1498 |
if (kshsetjmp(e->jbuf)) { |
1499 |
prompt = safe_prompt; |
1500 |
/* |
1501 |
* Don't print an error - assume it has already |
1502 |
* been printed. Reason is we may have forked |
1503 |
* to run a command and the child may be |
1504 |
* unwinding its stack through this code as it |
1505 |
* exits. |
1506 |
*/ |
1507 |
} else { |
1508 |
char *cp = substitute(ps1, 0); |
1509 |
strdupx(prompt, cp, saved_atemp); |
1510 |
} |
1511 |
quitenv(NULL); |
1512 |
} |
1513 |
break; |
1514 |
/* command continuation */ |
1515 |
case PS2: |
1516 |
prompt = str_val(global("PS2")); |
1517 |
break; |
1518 |
} |
1519 |
} |
1520 |
|
1521 |
int |
1522 |
pprompt(const char *cp, int ntruncate) |
1523 |
{ |
1524 |
char delimiter = 0; |
1525 |
bool doprint = (ntruncate != -1); |
1526 |
bool indelimit = false; |
1527 |
int columns = 0, lines = 0; |
1528 |
|
1529 |
/* |
1530 |
* Undocumented AT&T ksh feature: |
1531 |
* If the second char in the prompt string is \r then the first |
1532 |
* char is taken to be a non-printing delimiter and any chars |
1533 |
* between two instances of the delimiter are not considered to |
1534 |
* be part of the prompt length |
1535 |
*/ |
1536 |
if (*cp && cp[1] == '\r') { |
1537 |
delimiter = *cp; |
1538 |
cp += 2; |
1539 |
} |
1540 |
for (; *cp; cp++) { |
1541 |
if (indelimit && *cp != delimiter) |
1542 |
; |
1543 |
else if (*cp == '\n' || *cp == '\r') { |
1544 |
lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); |
1545 |
columns = 0; |
1546 |
} else if (*cp == '\t') { |
1547 |
columns = (columns | 7) + 1; |
1548 |
} else if (*cp == '\b') { |
1549 |
if (columns > 0) |
1550 |
columns--; |
1551 |
} else if (*cp == delimiter) |
1552 |
indelimit = !indelimit; |
1553 |
else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { |
1554 |
const char *cp2; |
1555 |
columns += utf_widthadj(cp, &cp2); |
1556 |
if (doprint && (indelimit || |
1557 |
(ntruncate < (x_cols * lines + columns)))) |
1558 |
shf_write(cp, cp2 - cp, shl_out); |
1559 |
cp = cp2 - /* loop increment */ 1; |
1560 |
continue; |
1561 |
} else |
1562 |
columns++; |
1563 |
if (doprint && (*cp != delimiter) && |
1564 |
(indelimit || (ntruncate < (x_cols * lines + columns)))) |
1565 |
shf_putc(*cp, shl_out); |
1566 |
} |
1567 |
if (doprint) |
1568 |
shf_flush(shl_out); |
1569 |
return (x_cols * lines + columns); |
1570 |
} |
1571 |
|
1572 |
/* |
1573 |
* Read the variable part of a ${...} expression (i.e. up to but not |
1574 |
* including the :[-+?=#%] or close-brace). |
1575 |
*/ |
1576 |
static char * |
1577 |
get_brace_var(XString *wsp, char *wp) |
1578 |
{ |
1579 |
char c; |
1580 |
enum parse_state { |
1581 |
PS_INITIAL, PS_SAW_HASH, PS_IDENT, |
1582 |
PS_NUMBER, PS_VAR1 |
1583 |
} state = PS_INITIAL; |
1584 |
|
1585 |
while (/* CONSTCOND */ 1) { |
1586 |
c = getsc(); |
1587 |
/* State machine to figure out where the variable part ends. */ |
1588 |
switch (state) { |
1589 |
case PS_INITIAL: |
1590 |
if (c == '#' || c == '!' || c == '%') { |
1591 |
state = PS_SAW_HASH; |
1592 |
break; |
1593 |
} |
1594 |
/* FALLTHROUGH */ |
1595 |
case PS_SAW_HASH: |
1596 |
if (ksh_isalphx(c)) |
1597 |
state = PS_IDENT; |
1598 |
else if (ksh_isdigit(c)) |
1599 |
state = PS_NUMBER; |
1600 |
else if (c == '#') { |
1601 |
if (state == PS_SAW_HASH) { |
1602 |
char c2; |
1603 |
|
1604 |
c2 = getsc(); |
1605 |
ungetsc(c2); |
1606 |
if (c2 != /*{*/ '}') { |
1607 |
ungetsc(c); |
1608 |
goto out; |
1609 |
} |
1610 |
} |
1611 |
state = PS_VAR1; |
1612 |
} else if (ctype(c, C_VAR1)) |
1613 |
state = PS_VAR1; |
1614 |
else |
1615 |
goto out; |
1616 |
break; |
1617 |
case PS_IDENT: |
1618 |
if (!ksh_isalnux(c)) { |
1619 |
if (c == '[') { |
1620 |
char *tmp, *p; |
1621 |
|
1622 |
if (!arraysub(&tmp)) |
1623 |
yyerror("missing ]\n"); |
1624 |
*wp++ = c; |
1625 |
for (p = tmp; *p; ) { |
1626 |
Xcheck(*wsp, wp); |
1627 |
*wp++ = *p++; |
1628 |
} |
1629 |
afree(tmp, ATEMP); |
1630 |
/* the ] */ |
1631 |
c = getsc(); |
1632 |
} |
1633 |
goto out; |
1634 |
} |
1635 |
break; |
1636 |
case PS_NUMBER: |
1637 |
if (!ksh_isdigit(c)) |
1638 |
goto out; |
1639 |
break; |
1640 |
case PS_VAR1: |
1641 |
goto out; |
1642 |
} |
1643 |
Xcheck(*wsp, wp); |
1644 |
*wp++ = c; |
1645 |
} |
1646 |
out: |
1647 |
/* end of variable part */ |
1648 |
*wp++ = '\0'; |
1649 |
ungetsc(c); |
1650 |
return (wp); |
1651 |
} |
1652 |
|
1653 |
/* |
1654 |
* Save an array subscript - returns true if matching bracket found, false |
1655 |
* if eof or newline was found. |
1656 |
* (Returned string double null terminated) |
1657 |
*/ |
1658 |
static bool |
1659 |
arraysub(char **strp) |
1660 |
{ |
1661 |
XString ws; |
1662 |
char *wp, c; |
1663 |
/* we are just past the initial [ */ |
1664 |
unsigned int depth = 1; |
1665 |
|
1666 |
Xinit(ws, wp, 32, ATEMP); |
1667 |
|
1668 |
do { |
1669 |
c = getsc(); |
1670 |
Xcheck(ws, wp); |
1671 |
*wp++ = c; |
1672 |
if (c == '[') |
1673 |
depth++; |
1674 |
else if (c == ']') |
1675 |
depth--; |
1676 |
} while (depth > 0 && c && c != '\n'); |
1677 |
|
1678 |
*wp++ = '\0'; |
1679 |
*strp = Xclose(ws, wp); |
1680 |
|
1681 |
return (tobool(depth == 0)); |
1682 |
} |
1683 |
|
1684 |
/* Unget a char: handles case when we are already at the start of the buffer */ |
1685 |
static void |
1686 |
ungetsc(int c) |
1687 |
{ |
1688 |
struct sretrace_info *rp = retrace_info; |
1689 |
|
1690 |
if (backslash_skip) |
1691 |
backslash_skip--; |
1692 |
/* Don't unget EOF... */ |
1693 |
if (source->str == null && c == '\0') |
1694 |
return; |
1695 |
while (rp) { |
1696 |
if (Xlength(rp->xs, rp->xp)) |
1697 |
rp->xp--; |
1698 |
rp = rp->next; |
1699 |
} |
1700 |
ungetsc_i(c); |
1701 |
} |
1702 |
static void |
1703 |
ungetsc_i(int c) |
1704 |
{ |
1705 |
if (source->str > source->start) |
1706 |
source->str--; |
1707 |
else { |
1708 |
Source *s; |
1709 |
|
1710 |
s = pushs(SREREAD, source->areap); |
1711 |
s->ugbuf[0] = c; s->ugbuf[1] = '\0'; |
1712 |
s->start = s->str = s->ugbuf; |
1713 |
s->next = source; |
1714 |
source = s; |
1715 |
} |
1716 |
} |
1717 |
|
1718 |
|
1719 |
/* Called to get a char that isn't a \newline sequence. */ |
1720 |
static int |
1721 |
getsc_bn(void) |
1722 |
{ |
1723 |
int c, c2; |
1724 |
|
1725 |
if (ignore_backslash_newline) |
1726 |
return (o_getsc_u()); |
1727 |
|
1728 |
if (backslash_skip == 1) { |
1729 |
backslash_skip = 2; |
1730 |
return (o_getsc_u()); |
1731 |
} |
1732 |
|
1733 |
backslash_skip = 0; |
1734 |
|
1735 |
while (/* CONSTCOND */ 1) { |
1736 |
c = o_getsc_u(); |
1737 |
if (c == '\\') { |
1738 |
if ((c2 = o_getsc_u()) == '\n') |
1739 |
/* ignore the \newline; get the next char... */ |
1740 |
continue; |
1741 |
ungetsc_i(c2); |
1742 |
backslash_skip = 1; |
1743 |
} |
1744 |
return (c); |
1745 |
} |
1746 |
} |
1747 |
|
1748 |
void |
1749 |
yyskiputf8bom(void) |
1750 |
{ |
1751 |
int c; |
1752 |
|
1753 |
if ((unsigned char)(c = o_getsc_u()) != 0xEF) { |
1754 |
ungetsc_i(c); |
1755 |
return; |
1756 |
} |
1757 |
if ((unsigned char)(c = o_getsc_u()) != 0xBB) { |
1758 |
ungetsc_i(c); |
1759 |
ungetsc_i(0xEF); |
1760 |
return; |
1761 |
} |
1762 |
if ((unsigned char)(c = o_getsc_u()) != 0xBF) { |
1763 |
ungetsc_i(c); |
1764 |
ungetsc_i(0xBB); |
1765 |
ungetsc_i(0xEF); |
1766 |
return; |
1767 |
} |
1768 |
UTFMODE |= 8; |
1769 |
} |
1770 |
|
1771 |
static Lex_state * |
1772 |
push_state_i(State_info *si, Lex_state *old_end) |
1773 |
{ |
1774 |
Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); |
1775 |
|
1776 |
news[0].ls_base = old_end; |
1777 |
si->base = &news[0]; |
1778 |
si->end = &news[STATE_BSIZE]; |
1779 |
return (&news[1]); |
1780 |
} |
1781 |
|
1782 |
static Lex_state * |
1783 |
pop_state_i(State_info *si, Lex_state *old_end) |
1784 |
{ |
1785 |
Lex_state *old_base = si->base; |
1786 |
|
1787 |
si->base = old_end->ls_base - STATE_BSIZE; |
1788 |
si->end = old_end->ls_base; |
1789 |
|
1790 |
afree(old_base, ATEMP); |
1791 |
|
1792 |
return (si->base + STATE_BSIZE - 1); |
1793 |
} |
1794 |
|
1795 |
static int |
1796 |
s_get(void) |
1797 |
{ |
1798 |
return (getsc()); |
1799 |
} |
1800 |
|
1801 |
static void |
1802 |
s_put(int c) |
1803 |
{ |
1804 |
ungetsc(c); |
1805 |
} |