ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/src/trunk/contrib/one-true-awk/b.c
Revision: 11168
Committed: Wed Jun 27 22:46:21 2018 UTC (5 years, 10 months ago) by laffer1
Content type: text/plain
File size: 23251 byte(s)
Log Message:
fix some pointer stuff

File Contents

# Content
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 /* lasciate ogne speranza, voi ch'intrate. */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: stable/10/contrib/one-true-awk/b.c 315582 2017-03-19 20:04:57Z pfg $");
29
30 #define DEBUG
31
32 #include <ctype.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include "awk.h"
37 #include "ytab.h"
38
39 #define HAT (NCHARS+2) /* matches ^ in regular expr */
40 /* NCHARS is 2**n */
41 #define MAXLIN 22
42
43 #define type(v) (v)->nobj /* badly overloaded here */
44 #define info(v) (v)->ntype /* badly overloaded here */
45 #define left(v) (v)->narg[0]
46 #define right(v) (v)->narg[1]
47 #define parent(v) (v)->nnext
48
49 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
50 #define ELEAF case EMPTYRE: /* empty string in regexp */
51 #define UNARY case STAR: case PLUS: case QUEST:
52
53 /* encoding in tree Nodes:
54 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
55 left is index, right contains value or pointer to value
56 unary (STAR, PLUS, QUEST): left is child, right is null
57 binary (CAT, OR): left and right are children
58 parent contains pointer to parent
59 */
60
61
62 int *setvec;
63 int *tmpset;
64 int maxsetvec = 0;
65
66 int rtok; /* next token in current re */
67 int rlxval;
68 static uschar *rlxstr;
69 static uschar *prestr; /* current position in current re */
70 static uschar *lastre; /* origin of last re */
71
72 static int setcnt;
73 static int poscnt;
74
75 char *patbeg;
76 int patlen;
77
78 #define NFA 20 /* cache this many dynamic fa's */
79 fa *fatab[NFA];
80 int nfatab = 0; /* entries in fatab */
81
82 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
83 {
84 int i, use, nuse;
85 fa *pfa;
86 static int now = 1;
87
88 if (setvec == 0) { /* first time through any RE */
89 maxsetvec = MAXLIN;
90 setvec = (int *) malloc(maxsetvec * sizeof(int));
91 tmpset = (int *) malloc(maxsetvec * sizeof(int));
92 if (setvec == 0 || tmpset == 0)
93 overflo("out of space initializing makedfa");
94 }
95
96 if (compile_time) /* a constant for sure */
97 return mkdfa(s, anchor);
98 for (i = 0; i < nfatab; i++) /* is it there already? */
99 if (fatab[i]->anchor == anchor
100 && strcmp((const char *) fatab[i]->restr, s) == 0) {
101 fatab[i]->use = now++;
102 return fatab[i];
103 }
104 pfa = mkdfa(s, anchor);
105 if (nfatab < NFA) { /* room for another */
106 fatab[nfatab] = pfa;
107 fatab[nfatab]->use = now++;
108 nfatab++;
109 return pfa;
110 }
111 use = fatab[0]->use; /* replace least-recently used */
112 nuse = 0;
113 for (i = 1; i < nfatab; i++)
114 if (fatab[i]->use < use) {
115 use = fatab[i]->use;
116 nuse = i;
117 }
118 freefa(fatab[nuse]);
119 fatab[nuse] = pfa;
120 pfa->use = now++;
121 return pfa;
122 }
123
124 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
125 /* anchor = 1 for anchored matches, else 0 */
126 {
127 Node *p, *p1;
128 fa *f;
129
130 p = reparse(s);
131 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
132 /* put ALL STAR in front of reg. exp. */
133 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
134 /* put FINAL after reg. exp. */
135
136 poscnt = 0;
137 penter(p1); /* enter parent pointers and leaf indices */
138 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
139 overflo("out of space for fa");
140 f->accept = poscnt-1; /* penter has computed number of positions in re */
141 cfoll(f, p1); /* set up follow sets */
142 freetr(p1);
143 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
144 overflo("out of space in makedfa");
145 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
146 overflo("out of space in makedfa");
147 *f->posns[1] = 0;
148 f->initstat = makeinit(f, anchor);
149 f->anchor = anchor;
150 f->restr = (uschar *) tostring(s);
151 return f;
152 }
153
154 int makeinit(fa *f, int anchor)
155 {
156 int i, k;
157
158 f->curstat = 2;
159 f->out[2] = 0;
160 f->reset = 0;
161 k = *(f->re[0].lfollow);
162 xfree(f->posns[2]);
163 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
164 overflo("out of space in makeinit");
165 for (i=0; i <= k; i++) {
166 (f->posns[2])[i] = (f->re[0].lfollow)[i];
167 }
168 if ((f->posns[2])[1] == f->accept)
169 f->out[2] = 1;
170 for (i=0; i < NCHARS; i++)
171 f->gototab[2][i] = 0;
172 f->curstat = cgoto(f, 2, HAT);
173 if (anchor) {
174 *f->posns[2] = k-1; /* leave out position 0 */
175 for (i=0; i < k; i++) {
176 (f->posns[0])[i] = (f->posns[2])[i];
177 }
178
179 f->out[0] = f->out[2];
180 if (f->curstat != 2)
181 --(*f->posns[f->curstat]);
182 }
183 return f->curstat;
184 }
185
186 void penter(Node *p) /* set up parent pointers and leaf indices */
187 {
188 switch (type(p)) {
189 ELEAF
190 LEAF
191 info(p) = poscnt;
192 poscnt++;
193 break;
194 UNARY
195 penter(left(p));
196 parent(left(p)) = p;
197 break;
198 case CAT:
199 case OR:
200 penter(left(p));
201 penter(right(p));
202 parent(left(p)) = p;
203 parent(right(p)) = p;
204 break;
205 default: /* can't happen */
206 FATAL("can't happen: unknown type %d in penter", type(p));
207 break;
208 }
209 }
210
211 void freetr(Node *p) /* free parse tree */
212 {
213 switch (type(p)) {
214 ELEAF
215 LEAF
216 xfree(p);
217 break;
218 UNARY
219 freetr(left(p));
220 xfree(p);
221 break;
222 case CAT:
223 case OR:
224 freetr(left(p));
225 freetr(right(p));
226 xfree(p);
227 break;
228 default: /* can't happen */
229 FATAL("can't happen: unknown type %d in freetr", type(p));
230 break;
231 }
232 }
233
234 /* in the parsing of regular expressions, metacharacters like . have */
235 /* to be seen literally; \056 is not a metacharacter. */
236
237 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
238 { /* only pick up one 8-bit byte (2 chars) */
239 uschar *p;
240 int n = 0;
241 int i;
242
243 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
244 if (isdigit(*p))
245 n = 16 * n + *p - '0';
246 else if (*p >= 'a' && *p <= 'f')
247 n = 16 * n + *p - 'a' + 10;
248 else if (*p >= 'A' && *p <= 'F')
249 n = 16 * n + *p - 'A' + 10;
250 }
251 *pp = (uschar *) p;
252 return n;
253 }
254
255 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
256
257 int quoted(uschar **pp) /* pick up next thing after a \\ */
258 /* and increment *pp */
259 {
260 uschar *p = *pp;
261 int c;
262
263 if ((c = *p++) == 't')
264 c = '\t';
265 else if (c == 'n')
266 c = '\n';
267 else if (c == 'f')
268 c = '\f';
269 else if (c == 'r')
270 c = '\r';
271 else if (c == 'b')
272 c = '\b';
273 else if (c == '\\')
274 c = '\\';
275 else if (c == 'x') { /* hexadecimal goo follows */
276 c = hexstr(&p); /* this adds a null if number is invalid */
277 } else if (isoctdigit(c)) { /* \d \dd \ddd */
278 int n = c - '0';
279 if (isoctdigit(*p)) {
280 n = 8 * n + *p++ - '0';
281 if (isoctdigit(*p))
282 n = 8 * n + *p++ - '0';
283 }
284 c = n;
285 } /* else */
286 /* c = c; */
287 *pp = p;
288 return c;
289 }
290
291 static int collate_range_cmp(int a, int b)
292 {
293 static char s[2][2];
294
295 if ((uschar)a == (uschar)b)
296 return 0;
297 s[0][0] = a;
298 s[1][0] = b;
299 return (strcoll(s[0], s[1]));
300 }
301
302 char *cclenter(const char *argp) /* add a character class */
303 {
304 int i, c, c2;
305 int j;
306 uschar *p = (uschar *) argp;
307 uschar *op, *bp;
308 static uschar *buf = 0;
309 static int bufsz = 100;
310
311 op = p;
312 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
313 FATAL("out of space for character class [%.10s...] 1", p);
314 bp = buf;
315 for (i = 0; (c = *p++) != 0; ) {
316 if (c == '\\') {
317 c = quoted(&p);
318 } else if (c == '-' && i > 0 && bp[-1] != 0) {
319 if (*p != 0) {
320 c = bp[-1];
321 c2 = *p++;
322 if (c2 == '\\')
323 c2 = quoted(&p);
324 if (collate_range_cmp(c, c2) > 0) {
325 bp--;
326 i--;
327 continue;
328 }
329 for (j = 0; j < NCHARS; j++) {
330 if ((collate_range_cmp(c, j) > 0) ||
331 collate_range_cmp(j, c2) > 0)
332 continue;
333 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
334 FATAL("out of space for character class [%.10s...] 2", p);
335 *bp++ = j;
336 i++;
337 }
338 continue;
339 }
340 }
341 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
342 FATAL("out of space for character class [%.10s...] 3", p);
343 *bp++ = c;
344 i++;
345 }
346 *bp = 0;
347 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
348 xfree(op);
349 return (char *) tostring((char *) buf);
350 }
351
352 void overflo(const char *s)
353 {
354 FATAL("regular expression too big: %.30s...", s);
355 }
356
357 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
358 {
359 int i;
360 int *p;
361
362 switch (type(v)) {
363 ELEAF
364 LEAF
365 f->re[info(v)].ltype = type(v);
366 f->re[info(v)].lval.np = right(v);
367 while (f->accept >= maxsetvec) { /* guessing here! */
368 maxsetvec *= 4;
369 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
370 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
371 if (setvec == 0 || tmpset == 0)
372 overflo("out of space in cfoll()");
373 }
374 for (i = 0; i <= f->accept; i++)
375 setvec[i] = 0;
376 setcnt = 0;
377 follow(v); /* computes setvec and setcnt */
378 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
379 overflo("out of space building follow set");
380 f->re[info(v)].lfollow = p;
381 *p = setcnt;
382 for (i = f->accept; i >= 0; i--)
383 if (setvec[i] == 1)
384 *++p = i;
385 break;
386 UNARY
387 cfoll(f,left(v));
388 break;
389 case CAT:
390 case OR:
391 cfoll(f,left(v));
392 cfoll(f,right(v));
393 break;
394 default: /* can't happen */
395 FATAL("can't happen: unknown type %d in cfoll", type(v));
396 }
397 }
398
399 int first(Node *p) /* collects initially active leaves of p into setvec */
400 /* returns 0 if p matches empty string */
401 {
402 int b, lp;
403
404 switch (type(p)) {
405 ELEAF
406 LEAF
407 lp = info(p); /* look for high-water mark of subscripts */
408 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
409 maxsetvec *= 4;
410 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
411 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
412 if (setvec == 0 || tmpset == 0)
413 overflo("out of space in first()");
414 }
415 if (type(p) == EMPTYRE) {
416 setvec[lp] = 0;
417 return(0);
418 }
419 if (setvec[lp] != 1) {
420 setvec[lp] = 1;
421 setcnt++;
422 }
423 if (type(p) == CCL && (*(char *) right(p)) == '\0')
424 return(0); /* empty CCL */
425 else return(1);
426 case PLUS:
427 if (first(left(p)) == 0) return(0);
428 return(1);
429 case STAR:
430 case QUEST:
431 first(left(p));
432 return(0);
433 case CAT:
434 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
435 return(1);
436 case OR:
437 b = first(right(p));
438 if (first(left(p)) == 0 || b == 0) return(0);
439 return(1);
440 }
441 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
442 return(-1);
443 }
444
445 void follow(Node *v) /* collects leaves that can follow v into setvec */
446 {
447 Node *p;
448
449 if (type(v) == FINAL)
450 return;
451 p = parent(v);
452 switch (type(p)) {
453 case STAR:
454 case PLUS:
455 first(v);
456 follow(p);
457 return;
458
459 case OR:
460 case QUEST:
461 follow(p);
462 return;
463
464 case CAT:
465 if (v == left(p)) { /* v is left child of p */
466 if (first(right(p)) == 0) {
467 follow(p);
468 return;
469 }
470 } else /* v is right child */
471 follow(p);
472 return;
473 }
474 }
475
476 int member(int c, const char *sarg) /* is c in s? */
477 {
478 uschar *s = (uschar *) sarg;
479
480 while (*s)
481 if (c == *s++)
482 return(1);
483 return(0);
484 }
485
486 int match(fa *f, const char *p0) /* shortest match ? */
487 {
488 int s, ns;
489 uschar *p = (uschar *) p0;
490
491 s = f->reset ? makeinit(f,0) : f->initstat;
492 if (f->out[s])
493 return(1);
494 do {
495 /* assert(*p < NCHARS); */
496 if ((ns = f->gototab[s][*p]) != 0)
497 s = ns;
498 else
499 s = cgoto(f, s, *p);
500 if (f->out[s])
501 return(1);
502 } while (*p++ != 0);
503 return(0);
504 }
505
506 int pmatch(fa *f, const char *p0) /* longest match, for sub */
507 {
508 int s, ns;
509 uschar *p = (uschar *) p0;
510 uschar *q;
511 int i, k;
512
513 /* s = f->reset ? makeinit(f,1) : f->initstat; */
514 if (f->reset) {
515 f->initstat = s = makeinit(f,1);
516 } else {
517 s = f->initstat;
518 }
519 patbeg = (char *) p;
520 patlen = -1;
521 do {
522 q = p;
523 do {
524 if (f->out[s]) /* final state */
525 patlen = q-p;
526 /* assert(*q < NCHARS); */
527 if ((ns = f->gototab[s][*q]) != 0)
528 s = ns;
529 else
530 s = cgoto(f, s, *q);
531 if (s == 1) { /* no transition */
532 if (patlen >= 0) {
533 patbeg = (char *) p;
534 return(1);
535 }
536 else
537 goto nextin; /* no match */
538 }
539 } while (*q++ != 0);
540 if (f->out[s])
541 patlen = q-p-1; /* don't count $ */
542 if (patlen >= 0) {
543 patbeg = (char *) p;
544 return(1);
545 }
546 nextin:
547 s = 2;
548 if (f->reset) {
549 for (i = 2; i <= f->curstat; i++)
550 xfree(f->posns[i]);
551 k = *f->posns[0];
552 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
553 overflo("out of space in pmatch");
554 for (i = 0; i <= k; i++)
555 (f->posns[2])[i] = (f->posns[0])[i];
556 f->initstat = f->curstat = 2;
557 f->out[2] = f->out[0];
558 for (i = 0; i < NCHARS; i++)
559 f->gototab[2][i] = 0;
560 }
561 } while (*p++ != 0);
562 return (0);
563 }
564
565 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
566 {
567 int s, ns;
568 uschar *p = (uschar *) p0;
569 uschar *q;
570 int i, k;
571
572 /* s = f->reset ? makeinit(f,1) : f->initstat; */
573 if (f->reset) {
574 f->initstat = s = makeinit(f,1);
575 } else {
576 s = f->initstat;
577 }
578 patlen = -1;
579 while (*p) {
580 q = p;
581 do {
582 if (f->out[s]) /* final state */
583 patlen = q-p;
584 /* assert(*q < NCHARS); */
585 if ((ns = f->gototab[s][*q]) != 0)
586 s = ns;
587 else
588 s = cgoto(f, s, *q);
589 if (s == 1) { /* no transition */
590 if (patlen > 0) {
591 patbeg = (char *) p;
592 return(1);
593 } else
594 goto nnextin; /* no nonempty match */
595 }
596 } while (*q++ != 0);
597 if (f->out[s])
598 patlen = q-p-1; /* don't count $ */
599 if (patlen > 0 ) {
600 patbeg = (char *) p;
601 return(1);
602 }
603 nnextin:
604 s = 2;
605 if (f->reset) {
606 for (i = 2; i <= f->curstat; i++)
607 xfree(f->posns[i]);
608 k = *f->posns[0];
609 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
610 overflo("out of state space");
611 for (i = 0; i <= k; i++)
612 (f->posns[2])[i] = (f->posns[0])[i];
613 f->initstat = f->curstat = 2;
614 f->out[2] = f->out[0];
615 for (i = 0; i < NCHARS; i++)
616 f->gototab[2][i] = 0;
617 }
618 p++;
619 }
620 return (0);
621 }
622
623 Node *reparse(const char *p) /* parses regular expression pointed to by p */
624 { /* uses relex() to scan regular expression */
625 Node *np;
626
627 dprintf( ("reparse <%s>\n", p) );
628 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
629 rtok = relex();
630 /* GNU compatibility: an empty regexp matches anything */
631 if (rtok == '\0') {
632 /* FATAL("empty regular expression"); previous */
633 return(op2(EMPTYRE, NIL, NIL));
634 }
635 np = regexp();
636 if (rtok != '\0')
637 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
638 return(np);
639 }
640
641 Node *regexp(void) /* top-level parse of reg expr */
642 {
643 return (alt(concat(primary())));
644 }
645
646 Node *primary(void)
647 {
648 Node *np;
649
650 switch (rtok) {
651 case CHAR:
652 np = op2(CHAR, NIL, itonp(rlxval));
653 rtok = relex();
654 return (unary(np));
655 case ALL:
656 rtok = relex();
657 return (unary(op2(ALL, NIL, NIL)));
658 case EMPTYRE:
659 rtok = relex();
660 return (unary(op2(ALL, NIL, NIL)));
661 case DOT:
662 rtok = relex();
663 return (unary(op2(DOT, NIL, NIL)));
664 case CCL:
665 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
666 rtok = relex();
667 return (unary(np));
668 case NCCL:
669 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
670 rtok = relex();
671 return (unary(np));
672 case '^':
673 rtok = relex();
674 return (unary(op2(CHAR, NIL, itonp(HAT))));
675 case '$':
676 rtok = relex();
677 return (unary(op2(CHAR, NIL, NIL)));
678 case '(':
679 rtok = relex();
680 if (rtok == ')') { /* special pleading for () */
681 rtok = relex();
682 return unary(op2(CCL, NIL, (Node *) tostring("")));
683 }
684 np = regexp();
685 if (rtok == ')') {
686 rtok = relex();
687 return (unary(np));
688 }
689 else
690 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
691 default:
692 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
693 }
694 return 0; /*NOTREACHED*/
695 }
696
697 Node *concat(Node *np)
698 {
699 switch (rtok) {
700 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
701 return (concat(op2(CAT, np, primary())));
702 }
703 return (np);
704 }
705
706 Node *alt(Node *np)
707 {
708 if (rtok == OR) {
709 rtok = relex();
710 return (alt(op2(OR, np, concat(primary()))));
711 }
712 return (np);
713 }
714
715 Node *unary(Node *np)
716 {
717 switch (rtok) {
718 case STAR:
719 rtok = relex();
720 return (unary(op2(STAR, np, NIL)));
721 case PLUS:
722 rtok = relex();
723 return (unary(op2(PLUS, np, NIL)));
724 case QUEST:
725 rtok = relex();
726 return (unary(op2(QUEST, np, NIL)));
727 default:
728 return (np);
729 }
730 }
731
732 /*
733 * Character class definitions conformant to the POSIX locale as
734 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
735 * and operating character sets are both ASCII (ISO646) or supersets
736 * thereof.
737 *
738 * Note that to avoid overflowing the temporary buffer used in
739 * relex(), the expanded character class (prior to range expansion)
740 * must be less than twice the size of their full name.
741 */
742
743 /* Because isblank doesn't show up in any of the header files on any
744 * system i use, it's defined here. if some other locale has a richer
745 * definition of "blank", define HAS_ISBLANK and provide your own
746 * version.
747 * the parentheses here are an attempt to find a path through the maze
748 * of macro definition and/or function and/or version provided. thanks
749 * to nelson beebe for the suggestion; let's see if it works everywhere.
750 */
751
752 /* #define HAS_ISBLANK */
753 #ifndef HAS_ISBLANK
754
755 int (xisblank)(int c)
756 {
757 return c==' ' || c=='\t';
758 }
759
760 #endif
761
762 struct charclass {
763 const char *cc_name;
764 int cc_namelen;
765 int (*cc_func)(int);
766 } charclasses[] = {
767 { "alnum", 5, isalnum },
768 { "alpha", 5, isalpha },
769 #ifndef HAS_ISBLANK
770 { "blank", 5, isspace }, /* was isblank */
771 #else
772 { "blank", 5, isblank },
773 #endif
774 { "cntrl", 5, iscntrl },
775 { "digit", 5, isdigit },
776 { "graph", 5, isgraph },
777 { "lower", 5, islower },
778 { "print", 5, isprint },
779 { "punct", 5, ispunct },
780 { "space", 5, isspace },
781 { "upper", 5, isupper },
782 { "xdigit", 6, isxdigit },
783 { NULL, 0, NULL },
784 };
785
786
787 int relex(void) /* lexical analyzer for reparse */
788 {
789 int c, n;
790 int cflag;
791 static uschar *buf = 0;
792 static int bufsz = 100;
793 uschar *bp;
794 struct charclass *cc;
795 int i;
796
797 switch (c = *prestr++) {
798 case '|': return OR;
799 case '*': return STAR;
800 case '+': return PLUS;
801 case '?': return QUEST;
802 case '.': return DOT;
803 case '\0': prestr--; return '\0';
804 case '^':
805 case '$':
806 case '(':
807 case ')':
808 return c;
809 case '\\':
810 rlxval = quoted(&prestr);
811 return CHAR;
812 default:
813 rlxval = c;
814 return CHAR;
815 case '[':
816 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
817 FATAL("out of space in reg expr %.10s..", lastre);
818 bp = buf;
819 if (*prestr == '^') {
820 cflag = 1;
821 prestr++;
822 }
823 else
824 cflag = 0;
825 n = 2 * strlen((const char *) prestr)+1;
826 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
827 FATAL("out of space for reg expr %.10s...", lastre);
828 for (; ; ) {
829 if ((c = *prestr++) == '\\') {
830 *bp++ = '\\';
831 if ((c = *prestr++) == '\0')
832 FATAL("nonterminated character class %.20s...", lastre);
833 *bp++ = c;
834 /* } else if (c == '\n') { */
835 /* FATAL("newline in character class %.20s...", lastre); */
836 } else if (c == '[' && *prestr == ':') {
837 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
838 for (cc = charclasses; cc->cc_name; cc++)
839 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
840 break;
841 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
842 prestr[2 + cc->cc_namelen] == ']') {
843 prestr += cc->cc_namelen + 3;
844 for (i = 1; i < NCHARS; i++) {
845 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
846 FATAL("out of space for reg expr %.10s...", lastre);
847 if (cc->cc_func(i)) {
848 *bp++ = i;
849 n++;
850 }
851 }
852 } else
853 *bp++ = c;
854 } else if (c == '\0') {
855 FATAL("nonterminated character class %.20s", lastre);
856 } else if (bp == buf) { /* 1st char is special */
857 *bp++ = c;
858 } else if (c == ']') {
859 *bp++ = 0;
860 rlxstr = (uschar *) tostring((char *) buf);
861 if (cflag == 0)
862 return CCL;
863 else
864 return NCCL;
865 } else
866 *bp++ = c;
867 }
868 }
869 }
870
871 int cgoto(fa *f, int s, int c)
872 {
873 int i, j, k;
874 int *p, *q;
875
876 assert(c == HAT || c < NCHARS);
877 while (f->accept >= maxsetvec) { /* guessing here! */
878 maxsetvec *= 4;
879 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
880 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
881 if (setvec == 0 || tmpset == 0)
882 overflo("out of space in cgoto()");
883 }
884 for (i = 0; i <= f->accept; i++)
885 setvec[i] = 0;
886 setcnt = 0;
887 /* compute positions of gototab[s,c] into setvec */
888 p = f->posns[s];
889 for (i = 1; i <= *p; i++) {
890 if ((k = f->re[p[i]].ltype) != FINAL) {
891 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
892 || (k == DOT && c != 0 && c != HAT)
893 || (k == ALL && c != 0)
894 || (k == EMPTYRE && c != 0)
895 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
896 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
897 q = f->re[p[i]].lfollow;
898 for (j = 1; j <= *q; j++) {
899 if (q[j] >= maxsetvec) {
900 maxsetvec *= 4;
901 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
902 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
903 if (setvec == 0 || tmpset == 0)
904 overflo("cgoto overflow");
905 }
906 if (setvec[q[j]] == 0) {
907 setcnt++;
908 setvec[q[j]] = 1;
909 }
910 }
911 }
912 }
913 }
914 /* determine if setvec is a previous state */
915 tmpset[0] = setcnt;
916 j = 1;
917 for (i = f->accept; i >= 0; i--)
918 if (setvec[i]) {
919 tmpset[j++] = i;
920 }
921 /* tmpset == previous state? */
922 for (i = 1; i <= f->curstat; i++) {
923 p = f->posns[i];
924 if ((k = tmpset[0]) != p[0])
925 goto different;
926 for (j = 1; j <= k; j++)
927 if (tmpset[j] != p[j])
928 goto different;
929 /* setvec is state i */
930 f->gototab[s][c] = i;
931 return i;
932 different:;
933 }
934
935 /* add tmpset to current set of states */
936 if (f->curstat >= NSTATES-1) {
937 f->curstat = 2;
938 f->reset = 1;
939 for (i = 2; i < NSTATES; i++)
940 xfree(f->posns[i]);
941 } else
942 ++(f->curstat);
943 for (i = 0; i < NCHARS; i++)
944 f->gototab[f->curstat][i] = 0;
945 xfree(f->posns[f->curstat]);
946 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
947 overflo("out of space in cgoto");
948
949 f->posns[f->curstat] = p;
950 f->gototab[s][c] = f->curstat;
951 for (i = 0; i <= setcnt; i++)
952 p[i] = tmpset[i];
953 if (setvec[f->accept])
954 f->out[f->curstat] = 1;
955 else
956 f->out[f->curstat] = 0;
957 return f->curstat;
958 }
959
960
961 void freefa(fa *f) /* free a finite automaton */
962 {
963 int i;
964
965 if (f == NULL)
966 return;
967 for (i = 0; i <= f->curstat; i++)
968 xfree(f->posns[i]);
969 for (i = 0; i <= f->accept; i++) {
970 xfree(f->re[i].lfollow);
971 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
972 xfree((f->re[i].lval.np));
973 }
974 xfree(f->restr);
975 xfree(f);
976 }

Properties

Name Value
svn:keywords MidnightBSD=%H