ViewVC Help
View File | Revision Log | Show Annotations | Download File | View Changeset | Root Listing
root/src/vendor/flex/2.5.39/nfa.c
Revision: 7076
Committed: Thu Jul 9 12:38:31 2015 UTC (8 years, 9 months ago) by laffer1
Content type: text/plain
File size: 18252 byte(s)
Log Message:
flex 2.5.39

File Contents

# Content
1 /* nfa - NFA construction routines */
2
3 /* Copyright (c) 1990 The Regents of the University of California. */
4 /* All rights reserved. */
5
6 /* This code is derived from software contributed to Berkeley by */
7 /* Vern Paxson. */
8
9 /* The United States Government has rights in this work pursuant */
10 /* to contract no. DE-AC03-76SF00098 between the United States */
11 /* Department of Energy and the University of California. */
12
13 /* This file is part of flex. */
14
15 /* Redistribution and use in source and binary forms, with or without */
16 /* modification, are permitted provided that the following conditions */
17 /* are met: */
18
19 /* 1. Redistributions of source code must retain the above copyright */
20 /* notice, this list of conditions and the following disclaimer. */
21 /* 2. Redistributions in binary form must reproduce the above copyright */
22 /* notice, this list of conditions and the following disclaimer in the */
23 /* documentation and/or other materials provided with the distribution. */
24
25 /* Neither the name of the University nor the names of its contributors */
26 /* may be used to endorse or promote products derived from this software */
27 /* without specific prior written permission. */
28
29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /* PURPOSE. */
33
34 #include "flexdef.h"
35
36
37 /* declare functions that have forward references */
38
39 int dupmachine PROTO ((int));
40 void mkxtion PROTO ((int, int));
41
42
43 /* add_accept - add an accepting state to a machine
44 *
45 * accepting_number becomes mach's accepting number.
46 */
47
48 void add_accept (mach, accepting_number)
49 int mach, accepting_number;
50 {
51 /* Hang the accepting number off an epsilon state. if it is associated
52 * with a state that has a non-epsilon out-transition, then the state
53 * will accept BEFORE it makes that transition, i.e., one character
54 * too soon.
55 */
56
57 if (transchar[finalst[mach]] == SYM_EPSILON)
58 accptnum[finalst[mach]] = accepting_number;
59
60 else {
61 int astate = mkstate (SYM_EPSILON);
62
63 accptnum[astate] = accepting_number;
64 (void) link_machines (mach, astate);
65 }
66 }
67
68
69 /* copysingl - make a given number of copies of a singleton machine
70 *
71 * synopsis
72 *
73 * newsng = copysingl( singl, num );
74 *
75 * newsng - a new singleton composed of num copies of singl
76 * singl - a singleton machine
77 * num - the number of copies of singl to be present in newsng
78 */
79
80 int copysingl (singl, num)
81 int singl, num;
82 {
83 int copy, i;
84
85 copy = mkstate (SYM_EPSILON);
86
87 for (i = 1; i <= num; ++i)
88 copy = link_machines (copy, dupmachine (singl));
89
90 return copy;
91 }
92
93
94 /* dumpnfa - debugging routine to write out an nfa */
95
96 void dumpnfa (state1)
97 int state1;
98
99 {
100 int sym, tsp1, tsp2, anum, ns;
101
102 fprintf (stderr,
103 _
104 ("\n\n********** beginning dump of nfa with start state %d\n"),
105 state1);
106
107 /* We probably should loop starting at firstst[state1] and going to
108 * lastst[state1], but they're not maintained properly when we "or"
109 * all of the rules together. So we use our knowledge that the machine
110 * starts at state 1 and ends at lastnfa.
111 */
112
113 /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
114 for (ns = 1; ns <= lastnfa; ++ns) {
115 fprintf (stderr, _("state # %4d\t"), ns);
116
117 sym = transchar[ns];
118 tsp1 = trans1[ns];
119 tsp2 = trans2[ns];
120 anum = accptnum[ns];
121
122 fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
123
124 if (anum != NIL)
125 fprintf (stderr, " [%d]", anum);
126
127 fprintf (stderr, "\n");
128 }
129
130 fprintf (stderr, _("********** end of dump\n"));
131 }
132
133
134 /* dupmachine - make a duplicate of a given machine
135 *
136 * synopsis
137 *
138 * copy = dupmachine( mach );
139 *
140 * copy - holds duplicate of mach
141 * mach - machine to be duplicated
142 *
143 * note that the copy of mach is NOT an exact duplicate; rather, all the
144 * transition states values are adjusted so that the copy is self-contained,
145 * as the original should have been.
146 *
147 * also note that the original MUST be contiguous, with its low and high
148 * states accessible by the arrays firstst and lastst
149 */
150
151 int dupmachine (mach)
152 int mach;
153 {
154 int i, init, state_offset;
155 int state = 0;
156 int last = lastst[mach];
157
158 for (i = firstst[mach]; i <= last; ++i) {
159 state = mkstate (transchar[i]);
160
161 if (trans1[i] != NO_TRANSITION) {
162 mkxtion (finalst[state], trans1[i] + state - i);
163
164 if (transchar[i] == SYM_EPSILON &&
165 trans2[i] != NO_TRANSITION)
166 mkxtion (finalst[state],
167 trans2[i] + state - i);
168 }
169
170 accptnum[state] = accptnum[i];
171 }
172
173 if (state == 0)
174 flexfatal (_("empty machine in dupmachine()"));
175
176 state_offset = state - i + 1;
177
178 init = mach + state_offset;
179 firstst[init] = firstst[mach] + state_offset;
180 finalst[init] = finalst[mach] + state_offset;
181 lastst[init] = lastst[mach] + state_offset;
182
183 return init;
184 }
185
186
187 /* finish_rule - finish up the processing for a rule
188 *
189 * An accepting number is added to the given machine. If variable_trail_rule
190 * is true then the rule has trailing context and both the head and trail
191 * are variable size. Otherwise if headcnt or trailcnt is non-zero then
192 * the machine recognizes a pattern with trailing context and headcnt is
193 * the number of characters in the matched part of the pattern, or zero
194 * if the matched part has variable length. trailcnt is the number of
195 * trailing context characters in the pattern, or zero if the trailing
196 * context has variable length.
197 */
198
199 void finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
200 pcont_act)
201 int mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
202 {
203 char action_text[MAXLINE];
204
205 add_accept (mach, num_rules);
206
207 /* We did this in new_rule(), but it often gets the wrong
208 * number because we do it before we start parsing the current rule.
209 */
210 rule_linenum[num_rules] = linenum;
211
212 /* If this is a continued action, then the line-number has already
213 * been updated, giving us the wrong number.
214 */
215 if (continued_action)
216 --rule_linenum[num_rules];
217
218
219 /* If the previous rule was continued action, then we inherit the
220 * previous newline flag, possibly overriding the current one.
221 */
222 if (pcont_act && rule_has_nl[num_rules - 1])
223 rule_has_nl[num_rules] = true;
224
225 snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
226 add_action (action_text);
227 if (rule_has_nl[num_rules]) {
228 snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
229 num_rules);
230 add_action (action_text);
231 }
232
233
234 if (variable_trail_rule) {
235 rule_type[num_rules] = RULE_VARIABLE;
236
237 if (performance_report > 0)
238 fprintf (stderr,
239 _
240 ("Variable trailing context rule at line %d\n"),
241 rule_linenum[num_rules]);
242
243 variable_trailing_context_rules = true;
244 }
245
246 else {
247 rule_type[num_rules] = RULE_NORMAL;
248
249 if (headcnt > 0 || trailcnt > 0) {
250 /* Do trailing context magic to not match the trailing
251 * characters.
252 */
253 char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
254 char *scanner_bp = "yy_bp";
255
256 add_action
257 ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
258
259 if (headcnt > 0) {
260 if (rule_has_nl[num_rules]) {
261 snprintf (action_text, sizeof(action_text),
262 "YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt);
263 add_action (action_text);
264 }
265 snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
266 scanner_cp, scanner_bp, headcnt);
267 add_action (action_text);
268 }
269
270 else {
271 if (rule_has_nl[num_rules]) {
272 snprintf (action_text, sizeof(action_text),
273 "YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt);
274 add_action (action_text);
275 }
276
277 snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
278 scanner_cp, trailcnt);
279 add_action (action_text);
280 }
281
282 add_action
283 ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
284 }
285 }
286
287 /* Okay, in the action code at this point yytext and yyleng have
288 * their proper final values for this rule, so here's the point
289 * to do any user action. But don't do it for continued actions,
290 * as that'll result in multiple YY_RULE_SETUP's.
291 */
292 if (!continued_action)
293 add_action ("YY_RULE_SETUP\n");
294
295 line_directive_out ((FILE *) 0, 1);
296 }
297
298
299 /* link_machines - connect two machines together
300 *
301 * synopsis
302 *
303 * new = link_machines( first, last );
304 *
305 * new - a machine constructed by connecting first to last
306 * first - the machine whose successor is to be last
307 * last - the machine whose predecessor is to be first
308 *
309 * note: this routine concatenates the machine first with the machine
310 * last to produce a machine new which will pattern-match first first
311 * and then last, and will fail if either of the sub-patterns fails.
312 * FIRST is set to new by the operation. last is unmolested.
313 */
314
315 int link_machines (first, last)
316 int first, last;
317 {
318 if (first == NIL)
319 return last;
320
321 else if (last == NIL)
322 return first;
323
324 else {
325 mkxtion (finalst[first], last);
326 finalst[first] = finalst[last];
327 lastst[first] = MAX (lastst[first], lastst[last]);
328 firstst[first] = MIN (firstst[first], firstst[last]);
329
330 return first;
331 }
332 }
333
334
335 /* mark_beginning_as_normal - mark each "beginning" state in a machine
336 * as being a "normal" (i.e., not trailing context-
337 * associated) states
338 *
339 * The "beginning" states are the epsilon closure of the first state
340 */
341
342 void mark_beginning_as_normal (mach)
343 register int mach;
344 {
345 switch (state_type[mach]) {
346 case STATE_NORMAL:
347 /* Oh, we've already visited here. */
348 return;
349
350 case STATE_TRAILING_CONTEXT:
351 state_type[mach] = STATE_NORMAL;
352
353 if (transchar[mach] == SYM_EPSILON) {
354 if (trans1[mach] != NO_TRANSITION)
355 mark_beginning_as_normal (trans1[mach]);
356
357 if (trans2[mach] != NO_TRANSITION)
358 mark_beginning_as_normal (trans2[mach]);
359 }
360 break;
361
362 default:
363 flexerror (_
364 ("bad state type in mark_beginning_as_normal()"));
365 break;
366 }
367 }
368
369
370 /* mkbranch - make a machine that branches to two machines
371 *
372 * synopsis
373 *
374 * branch = mkbranch( first, second );
375 *
376 * branch - a machine which matches either first's pattern or second's
377 * first, second - machines whose patterns are to be or'ed (the | operator)
378 *
379 * Note that first and second are NEITHER destroyed by the operation. Also,
380 * the resulting machine CANNOT be used with any other "mk" operation except
381 * more mkbranch's. Compare with mkor()
382 */
383
384 int mkbranch (first, second)
385 int first, second;
386 {
387 int eps;
388
389 if (first == NO_TRANSITION)
390 return second;
391
392 else if (second == NO_TRANSITION)
393 return first;
394
395 eps = mkstate (SYM_EPSILON);
396
397 mkxtion (eps, first);
398 mkxtion (eps, second);
399
400 return eps;
401 }
402
403
404 /* mkclos - convert a machine into a closure
405 *
406 * synopsis
407 * new = mkclos( state );
408 *
409 * new - a new state which matches the closure of "state"
410 */
411
412 int mkclos (state)
413 int state;
414 {
415 return mkopt (mkposcl (state));
416 }
417
418
419 /* mkopt - make a machine optional
420 *
421 * synopsis
422 *
423 * new = mkopt( mach );
424 *
425 * new - a machine which optionally matches whatever mach matched
426 * mach - the machine to make optional
427 *
428 * notes:
429 * 1. mach must be the last machine created
430 * 2. mach is destroyed by the call
431 */
432
433 int mkopt (mach)
434 int mach;
435 {
436 int eps;
437
438 if (!SUPER_FREE_EPSILON (finalst[mach])) {
439 eps = mkstate (SYM_EPSILON);
440 mach = link_machines (mach, eps);
441 }
442
443 /* Can't skimp on the following if FREE_EPSILON(mach) is true because
444 * some state interior to "mach" might point back to the beginning
445 * for a closure.
446 */
447 eps = mkstate (SYM_EPSILON);
448 mach = link_machines (eps, mach);
449
450 mkxtion (mach, finalst[mach]);
451
452 return mach;
453 }
454
455
456 /* mkor - make a machine that matches either one of two machines
457 *
458 * synopsis
459 *
460 * new = mkor( first, second );
461 *
462 * new - a machine which matches either first's pattern or second's
463 * first, second - machines whose patterns are to be or'ed (the | operator)
464 *
465 * note that first and second are both destroyed by the operation
466 * the code is rather convoluted because an attempt is made to minimize
467 * the number of epsilon states needed
468 */
469
470 int mkor (first, second)
471 int first, second;
472 {
473 int eps, orend;
474
475 if (first == NIL)
476 return second;
477
478 else if (second == NIL)
479 return first;
480
481 else {
482 /* See comment in mkopt() about why we can't use the first
483 * state of "first" or "second" if they satisfy "FREE_EPSILON".
484 */
485 eps = mkstate (SYM_EPSILON);
486
487 first = link_machines (eps, first);
488
489 mkxtion (first, second);
490
491 if (SUPER_FREE_EPSILON (finalst[first]) &&
492 accptnum[finalst[first]] == NIL) {
493 orend = finalst[first];
494 mkxtion (finalst[second], orend);
495 }
496
497 else if (SUPER_FREE_EPSILON (finalst[second]) &&
498 accptnum[finalst[second]] == NIL) {
499 orend = finalst[second];
500 mkxtion (finalst[first], orend);
501 }
502
503 else {
504 eps = mkstate (SYM_EPSILON);
505
506 first = link_machines (first, eps);
507 orend = finalst[first];
508
509 mkxtion (finalst[second], orend);
510 }
511 }
512
513 finalst[first] = orend;
514 return first;
515 }
516
517
518 /* mkposcl - convert a machine into a positive closure
519 *
520 * synopsis
521 * new = mkposcl( state );
522 *
523 * new - a machine matching the positive closure of "state"
524 */
525
526 int mkposcl (state)
527 int state;
528 {
529 int eps;
530
531 if (SUPER_FREE_EPSILON (finalst[state])) {
532 mkxtion (finalst[state], state);
533 return state;
534 }
535
536 else {
537 eps = mkstate (SYM_EPSILON);
538 mkxtion (eps, state);
539 return link_machines (state, eps);
540 }
541 }
542
543
544 /* mkrep - make a replicated machine
545 *
546 * synopsis
547 * new = mkrep( mach, lb, ub );
548 *
549 * new - a machine that matches whatever "mach" matched from "lb"
550 * number of times to "ub" number of times
551 *
552 * note
553 * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
554 */
555
556 int mkrep (mach, lb, ub)
557 int mach, lb, ub;
558 {
559 int base_mach, tail, copy, i;
560
561 base_mach = copysingl (mach, lb - 1);
562
563 if (ub == INFINITE_REPEAT) {
564 copy = dupmachine (mach);
565 mach = link_machines (mach,
566 link_machines (base_mach,
567 mkclos (copy)));
568 }
569
570 else {
571 tail = mkstate (SYM_EPSILON);
572
573 for (i = lb; i < ub; ++i) {
574 copy = dupmachine (mach);
575 tail = mkopt (link_machines (copy, tail));
576 }
577
578 mach =
579 link_machines (mach,
580 link_machines (base_mach, tail));
581 }
582
583 return mach;
584 }
585
586
587 /* mkstate - create a state with a transition on a given symbol
588 *
589 * synopsis
590 *
591 * state = mkstate( sym );
592 *
593 * state - a new state matching sym
594 * sym - the symbol the new state is to have an out-transition on
595 *
596 * note that this routine makes new states in ascending order through the
597 * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
598 * relies on machines being made in ascending order and that they are
599 * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
600 * that it admittedly is)
601 */
602
603 int mkstate (sym)
604 int sym;
605 {
606 if (++lastnfa >= current_mns) {
607 if ((current_mns += MNS_INCREMENT) >= maximum_mns)
608 lerrif (_
609 ("input rules are too complicated (>= %d NFA states)"),
610 current_mns);
611
612 ++num_reallocs;
613
614 firstst = reallocate_integer_array (firstst, current_mns);
615 lastst = reallocate_integer_array (lastst, current_mns);
616 finalst = reallocate_integer_array (finalst, current_mns);
617 transchar =
618 reallocate_integer_array (transchar, current_mns);
619 trans1 = reallocate_integer_array (trans1, current_mns);
620 trans2 = reallocate_integer_array (trans2, current_mns);
621 accptnum =
622 reallocate_integer_array (accptnum, current_mns);
623 assoc_rule =
624 reallocate_integer_array (assoc_rule, current_mns);
625 state_type =
626 reallocate_integer_array (state_type, current_mns);
627 }
628
629 firstst[lastnfa] = lastnfa;
630 finalst[lastnfa] = lastnfa;
631 lastst[lastnfa] = lastnfa;
632 transchar[lastnfa] = sym;
633 trans1[lastnfa] = NO_TRANSITION;
634 trans2[lastnfa] = NO_TRANSITION;
635 accptnum[lastnfa] = NIL;
636 assoc_rule[lastnfa] = num_rules;
637 state_type[lastnfa] = current_state_type;
638
639 /* Fix up equivalence classes base on this transition. Note that any
640 * character which has its own transition gets its own equivalence
641 * class. Thus only characters which are only in character classes
642 * have a chance at being in the same equivalence class. E.g. "a|b"
643 * puts 'a' and 'b' into two different equivalence classes. "[ab]"
644 * puts them in the same equivalence class (barring other differences
645 * elsewhere in the input).
646 */
647
648 if (sym < 0) {
649 /* We don't have to update the equivalence classes since
650 * that was already done when the ccl was created for the
651 * first time.
652 */
653 }
654
655 else if (sym == SYM_EPSILON)
656 ++numeps;
657
658 else {
659 check_char (sym);
660
661 if (useecs)
662 /* Map NUL's to csize. */
663 mkechar (sym ? sym : csize, nextecm, ecgroup);
664 }
665
666 return lastnfa;
667 }
668
669
670 /* mkxtion - make a transition from one state to another
671 *
672 * synopsis
673 *
674 * mkxtion( statefrom, stateto );
675 *
676 * statefrom - the state from which the transition is to be made
677 * stateto - the state to which the transition is to be made
678 */
679
680 void mkxtion (statefrom, stateto)
681 int statefrom, stateto;
682 {
683 if (trans1[statefrom] == NO_TRANSITION)
684 trans1[statefrom] = stateto;
685
686 else if ((transchar[statefrom] != SYM_EPSILON) ||
687 (trans2[statefrom] != NO_TRANSITION))
688 flexfatal (_("found too many transitions in mkxtion()"));
689
690 else { /* second out-transition for an epsilon state */
691 ++eps2;
692 trans2[statefrom] = stateto;
693 }
694 }
695
696 /* new_rule - initialize for a new rule */
697
698 void new_rule ()
699 {
700 if (++num_rules >= current_max_rules) {
701 ++num_reallocs;
702 current_max_rules += MAX_RULES_INCREMENT;
703 rule_type = reallocate_integer_array (rule_type,
704 current_max_rules);
705 rule_linenum = reallocate_integer_array (rule_linenum,
706 current_max_rules);
707 rule_useful = reallocate_integer_array (rule_useful,
708 current_max_rules);
709 rule_has_nl = reallocate_bool_array (rule_has_nl,
710 current_max_rules);
711 }
712
713 if (num_rules > MAX_RULE)
714 lerrif (_("too many rules (> %d)!"), MAX_RULE);
715
716 rule_linenum[num_rules] = linenum;
717 rule_useful[num_rules] = false;
718 rule_has_nl[num_rules] = false;
719 }