flex/2.5.39/nfa.c

/* nfa - NFA construction routines */

/*  Copyright (c) 1990 The Regents of the University of California. */
/*  All rights reserved. */

/*  This code is derived from software contributed to Berkeley by */
/*  Vern Paxson. */

/*  The United States Government has rights in this work pursuant */
/*  to contract no. DE-AC03-76SF00098 between the United States */
/*  Department of Energy and the University of California. */

/*  This file is part of flex. */

/*  Redistribution and use in source and binary forms, with or without */
/*  modification, are permitted provided that the following conditions */
/*  are met: */

/*  1. Redistributions of source code must retain the above copyright */
/*     notice, this list of conditions and the following disclaimer. */
/*  2. Redistributions in binary form must reproduce the above copyright */
/*     notice, this list of conditions and the following disclaimer in the */
/*     documentation and/or other materials provided with the distribution. */

/*  Neither the name of the University nor the names of its contributors */
/*  may be used to endorse or promote products derived from this software */
/*  without specific prior written permission. */

/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/*  PURPOSE. */

#include "flexdef.h"


/* declare functions that have forward references */

int dupmachine PROTO ((int));
void mkxtion PROTO ((int, int));


/* add_accept - add an accepting state to a machine
 *
 * accepting_number becomes mach's accepting number.
 */

void    add_accept (mach, accepting_number)
     int     mach, accepting_number;
{
        /* Hang the accepting number off an epsilon state.  if it is associated
         * with a state that has a non-epsilon out-transition, then the state
         * will accept BEFORE it makes that transition, i.e., one character
         * too soon.
         */

        if (transchar[finalst[mach]] == SYM_EPSILON)
                accptnum[finalst[mach]] = accepting_number;

        else {
                int     astate = mkstate (SYM_EPSILON);

                accptnum[astate] = accepting_number;
                (void) link_machines (mach, astate);
        }
}


/* copysingl - make a given number of copies of a singleton machine
 *
 * synopsis
 *
 *   newsng = copysingl( singl, num );
 *
 *     newsng - a new singleton composed of num copies of singl
 *     singl  - a singleton machine
 *     num    - the number of copies of singl to be present in newsng
 */

int     copysingl (singl, num)
     int     singl, num;
{
        int     copy, i;

        copy = mkstate (SYM_EPSILON);

        for (i = 1; i <= num; ++i)
                copy = link_machines (copy, dupmachine (singl));

        return copy;
}


/* dumpnfa - debugging routine to write out an nfa */

void    dumpnfa (state1)
     int     state1;

{
        int     sym, tsp1, tsp2, anum, ns;

        fprintf (stderr,
                 _
                 ("\n\n********** beginning dump of nfa with start state %d\n"),
                 state1);

        /* We probably should loop starting at firstst[state1] and going to
         * lastst[state1], but they're not maintained properly when we "or"
         * all of the rules together.  So we use our knowledge that the machine
         * starts at state 1 and ends at lastnfa.
         */

        /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
        for (ns = 1; ns <= lastnfa; ++ns) {
                fprintf (stderr, _("state # %4d\t"), ns);

                sym = transchar[ns];
                tsp1 = trans1[ns];
                tsp2 = trans2[ns];
                anum = accptnum[ns];

                fprintf (stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2);

                if (anum != NIL)
                        fprintf (stderr, "  [%d]", anum);

                fprintf (stderr, "\n");
        }

        fprintf (stderr, _("********** end of dump\n"));
}


/* dupmachine - make a duplicate of a given machine
 *
 * synopsis
 *
 *   copy = dupmachine( mach );
 *
 *     copy - holds duplicate of mach
 *     mach - machine to be duplicated
 *
 * note that the copy of mach is NOT an exact duplicate; rather, all the
 * transition states values are adjusted so that the copy is self-contained,
 * as the original should have been.
 *
 * also note that the original MUST be contiguous, with its low and high
 * states accessible by the arrays firstst and lastst
 */

int     dupmachine (mach)
     int     mach;
{
        int     i, init, state_offset;
        int     state = 0;
        int     last = lastst[mach];

        for (i = firstst[mach]; i <= last; ++i) {
                state = mkstate (transchar[i]);

                if (trans1[i] != NO_TRANSITION) {
                        mkxtion (finalst[state], trans1[i] + state - i);

                        if (transchar[i] == SYM_EPSILON &&
                            trans2[i] != NO_TRANSITION)
                                        mkxtion (finalst[state],
                                                 trans2[i] + state - i);
                }

                accptnum[state] = accptnum[i];
        }

        if (state == 0)
                flexfatal (_("empty machine in dupmachine()"));

        state_offset = state - i + 1;

        init = mach + state_offset;
        firstst[init] = firstst[mach] + state_offset;
        finalst[init] = finalst[mach] + state_offset;
        lastst[init] = lastst[mach] + state_offset;

        return init;
}


/* finish_rule - finish up the processing for a rule
 *
 * An accepting number is added to the given machine.  If variable_trail_rule
 * is true then the rule has trailing context and both the head and trail
 * are variable size.  Otherwise if headcnt or trailcnt is non-zero then
 * the machine recognizes a pattern with trailing context and headcnt is
 * the number of characters in the matched part of the pattern, or zero
 * if the matched part has variable length.  trailcnt is the number of
 * trailing context characters in the pattern, or zero if the trailing
 * context has variable length.
 */

void    finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
                     pcont_act)
     int     mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
{
        char    action_text[MAXLINE];

        add_accept (mach, num_rules);

        /* We did this in new_rule(), but it often gets the wrong
         * number because we do it before we start parsing the current rule.
         */
        rule_linenum[num_rules] = linenum;

        /* If this is a continued action, then the line-number has already
         * been updated, giving us the wrong number.
         */
        if (continued_action)
                --rule_linenum[num_rules];


        /* If the previous rule was continued action, then we inherit the
         * previous newline flag, possibly overriding the current one.
         */
        if (pcont_act && rule_has_nl[num_rules - 1])
                rule_has_nl[num_rules] = true;

        snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
        add_action (action_text);
        if (rule_has_nl[num_rules]) {
                snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
                         num_rules);
                add_action (action_text);
        }


        if (variable_trail_rule) {
                rule_type[num_rules] = RULE_VARIABLE;

                if (performance_report > 0)
                        fprintf (stderr,
                                 _
                                 ("Variable trailing context rule at line %d\n"),
                                 rule_linenum[num_rules]);

                variable_trailing_context_rules = true;
        }

        else {
                rule_type[num_rules] = RULE_NORMAL;

                if (headcnt > 0 || trailcnt > 0) {
                        /* Do trailing context magic to not match the trailing
                         * characters.
                         */
                        char   *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
                        char   *scanner_bp = "yy_bp";

                        add_action
                                ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");

                        if (headcnt > 0) {
                                if (rule_has_nl[num_rules]) {
                                        snprintf (action_text, sizeof(action_text),
                                                "YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt);
                                        add_action (action_text);
                                }
                                snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
                                         scanner_cp, scanner_bp, headcnt);
                                add_action (action_text);
                        }

                        else {
                                if (rule_has_nl[num_rules]) {
                                        snprintf (action_text, sizeof(action_text),
                                                 "YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt);
                                        add_action (action_text);
                                }

                                snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
                                         scanner_cp, trailcnt);
                                add_action (action_text);
                        }

                        add_action
                                ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
                }
        }

        /* Okay, in the action code at this point yytext and yyleng have
         * their proper final values for this rule, so here's the point
         * to do any user action.  But don't do it for continued actions,
         * as that'll result in multiple YY_RULE_SETUP's.
         */
        if (!continued_action)
                add_action ("YY_RULE_SETUP\n");

        line_directive_out ((FILE *) 0, 1);
}


/* link_machines - connect two machines together
 *
 * synopsis
 *
 *   new = link_machines( first, last );
 *
 *     new    - a machine constructed by connecting first to last
 *     first  - the machine whose successor is to be last
 *     last   - the machine whose predecessor is to be first
 *
 * note: this routine concatenates the machine first with the machine
 *  last to produce a machine new which will pattern-match first first
 *  and then last, and will fail if either of the sub-patterns fails.
 *  FIRST is set to new by the operation.  last is unmolested.
 */

int     link_machines (first, last)
     int     first, last;
{
        if (first == NIL)
                return last;

        else if (last == NIL)
                return first;

        else {
                mkxtion (finalst[first], last);
                finalst[first] = finalst[last];
                lastst[first] = MAX (lastst[first], lastst[last]);
                firstst[first] = MIN (firstst[first], firstst[last]);

                return first;
        }
}


/* mark_beginning_as_normal - mark each "beginning" state in a machine
 *                            as being a "normal" (i.e., not trailing context-
 *                            associated) states
 *
 * The "beginning" states are the epsilon closure of the first state
 */

void    mark_beginning_as_normal (mach)
     register int mach;
{
        switch (state_type[mach]) {
        case STATE_NORMAL:
                /* Oh, we've already visited here. */
                return;

        case STATE_TRAILING_CONTEXT:
                state_type[mach] = STATE_NORMAL;

                if (transchar[mach] == SYM_EPSILON) {
                        if (trans1[mach] != NO_TRANSITION)
                                mark_beginning_as_normal (trans1[mach]);

                        if (trans2[mach] != NO_TRANSITION)
                                mark_beginning_as_normal (trans2[mach]);
                }
                break;

        default:
                flexerror (_
                           ("bad state type in mark_beginning_as_normal()"));
                break;
        }
}


/* mkbranch - make a machine that branches to two machines
 *
 * synopsis
 *
 *   branch = mkbranch( first, second );
 *
 *     branch - a machine which matches either first's pattern or second's
 *     first, second - machines whose patterns are to be or'ed (the | operator)
 *
 * Note that first and second are NEITHER destroyed by the operation.  Also,
 * the resulting machine CANNOT be used with any other "mk" operation except
 * more mkbranch's.  Compare with mkor()
 */

int     mkbranch (first, second)
     int     first, second;
{
        int     eps;

        if (first == NO_TRANSITION)
                return second;

        else if (second == NO_TRANSITION)
                return first;

        eps = mkstate (SYM_EPSILON);

        mkxtion (eps, first);
        mkxtion (eps, second);

        return eps;
}


/* mkclos - convert a machine into a closure
 *
 * synopsis
 *   new = mkclos( state );
 *
 * new - a new state which matches the closure of "state"
 */

int     mkclos (state)
     int     state;
{
        return mkopt (mkposcl (state));
}


/* mkopt - make a machine optional
 *
 * synopsis
 *
 *   new = mkopt( mach );
 *
 *     new  - a machine which optionally matches whatever mach matched
 *     mach - the machine to make optional
 *
 * notes:
 *     1. mach must be the last machine created
 *     2. mach is destroyed by the call
 */

int     mkopt (mach)
     int     mach;
{
        int     eps;

        if (!SUPER_FREE_EPSILON (finalst[mach])) {
                eps = mkstate (SYM_EPSILON);
                mach = link_machines (mach, eps);
        }

        /* Can't skimp on the following if FREE_EPSILON(mach) is true because
         * some state interior to "mach" might point back to the beginning
         * for a closure.
         */
        eps = mkstate (SYM_EPSILON);
        mach = link_machines (eps, mach);

        mkxtion (mach, finalst[mach]);

        return mach;
}


/* mkor - make a machine that matches either one of two machines
 *
 * synopsis
 *
 *   new = mkor( first, second );
 *
 *     new - a machine which matches either first's pattern or second's
 *     first, second - machines whose patterns are to be or'ed (the | operator)
 *
 * note that first and second are both destroyed by the operation
 * the code is rather convoluted because an attempt is made to minimize
 * the number of epsilon states needed
 */

int     mkor (first, second)
     int     first, second;
{
        int     eps, orend;

        if (first == NIL)
                return second;

        else if (second == NIL)
                return first;

        else {
                /* See comment in mkopt() about why we can't use the first
                 * state of "first" or "second" if they satisfy "FREE_EPSILON".
                 */
                eps = mkstate (SYM_EPSILON);

                first = link_machines (eps, first);

                mkxtion (first, second);

                if (SUPER_FREE_EPSILON (finalst[first]) &&
                    accptnum[finalst[first]] == NIL) {
                        orend = finalst[first];
                        mkxtion (finalst[second], orend);
                }

                else if (SUPER_FREE_EPSILON (finalst[second]) &&
                         accptnum[finalst[second]] == NIL) {
                        orend = finalst[second];
                        mkxtion (finalst[first], orend);
                }

                else {
                        eps = mkstate (SYM_EPSILON);

                        first = link_machines (first, eps);
                        orend = finalst[first];

                        mkxtion (finalst[second], orend);
                }
        }

        finalst[first] = orend;
        return first;
}


/* mkposcl - convert a machine into a positive closure
 *
 * synopsis
 *   new = mkposcl( state );
 *
 *    new - a machine matching the positive closure of "state"
 */

int     mkposcl (state)
     int     state;
{
        int     eps;

        if (SUPER_FREE_EPSILON (finalst[state])) {
                mkxtion (finalst[state], state);
                return state;
        }

        else {
                eps = mkstate (SYM_EPSILON);
                mkxtion (eps, state);
                return link_machines (state, eps);
        }
}


/* mkrep - make a replicated machine
 *
 * synopsis
 *   new = mkrep( mach, lb, ub );
 *
 *    new - a machine that matches whatever "mach" matched from "lb"
 *          number of times to "ub" number of times
 *
 * note
 *   if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
 */

int     mkrep (mach, lb, ub)
     int     mach, lb, ub;
{
        int     base_mach, tail, copy, i;

        base_mach = copysingl (mach, lb - 1);

        if (ub == INFINITE_REPEAT) {
                copy = dupmachine (mach);
                mach = link_machines (mach,
                                      link_machines (base_mach,
                                                     mkclos (copy)));
        }

        else {
                tail = mkstate (SYM_EPSILON);

                for (i = lb; i < ub; ++i) {
                        copy = dupmachine (mach);
                        tail = mkopt (link_machines (copy, tail));
                }

                mach =
                        link_machines (mach,
                                       link_machines (base_mach, tail));
        }

        return mach;
}


/* mkstate - create a state with a transition on a given symbol
 *
 * synopsis
 *
 *   state = mkstate( sym );
 *
 *     state - a new state matching sym
 *     sym   - the symbol the new state is to have an out-transition on
 *
 * note that this routine makes new states in ascending order through the
 * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
 * relies on machines being made in ascending order and that they are
 * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
 * that it admittedly is)
 */

int     mkstate (sym)
     int     sym;
{
        if (++lastnfa >= current_mns) {
                if ((current_mns += MNS_INCREMENT) >= maximum_mns)
                        lerrif (_
                                ("input rules are too complicated (>= %d NFA states)"),
current_mns);

                ++num_reallocs;

                firstst = reallocate_integer_array (firstst, current_mns);
                lastst = reallocate_integer_array (lastst, current_mns);
                finalst = reallocate_integer_array (finalst, current_mns);
                transchar =
                        reallocate_integer_array (transchar, current_mns);
                trans1 = reallocate_integer_array (trans1, current_mns);
                trans2 = reallocate_integer_array (trans2, current_mns);
                accptnum =
                        reallocate_integer_array (accptnum, current_mns);
                assoc_rule =
                        reallocate_integer_array (assoc_rule, current_mns);
                state_type =
                        reallocate_integer_array (state_type, current_mns);
        }

        firstst[lastnfa] = lastnfa;
        finalst[lastnfa] = lastnfa;
        lastst[lastnfa] = lastnfa;
        transchar[lastnfa] = sym;
        trans1[lastnfa] = NO_TRANSITION;
        trans2[lastnfa] = NO_TRANSITION;
        accptnum[lastnfa] = NIL;
        assoc_rule[lastnfa] = num_rules;
        state_type[lastnfa] = current_state_type;

        /* Fix up equivalence classes base on this transition.  Note that any
         * character which has its own transition gets its own equivalence
         * class.  Thus only characters which are only in character classes
         * have a chance at being in the same equivalence class.  E.g. "a|b"
         * puts 'a' and 'b' into two different equivalence classes.  "[ab]"
         * puts them in the same equivalence class (barring other differences
         * elsewhere in the input).
         */

        if (sym < 0) {
                /* We don't have to update the equivalence classes since
                 * that was already done when the ccl was created for the
                 * first time.
                 */
        }

        else if (sym == SYM_EPSILON)
                ++numeps;

        else {
                check_char (sym);

                if (useecs)
                        /* Map NUL's to csize. */
                        mkechar (sym ? sym : csize, nextecm, ecgroup);
        }

        return lastnfa;
}


/* mkxtion - make a transition from one state to another
 *
 * synopsis
 *
 *   mkxtion( statefrom, stateto );
 *
 *     statefrom - the state from which the transition is to be made
 *     stateto   - the state to which the transition is to be made
 */

void    mkxtion (statefrom, stateto)
     int     statefrom, stateto;
{
        if (trans1[statefrom] == NO_TRANSITION)
                trans1[statefrom] = stateto;

        else if ((transchar[statefrom] != SYM_EPSILON) ||
                 (trans2[statefrom] != NO_TRANSITION))
                flexfatal (_("found too many transitions in mkxtion()"));

        else {                  /* second out-transition for an epsilon state */
                ++eps2;
                trans2[statefrom] = stateto;
        }
}

/* new_rule - initialize for a new rule */

void    new_rule ()
{
        if (++num_rules >= current_max_rules) {
                ++num_reallocs;
                current_max_rules += MAX_RULES_INCREMENT;
                rule_type = reallocate_integer_array (rule_type,
                                                      current_max_rules);
                rule_linenum = reallocate_integer_array (rule_linenum,
                                                         current_max_rules);
                rule_useful = reallocate_integer_array (rule_useful,
                                                        current_max_rules);
                rule_has_nl = reallocate_bool_array (rule_has_nl,
                                                     current_max_rules);
        }

        if (num_rules > MAX_RULE)
                lerrif (_("too many rules (> %d)!"), MAX_RULE);

        rule_linenum[num_rules] = linenum;
        rule_useful[num_rules] = false;
        rule_has_nl[num_rules] = false;
}
Revision:	7076
Committed:	Thu Jul 9 12:38:31 2015 UTC (8 years, 9 months ago) by laffer1
Content type:	text/plain
File size:	18252 byte(s)
Log Message:	flex 2.5.39
#	Content
1	/* nfa - NFA construction routines */
2
3	/* Copyright (c) 1990 The Regents of the University of California. */
4	/* All rights reserved. */
5
6	/* This code is derived from software contributed to Berkeley by */
7	/* Vern Paxson. */
8
9	/* The United States Government has rights in this work pursuant */
10	/* to contract no. DE-AC03-76SF00098 between the United States */
11	/* Department of Energy and the University of California. */
12
13	/* This file is part of flex. */
14
15	/* Redistribution and use in source and binary forms, with or without */
16	/* modification, are permitted provided that the following conditions */
17	/* are met: */
18
19	/* 1. Redistributions of source code must retain the above copyright */
20	/* notice, this list of conditions and the following disclaimer. */
21	/* 2. Redistributions in binary form must reproduce the above copyright */
22	/* notice, this list of conditions and the following disclaimer in the */
23	/* documentation and/or other materials provided with the distribution. */
24
25	/* Neither the name of the University nor the names of its contributors */
26	/* may be used to endorse or promote products derived from this software */
27	/* without specific prior written permission. */
28
29	/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30	/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31	/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32	/* PURPOSE. */
33
34	#include "flexdef.h"
35
36
37	/* declare functions that have forward references */
38
39	int dupmachine PROTO ((int));
40	void mkxtion PROTO ((int, int));
41
42
43	/* add_accept - add an accepting state to a machine
44	*
45	* accepting_number becomes mach's accepting number.
46	*/
47
48	void add_accept (mach, accepting_number)
49	int mach, accepting_number;
50	{
51	/* Hang the accepting number off an epsilon state. if it is associated
52	* with a state that has a non-epsilon out-transition, then the state
53	* will accept BEFORE it makes that transition, i.e., one character
54	* too soon.
55	*/
56
57	if (transchar[finalst[mach]] == SYM_EPSILON)
58	accptnum[finalst[mach]] = accepting_number;
59
60	else {
61	int astate = mkstate (SYM_EPSILON);
62
63	accptnum[astate] = accepting_number;
64	(void) link_machines (mach, astate);
65	}
66	}
67
68
69	/* copysingl - make a given number of copies of a singleton machine
70	*
71	* synopsis
72	*
73	* newsng = copysingl( singl, num );
74	*
75	* newsng - a new singleton composed of num copies of singl
76	* singl - a singleton machine
77	* num - the number of copies of singl to be present in newsng
78	*/
79
80	int copysingl (singl, num)
81	int singl, num;
82	{
83	int copy, i;
84
85	copy = mkstate (SYM_EPSILON);
86
87	for (i = 1; i <= num; ++i)
88	copy = link_machines (copy, dupmachine (singl));
89
90	return copy;
91	}
92
93
94	/* dumpnfa - debugging routine to write out an nfa */
95
96	void dumpnfa (state1)
97	int state1;
98
99	{
100	int sym, tsp1, tsp2, anum, ns;
101
102	fprintf (stderr,
103	_
104	("\n\n********** beginning dump of nfa with start state %d\n"),
105	state1);
106
107	/* We probably should loop starting at firstst[state1] and going to
108	* lastst[state1], but they're not maintained properly when we "or"
109	* all of the rules together. So we use our knowledge that the machine
110	* starts at state 1 and ends at lastnfa.
111	*/
112
113	/* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
114	for (ns = 1; ns <= lastnfa; ++ns) {
115	fprintf (stderr, _("state # %4d\t"), ns);
116
117	sym = transchar[ns];
118	tsp1 = trans1[ns];
119	tsp2 = trans2[ns];
120	anum = accptnum[ns];
121
122	fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
123
124	if (anum != NIL)
125	fprintf (stderr, " [%d]", anum);
126
127	fprintf (stderr, "\n");
128	}
129
130	fprintf (stderr, _("********** end of dump\n"));
131	}
132
133
134	/* dupmachine - make a duplicate of a given machine
135	*
136	* synopsis
137	*
138	* copy = dupmachine( mach );
139	*
140	* copy - holds duplicate of mach
141	* mach - machine to be duplicated
142	*
143	* note that the copy of mach is NOT an exact duplicate; rather, all the
144	* transition states values are adjusted so that the copy is self-contained,
145	* as the original should have been.
146	*
147	* also note that the original MUST be contiguous, with its low and high
148	* states accessible by the arrays firstst and lastst
149	*/
150
151	int dupmachine (mach)
152	int mach;
153	{
154	int i, init, state_offset;
155	int state = 0;
156	int last = lastst[mach];
157
158	for (i = firstst[mach]; i <= last; ++i) {
159	state = mkstate (transchar[i]);
160
161	if (trans1[i] != NO_TRANSITION) {
162	mkxtion (finalst[state], trans1[i] + state - i);
163
164	if (transchar[i] == SYM_EPSILON &&
165	trans2[i] != NO_TRANSITION)
166	mkxtion (finalst[state],
167	trans2[i] + state - i);
168	}
169
170	accptnum[state] = accptnum[i];
171	}
172
173	if (state == 0)
174	flexfatal (_("empty machine in dupmachine()"));
175
176	state_offset = state - i + 1;
177
178	init = mach + state_offset;
179	firstst[init] = firstst[mach] + state_offset;
180	finalst[init] = finalst[mach] + state_offset;
181	lastst[init] = lastst[mach] + state_offset;
182
183	return init;
184	}
185
186
187	/* finish_rule - finish up the processing for a rule
188	*
189	* An accepting number is added to the given machine. If variable_trail_rule
190	* is true then the rule has trailing context and both the head and trail
191	* are variable size. Otherwise if headcnt or trailcnt is non-zero then
192	* the machine recognizes a pattern with trailing context and headcnt is
193	* the number of characters in the matched part of the pattern, or zero
194	* if the matched part has variable length. trailcnt is the number of
195	* trailing context characters in the pattern, or zero if the trailing
196	* context has variable length.
197	*/
198
199	void finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
200	pcont_act)
201	int mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
202	{
203	char action_text[MAXLINE];
204
205	add_accept (mach, num_rules);
206
207	/* We did this in new_rule(), but it often gets the wrong
208	* number because we do it before we start parsing the current rule.
209	*/
210	rule_linenum[num_rules] = linenum;
211
212	/* If this is a continued action, then the line-number has already
213	* been updated, giving us the wrong number.
214	*/
215	if (continued_action)
216	--rule_linenum[num_rules];
217
218
219	/* If the previous rule was continued action, then we inherit the
220	* previous newline flag, possibly overriding the current one.
221	*/
222	if (pcont_act && rule_has_nl[num_rules - 1])
223	rule_has_nl[num_rules] = true;
224
225	snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
226	add_action (action_text);
227	if (rule_has_nl[num_rules]) {
228	snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
229	num_rules);
230	add_action (action_text);
231	}
232
233
234	if (variable_trail_rule) {
235	rule_type[num_rules] = RULE_VARIABLE;
236
237	if (performance_report > 0)
238	fprintf (stderr,
239	_
240	("Variable trailing context rule at line %d\n"),
241	rule_linenum[num_rules]);
242
243	variable_trailing_context_rules = true;
244	}
245
246	else {
247	rule_type[num_rules] = RULE_NORMAL;
248
249	if (headcnt > 0 \|\| trailcnt > 0) {
250	/* Do trailing context magic to not match the trailing
251	* characters.
252	*/
253	char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
254	char *scanner_bp = "yy_bp";
255
256	add_action
257	("yy_cp = YY_G(yy_hold_char); / undo effects of setting up yytext */\n");
258
259	if (headcnt > 0) {
260	if (rule_has_nl[num_rules]) {
261	snprintf (action_text, sizeof(action_text),
262	"YY_LINENO_REWIND_TO(%s + %d);\n", scanner_bp, headcnt);
263	add_action (action_text);
264	}
265	snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
266	scanner_cp, scanner_bp, headcnt);
267	add_action (action_text);
268	}
269
270	else {
271	if (rule_has_nl[num_rules]) {
272	snprintf (action_text, sizeof(action_text),
273	"YY_LINENO_REWIND_TO(yy_cp - %d);\n", trailcnt);
274	add_action (action_text);
275	}
276
277	snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
278	scanner_cp, trailcnt);
279	add_action (action_text);
280	}
281
282	add_action
283	("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
284	}
285	}
286
287	/* Okay, in the action code at this point yytext and yyleng have
288	* their proper final values for this rule, so here's the point
289	* to do any user action. But don't do it for continued actions,
290	* as that'll result in multiple YY_RULE_SETUP's.
291	*/
292	if (!continued_action)
293	add_action ("YY_RULE_SETUP\n");
294
295	line_directive_out ((FILE *) 0, 1);
296	}
297
298
299	/* link_machines - connect two machines together
300	*
301	* synopsis
302	*
303	* new = link_machines( first, last );
304	*
305	* new - a machine constructed by connecting first to last
306	* first - the machine whose successor is to be last
307	* last - the machine whose predecessor is to be first
308	*
309	* note: this routine concatenates the machine first with the machine
310	* last to produce a machine new which will pattern-match first first
311	* and then last, and will fail if either of the sub-patterns fails.
312	* FIRST is set to new by the operation. last is unmolested.
313	*/
314
315	int link_machines (first, last)
316	int first, last;
317	{
318	if (first == NIL)
319	return last;
320
321	else if (last == NIL)
322	return first;
323
324	else {
325	mkxtion (finalst[first], last);
326	finalst[first] = finalst[last];
327	lastst[first] = MAX (lastst[first], lastst[last]);
328	firstst[first] = MIN (firstst[first], firstst[last]);
329
330	return first;
331	}
332	}
333
334
335	/* mark_beginning_as_normal - mark each "beginning" state in a machine
336	* as being a "normal" (i.e., not trailing context-
337	* associated) states
338	*
339	* The "beginning" states are the epsilon closure of the first state
340	*/
341
342	void mark_beginning_as_normal (mach)
343	register int mach;
344	{
345	switch (state_type[mach]) {
346	case STATE_NORMAL:
347	/* Oh, we've already visited here. */
348	return;
349
350	case STATE_TRAILING_CONTEXT:
351	state_type[mach] = STATE_NORMAL;
352
353	if (transchar[mach] == SYM_EPSILON) {
354	if (trans1[mach] != NO_TRANSITION)
355	mark_beginning_as_normal (trans1[mach]);
356
357	if (trans2[mach] != NO_TRANSITION)
358	mark_beginning_as_normal (trans2[mach]);
359	}
360	break;
361
362	default:
363	flexerror (_
364	("bad state type in mark_beginning_as_normal()"));
365	break;
366	}
367	}
368
369
370	/* mkbranch - make a machine that branches to two machines
371	*
372	* synopsis
373	*
374	* branch = mkbranch( first, second );
375	*
376	* branch - a machine which matches either first's pattern or second's
377	* first, second - machines whose patterns are to be or'ed (the \| operator)
378	*
379	* Note that first and second are NEITHER destroyed by the operation. Also,
380	* the resulting machine CANNOT be used with any other "mk" operation except
381	* more mkbranch's. Compare with mkor()
382	*/
383
384	int mkbranch (first, second)
385	int first, second;
386	{
387	int eps;
388
389	if (first == NO_TRANSITION)
390	return second;
391
392	else if (second == NO_TRANSITION)
393	return first;
394
395	eps = mkstate (SYM_EPSILON);
396
397	mkxtion (eps, first);
398	mkxtion (eps, second);
399
400	return eps;
401	}
402
403
404	/* mkclos - convert a machine into a closure
405	*
406	* synopsis
407	* new = mkclos( state );
408	*
409	* new - a new state which matches the closure of "state"
410	*/
411
412	int mkclos (state)
413	int state;
414	{
415	return mkopt (mkposcl (state));
416	}
417
418
419	/* mkopt - make a machine optional
420	*
421	* synopsis
422	*
423	* new = mkopt( mach );
424	*
425	* new - a machine which optionally matches whatever mach matched
426	* mach - the machine to make optional
427	*
428	* notes:
429	* 1. mach must be the last machine created
430	* 2. mach is destroyed by the call
431	*/
432
433	int mkopt (mach)
434	int mach;
435	{
436	int eps;
437
438	if (!SUPER_FREE_EPSILON (finalst[mach])) {
439	eps = mkstate (SYM_EPSILON);
440	mach = link_machines (mach, eps);
441	}
442
443	/* Can't skimp on the following if FREE_EPSILON(mach) is true because
444	* some state interior to "mach" might point back to the beginning
445	* for a closure.
446	*/
447	eps = mkstate (SYM_EPSILON);
448	mach = link_machines (eps, mach);
449
450	mkxtion (mach, finalst[mach]);
451
452	return mach;
453	}
454
455
456	/* mkor - make a machine that matches either one of two machines
457	*
458	* synopsis
459	*
460	* new = mkor( first, second );
461	*
462	* new - a machine which matches either first's pattern or second's
463	* first, second - machines whose patterns are to be or'ed (the \| operator)
464	*
465	* note that first and second are both destroyed by the operation
466	* the code is rather convoluted because an attempt is made to minimize
467	* the number of epsilon states needed
468	*/
469
470	int mkor (first, second)
471	int first, second;
472	{
473	int eps, orend;
474
475	if (first == NIL)
476	return second;
477
478	else if (second == NIL)
479	return first;
480
481	else {
482	/* See comment in mkopt() about why we can't use the first
483	* state of "first" or "second" if they satisfy "FREE_EPSILON".
484	*/
485	eps = mkstate (SYM_EPSILON);
486
487	first = link_machines (eps, first);
488
489	mkxtion (first, second);
490
491	if (SUPER_FREE_EPSILON (finalst[first]) &&
492	accptnum[finalst[first]] == NIL) {
493	orend = finalst[first];
494	mkxtion (finalst[second], orend);
495	}
496
497	else if (SUPER_FREE_EPSILON (finalst[second]) &&
498	accptnum[finalst[second]] == NIL) {
499	orend = finalst[second];
500	mkxtion (finalst[first], orend);
501	}
502
503	else {
504	eps = mkstate (SYM_EPSILON);
505
506	first = link_machines (first, eps);
507	orend = finalst[first];
508
509	mkxtion (finalst[second], orend);
510	}
511	}
512
513	finalst[first] = orend;
514	return first;
515	}
516
517
518	/* mkposcl - convert a machine into a positive closure
519	*
520	* synopsis
521	* new = mkposcl( state );
522	*
523	* new - a machine matching the positive closure of "state"
524	*/
525
526	int mkposcl (state)
527	int state;
528	{
529	int eps;
530
531	if (SUPER_FREE_EPSILON (finalst[state])) {
532	mkxtion (finalst[state], state);
533	return state;
534	}
535
536	else {
537	eps = mkstate (SYM_EPSILON);
538	mkxtion (eps, state);
539	return link_machines (state, eps);
540	}
541	}
542
543
544	/* mkrep - make a replicated machine
545	*
546	* synopsis
547	* new = mkrep( mach, lb, ub );
548	*
549	* new - a machine that matches whatever "mach" matched from "lb"
550	* number of times to "ub" number of times
551	*
552	* note
553	* if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
554	*/
555
556	int mkrep (mach, lb, ub)
557	int mach, lb, ub;
558	{
559	int base_mach, tail, copy, i;
560
561	base_mach = copysingl (mach, lb - 1);
562
563	if (ub == INFINITE_REPEAT) {
564	copy = dupmachine (mach);
565	mach = link_machines (mach,
566	link_machines (base_mach,
567	mkclos (copy)));
568	}
569
570	else {
571	tail = mkstate (SYM_EPSILON);
572
573	for (i = lb; i < ub; ++i) {
574	copy = dupmachine (mach);
575	tail = mkopt (link_machines (copy, tail));
576	}
577
578	mach =
579	link_machines (mach,
580	link_machines (base_mach, tail));
581	}
582
583	return mach;
584	}
585
586
587	/* mkstate - create a state with a transition on a given symbol
588	*
589	* synopsis
590	*
591	* state = mkstate( sym );
592	*
593	* state - a new state matching sym
594	* sym - the symbol the new state is to have an out-transition on
595	*
596	* note that this routine makes new states in ascending order through the
597	* state array (and increments LASTNFA accordingly). The routine DUPMACHINE
598	* relies on machines being made in ascending order and that they are
599	* CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
600	* that it admittedly is)
601	*/
602
603	int mkstate (sym)
604	int sym;
605	{
606	if (++lastnfa >= current_mns) {
607	if ((current_mns += MNS_INCREMENT) >= maximum_mns)
608	lerrif (_
609	("input rules are too complicated (>= %d NFA states)"),
610	current_mns);
611
612	++num_reallocs;
613
614	firstst = reallocate_integer_array (firstst, current_mns);
615	lastst = reallocate_integer_array (lastst, current_mns);
616	finalst = reallocate_integer_array (finalst, current_mns);
617	transchar =
618	reallocate_integer_array (transchar, current_mns);
619	trans1 = reallocate_integer_array (trans1, current_mns);
620	trans2 = reallocate_integer_array (trans2, current_mns);
621	accptnum =
622	reallocate_integer_array (accptnum, current_mns);
623	assoc_rule =
624	reallocate_integer_array (assoc_rule, current_mns);
625	state_type =
626	reallocate_integer_array (state_type, current_mns);
627	}
628
629	firstst[lastnfa] = lastnfa;
630	finalst[lastnfa] = lastnfa;
631	lastst[lastnfa] = lastnfa;
632	transchar[lastnfa] = sym;
633	trans1[lastnfa] = NO_TRANSITION;
634	trans2[lastnfa] = NO_TRANSITION;
635	accptnum[lastnfa] = NIL;
636	assoc_rule[lastnfa] = num_rules;
637	state_type[lastnfa] = current_state_type;
638
639	/* Fix up equivalence classes base on this transition. Note that any
640	* character which has its own transition gets its own equivalence
641	* class. Thus only characters which are only in character classes
642	* have a chance at being in the same equivalence class. E.g. "a\|b"
643	* puts 'a' and 'b' into two different equivalence classes. "[ab]"
644	* puts them in the same equivalence class (barring other differences
645	* elsewhere in the input).
646	*/
647
648	if (sym < 0) {
649	/* We don't have to update the equivalence classes since
650	* that was already done when the ccl was created for the
651	* first time.
652	*/
653	}
654
655	else if (sym == SYM_EPSILON)
656	++numeps;
657
658	else {
659	check_char (sym);
660
661	if (useecs)
662	/* Map NUL's to csize. */
663	mkechar (sym ? sym : csize, nextecm, ecgroup);
664	}
665
666	return lastnfa;
667	}
668
669
670	/* mkxtion - make a transition from one state to another
671	*
672	* synopsis
673	*
674	* mkxtion( statefrom, stateto );
675	*
676	* statefrom - the state from which the transition is to be made
677	* stateto - the state to which the transition is to be made
678	*/
679
680	void mkxtion (statefrom, stateto)
681	int statefrom, stateto;
682	{
683	if (trans1[statefrom] == NO_TRANSITION)
684	trans1[statefrom] = stateto;
685
686	else if ((transchar[statefrom] != SYM_EPSILON) \|\|
687	(trans2[statefrom] != NO_TRANSITION))
688	flexfatal (_("found too many transitions in mkxtion()"));
689
690	else { /* second out-transition for an epsilon state */
691	++eps2;
692	trans2[statefrom] = stateto;
693	}
694	}
695
696	/* new_rule - initialize for a new rule */
697
698	void new_rule ()
699	{
700	if (++num_rules >= current_max_rules) {
701	++num_reallocs;
702	current_max_rules += MAX_RULES_INCREMENT;
703	rule_type = reallocate_integer_array (rule_type,
704	current_max_rules);
705	rule_linenum = reallocate_integer_array (rule_linenum,
706	current_max_rules);
707	rule_useful = reallocate_integer_array (rule_useful,
708	current_max_rules);
709	rule_has_nl = reallocate_bool_array (rule_has_nl,
710	current_max_rules);
711	}
712
713	if (num_rules > MAX_RULE)
714	lerrif (_("too many rules (> %d)!"), MAX_RULE);
715
716	rule_linenum[num_rules] = linenum;
717	rule_useful[num_rules] = false;
718	rule_has_nl[num_rules] = false;
719	}