1 /*
2   tre-match-approx.c - TRE approximate regex matching engine
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif /* HAVE_CONFIG_H */
11 
12 #include "tre-alloca.h"
13 
14 #define __USE_STRING_INLINES
15 #undef __NO_INLINE__
16 
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #ifdef HAVE_WCHAR_H
22 #include <wchar.h>
23 #endif /* HAVE_WCHAR_H */
24 #ifdef HAVE_WCTYPE_H
25 #include <wctype.h>
26 #endif /* HAVE_WCTYPE_H */
27 #ifndef TRE_WCHAR
28 #include <ctype.h>
29 #endif /* !TRE_WCHAR */
30 #ifdef HAVE_MALLOC_H
31 #include <malloc.h>
32 #endif /* HAVE_MALLOC_H */
33 #include <stdint.h>
34 
35 #include "tre-internal.h"
36 #include "tre-match-utils.h"
37 #include "tre.h"
38 #include "xmalloc.h"
39 
40 #define TRE_M_COST  0
41 #define TRE_M_NUM_INS         1
42 #define TRE_M_NUM_DEL         2
43 #define TRE_M_NUM_SUBST 3
44 #define TRE_M_NUM_ERR         4
45 #define TRE_M_LAST  5
46 
47 #define TRE_M_MAX_DEPTH 3
48 
49 typedef struct {
50   /* State in the TNFA transition table. */
51   tre_tnfa_transition_t *state;
52   /* Position in input string. */
53   int pos;
54   /* Tag values. */
55   int *tags;
56   /* Matching parameters. */
57   regaparams_t params;
58   /* Nesting depth of parameters.  This is used as an index in
59      the `costs' array. */
60   int depth;
61   /* Costs and counter values for different parameter nesting depths. */
62   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
63 } tre_tnfa_approx_reach_t;
64 
65 
66 #ifdef TRE_DEBUG
67 /* Prints the `reach' array in a readable fashion with DPRINT. */
68 static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_approx_reach_t * reach,int pos,size_t num_tags)69 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
70                     int pos, size_t num_tags)
71 {
72   int id;
73 
74   /* Print each state on one line. */
75   DPRINT(("  reach:\n"));
76   for (id = 0; id < tnfa->num_states; id++)
77     {
78       int i, j;
79       if (reach[id].pos < pos)
80           continue;  /* Not reached. */
81       DPRINT(("      %03d, costs ", id));
82       for (i = 0; i <= reach[id].depth; i++)
83           {
84             DPRINT(("["));
85             for (j = 0; j < TRE_M_LAST; j++)
86               {
87                 DPRINT(("%2d", reach[id].costs[i][j]));
88                 if (j + 1 < TRE_M_LAST)
89                     DPRINT((","));
90               }
91             DPRINT(("]"));
92             if (i + 1 <= reach[id].depth)
93               DPRINT((", "));
94           }
95       DPRINT(("\n   tags "));
96       for (i = 0; i < num_tags; i++)
97           {
98             DPRINT(("%02d", reach[id].tags[i]));
99             if (i + 1 < num_tags)
100               DPRINT((","));
101           }
102       DPRINT(("\n"));
103     }
104   DPRINT(("\n"));
105 }
106 #endif /* TRE_DEBUG */
107 
108 
109 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
110    array.  If `pa' specifies default values, they are taken from
111    `default_params'. */
112 inline static void
tre_set_params(tre_tnfa_approx_reach_t * reach,int * pa,regaparams_t default_params)113 tre_set_params(tre_tnfa_approx_reach_t *reach,
114                  int *pa, regaparams_t default_params)
115 {
116   int value;
117 
118   /* If depth is increased reset costs and counters to zero for the
119      new levels. */
120   value = pa[TRE_PARAM_DEPTH];
121   assert(value <= TRE_M_MAX_DEPTH);
122   if (value > reach->depth)
123     {
124       int i, j;
125       for (i = reach->depth + 1; i <= value; i++)
126           for (j = 0; j < TRE_M_LAST; j++)
127             reach->costs[i][j] = 0;
128     }
129   reach->depth = value;
130 
131   /* Set insert cost. */
132   value = pa[TRE_PARAM_COST_INS];
133   if (value == TRE_PARAM_DEFAULT)
134     reach->params.cost_ins = default_params.cost_ins;
135   else if (value != TRE_PARAM_UNSET)
136     reach->params.cost_ins = value;
137 
138   /* Set delete cost. */
139   value = pa[TRE_PARAM_COST_DEL];
140   if (value == TRE_PARAM_DEFAULT)
141     reach->params.cost_del = default_params.cost_del;
142   else if (value != TRE_PARAM_UNSET)
143     reach->params.cost_del = value;
144 
145   /* Set substitute cost. */
146   value = pa[TRE_PARAM_COST_SUBST];
147   if (value == TRE_PARAM_DEFAULT)
148     reach->params.cost_subst = default_params.cost_subst;
149   else
150     reach->params.cost_subst = value;
151 
152   /* Set maximum cost. */
153   value = pa[TRE_PARAM_COST_MAX];
154   if (value == TRE_PARAM_DEFAULT)
155     reach->params.max_cost = default_params.max_cost;
156   else if (value != TRE_PARAM_UNSET)
157     reach->params.max_cost = value;
158 
159   /* Set maximum inserts. */
160   value = pa[TRE_PARAM_MAX_INS];
161   if (value == TRE_PARAM_DEFAULT)
162     reach->params.max_ins = default_params.max_ins;
163   else if (value != TRE_PARAM_UNSET)
164     reach->params.max_ins = value;
165 
166   /* Set maximum deletes. */
167   value = pa[TRE_PARAM_MAX_DEL];
168   if (value == TRE_PARAM_DEFAULT)
169     reach->params.max_del = default_params.max_del;
170   else if (value != TRE_PARAM_UNSET)
171     reach->params.max_del = value;
172 
173   /* Set maximum substitutes. */
174   value = pa[TRE_PARAM_MAX_SUBST];
175   if (value == TRE_PARAM_DEFAULT)
176     reach->params.max_subst = default_params.max_subst;
177   else if (value != TRE_PARAM_UNSET)
178     reach->params.max_subst = value;
179 
180   /* Set maximum number of errors. */
181   value = pa[TRE_PARAM_MAX_ERR];
182   if (value == TRE_PARAM_DEFAULT)
183     reach->params.max_err = default_params.max_err;
184   else if (value != TRE_PARAM_UNSET)
185     reach->params.max_err = value;
186 }
187 
188 reg_errcode_t
tre_tnfa_run_approx(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,int * match_tags,regamatch_t * match,regaparams_t default_params,int eflags,int * match_end_ofs)189 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
190                         tre_str_type_t type, int *match_tags,
191                         regamatch_t *match, regaparams_t default_params,
192                         int eflags, int *match_end_ofs)
193 {
194   /* State variables required by GET_NEXT_WCHAR. */
195   tre_char_t prev_c = 0, next_c = 0;
196   const char *str_byte = string;
197   int pos = -1;
198   unsigned int pos_add_next = 1;
199 #ifdef TRE_WCHAR
200   const wchar_t *str_wide = string;
201 #ifdef TRE_MBSTATE
202   mbstate_t mbstate;
203 #endif /* !TRE_WCHAR */
204 #endif /* TRE_WCHAR */
205   int reg_notbol = eflags & REG_NOTBOL;
206   int reg_noteol = eflags & REG_NOTEOL;
207   int reg_newline = tnfa->cflags & REG_NEWLINE;
208   size_t str_user_end = 0;
209 
210   int prev_pos;
211 
212   /* Number of tags. */
213   size_t num_tags;
214   /* The reach tables. */
215   tre_tnfa_approx_reach_t *reach, *reach_next;
216   /* Tag array for temporary use. */
217   int *tmp_tags;
218 
219   /* End offset of best match so far, or -1 if no match found yet. */
220   int match_eo = -1;
221   /* Costs of the match. */
222   int match_costs[TRE_M_LAST];
223 
224   /* Space for temporary data required for matching. */
225   unsigned char *buf;
226 
227   size_t i, id;
228 
229   reg_errcode_t ret;
230 
231   if (!match_tags)
232     num_tags = 0;
233   else
234     num_tags = tnfa->num_tags;
235 
236 #ifdef TRE_MBSTATE
237   memset(&mbstate, '\0', sizeof(mbstate));
238 #endif /* TRE_MBSTATE */
239 
240   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
241             "match_tags %p\n",
242             type, len, eflags,
243             match_tags));
244   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
245             default_params.max_cost,
246             default_params.cost_ins,
247             default_params.cost_del,
248             default_params.cost_subst));
249 
250   /* Allocate memory for temporary data required for matching.        This needs to
251      be done for every matching operation to be thread safe.  This allocates
252      everything in a single large block from the stack frame using alloca()
253      or with malloc() if alloca is unavailable. */
254   {
255     unsigned char *buf_cursor;
256 
257     /* Ensure that tag_bytes*num_states cannot overflow, and that it don't
258      * contribute more than 1/8 of SIZE_MAX to total_bytes. */
259     if (num_tags > SIZE_MAX/(8 * sizeof(*tmp_tags) * tnfa->num_states))
260       return REG_ESPACE;
261 
262     /* Likewise check reach_bytes. */
263     if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_next)))
264       return REG_ESPACE;
265 
266     /* Space needed for one array of tags. */
267     size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
268     /* Space needed for one reach table. */
269     size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
270     /* Total space needed. */
271     size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
272     /* Add some extra to make sure we can align the pointers.  The multiplier
273        used here must be equal to the number of ALIGN calls below. */
274     total_bytes += (sizeof(long) - 1) * 3;
275 
276     /* Allocate the memory. */
277 #ifdef TRE_USE_ALLOCA
278     buf = alloca(total_bytes);
279 #else /* !TRE_USE_ALLOCA */
280     buf = xmalloc((unsigned)total_bytes);
281 #endif /* !TRE_USE_ALLOCA */
282     if (!buf)
283       return REG_ESPACE;
284     memset(buf, 0, (size_t)total_bytes);
285 
286     /* Allocate `tmp_tags' from `buf'. */
287     tmp_tags = (void *)buf;
288     buf_cursor = buf + tag_bytes;
289     buf_cursor += ALIGN(buf_cursor, long);
290 
291     /* Allocate `reach' from `buf'. */
292     reach = (void *)buf_cursor;
293     buf_cursor += reach_bytes;
294     buf_cursor += ALIGN(buf_cursor, long);
295 
296     /* Allocate `reach_next' from `buf'. */
297     reach_next = (void *)buf_cursor;
298     buf_cursor += reach_bytes;
299     buf_cursor += ALIGN(buf_cursor, long);
300 
301     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
302     for (i = 0; i < tnfa->num_states; i++)
303       {
304           reach[i].tags = (void *)buf_cursor;
305           buf_cursor += tag_bytes;
306           reach_next[i].tags = (void *)buf_cursor;
307           buf_cursor += tag_bytes;
308       }
309     assert(buf_cursor <= buf + total_bytes);
310   }
311 
312   for (i = 0; i < TRE_M_LAST; i++)
313     match_costs[i] = INT_MAX;
314 
315   /* Mark the reach arrays empty. */
316   for (i = 0; i < tnfa->num_states; i++)
317     reach[i].pos = reach_next[i].pos = -2;
318 
319   prev_pos = pos;
320   GET_NEXT_WCHAR();
321   pos = 0;
322 
323   while (/*CONSTCOND*/(void)1,1)
324     {
325       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
326 
327       /* Add initial states to `reach_next' if an exact match has not yet
328            been found. */
329       if (match_costs[TRE_M_COST] > 0)
330           {
331             tre_tnfa_transition_t *trans;
332             DPRINT(("  init"));
333             for (trans = tnfa->initial; trans->state; trans++)
334               {
335                 int stateid = trans->state_id;
336 
337                 /* If this state is not currently in `reach_next', add it
338                      there. */
339                 if (reach_next[stateid].pos < pos)
340                     {
341                       if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
342                         {
343                           /* Assertions failed, don't add this state. */
344                           DPRINT((" !%d (assert)", stateid));
345                           continue;
346                         }
347                       DPRINT((" %d", stateid));
348                       reach_next[stateid].state = trans->state;
349                       reach_next[stateid].pos = pos;
350 
351                       /* Compute tag values after this transition. */
352                       for (i = 0; i < num_tags; i++)
353                         reach_next[stateid].tags[i] = -1;
354 
355                       if (trans->tags)
356                         for (i = 0; trans->tags[i] >= 0; i++)
357                           if ((size_t)trans->tags[i] < num_tags)
358                               reach_next[stateid].tags[trans->tags[i]] = pos;
359 
360                       /* Set the parameters, depth, and costs. */
361                       reach_next[stateid].params = default_params;
362                       reach_next[stateid].depth = 0;
363                       for (i = 0; i < TRE_M_LAST; i++)
364                         reach_next[stateid].costs[0][i] = 0;
365                       if (trans->params)
366                         tre_set_params(&reach_next[stateid], trans->params,
367                                            default_params);
368 
369                       /* If this is the final state, mark the exact match. */
370                       if (trans->state == tnfa->final)
371                         {
372                           match_eo = pos;
373                           for (i = 0; i < num_tags; i++)
374                               match_tags[i] = reach_next[stateid].tags[i];
375                           for (i = 0; i < TRE_M_LAST; i++)
376                               match_costs[i] = 0;
377                         }
378                     }
379               }
380               DPRINT(("\n"));
381           }
382 
383 
384       /* Handle inserts.  This is done by pretending there's an epsilon
385            transition from each state in `reach' back to the same state.
386            We don't need to worry about the final state here; this will never
387            give a better match than what we already have. */
388       for (id = 0; id < tnfa->num_states; id++)
389           {
390             int depth;
391             int cost, cost0;
392 
393             if (reach[id].pos != prev_pos)
394               {
395                 DPRINT(("      insert: %d not reached\n", id));
396                 continue;      /* Not reached. */
397               }
398 
399             depth = reach[id].depth;
400 
401             /* Compute and check cost at current depth. */
402             cost = reach[id].costs[depth][TRE_M_COST];
403             if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
404               cost += reach[id].params.cost_ins;
405             if (cost > reach[id].params.max_cost)
406               continue;  /* Cost too large. */
407 
408             /* Check number of inserts at current depth. */
409             if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
410                 > reach[id].params.max_ins)
411               continue;  /* Too many inserts. */
412 
413             /* Check total number of errors at current depth. */
414             if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
415                 > reach[id].params.max_err)
416               continue;  /* Too many errors. */
417 
418             /* Compute overall cost. */
419             cost0 = cost;
420             if (depth > 0)
421               {
422                 cost0 = reach[id].costs[0][TRE_M_COST];
423                 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
424                     cost0 += reach[id].params.cost_ins;
425                 else
426                     cost0 += default_params.cost_ins;
427               }
428 
429             DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
430                       reach[id].costs[depth][TRE_M_COST]));
431             if (reach_next[id].pos == pos
432                 && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
433               {
434                 DPRINT(("lose\n"));
435                 continue;
436               }
437             DPRINT(("win\n"));
438 
439             /* Copy state, position, tags, parameters, and depth. */
440             reach_next[id].state = reach[id].state;
441             reach_next[id].pos = pos;
442             for (i = 0; i < num_tags; i++)
443               reach_next[id].tags[i] = reach[id].tags[i];
444             reach_next[id].params = reach[id].params;
445             reach_next[id].depth = reach[id].depth;
446 
447             /* Set the costs after this transition. */
448             memcpy(reach_next[id].costs, reach[id].costs,
449                      sizeof(reach_next[id].costs[0][0])
450                      * TRE_M_LAST * (depth + 1));
451             reach_next[id].costs[depth][TRE_M_COST] = cost;
452             reach_next[id].costs[depth][TRE_M_NUM_INS]++;
453             reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
454             if (depth > 0)
455               {
456                 reach_next[id].costs[0][TRE_M_COST] = cost0;
457                 reach_next[id].costs[0][TRE_M_NUM_INS]++;
458                 reach_next[id].costs[0][TRE_M_NUM_ERR]++;
459               }
460 
461           }
462 
463 
464       /* Handle deletes.  This is done by traversing through the whole TNFA
465            pretending that all transitions are epsilon transitions, until
466            no more states can be reached with better costs. */
467       {
468           /* XXX - dynamic ringbuffer size */
469           tre_tnfa_approx_reach_t *ringbuffer[512];
470           tre_tnfa_approx_reach_t **deque_start, **deque_end;
471 
472           deque_start = deque_end = ringbuffer;
473 
474           /* Add all states in `reach_next' to the deque. */
475           for (id = 0; id < tnfa->num_states; id++)
476             {
477               if (reach_next[id].pos != pos)
478                 continue;
479               *deque_end = &reach_next[id];
480               deque_end++;
481               assert(deque_end != deque_start);
482             }
483 
484           /* Repeat until the deque is empty. */
485           while (deque_end != deque_start)
486             {
487               tre_tnfa_approx_reach_t *reach_p;
488               int depth;
489               int cost, cost0;
490               tre_tnfa_transition_t *trans;
491 
492               /* Pop the first item off the deque. */
493               reach_p = *deque_start;
494               id = reach_p - reach_next;
495               depth = reach_p->depth;
496 
497               /* Compute cost at current depth. */
498               cost = reach_p->costs[depth][TRE_M_COST];
499               if (reach_p->params.cost_del != TRE_PARAM_UNSET)
500                 cost += reach_p->params.cost_del;
501 
502               /* Check cost, number of deletes, and total number of errors
503                  at current depth. */
504               if (cost > reach_p->params.max_cost
505                     || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
506                         > reach_p->params.max_del)
507                     || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
508                         > reach_p->params.max_err))
509                 {
510                     /* Too many errors or cost too large. */
511                     DPRINT(("  delete: from %03d: cost too large\n", id));
512                     deque_start++;
513                     if (deque_start >= (ringbuffer + 512))
514                       deque_start = ringbuffer;
515                     continue;
516                 }
517 
518               /* Compute overall cost. */
519               cost0 = cost;
520               if (depth > 0)
521                 {
522                     cost0 = reach_p->costs[0][TRE_M_COST];
523                     if (reach_p->params.cost_del != TRE_PARAM_UNSET)
524                       cost0 += reach_p->params.cost_del;
525                     else
526                       cost0 += default_params.cost_del;
527                 }
528 
529               for (trans = reach_p->state; trans->state; trans++)
530                 {
531                     int dest_id = trans->state_id;
532                     DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
533                               id, dest_id, cost0, reach_p->params.max_cost));
534 
535                     if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
536                       {
537                         DPRINT(("assertion failed\n"));
538                         continue;
539                       }
540 
541                     /* Compute tag values after this transition. */
542                     for (i = 0; i < num_tags; i++)
543                       tmp_tags[i] = reach_p->tags[i];
544                     if (trans->tags)
545                       for (i = 0; trans->tags[i] >= 0; i++)
546                         if ((size_t)trans->tags[i] < num_tags)
547                           tmp_tags[trans->tags[i]] = pos;
548 
549                     /* If another path has also reached this state, choose the one
550                        with the smallest cost or best tags if costs are equal. */
551                     if (reach_next[dest_id].pos == pos
552                         && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
553                               || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
554                                   && (!match_tags
555                                         || !tre_tag_order(num_tags,
556                                                               tnfa->tag_directions,
557                                                               tmp_tags,
558                                                               reach_next[dest_id].tags)))))
559                       {
560                         DPRINT(("lose, cost0 %d, have %d\n",
561                                   cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
562                         continue;
563                       }
564                     DPRINT(("win\n"));
565 
566                     /* Set state, position, tags, parameters, depth, and costs. */
567                     reach_next[dest_id].state = trans->state;
568                     reach_next[dest_id].pos = pos;
569                     for (i = 0; i < num_tags; i++)
570                       reach_next[dest_id].tags[i] = tmp_tags[i];
571 
572                     reach_next[dest_id].params = reach_p->params;
573                     if (trans->params)
574                       tre_set_params(&reach_next[dest_id], trans->params,
575                                          default_params);
576 
577                     reach_next[dest_id].depth = reach_p->depth;
578                     memcpy(&reach_next[dest_id].costs,
579                            reach_p->costs,
580                            sizeof(reach_p->costs[0][0])
581                            * TRE_M_LAST * (depth + 1));
582                     reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
583                     reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
584                     reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
585                     if (depth > 0)
586                       {
587                         reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
588                         reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
589                         reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
590                       }
591 
592                     if (trans->state == tnfa->final
593                         && (match_eo < 0
594                               || match_costs[TRE_M_COST] > cost0
595                               || (match_costs[TRE_M_COST] == cost0
596                                   && (num_tags > 0
597                                         && tmp_tags[0] <= match_tags[0]))))
598                       {
599                         DPRINT(("        setting new match at %d, cost %d\n",
600                                   pos, cost0));
601                         match_eo = pos;
602                         memcpy(match_costs, reach_next[dest_id].costs[0],
603                                  sizeof(match_costs[0]) * TRE_M_LAST);
604                         for (i = 0; i < num_tags; i++)
605                           match_tags[i] = tmp_tags[i];
606                       }
607 
608                     /* Add to the end of the deque. */
609                     *deque_end = &reach_next[dest_id];
610                     deque_end++;
611                     if (deque_end >= (ringbuffer + 512))
612                       deque_end = ringbuffer;
613                     assert(deque_end != deque_start);
614                 }
615               deque_start++;
616               if (deque_start >= (ringbuffer + 512))
617                 deque_start = ringbuffer;
618             }
619 
620       }
621 
622 #ifdef TRE_DEBUG
623       tre_print_reach(tnfa, reach_next, pos, num_tags);
624 #endif /* TRE_DEBUG */
625 
626       /* Check for end of string. */
627       if (len < 0)
628           {
629             if (type == STR_USER)
630               {
631                 if (str_user_end)
632                     break;
633               }
634             else if (next_c == L'\0')
635               break;
636           }
637       else
638           {
639             if (pos >= len)
640               break;
641           }
642 
643       prev_pos = pos;
644       GET_NEXT_WCHAR();
645 
646       /* Swap `reach' and `reach_next'. */
647       {
648           tre_tnfa_approx_reach_t *tmp;
649           tmp = reach;
650           reach = reach_next;
651           reach_next = tmp;
652       }
653 
654       /* Handle exact matches and substitutions. */
655       for (id = 0; id < tnfa->num_states; id++)
656           {
657             tre_tnfa_transition_t *trans;
658 
659             if (reach[id].pos < prev_pos)
660               continue;  /* Not reached. */
661             for (trans = reach[id].state; trans->state; trans++)
662               {
663                 int dest_id;
664                 int depth;
665                 int cost, cost0, err;
666 
667                 if (trans->assertions
668                       && (CHECK_ASSERTIONS(trans->assertions)
669                           || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
670                     {
671                       DPRINT(("  exact,  from %d: assert failed\n", id));
672                       continue;
673                     }
674 
675                 depth = reach[id].depth;
676                 dest_id = trans->state_id;
677 
678                 cost = reach[id].costs[depth][TRE_M_COST];
679                 cost0 = reach[id].costs[0][TRE_M_COST];
680                 err = 0;
681 
682                 if (trans->code_min > (tre_cint_t)prev_c
683                       || trans->code_max < (tre_cint_t)prev_c)
684                     {
685                       /* Handle substitutes.  The required character was not in
686                          the string, so match it in place of whatever was supposed
687                          to be there and increase costs accordingly. */
688                       err = 1;
689 
690                       /* Compute and check cost at current depth. */
691                       cost = reach[id].costs[depth][TRE_M_COST];
692                       if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
693                         cost += reach[id].params.cost_subst;
694                       if (cost > reach[id].params.max_cost)
695                         continue; /* Cost too large. */
696 
697                       /* Check number of substitutes at current depth. */
698                       if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
699                           > reach[id].params.max_subst)
700                         continue; /* Too many substitutes. */
701 
702                       /* Check total number of errors at current depth. */
703                       if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
704                           > reach[id].params.max_err)
705                         continue; /* Too many errors. */
706 
707                       /* Compute overall cost. */
708                       cost0 = cost;
709                       if (depth > 0)
710                         {
711                           cost0 = reach[id].costs[0][TRE_M_COST];
712                           if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
713                               cost0 += reach[id].params.cost_subst;
714                           else
715                               cost0 += default_params.cost_subst;
716                         }
717                       DPRINT(("  subst,  from %03d to %03d, cost %d: ",
718                                 id, dest_id, cost0));
719                     }
720                 else
721                     DPRINT(("  exact,  from %03d to %03d, cost %d: ",
722                               id, dest_id, cost0));
723 
724                 /* Compute tag values after this transition. */
725                 for (i = 0; i < num_tags; i++)
726                     tmp_tags[i] = reach[id].tags[i];
727                 if (trans->tags)
728                     for (i = 0; trans->tags[i] >= 0; i++)
729                       if ((size_t)trans->tags[i] < num_tags)
730                         tmp_tags[trans->tags[i]] = pos;
731 
732                 /* If another path has also reached this state, choose the
733                      one with the smallest cost or best tags if costs are equal. */
734                 if (reach_next[dest_id].pos == pos
735                       && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
736                           || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
737                                 && !tre_tag_order(num_tags, tnfa->tag_directions,
738                                                       tmp_tags,
739                                                       reach_next[dest_id].tags))))
740                     {
741                       DPRINT(("lose\n"));
742                       continue;
743                     }
744                 DPRINT(("win %d %d\n",
745                           reach_next[dest_id].pos,
746                           reach_next[dest_id].costs[0][TRE_M_COST]));
747 
748                 /* Set state, position, tags, and depth. */
749                 reach_next[dest_id].state = trans->state;
750                 reach_next[dest_id].pos = pos;
751                 for (i = 0; i < num_tags; i++)
752                     reach_next[dest_id].tags[i] = tmp_tags[i];
753                 reach_next[dest_id].depth = reach[id].depth;
754 
755                 /* Set parameters. */
756                 reach_next[dest_id].params = reach[id].params;
757                 if (trans->params)
758                     tre_set_params(&reach_next[dest_id], trans->params,
759                                      default_params);
760 
761                 /* Set the costs after this transition. */
762                 memcpy(&reach_next[dest_id].costs,
763                          reach[id].costs,
764                          sizeof(reach[id].costs[0][0])
765                          * TRE_M_LAST * (depth + 1));
766                 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
767                 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
768                 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
769                 if (depth > 0)
770                     {
771                       reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
772                       reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
773                       reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
774                     }
775 
776                 if (trans->state == tnfa->final
777                       && (match_eo < 0
778                           || cost0 < match_costs[TRE_M_COST]
779                           || (cost0 == match_costs[TRE_M_COST]
780                                 && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
781                     {
782                       DPRINT(("    setting new match at %d, cost %d\n",
783                                 pos, cost0));
784                       match_eo = pos;
785                       for (i = 0; i < TRE_M_LAST; i++)
786                         match_costs[i] = reach_next[dest_id].costs[0][i];
787                       for (i = 0; i < num_tags; i++)
788                         match_tags[i] = tmp_tags[i];
789                     }
790               }
791           }
792     }
793 
794   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
795             match_costs[TRE_M_COST]));
796 
797   match->cost = match_costs[TRE_M_COST];
798   match->num_ins = match_costs[TRE_M_NUM_INS];
799   match->num_del = match_costs[TRE_M_NUM_DEL];
800   match->num_subst = match_costs[TRE_M_NUM_SUBST];
801   *match_end_ofs = match_eo;
802 
803   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
804 error_exit:
805 #ifndef TRE_USE_ALLOCA
806   if (buf)
807     xfree(buf);
808 #endif /* !TRE_USE_ALLOCA */
809   return ret;
810 }
811