1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005
2           Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17 #include <sys/cdefs.h>
18 __RCSID("$NetBSD: fnmatch_loop.c,v 1.2 2016/05/17 14:00:09 christos Exp $");
19 
20 
21 /* Match STRING against the file name pattern PATTERN, returning zero if
22    it matches, nonzero if not.  */
23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
24                     const CHAR *string_end, bool no_leading_period, int flags)
25      internal_function;
26 static const CHAR *END (const CHAR *patternp) internal_function;
27 
28 static int
29 internal_function
FCT(const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)30 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
31      bool no_leading_period, int flags)
32 {
33   register const CHAR *p = pattern, *n = string;
34   register UCHAR c;
35 #ifdef _LIBC
36 # if WIDE_CHAR_VERSION
37   const char *collseq = (const char *)
38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
39 # else
40   const UCHAR *collseq = (const UCHAR *)
41     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
42 # endif
43 #endif
44 
45   while ((c = *p++) != L('\0'))
46     {
47       bool new_no_leading_period = false;
48       c = FOLD (c);
49 
50       switch (c)
51           {
52           case L('?'):
53             if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
54               {
55                 int res;
56 
57                 res = EXT (c, p, n, string_end, no_leading_period,
58                                flags);
59                 if (res != -1)
60                     return res;
61               }
62 
63             if (n == string_end)
64               return FNM_NOMATCH;
65             else if (*n == L('/') && (flags & FNM_FILE_NAME))
66               return FNM_NOMATCH;
67             else if (*n == L('.') && no_leading_period)
68               return FNM_NOMATCH;
69             break;
70 
71           case L('\\'):
72             if (!(flags & FNM_NOESCAPE))
73               {
74                 c = *p++;
75                 if (c == L('\0'))
76                     /* Trailing \ loses.  */
77                     return FNM_NOMATCH;
78                 c = FOLD (c);
79               }
80             if (n == string_end || FOLD ((UCHAR) *n) != c)
81               return FNM_NOMATCH;
82             break;
83 
84           case L('*'):
85             if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
86               {
87                 int res;
88 
89                 res = EXT (c, p, n, string_end, no_leading_period,
90                                flags);
91                 if (res != -1)
92                     return res;
93               }
94 
95             if (n != string_end && *n == L('.') && no_leading_period)
96               return FNM_NOMATCH;
97 
98             for (c = *p++; c == L('?') || c == L('*'); c = *p++)
99               {
100                 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
101                     {
102                       const CHAR *endp = END (p);
103                       if (endp != p)
104                         {
105                           /* This is a pattern.  Skip over it.  */
106                           p = endp;
107                           continue;
108                         }
109                     }
110 
111                 if (c == L('?'))
112                     {
113                       /* A ? needs to match one character.  */
114                       if (n == string_end)
115                         /* There isn't another character; no match.  */
116                         return FNM_NOMATCH;
117                       else if (*n == L('/')
118                                  && __builtin_expect (flags & FNM_FILE_NAME, 0))
119                         /* A slash does not match a wildcard under
120                            FNM_FILE_NAME.  */
121                         return FNM_NOMATCH;
122                       else
123                         /* One character of the string is consumed in matching
124                            this ? wildcard, so *??? won't match if there are
125                            less than three characters.  */
126                         ++n;
127                     }
128               }
129 
130             if (c == L('\0'))
131               /* The wildcard(s) is/are the last element of the pattern.
132                  If the name is a file name and contains another slash
133                  this means it cannot match, unless the FNM_LEADING_DIR
134                  flag is set.  */
135               {
136                 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
137 
138                 if (flags & FNM_FILE_NAME)
139                     {
140                       if (flags & FNM_LEADING_DIR)
141                         result = 0;
142                       else
143                         {
144                           if (MEMCHR (n, L('/'), string_end - n) == NULL)
145                               result = 0;
146                         }
147                     }
148 
149                 return result;
150               }
151             else
152               {
153                 const CHAR *endp;
154 
155                 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
156                                    string_end - n);
157                 if (endp == NULL)
158                     endp = string_end;
159 
160                 if (c == L('[')
161                       || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
162                           && (c == L('@') || c == L('+') || c == L('!'))
163                           && *p == L('(')))
164                     {
165                       int flags2 = ((flags & FNM_FILE_NAME)
166                                         ? flags : (flags & ~FNM_PERIOD));
167                       bool no_leading_period2 = no_leading_period;
168 
169                       for (--p; n < endp; ++n, no_leading_period2 = false)
170                         if (FCT (p, n, string_end, no_leading_period2, flags2)
171                               == 0)
172                           return 0;
173                     }
174                 else if (c == L('/') && (flags & FNM_FILE_NAME))
175                     {
176                       while (n < string_end && *n != L('/'))
177                         ++n;
178                       if (n < string_end && *n == L('/')
179                           && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
180                                 == 0))
181                         return 0;
182                     }
183                 else
184                     {
185                       int flags2 = ((flags & FNM_FILE_NAME)
186                                         ? flags : (flags & ~FNM_PERIOD));
187                       int no_leading_period2 = no_leading_period;
188 
189                       if (c == L('\\') && !(flags & FNM_NOESCAPE))
190                         c = *p;
191                       c = FOLD (c);
192                       for (--p; n < endp; ++n, no_leading_period2 = false)
193                         if (FOLD ((UCHAR) *n) == c
194                               && (FCT (p, n, string_end, no_leading_period2, flags2)
195                                   == 0))
196                           return 0;
197                     }
198               }
199 
200             /* If we come here no match is possible with the wildcard.  */
201             return FNM_NOMATCH;
202 
203           case L('['):
204             {
205               /* Nonzero if the sense of the character class is inverted.  */
206               register bool not;
207               CHAR cold;
208               UCHAR fn;
209 
210               if (posixly_correct == 0)
211                 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
212 
213               if (n == string_end)
214                 return FNM_NOMATCH;
215 
216               if (*n == L('.') && no_leading_period)
217                 return FNM_NOMATCH;
218 
219               if (*n == L('/') && (flags & FNM_FILE_NAME))
220                 /* `/' cannot be matched.  */
221                 return FNM_NOMATCH;
222 
223               not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
224               if (not)
225                 ++p;
226 
227               fn = FOLD ((UCHAR) *n);
228 
229               c = *p++;
230               for (;;)
231                 {
232                     if (!(flags & FNM_NOESCAPE) && c == L('\\'))
233                       {
234                         if (*p == L('\0'))
235                           return FNM_NOMATCH;
236                         c = FOLD ((UCHAR) *p);
237                         ++p;
238 
239                         if (c == fn)
240                           goto matched;
241                       }
242                     else if (c == L('[') && *p == L(':'))
243                       {
244                         /* Leave room for the null.  */
245                         CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
246                         size_t c1 = 0;
247 #if defined _LIBC || WIDE_CHAR_SUPPORT
248                         wctype_t wt;
249 #endif
250                         const CHAR *startp = p;
251 
252                         for (;;)
253                           {
254                               if (c1 == CHAR_CLASS_MAX_LENGTH)
255                                 /* The name is too long and therefore the pattern
256                                    is ill-formed.  */
257                                 return FNM_NOMATCH;
258 
259                               c = *++p;
260                               if (c == L(':') && p[1] == L(']'))
261                                 {
262                                   p += 2;
263                                   break;
264                                 }
265                               if (c < L('a') || c >= L('z'))
266                                 {
267                                   /* This cannot possibly be a character class name.
268                                      Match it as a normal range.  */
269                                   p = startp;
270                                   c = L('[');
271                                   goto normal_bracket;
272                                 }
273                               str[c1++] = c;
274                           }
275                         str[c1] = L('\0');
276 
277 #if defined _LIBC || WIDE_CHAR_SUPPORT
278                         wt = IS_CHAR_CLASS (str);
279                         if (wt == 0)
280                           /* Invalid character class name.  */
281                           return FNM_NOMATCH;
282 
283 # if defined _LIBC && ! WIDE_CHAR_VERSION
284                         /* The following code is glibc specific but does
285                            there a good job in speeding up the code since
286                            we can avoid the btowc() call.  */
287                         if (_ISCTYPE ((UCHAR) *n, wt))
288                           goto matched;
289 # else
290                         if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291                           goto matched;
292 # endif
293 #else
294                         if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
295                               || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
296                               || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
297                               || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
298                               || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
299                               || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
300                               || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
301                               || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
302                               || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
303                               || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
304                               || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
305                               || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
306                           goto matched;
307 #endif
308                         c = *p++;
309                       }
310 #ifdef _LIBC
311                     else if (c == L('[') && *p == L('='))
312                       {
313                         UCHAR str[1];
314                         uint32_t nrules =
315                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
316                         const CHAR *startp = p;
317 
318                         c = *++p;
319                         if (c == L('\0'))
320                           {
321                               p = startp;
322                               c = L('[');
323                               goto normal_bracket;
324                           }
325                         str[0] = c;
326 
327                         c = *++p;
328                         if (c != L('=') || p[1] != L(']'))
329                           {
330                               p = startp;
331                               c = L('[');
332                               goto normal_bracket;
333                           }
334                         p += 2;
335 
336                         if (nrules == 0)
337                           {
338                               if ((UCHAR) *n == str[0])
339                                 goto matched;
340                           }
341                         else
342                           {
343                               const int32_t *table;
344 # if WIDE_CHAR_VERSION
345                               const int32_t *weights;
346                               const int32_t *extra;
347 # else
348                               const unsigned char *weights;
349                               const unsigned char *extra;
350 # endif
351                               const int32_t *indirect;
352                               int32_t idx;
353                               const UCHAR *cp = (const UCHAR *) str;
354 
355                               /* This #include defines a local function!  */
356 # if WIDE_CHAR_VERSION
357 #  include <locale/weightwc.h>
358 # else
359 #  include <locale/weight.h>
360 # endif
361 
362 # if WIDE_CHAR_VERSION
363                               table = (const int32_t *)
364                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
365                               weights = (const int32_t *)
366                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
367                               extra = (const int32_t *)
368                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
369                               indirect = (const int32_t *)
370                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
371 # else
372                               table = (const int32_t *)
373                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
374                               weights = (const unsigned char *)
375                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
376                               extra = (const unsigned char *)
377                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
378                               indirect = (const int32_t *)
379                                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
380 # endif
381 
382                               idx = findidx (&cp);
383                               if (idx != 0)
384                                 {
385                                   /* We found a table entry.  Now see whether the
386                                      character we are currently at has the same
387                                      equivalance class value.  */
388                                   int len = weights[idx];
389                                   int32_t idx2;
390                                   const UCHAR *np = (const UCHAR *) n;
391 
392                                   idx2 = findidx (&np);
393                                   if (idx2 != 0 && len == weights[idx2])
394                                     {
395                                         int cnt = 0;
396 
397                                         while (cnt < len
398                                                && (weights[idx + 1 + cnt]
399                                                      == weights[idx2 + 1 + cnt]))
400                                           ++cnt;
401 
402                                         if (cnt == len)
403                                           goto matched;
404                                     }
405                                 }
406                           }
407 
408                         c = *p++;
409                       }
410 #endif
411                     else if (c == L('\0'))
412                       /* [ (unterminated) loses.  */
413                       return FNM_NOMATCH;
414                     else
415                       {
416                         bool is_range = false;
417 
418 #ifdef _LIBC
419                         bool is_seqval = false;
420 
421                         if (c == L('[') && *p == L('.'))
422                           {
423                               uint32_t nrules =
424                                 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
425                               const CHAR *startp = p;
426                               size_t c1 = 0;
427 
428                               while (1)
429                                 {
430                                   c = *++p;
431                                   if (c == L('.') && p[1] == L(']'))
432                                     {
433                                         p += 2;
434                                         break;
435                                     }
436                                   if (c == '\0')
437                                     return FNM_NOMATCH;
438                                   ++c1;
439                                 }
440 
441                               /* We have to handling the symbols differently in
442                                  ranges since then the collation sequence is
443                                  important.  */
444                               is_range = *p == L('-') && p[1] != L('\0');
445 
446                               if (nrules == 0)
447                                 {
448                                   /* There are no names defined in the collation
449                                      data.  Therefore we only accept the trivial
450                                      names consisting of the character itself.  */
451                                   if (c1 != 1)
452                                     return FNM_NOMATCH;
453 
454                                   if (!is_range && *n == startp[1])
455                                     goto matched;
456 
457                                   cold = startp[1];
458                                   c = *p++;
459                                 }
460                               else
461                                 {
462                                   int32_t table_size;
463                                   const int32_t *symb_table;
464 # ifdef WIDE_CHAR_VERSION
465                                   char str[c1];
466                                   size_t strcnt;
467 # else
468 #  define str (startp + 1)
469 # endif
470                                   const unsigned char *extra;
471                                   int32_t idx;
472                                   int32_t elem;
473                                   int32_t second;
474                                   int32_t hash;
475 
476 # ifdef WIDE_CHAR_VERSION
477                                   /* We have to convert the name to a single-byte
478                                      string.  This is possible since the names
479                                      consist of ASCII characters and the internal
480                                      representation is UCS4.  */
481                                   for (strcnt = 0; strcnt < c1; ++strcnt)
482                                     str[strcnt] = startp[1 + strcnt];
483 # endif
484 
485                                   table_size =
486                                     _NL_CURRENT_WORD (LC_COLLATE,
487                                                             _NL_COLLATE_SYMB_HASH_SIZEMB);
488                                   symb_table = (const int32_t *)
489                                     _NL_CURRENT (LC_COLLATE,
490                                                      _NL_COLLATE_SYMB_TABLEMB);
491                                   extra = (const unsigned char *)
492                                     _NL_CURRENT (LC_COLLATE,
493                                                      _NL_COLLATE_SYMB_EXTRAMB);
494 
495                                   /* Locate the character in the hashing table.  */
496                                   hash = elem_hash (str, c1);
497 
498                                   idx = 0;
499                                   elem = hash % table_size;
500                                   second = hash % (table_size - 2);
501                                   while (symb_table[2 * elem] != 0)
502                                     {
503                                         /* First compare the hashing value.  */
504                                         if (symb_table[2 * elem] == hash
505                                             && c1 == extra[symb_table[2 * elem + 1]]
506                                             && memcmp (str,
507                                                          &extra[symb_table[2 * elem + 1]
508                                                                  + 1], c1) == 0)
509                                           {
510                                             /* Yep, this is the entry.  */
511                                             idx = symb_table[2 * elem + 1];
512                                             idx += 1 + extra[idx];
513                                             break;
514                                           }
515 
516                                         /* Next entry.  */
517                                         elem += second;
518                                     }
519 
520                                   if (symb_table[2 * elem] != 0)
521                                     {
522                                         /* Compare the byte sequence but only if
523                                            this is not part of a range.  */
524 # ifdef WIDE_CHAR_VERSION
525                                         int32_t *wextra;
526 
527                                         idx += 1 + extra[idx];
528                                         /* Adjust for the alignment.  */
529                                         idx = (idx + 3) & ~3;
530 
531                                         wextra = (int32_t *) &extra[idx + 4];
532 # endif
533 
534                                         if (! is_range)
535                                           {
536 # ifdef WIDE_CHAR_VERSION
537                                             for (c1 = 0;
538                                                    (int32_t) c1 < wextra[idx];
539                                                    ++c1)
540                                               if (n[c1] != wextra[1 + c1])
541                                                   break;
542 
543                                             if ((int32_t) c1 == wextra[idx])
544                                               goto matched;
545 # else
546                                             for (c1 = 0; c1 < extra[idx]; ++c1)
547                                               if (n[c1] != extra[1 + c1])
548                                                   break;
549 
550                                             if (c1 == extra[idx])
551                                               goto matched;
552 # endif
553                                           }
554 
555                                         /* Get the collation sequence value.  */
556                                         is_seqval = true;
557 # ifdef WIDE_CHAR_VERSION
558                                         cold = wextra[1 + wextra[idx]];
559 # else
560                                         /* Adjust for the alignment.  */
561                                         idx += 1 + extra[idx];
562                                         idx = (idx + 3) & ~4;
563                                         cold = *((int32_t *) &extra[idx]);
564 # endif
565 
566                                         c = *p++;
567                                     }
568                                   else if (c1 == 1)
569                                     {
570                                         /* No valid character.  Match it as a
571                                            single byte.  */
572                                         if (!is_range && *n == str[0])
573                                           goto matched;
574 
575                                         cold = str[0];
576                                         c = *p++;
577                                     }
578                                   else
579                                     return FNM_NOMATCH;
580                                 }
581                           }
582                         else
583 # undef str
584 #endif
585                           {
586                               c = FOLD (c);
587                           normal_bracket:
588 
589                               /* We have to handling the symbols differently in
590                                  ranges since then the collation sequence is
591                                  important.  */
592                               is_range = (*p == L('-') && p[1] != L('\0')
593                                             && p[1] != L(']'));
594 
595                               if (!is_range && c == fn)
596                                 goto matched;
597 
598                               cold = c;
599                               c = *p++;
600                           }
601 
602                         if (c == L('-') && *p != L(']'))
603                           {
604 #if _LIBC
605                               /* We have to find the collation sequence
606                                  value for C.  Collation sequence is nothing
607                                  we can regularly access.  The sequence
608                                  value is defined by the order in which the
609                                  definitions of the collation values for the
610                                  various characters appear in the source
611                                  file.  A strange concept, nowhere
612                                  documented.  */
613                               uint32_t fcollseq;
614                               uint32_t lcollseq;
615                               UCHAR cend = *p++;
616 
617 # ifdef WIDE_CHAR_VERSION
618                               /* Search in the `names' array for the characters.  */
619                               fcollseq = __collseq_table_lookup (collseq, fn);
620                               if (fcollseq == ~((uint32_t) 0))
621                                 /* XXX We don't know anything about the character
622                                    we are supposed to match.  This means we are
623                                    failing.  */
624                                 goto range_not_matched;
625 
626                               if (is_seqval)
627                                 lcollseq = cold;
628                               else
629                                 lcollseq = __collseq_table_lookup (collseq, cold);
630 # else
631                               fcollseq = collseq[fn];
632                               lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
633 # endif
634 
635                               is_seqval = false;
636                               if (cend == L('[') && *p == L('.'))
637                                 {
638                                   uint32_t nrules =
639                                     _NL_CURRENT_WORD (LC_COLLATE,
640                                                             _NL_COLLATE_NRULES);
641                                   const CHAR *startp = p;
642                                   size_t c1 = 0;
643 
644                                   while (1)
645                                     {
646                                         c = *++p;
647                                         if (c == L('.') && p[1] == L(']'))
648                                           {
649                                             p += 2;
650                                             break;
651                                           }
652                                         if (c == '\0')
653                                           return FNM_NOMATCH;
654                                         ++c1;
655                                     }
656 
657                                   if (nrules == 0)
658                                     {
659                                         /* There are no names defined in the
660                                            collation data.  Therefore we only
661                                            accept the trivial names consisting
662                                            of the character itself.  */
663                                         if (c1 != 1)
664                                           return FNM_NOMATCH;
665 
666                                         cend = startp[1];
667                                     }
668                                   else
669                                     {
670                                         int32_t table_size;
671                                         const int32_t *symb_table;
672 # ifdef WIDE_CHAR_VERSION
673                                         char str[c1];
674                                         size_t strcnt;
675 # else
676 #  define str (startp + 1)
677 # endif
678                                         const unsigned char *extra;
679                                         int32_t idx;
680                                         int32_t elem;
681                                         int32_t second;
682                                         int32_t hash;
683 
684 # ifdef WIDE_CHAR_VERSION
685                                         /* We have to convert the name to a single-byte
686                                            string.  This is possible since the names
687                                            consist of ASCII characters and the internal
688                                            representation is UCS4.  */
689                                         for (strcnt = 0; strcnt < c1; ++strcnt)
690                                           str[strcnt] = startp[1 + strcnt];
691 # endif
692 
693                                         table_size =
694                                           _NL_CURRENT_WORD (LC_COLLATE,
695                                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
696                                         symb_table = (const int32_t *)
697                                           _NL_CURRENT (LC_COLLATE,
698                                                          _NL_COLLATE_SYMB_TABLEMB);
699                                         extra = (const unsigned char *)
700                                           _NL_CURRENT (LC_COLLATE,
701                                                          _NL_COLLATE_SYMB_EXTRAMB);
702 
703                                         /* Locate the character in the hashing
704                                    table.  */
705                                         hash = elem_hash (str, c1);
706 
707                                         idx = 0;
708                                         elem = hash % table_size;
709                                         second = hash % (table_size - 2);
710                                         while (symb_table[2 * elem] != 0)
711                                           {
712                                         /* First compare the hashing value.  */
713                                             if (symb_table[2 * elem] == hash
714                                                   && (c1
715                                                       == extra[symb_table[2 * elem + 1]])
716                                                   && memcmp (str,
717                                                                &extra[symb_table[2 * elem + 1]
718                                                                        + 1], c1) == 0)
719                                               {
720                                                   /* Yep, this is the entry.  */
721                                                   idx = symb_table[2 * elem + 1];
722                                                   idx += 1 + extra[idx];
723                                                   break;
724                                               }
725 
726                                             /* Next entry.  */
727                                             elem += second;
728                                           }
729 
730                                         if (symb_table[2 * elem] != 0)
731                                           {
732                                             /* Compare the byte sequence but only if
733                                                this is not part of a range.  */
734 # ifdef WIDE_CHAR_VERSION
735                                             int32_t *wextra;
736 
737                                             idx += 1 + extra[idx];
738                                             /* Adjust for the alignment.  */
739                                             idx = (idx + 3) & ~4;
740 
741                                             wextra = (int32_t *) &extra[idx + 4];
742 # endif
743                                             /* Get the collation sequence value.  */
744                                             is_seqval = true;
745 # ifdef WIDE_CHAR_VERSION
746                                             cend = wextra[1 + wextra[idx]];
747 # else
748                                             /* Adjust for the alignment.  */
749                                             idx += 1 + extra[idx];
750                                             idx = (idx + 3) & ~4;
751                                             cend = *((int32_t *) &extra[idx]);
752 # endif
753                                           }
754                                         else if (symb_table[2 * elem] != 0 && c1 == 1)
755                                           {
756                                             cend = str[0];
757                                             c = *p++;
758                                           }
759                                         else
760                                           return FNM_NOMATCH;
761                                     }
762 # undef str
763                                 }
764                               else
765                                 {
766                                   if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
767                                     cend = *p++;
768                                   if (cend == L('\0'))
769                                     return FNM_NOMATCH;
770                                   cend = FOLD (cend);
771                                 }
772 
773                               /* XXX It is not entirely clear to me how to handle
774                                  characters which are not mentioned in the
775                                  collation specification.  */
776                               if (
777 # ifdef WIDE_CHAR_VERSION
778                                   lcollseq == 0xffffffff ||
779 # endif
780                                   lcollseq <= fcollseq)
781                                 {
782                                   /* We have to look at the upper bound.  */
783                                   uint32_t hcollseq;
784 
785                                   if (is_seqval)
786                                     hcollseq = cend;
787                                   else
788                                     {
789 # ifdef WIDE_CHAR_VERSION
790                                         hcollseq =
791                                           __collseq_table_lookup (collseq, cend);
792                                         if (hcollseq == ~((uint32_t) 0))
793                                           {
794                                             /* Hum, no information about the upper
795                                                bound.  The matching succeeds if the
796                                                lower bound is matched exactly.  */
797                                             if (lcollseq != fcollseq)
798                                               goto range_not_matched;
799 
800                                             goto matched;
801                                           }
802 # else
803                                         hcollseq = collseq[cend];
804 # endif
805                                     }
806 
807                                   if (lcollseq <= hcollseq && fcollseq <= hcollseq)
808                                     goto matched;
809                                 }
810 # ifdef WIDE_CHAR_VERSION
811                           range_not_matched:
812 # endif
813 #else
814                               /* We use a boring value comparison of the character
815                                  values.  This is better than comparing using
816                                  `strcoll' since the latter would have surprising
817                                  and sometimes fatal consequences.  */
818                               UCHAR cend = *p++;
819 
820                               if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
821                                 cend = *p++;
822                               if (cend == L('\0'))
823                                 return FNM_NOMATCH;
824 
825                               /* It is a range.  */
826                               if (cold <= fn && fn <= cend)
827                                 goto matched;
828 #endif
829 
830                               c = *p++;
831                           }
832                       }
833 
834                     if (c == L(']'))
835                       break;
836                 }
837 
838               if (!not)
839                 return FNM_NOMATCH;
840               break;
841 
842             matched:
843               /* Skip the rest of the [...] that already matched.  */
844               do
845                 {
846                 ignore_next:
847                     c = *p++;
848 
849                     if (c == L('\0'))
850                       /* [... (unterminated) loses.  */
851                       return FNM_NOMATCH;
852 
853                     if (!(flags & FNM_NOESCAPE) && c == L('\\'))
854                       {
855                         if (*p == L('\0'))
856                           return FNM_NOMATCH;
857                         /* XXX 1003.2d11 is unclear if this is right.  */
858                         ++p;
859                       }
860                     else if (c == L('[') && *p == L(':'))
861                       {
862                         int c1 = 0;
863                         const CHAR *startp = p;
864 
865                         while (1)
866                           {
867                               c = *++p;
868                               if (++c1 == CHAR_CLASS_MAX_LENGTH)
869                                 return FNM_NOMATCH;
870 
871                               if (*p == L(':') && p[1] == L(']'))
872                                 break;
873 
874                               if (c < L('a') || c >= L('z'))
875                                 {
876                                   p = startp;
877                                   goto ignore_next;
878                                 }
879                           }
880                         p += 2;
881                         c = *p++;
882                       }
883                     else if (c == L('[') && *p == L('='))
884                       {
885                         c = *++p;
886                         if (c == L('\0'))
887                           return FNM_NOMATCH;
888                         c = *++p;
889                         if (c != L('=') || p[1] != L(']'))
890                           return FNM_NOMATCH;
891                         p += 2;
892                         c = *p++;
893                       }
894                     else if (c == L('[') && *p == L('.'))
895                       {
896                         ++p;
897                         while (1)
898                           {
899                               c = *++p;
900                               if (c == '\0')
901                                 return FNM_NOMATCH;
902 
903                               if (*p == L('.') && p[1] == L(']'))
904                                 break;
905                           }
906                         p += 2;
907                         c = *p++;
908                       }
909                 }
910               while (c != L(']'));
911               if (not)
912                 return FNM_NOMATCH;
913             }
914             break;
915 
916           case L('+'):
917           case L('@'):
918           case L('!'):
919             if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
920               {
921                 int res;
922 
923                 res = EXT (c, p, n, string_end, no_leading_period, flags);
924                 if (res != -1)
925                     return res;
926               }
927             goto normal_match;
928 
929           case L('/'):
930             if (NO_LEADING_PERIOD (flags))
931               {
932                 if (n == string_end || c != (UCHAR) *n)
933                     return FNM_NOMATCH;
934 
935                 new_no_leading_period = true;
936                 break;
937               }
938             /* FALLTHROUGH */
939           default:
940           normal_match:
941             if (n == string_end || c != FOLD ((UCHAR) *n))
942               return FNM_NOMATCH;
943           }
944 
945       no_leading_period = new_no_leading_period;
946       ++n;
947     }
948 
949   if (n == string_end)
950     return 0;
951 
952   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
953     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
954     return 0;
955 
956   return FNM_NOMATCH;
957 }
958 
959 
960 static const CHAR *
961 internal_function
END(const CHAR * pattern)962 END (const CHAR *pattern)
963 {
964   const CHAR *p = pattern;
965 
966   while (1)
967     if (*++p == L('\0'))
968       /* This is an invalid pattern.  */
969       return pattern;
970     else if (*p == L('['))
971       {
972           /* Handle brackets special.  */
973           if (posixly_correct == 0)
974             posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
975 
976           /* Skip the not sign.  We have to recognize it because of a possibly
977              following ']'.  */
978           if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
979             ++p;
980           /* A leading ']' is recognized as such.  */
981           if (*p == L(']'))
982             ++p;
983           /* Skip over all characters of the list.  */
984           while (*p != L(']'))
985             if (*p++ == L('\0'))
986               /* This is no valid pattern.  */
987               return pattern;
988       }
989     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
990                 || *p == L('!')) && p[1] == L('('))
991       p = END (p + 1);
992     else if (*p == L(')'))
993       break;
994 
995   return p + 1;
996 }
997 
998 
999 static int
1000 internal_function
EXT(INT opt,const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)1001 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1002      bool no_leading_period, int flags)
1003 {
1004   const CHAR *startp;
1005   size_t level;
1006   struct patternlist
1007   {
1008     struct patternlist *next;
1009     CHAR str[1];
1010   } *list = NULL;
1011   struct patternlist **lastp = &list;
1012   size_t pattern_len = STRLEN (pattern);
1013   const CHAR *p;
1014   const CHAR *rs;
1015   enum { ALLOCA_LIMIT = 8000 };
1016 
1017   /* Parse the pattern.  Store the individual parts in the list.  */
1018   level = 0;
1019   for (startp = p = pattern + 1; ; ++p)
1020     if (*p == L('\0'))
1021       /* This is an invalid pattern.  */
1022       return -1;
1023     else if (*p == L('['))
1024       {
1025           /* Handle brackets special.  */
1026           if (posixly_correct == 0)
1027             posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1028 
1029           /* Skip the not sign.  We have to recognize it because of a possibly
1030              following ']'.  */
1031           if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1032             ++p;
1033           /* A leading ']' is recognized as such.  */
1034           if (*p == L(']'))
1035             ++p;
1036           /* Skip over all characters of the list.  */
1037           while (*p != L(']'))
1038             if (*p++ == L('\0'))
1039               /* This is no valid pattern.  */
1040               return -1;
1041       }
1042     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1043                 || *p == L('!')) && p[1] == L('('))
1044       /* Remember the nesting level.  */
1045       ++level;
1046     else if (*p == L(')'))
1047       {
1048           if (level-- == 0)
1049             {
1050               /* This means we found the end of the pattern.  */
1051 #define NEW_PATTERN \
1052               struct patternlist *newp;                                               \
1053               size_t plen;                                                            \
1054               size_t plensize;                                                                  \
1055               size_t newpsize;                                                                  \
1056                                                                                                 \
1057               plen = (opt == L('?') || opt == L('@')                                  \
1058                         ? pattern_len                                                 \
1059                         : p - startp + 1);                                                      \
1060               plensize = plen * sizeof (CHAR);                                        \
1061               newpsize = offsetof (struct patternlist, str) + plensize;               \
1062               if ((size_t) -1 / sizeof (CHAR) < plen                                  \
1063                     || newpsize < offsetof (struct patternlist, str)        \
1064                     || ALLOCA_LIMIT <= newpsize)                                      \
1065                 return -1;                                                            \
1066               newp = (struct patternlist *) alloca (newpsize);                        \
1067               *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1068               newp->next = NULL;                                                                \
1069               *lastp = newp;                                                          \
1070               lastp = &newp->next
1071               NEW_PATTERN;
1072               break;
1073             }
1074       }
1075     else if (*p == L('|'))
1076       {
1077           if (level == 0)
1078             {
1079               NEW_PATTERN;
1080               startp = p + 1;
1081             }
1082       }
1083   assert (list != NULL);
1084   assert (p[-1] == L(')'));
1085 #undef NEW_PATTERN
1086 
1087   switch (opt)
1088     {
1089     case L('*'):
1090       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1091           return 0;
1092       /* FALLTHROUGH */
1093 
1094     case L('+'):
1095       do
1096           {
1097             for (rs = string; rs <= string_end; ++rs)
1098               /* First match the prefix with the current pattern with the
1099                  current pattern.  */
1100               if (FCT (list->str, string, rs, no_leading_period,
1101                          flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1102                     /* This was successful.  Now match the rest with the rest
1103                        of the pattern.  */
1104                     && (FCT (p, rs, string_end,
1105                                rs == string
1106                                ? no_leading_period
1107                                : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1108                                flags & FNM_FILE_NAME
1109                                ? flags : flags & ~FNM_PERIOD) == 0
1110                         /* This didn't work.  Try the whole pattern.  */
1111                         || (rs != string
1112                               && FCT (pattern - 1, rs, string_end,
1113                                         rs == string
1114                                         ? no_leading_period
1115                                         : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1116                                         flags & FNM_FILE_NAME
1117                                         ? flags : flags & ~FNM_PERIOD) == 0)))
1118                 /* It worked.  Signal success.  */
1119                 return 0;
1120           }
1121       while ((list = list->next) != NULL);
1122 
1123       /* None of the patterns lead to a match.  */
1124       return FNM_NOMATCH;
1125 
1126     case L('?'):
1127       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1128           return 0;
1129       /* FALLTHROUGH */
1130 
1131     case L('@'):
1132       do
1133           /* I cannot believe it but `strcat' is actually acceptable
1134              here.  Match the entire string with the prefix from the
1135              pattern list and the rest of the pattern following the
1136              pattern list.  */
1137           if (FCT (STRCAT (list->str, p), string, string_end,
1138                      no_leading_period,
1139                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1140             /* It worked.  Signal success.  */
1141             return 0;
1142       while ((list = list->next) != NULL);
1143 
1144       /* None of the patterns lead to a match.  */
1145       return FNM_NOMATCH;
1146 
1147     case L('!'):
1148       for (rs = string; rs <= string_end; ++rs)
1149           {
1150             struct patternlist *runp;
1151 
1152             for (runp = list; runp != NULL; runp = runp->next)
1153               if (FCT (runp->str, string, rs,  no_leading_period,
1154                          flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1155                 break;
1156 
1157             /* If none of the patterns matched see whether the rest does.  */
1158             if (runp == NULL
1159                 && (FCT (p, rs, string_end,
1160                            rs == string
1161                            ? no_leading_period
1162                            : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1163                            flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1164                       == 0))
1165               /* This is successful.  */
1166               return 0;
1167           }
1168 
1169       /* None of the patterns together with the rest of the pattern
1170            lead to a match.  */
1171       return FNM_NOMATCH;
1172 
1173     default:
1174       assert (! "Invalid extended matching operator");
1175       break;
1176     }
1177 
1178   return -1;
1179 }
1180 
1181 
1182 #undef FOLD
1183 #undef CHAR
1184 #undef UCHAR
1185 #undef INT
1186 #undef FCT
1187 #undef EXT
1188 #undef END
1189 #undef MEMPCPY
1190 #undef MEMCHR
1191 #undef STRCOLL
1192 #undef STRLEN
1193 #undef STRCAT
1194 #undef L
1195 #undef BTOWC
1196