xref: /dragonfly/lib/libc/stdio/vfscanf.c (revision 839f7d6207d1654b664a46f2905d559773779bc3)
1 /*-
2  * Copyright (c) 1990, 1993
3  *        The Regents of the University of California.  All rights reserved.
4  *
5  * Copyright (c) 2011 The FreeBSD Foundation
6  * All rights reserved.
7  * Portions of this software were developed by David Chisnall
8  * under sponsorship from the FreeBSD Foundation.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Chris Torek.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * @(#)vfscanf.c    8.1 (Berkeley) 6/4/93
38  * $FreeBSD: head/lib/libc/stdio/vfscanf.c 249808 2013-04-23 13:33:13Z emaste $
39  */
40 
41 
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stddef.h>
48 #include <stdarg.h>
49 #include <string.h>
50 #include <wchar.h>
51 #include <wctype.h>
52 #include "un-namespace.h"
53 
54 #include "collate.h"
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58 
59 #ifndef NO_FLOATING_POINT
60 #include <locale.h>
61 #endif
62 
63 #define   BUF                 513       /* Maximum length of numeric string. */
64 
65 /*
66  * Flags used during conversion.
67  */
68 #define   LONG                0x01      /* l: long or double */
69 #define   LONGDBL             0x02      /* L: long double */
70 #define   SHORT               0x04      /* h: short */
71 #define   SUPPRESS  0x08      /* *: suppress assignment */
72 #define   POINTER             0x10      /* p: void * (as hex) */
73 #define   NOSKIP              0x20      /* [ or c: do not skip blanks */
74 #ifdef __DragonFly__ /* Non-standard extension: L and ll are equivalent */
75 #define   LONGLONG  LONGDBL   /* ll/L: long long, long double */
76                                         /* (+ deprecated q: quad) */
77 #else
78 #define   LONGLONG  0x400     /* ll: long long (+ deprecated q: quad) */
79 #endif
80 #define   INTMAXT             0x800     /* j: intmax_t */
81 #define   PTRDIFFT  0x1000    /* t: ptrdiff_t */
82 #define   SIZET               0x2000    /* z: size_t */
83 #define   SHORTSHORT          0x4000    /* hh: char */
84 #define   UNSIGNED  0x8000    /* %[oupxX] conversions */
85 
86 /*
87  * The following are used in integral conversions only:
88  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
89  */
90 #define   SIGNOK              0x40      /* +/- is (still) legal */
91 #define   NDIGITS             0x80      /* no digits detected */
92 #define   PFXOK               0x100     /* 0x prefix is (still) legal */
93 #define   NZDIGITS  0x200     /* no zero digits detected */
94 #define   HAVESIGN  0x10000   /* sign detected */
95 
96 /*
97  * Conversion types.
98  */
99 #define   CT_CHAR             0         /* %c conversion */
100 #define   CT_CCL              1         /* %[...] conversion */
101 #define   CT_STRING 2         /* %s conversion */
102 #define   CT_INT              3         /* %[dioupxX] conversion */
103 #define   CT_FLOAT  4         /* %[efgEFG] conversion */
104 
105 static const u_char *__sccl(char *, const u_char *);
106 #ifndef NO_FLOATING_POINT
107 static int parsefloat(FILE *, char *, char *, locale_t);
108 #endif
109 
110 /*
111  * Conversion functions are passed a pointer to this object instead of
112  * a real parameter to indicate that the assignment-suppression (*)
113  * flag was specified.  We could use a NULL pointer to indicate this,
114  * but that would mask bugs in applications that call scanf() with a
115  * NULL pointer.
116  */
117 static const int suppress;
118 #define   SUPPRESS_PTR        ((void *)&suppress)
119 
120 static const mbstate_t initial_mbs;
121 
122 /*
123  * The following conversion functions return the number of characters consumed,
124  * or -1 on input failure.  Character class conversion returns 0 on match
125  * failure.
126  */
127 
128 static __inline int
convert_char(FILE * fp,char * p,int width)129 convert_char(FILE *fp, char * p, int width)
130 {
131           int n;
132 
133           if (p == SUPPRESS_PTR) {
134                     size_t sum = 0;
135                     for (;;) {
136                               if ((n = fp->pub._r) < width) {
137                                         sum += n;
138                                         width -= n;
139                                         fp->pub._p += n;
140                                         if (__srefill(fp)) {
141                                                   if (sum == 0)
142                                                             return (-1);
143                                                   break;
144                                         }
145                               } else {
146                                         sum += width;
147                                         fp->pub._r -= width;
148                                         fp->pub._p += width;
149                                         break;
150                               }
151                     }
152                     return (sum);
153           } else {
154                     size_t r = __fread(p, 1, width, fp);
155 
156                     if (r == 0)
157                               return (-1);
158                     return (r);
159           }
160 }
161 
162 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)163 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
164 {
165           mbstate_t mbs;
166           int n, nread;
167           wint_t wi;
168 
169           mbs = initial_mbs;
170           n = 0;
171           while (width-- != 0 &&
172               (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
173                     if (wcp != SUPPRESS_PTR)
174                               *wcp++ = (wchar_t)wi;
175                     n += nread;
176           }
177           if (n == 0)
178                     return (-1);
179           return (n);
180 }
181 
182 static __inline int
convert_ccl(FILE * fp,char * p,int width,const char * ccltab)183 convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
184 {
185           char *p0;
186           int n;
187 
188           if (p == SUPPRESS_PTR) {
189                     n = 0;
190                     while (ccltab[*fp->pub._p]) {
191                               n++, fp->pub._r--, fp->pub._p++;
192                               if (--width == 0)
193                                         break;
194                               if (fp->pub._r <= 0 && __srefill(fp)) {
195                                         if (n == 0)
196                                                   return (-1);
197                                         break;
198                               }
199                     }
200           } else {
201                     p0 = p;
202                     while (ccltab[*fp->pub._p]) {
203                               fp->pub._r--;
204                               *p++ = *fp->pub._p++;
205                               if (--width == 0)
206                                         break;
207                               if (fp->pub._r <= 0 && __srefill(fp)) {
208                                         if (p == p0)
209                                                   return (-1);
210                                         break;
211                               }
212                     }
213                     n = p - p0;
214                     if (n == 0)
215                               return (0);
216                     *p = 0;
217           }
218           return (n);
219 }
220 
221 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const char * ccltab,locale_t locale)222 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
223     locale_t locale)
224 {
225           mbstate_t mbs;
226           wint_t wi;
227           int n, nread;
228 
229           mbs = initial_mbs;
230           n = 0;
231           if (wcp == SUPPRESS_PTR) {
232                     while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
233                         width-- != 0 && ccltab[wctob(wi)])
234                               n += nread;
235                     if (wi != WEOF)
236                               __ungetwc(wi, fp, __get_locale());
237           } else {
238                     while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
239                         width-- != 0 && ccltab[wctob(wi)]) {
240                               *wcp++ = (wchar_t)wi;
241                               n += nread;
242                     }
243                     if (wi != WEOF)
244                               __ungetwc(wi, fp, __get_locale());
245                     if (n == 0)
246                               return (0);
247                     *wcp = 0;
248           }
249           return (n);
250 }
251 
252 static __inline int
convert_string(FILE * fp,char * p,int width)253 convert_string(FILE *fp, char * p, int width)
254 {
255           char *p0;
256           int n;
257 
258           if (p == SUPPRESS_PTR) {
259                     n = 0;
260                     while (!isspace(*fp->pub._p)) {
261                               n++, fp->pub._r--, fp->pub._p++;
262                               if (--width == 0)
263                                         break;
264                               if (fp->pub._r <= 0 && __srefill(fp))
265                                         break;
266                     }
267           } else {
268                     p0 = p;
269                     while (!isspace(*fp->pub._p)) {
270                               fp->pub._r--;
271                               *p++ = *fp->pub._p++;
272                               if (--width == 0)
273                                         break;
274                               if (fp->pub._r <= 0 && __srefill(fp))
275                                         break;
276                     }
277                     *p = 0;
278                     n = p - p0;
279           }
280           return (n);
281 }
282 
283 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)284 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
285 {
286           mbstate_t mbs;
287           wint_t wi;
288           int n, nread;
289 
290           mbs = initial_mbs;
291           n = 0;
292           if (wcp == SUPPRESS_PTR) {
293                     while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
294                         width-- != 0 && !iswspace(wi))
295                               n += nread;
296                     if (wi != WEOF)
297                               __ungetwc(wi, fp, __get_locale());
298           } else {
299                     while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
300                         width-- != 0 && !iswspace(wi)) {
301                               *wcp++ = (wchar_t)wi;
302                               n += nread;
303                     }
304                     if (wi != WEOF)
305                               __ungetwc(wi, fp, __get_locale());
306                     *wcp = '\0';
307           }
308           return (n);
309 }
310 
311 /*
312  * Read an integer, storing it in buf.  The only relevant bit in the
313  * flags argument is PFXOK.
314  *
315  * Return 0 on a match failure, and the number of characters read
316  * otherwise.
317  */
318 static __inline int
parseint(FILE * fp,char * __restrict buf,int width,int base,int flags)319 parseint(FILE *fp, char * __restrict buf, int width, int base, int flags)
320 {
321           /* `basefix' is used to avoid `if' tests */
322           static const short basefix[17] =
323                     { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
324           char *p;
325           int c;
326 
327           flags |= SIGNOK | NDIGITS | NZDIGITS;
328           for (p = buf; width; width--) {
329                     c = *fp->pub._p;
330                     /*
331                      * Switch on the character; `goto ok' if we accept it
332                      * as a part of number.
333                      */
334                     switch (c) {
335 
336                     /*
337                      * The digit 0 is always legal, but is special.  For
338                      * %i conversions, if no digits (zero or nonzero) have
339                      * been scanned (only signs), we will have base==0.
340                      * In that case, we should set it to 8 and enable 0x
341                      * prefixing.  Also, if we have not scanned zero
342                      * digits before this, do not turn off prefixing
343                      * (someone else will turn it off if we have scanned
344                      * any nonzero digits).
345                      */
346                     case '0':
347                               if (base == 0) {
348                                         base = 8;
349                                         flags |= PFXOK;
350                               }
351                               if (flags & NZDIGITS)
352                                         flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
353                               else
354                                         flags &= ~(SIGNOK|PFXOK|NDIGITS);
355                               goto ok;
356 
357                     /* 1 through 7 always legal */
358                     case '1': case '2': case '3':
359                     case '4': case '5': case '6': case '7':
360                               base = basefix[base];
361                               flags &= ~(SIGNOK | PFXOK | NDIGITS);
362                               goto ok;
363 
364                     /* digits 8 and 9 ok iff decimal or hex */
365                     case '8': case '9':
366                               base = basefix[base];
367                               if (base <= 8)
368                                         break;    /* not legal here */
369                               flags &= ~(SIGNOK | PFXOK | NDIGITS);
370                               goto ok;
371 
372                     /* letters ok iff hex */
373                     case 'A': case 'B': case 'C':
374                     case 'D': case 'E': case 'F':
375                     case 'a': case 'b': case 'c':
376                     case 'd': case 'e': case 'f':
377                               /* no need to fix base here */
378                               if (base <= 10)
379                                         break;    /* not legal here */
380                               flags &= ~(SIGNOK | PFXOK | NDIGITS);
381                               goto ok;
382 
383                     /* sign ok only as first character */
384                     case '+': case '-':
385                               if (flags & SIGNOK) {
386                                         flags &= ~SIGNOK;
387                                         flags |= HAVESIGN;
388                                         goto ok;
389                               }
390                               break;
391 
392                     /*
393                      * x ok iff flag still set & 2nd char (or 3rd char if
394                      * we have a sign).
395                      */
396                     case 'x': case 'X':
397                               if (flags & PFXOK && p ==
398                                   buf + 1 + !!(flags & HAVESIGN)) {
399                                         base = 16;          /* if %i */
400                                         flags &= ~PFXOK;
401                                         goto ok;
402                               }
403                               break;
404                     }
405 
406                     /*
407                      * If we got here, c is not a legal character for a
408                      * number.  Stop accumulating digits.
409                      */
410                     break;
411           ok:
412                     /*
413                      * c is legal: store it and look at the next.
414                      */
415                     *p++ = c;
416                     if (--fp->pub._r > 0)
417                               fp->pub._p++;
418                     else if (__srefill(fp))
419                               break;              /* EOF */
420           }
421           /*
422            * If we had only a sign, it is no good; push back the sign.
423            * If the number ends in `x', it was [sign] '0' 'x', so push
424            * back the x and treat it as [sign] '0'.
425            */
426           if (flags & NDIGITS) {
427                     if (p > buf)
428                               (void) __ungetc(*(u_char *)--p, fp);
429                     return (0);
430           }
431           c = ((u_char *)p)[-1];
432           if (c == 'x' || c == 'X') {
433                     --p;
434                     (void) __ungetc(c, fp);
435           }
436           return (p - buf);
437 }
438 
439 /*
440  * __vfscanf - MT-safe version
441  */
442 int
__vfscanf(FILE * fp,char const * fmt0,va_list ap)443 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
444 {
445           int ret;
446 
447           FLOCKFILE(fp);
448           ret = __svfscanf(fp, __get_locale(), fmt0, ap);
449           FUNLOCKFILE(fp);
450           return (ret);
451 }
452 
453 __weak_reference(__vfscanf, vfscanf);
454 
455 int
vfscanf_l(FILE * fp,locale_t locale,char const * fmt0,va_list ap)456 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
457 {
458           int ret;
459           FIX_LOCALE(locale);
460 
461           FLOCKFILE(fp);
462           ret = __svfscanf(fp, locale, fmt0, ap);
463           FUNLOCKFILE(fp);
464           return (ret);
465 }
466 
467 /*
468  * __svfscanf - non-MT-safe version of __vfscanf
469  */
470 int
__svfscanf(FILE * fp,locale_t locale,const char * fmt0,va_list ap)471 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
472 {
473 #define   GETARG(type)        ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
474           const u_char *fmt = (const u_char *)fmt0;
475           int c;                        /* character from format, or conversion */
476           size_t width;                 /* field width, or 0 */
477           int flags;                    /* flags as defined above */
478           int nassigned;                /* number of fields assigned */
479           int nconversions;   /* number of conversions */
480           int nr;                       /* characters read by the current conversion */
481           int nread;                    /* number of characters consumed from fp */
482           int base;           /* base argument to conversion function */
483           char ccltab[256];   /* character class table for %[...] */
484           char buf[BUF];                /* buffer for numeric conversions */
485 
486           ORIENT(fp, -1);
487 
488           nassigned = 0;
489           nconversions = 0;
490           nread = 0;
491           for (;;) {
492                     c = *fmt++;
493                     if (c == 0)
494                               return (nassigned);
495                     if (isspace(c)) {
496                               while ((fp->pub._r > 0 || __srefill(fp) == 0) && isspace(*fp->pub._p))
497                                         nread++, fp->pub._r--, fp->pub._p++;
498                               continue;
499                     }
500                     if (c != '%')
501                               goto literal;
502                     width = 0;
503                     flags = 0;
504                     /*
505                      * switch on the format.  continue if done;
506                      * break once format type is derived.
507                      */
508 again:              c = *fmt++;
509                     switch (c) {
510                     case '%':
511 literal:
512                               if (fp->pub._r <= 0 && __srefill(fp))
513                                         goto input_failure;
514                               if (*fp->pub._p != c)
515                                         goto match_failure;
516                               fp->pub._r--, fp->pub._p++;
517                               nread++;
518                               continue;
519 
520                     case '*':
521                               flags |= SUPPRESS;
522                               goto again;
523                     case 'j':
524                               flags |= INTMAXT;
525                               goto again;
526                     case 'l':
527                               if (flags & LONG) {
528                                         flags &= ~LONG;
529                                         flags |= LONGLONG;
530                               } else
531                                         flags |= LONG;
532                               goto again;
533                     case 'q':
534                               flags |= LONGLONG;  /* not quite */
535                               goto again;
536                     case 't':
537                               flags |= PTRDIFFT;
538                               goto again;
539                     case 'z':
540                               flags |= SIZET;
541                               goto again;
542                     case 'L':
543                               flags |= LONGDBL;
544                               goto again;
545                     case 'h':
546                               if (flags & SHORT) {
547                                         flags &= ~SHORT;
548                                         flags |= SHORTSHORT;
549                               } else
550                                         flags |= SHORT;
551                               goto again;
552 
553                     case '0': case '1': case '2': case '3': case '4':
554                     case '5': case '6': case '7': case '8': case '9':
555                               width = width * 10 + c - '0';
556                               goto again;
557 
558                     /*
559                      * Conversions.
560                      */
561                     case 'd':
562                               c = CT_INT;
563                               base = 10;
564                               break;
565 
566                     case 'i':
567                               c = CT_INT;
568                               base = 0;
569                               break;
570 
571                     case 'o':
572                               c = CT_INT;
573                               flags |= UNSIGNED;
574                               base = 8;
575                               break;
576 
577                     case 'u':
578                               c = CT_INT;
579                               flags |= UNSIGNED;
580                               base = 10;
581                               break;
582 
583                     case 'X':
584                     case 'x':
585                               flags |= PFXOK;     /* enable 0x prefixing */
586                               c = CT_INT;
587                               flags |= UNSIGNED;
588                               base = 16;
589                               break;
590 
591 #ifndef NO_FLOATING_POINT
592                     case 'A': case 'E': case 'F': case 'G':
593                     case 'a': case 'e': case 'f': case 'g':
594                               c = CT_FLOAT;
595                               break;
596 #endif
597 
598                     case 'S':
599                               flags |= LONG;
600                               /* FALLTHROUGH */
601                     case 's':
602                               c = CT_STRING;
603                               break;
604 
605                     case '[':
606                               fmt = __sccl(ccltab, fmt);
607                               flags |= NOSKIP;
608                               c = CT_CCL;
609                               break;
610 
611                     case 'C':
612                               flags |= LONG;
613                               /* FALLTHROUGH */
614                     case 'c':
615                               flags |= NOSKIP;
616                               c = CT_CHAR;
617                               break;
618 
619                     case 'p': /* pointer format is like hex */
620                               flags |= POINTER | PFXOK;
621                               c = CT_INT;                   /* assumes sizeof(uintmax_t) */
622                               flags |= UNSIGNED;  /*      >= sizeof(uintptr_t) */
623                               base = 16;
624                               break;
625 
626                     case 'n':
627                               if (flags & SUPPRESS)         /* ??? */
628                                         continue;
629                               if (flags & SHORTSHORT)
630                                         *va_arg(ap, char *) = nread;
631                               else if (flags & SHORT)
632                                         *va_arg(ap, short *) = nread;
633                               else if (flags & LONG)
634                                         *va_arg(ap, long *) = nread;
635                               else if (flags & LONGLONG)
636                                         *va_arg(ap, long long *) = nread;
637                               else if (flags & INTMAXT)
638                                         *va_arg(ap, intmax_t *) = nread;
639                               else if (flags & SIZET)
640                                         *va_arg(ap, size_t *) = nread;
641                               else if (flags & PTRDIFFT)
642                                         *va_arg(ap, ptrdiff_t *) = nread;
643                               else
644                                         *va_arg(ap, int *) = nread;
645                               continue;
646 
647                     default:
648                               goto match_failure;
649 
650                     /*
651                      * Disgusting backwards compatibility hack.       XXX
652                      */
653                     case '\0':          /* compat */
654                               return (EOF);
655                     }
656 
657                     /*
658                      * We have a conversion that requires input.
659                      */
660                     if (fp->pub._r <= 0 && __srefill(fp))
661                               goto input_failure;
662 
663                     /*
664                      * Consume leading white space, except for formats
665                      * that suppress this.
666                      */
667                     if ((flags & NOSKIP) == 0) {
668                               while (isspace(*fp->pub._p)) {
669                                         nread++;
670                                         if (--fp->pub._r > 0)
671                                                   fp->pub._p++;
672                                         else if (__srefill(fp))
673                                                   goto input_failure;
674                               }
675                               /*
676                                * Note that there is at least one character in
677                                * the buffer, so conversions that do not set NOSKIP
678                                * ca no longer result in an input failure.
679                                */
680                     }
681 
682                     /*
683                      * Do the conversion.
684                      */
685                     switch (c) {
686 
687                     case CT_CHAR:
688                               /* scan arbitrary characters (sets NOSKIP) */
689                               if (width == 0)
690                                         width = 1;
691                               if (flags & LONG) {
692                                         nr = convert_wchar(fp, GETARG(wchar_t *),
693                                             width, locale);
694                               } else {
695                                         nr = convert_char(fp, GETARG(char *), width);
696                               }
697                               if (nr < 0)
698                                         goto input_failure;
699                               break;
700 
701                     case CT_CCL:
702                               /* scan a (nonempty) character class (sets NOSKIP) */
703                               if (width == 0)
704                                         width = (size_t)~0; /* `infinity' */
705                               if (flags & LONG) {
706                                         nr = convert_wccl(fp, GETARG(wchar_t *), width,
707                                             ccltab, locale);
708                               } else {
709                                         nr = convert_ccl(fp, GETARG(char *), width,
710                                             ccltab);
711                               }
712                               if (nr <= 0) {
713                                         if (nr < 0)
714                                                   goto input_failure;
715                                         else /* nr == 0 */
716                                                   goto match_failure;
717                               }
718                               break;
719 
720                     case CT_STRING:
721                               /* like CCL, but zero-length string OK, & no NOSKIP */
722                               if (width == 0)
723                                         width = (size_t)~0;
724                               if (flags & LONG) {
725                                         nr = convert_wstring(fp, GETARG(wchar_t *),
726                                             width, locale);
727                               } else {
728                                         nr = convert_string(fp, GETARG(char *), width);
729                               }
730                               if (nr < 0)
731                                         goto input_failure;
732                               break;
733 
734                     case CT_INT:
735                               /* scan an integer as if by the conversion function */
736 #ifdef hardway
737                               if (width == 0 || width > sizeof(buf) - 1)
738                                         width = sizeof(buf) - 1;
739 #else
740                               /* size_t is unsigned, hence this optimisation */
741                               if (--width > sizeof(buf) - 2)
742                                         width = sizeof(buf) - 2;
743                               width++;
744 #endif
745                               nr = parseint(fp, buf, width, base, flags);
746                               if (nr == 0)
747                                         goto match_failure;
748                               if ((flags & SUPPRESS) == 0) {
749                                         uintmax_t res;
750 
751                                         buf[nr] = '\0';
752                                         if ((flags & UNSIGNED) == 0)
753                                             res = strtoimax_l(buf, NULL, base, locale);
754                                         else
755                                             res = strtoumax_l(buf, NULL, base, locale);
756                                         if (flags & POINTER)
757                                                   *va_arg(ap, void **) =
758                                                                       (void *)(uintptr_t)res;
759                                         else if (flags & SHORTSHORT)
760                                                   *va_arg(ap, char *) = res;
761                                         else if (flags & SHORT)
762                                                   *va_arg(ap, short *) = res;
763                                         else if (flags & LONG)
764                                                   *va_arg(ap, long *) = res;
765                                         else if (flags & LONGLONG)
766                                                   *va_arg(ap, long long *) = res;
767                                         else if (flags & INTMAXT)
768                                                   *va_arg(ap, intmax_t *) = res;
769                                         else if (flags & PTRDIFFT)
770                                                   *va_arg(ap, ptrdiff_t *) = res;
771                                         else if (flags & SIZET)
772                                                   *va_arg(ap, size_t *) = res;
773                                         else
774                                                   *va_arg(ap, int *) = res;
775                               }
776                               break;
777 
778 #ifndef NO_FLOATING_POINT
779                     case CT_FLOAT:
780                               /* scan a floating point number as if by strtod */
781                               if (width == 0 || width > sizeof(buf) - 1)
782                                         width = sizeof(buf) - 1;
783                               nr = parsefloat(fp, buf, buf + width, locale);
784                               if (nr == 0)
785                                         goto match_failure;
786                               if ((flags & SUPPRESS) == 0) {
787                                         if (flags & LONGDBL) {
788                                                   long double res = strtold_l(buf, NULL,
789                                                       locale);
790                                                   *va_arg(ap, long double *) = res;
791                                         } else if (flags & LONG) {
792                                                   double res = strtod_l(buf, NULL,
793                                                       locale);
794                                                   *va_arg(ap, double *) = res;
795                                         } else {
796                                                   float res = strtof_l(buf, NULL, locale);
797                                                   *va_arg(ap, float *) = res;
798                                         }
799                               }
800                               break;
801 #endif /* !NO_FLOATING_POINT */
802                     }
803                     if (!(flags & SUPPRESS))
804                               nassigned++;
805                     nread += nr;
806                     nconversions++;
807           }
808 input_failure:
809           return (nconversions != 0 ? nassigned : EOF);
810 match_failure:
811           return (nassigned);
812 }
813 
814 /*
815  * Fill in the given table from the scanset at the given format
816  * (just after `[').  Return a pointer to the character past the
817  * closing `]'.  The table has a 1 wherever characters should be
818  * considered part of the scanset.
819  */
820 static const u_char *
__sccl(char * tab,const u_char * fmt)821 __sccl(char *tab, const u_char *fmt)
822 {
823           int c, n, v, i;
824           struct xlocale_collate *table =
825                     (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
826 
827           /* first `clear' the whole table */
828           c = *fmt++;                   /* first char hat => negated scanset */
829           if (c == '^') {
830                     v = 1;              /* default => accept */
831                     c = *fmt++;         /* get new first char */
832           } else
833                     v = 0;              /* default => reject */
834 
835           /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
836           (void) memset(tab, v, 256);
837 
838           if (c == 0)
839                     return (fmt - 1);/* format ended before closing ] */
840 
841           /*
842            * Now set the entries corresponding to the actual scanset
843            * to the opposite of the above.
844            *
845            * The first character may be ']' (or '-') without being special;
846            * the last character may be '-'.
847            */
848           v = 1 - v;
849           for (;;) {
850                     tab[c] = v;                   /* take character c */
851 doswitch:
852                     n = *fmt++;                   /* and examine the next */
853                     switch (n) {
854 
855                     case 0:                       /* format ended too soon */
856                               return (fmt - 1);
857 
858                     case '-':
859                               /*
860                                * A scanset of the form
861                                *        [01+-]
862                                * is defined as `the digit 0, the digit 1,
863                                * the character +, the character -', but
864                                * the effect of a scanset such as
865                                *        [a-zA-Z0-9]
866                                * is implementation defined.  The V7 Unix
867                                * scanf treats `a-z' as `the letters a through
868                                * z', but treats `a-a' as `the letter a, the
869                                * character -, and the letter a'.
870                                *
871                                * For compatibility, the `-' is not considered
872                                * to define a range if the character following
873                                * it is either a close bracket (required by ANSI)
874                                * or is not numerically greater than the character
875                                * we just stored in the table (c).
876                                */
877                               n = *fmt;
878                               if (n == ']'
879                                   || (table->__collate_load_error ? n < c :
880                                         __collate_range_cmp(n, c) < 0
881                                      )
882                                  ) {
883                                         c = '-';
884                                         break;    /* resume the for(;;) */
885                               }
886                               fmt++;
887                               /* fill in the range */
888                               if (table->__collate_load_error) {
889                                         do {
890                                                   tab[++c] = v;
891                                         } while (c < n);
892                               } else {
893                                         for (i = 0; i < 256; i ++)
894                                                   if (__collate_range_cmp(c, i) <= 0 &&
895                                                       __collate_range_cmp(i, n) <= 0
896                                                      )
897                                                             tab[i] = v;
898                               }
899 #if 1     /* XXX another disgusting compatibility hack */
900                               c = n;
901                               /*
902                                * Alas, the V7 Unix scanf also treats formats
903                                * such as [a-c-e] as `the letters a through e'.
904                                * This too is permitted by the standard....
905                                */
906                               goto doswitch;
907 #else
908                               c = *fmt++;
909                               if (c == 0)
910                                         return (fmt - 1);
911                               if (c == ']')
912                                         return (fmt);
913 #endif
914                               break;
915 
916                     case ']':           /* end of scanset */
917                               return (fmt);
918 
919                     default:            /* just another character */
920                               c = n;
921                               break;
922                     }
923           }
924           /* NOTREACHED */
925 }
926 
927 #ifndef NO_FLOATING_POINT
928 static int
parsefloat(FILE * fp,char * buf,char * end,locale_t locale)929 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
930 {
931           char *commit, *p;
932           int infnanpos = 0, decptpos = 0;
933           enum {
934                     S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
935                     S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
936           } state = S_START;
937           unsigned char c;
938           const char *decpt = localeconv_l(locale)->decimal_point;
939           _Bool gotmantdig = 0, ishex = 0;
940 
941           /*
942            * We set commit = p whenever the string we have read so far
943            * constitutes a valid representation of a floating point
944            * number by itself.  At some point, the parse will complete
945            * or fail, and we will ungetc() back to the last commit point.
946            * To ensure that the file offset gets updated properly, it is
947            * always necessary to read at least one character that doesn't
948            * match; thus, we can't short-circuit "infinity" or "nan(...)".
949            */
950           commit = buf - 1;
951           for (p = buf; p < end; ) {
952                     c = *fp->pub._p;
953 reswitch:
954                     switch (state) {
955                     case S_START:
956                               state = S_GOTSIGN;
957                               if (c == '-' || c == '+')
958                                         break;
959                               else
960                                         goto reswitch;
961                     case S_GOTSIGN:
962                               switch (c) {
963                               case '0':
964                                         state = S_MAYBEHEX;
965                                         commit = p;
966                                         break;
967                               case 'I':
968                               case 'i':
969                                         state = S_INF;
970                                         break;
971                               case 'N':
972                               case 'n':
973                                         state = S_NAN;
974                                         break;
975                               default:
976                                         state = S_DIGITS;
977                                         goto reswitch;
978                               }
979                               break;
980                     case S_INF:
981                               if (infnanpos > 6 ||
982                                   (c != "nfinity"[infnanpos] &&
983                                    c != "NFINITY"[infnanpos]))
984                                         goto parsedone;
985                               if (infnanpos == 1 || infnanpos == 6)
986                                         commit = p;         /* inf or infinity */
987                               infnanpos++;
988                               break;
989                     case S_NAN:
990                               switch (infnanpos) {
991                               case 0:
992                                         if (c != 'A' && c != 'a')
993                                                   goto parsedone;
994                                         break;
995                               case 1:
996                                         if (c != 'N' && c != 'n')
997                                                   goto parsedone;
998                                         else
999                                                   commit = p;
1000                                         break;
1001                               case 2:
1002                                         if (c != '(')
1003                                                   goto parsedone;
1004                                         break;
1005                               default:
1006                                         if (c == ')') {
1007                                                   commit = p;
1008                                                   state = S_DONE;
1009                                         } else if (!isalnum(c) && c != '_')
1010                                                   goto parsedone;
1011                                         break;
1012                               }
1013                               infnanpos++;
1014                               break;
1015                     case S_DONE:
1016                               goto parsedone;
1017                     case S_MAYBEHEX:
1018                               state = S_DIGITS;
1019                               if (c == 'X' || c == 'x') {
1020                                         ishex = 1;
1021                                         break;
1022                               } else {  /* we saw a '0', but no 'x' */
1023                                         gotmantdig = 1;
1024                                         goto reswitch;
1025                               }
1026                     case S_DIGITS:
1027                               if ((ishex && isxdigit(c)) || isdigit(c)) {
1028                                         gotmantdig = 1;
1029                                         commit = p;
1030                                         break;
1031                               } else {
1032                                         state = S_DECPT;
1033                                         goto reswitch;
1034                               }
1035                     case S_DECPT:
1036                               if (c == decpt[decptpos]) {
1037                                         if (decpt[++decptpos] == '\0') {
1038                                                   /* We read the complete decpt seq. */
1039                                                   state = S_FRAC;
1040                                                   if (gotmantdig)
1041                                                             commit = p;
1042                                         }
1043                                         break;
1044                               } else if (!decptpos) {
1045                                         /* We didn't read any decpt characters. */
1046                                         state = S_FRAC;
1047                                         goto reswitch;
1048                               } else {
1049                                         /*
1050                                          * We read part of a multibyte decimal point,
1051                                          * but the rest is invalid, so bail.
1052                                          */
1053                                         goto parsedone;
1054                               }
1055                     case S_FRAC:
1056                               if (((c == 'E' || c == 'e') && !ishex) ||
1057                                   ((c == 'P' || c == 'p') && ishex)) {
1058                                         if (!gotmantdig)
1059                                                   goto parsedone;
1060                                         else
1061                                                   state = S_EXP;
1062                               } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1063                                         commit = p;
1064                                         gotmantdig = 1;
1065                               } else
1066                                         goto parsedone;
1067                               break;
1068                     case S_EXP:
1069                               state = S_EXPDIGITS;
1070                               if (c == '-' || c == '+')
1071                                         break;
1072                               else
1073                                         goto reswitch;
1074                     case S_EXPDIGITS:
1075                               if (isdigit(c))
1076                                         commit = p;
1077                               else
1078                                         goto parsedone;
1079                               break;
1080                     default:
1081                               abort();
1082                     }
1083                     *p++ = c;
1084                     if (--fp->pub._r > 0)
1085                               fp->pub._p++;
1086                     else if (__srefill(fp))
1087                               break;    /* EOF */
1088           }
1089 
1090 parsedone:
1091           while (commit < --p)
1092                     __ungetc(*(u_char *)p, fp);
1093           *++commit = '\0';
1094           return (commit - buf);
1095 }
1096 #endif
1097