1/* Formatted output to strings.
2   Copyright (C) 1999-2000, 2002-2003, 2006-2012 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License along
15   with this program; if not, see <http://www.gnu.org/licenses/>.  */
16
17/* This file can be parametrized with the following macros:
18     CHAR_T             The element type of the format string.
19     CHAR_T_ONLY_ASCII  Set to 1 to enable verification that all characters
20                        in the format string are ASCII.
21     DIRECTIVE          Structure denoting a format directive.
22                        Depends on CHAR_T.
23     DIRECTIVES         Structure denoting the set of format directives of a
24                        format string.  Depends on CHAR_T.
25     PRINTF_PARSE       Function that parses a format string.
26                        Depends on CHAR_T.
27     STATIC             Set to 'static' to declare the function static.
28     ENABLE_UNISTDIO    Set to 1 to enable the unistdio extensions.  */
29
30#ifndef PRINTF_PARSE
31# include <config.h>
32#endif
33
34/* Specification.  */
35#ifndef PRINTF_PARSE
36# include "printf-parse.h"
37#endif
38
39/* Default parameters.  */
40#ifndef PRINTF_PARSE
41# define PRINTF_PARSE printf_parse
42# define CHAR_T char
43# define DIRECTIVE char_directive
44# define DIRECTIVES char_directives
45#endif
46
47/* Get size_t, NULL.  */
48#include <stddef.h>
49
50/* Get intmax_t.  */
51#if defined IN_LIBINTL || defined IN_LIBASPRINTF
52# if HAVE_STDINT_H_WITH_UINTMAX
53#  include <stdint.h>
54# endif
55# if HAVE_INTTYPES_H_WITH_UINTMAX
56#  include <inttypes.h>
57# endif
58#else
59# include <stdint.h>
60#endif
61
62/* malloc(), realloc(), free().  */
63#include <stdlib.h>
64
65/* memcpy().  */
66#include <string.h>
67
68/* errno.  */
69#include <errno.h>
70
71/* Checked size_t computations.  */
72#include "xsize.h"
73
74#if CHAR_T_ONLY_ASCII
75/* c_isascii().  */
76# include "c-ctype.h"
77#endif
78
79#ifdef STATIC
80STATIC
81#endif
82int
83PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
84{
85  const CHAR_T *cp = format;    /* pointer into format */
86  size_t arg_posn = 0;          /* number of regular arguments consumed */
87  size_t d_allocated;           /* allocated elements of d->dir */
88  size_t a_allocated;           /* allocated elements of a->arg */
89  size_t max_width_length = 0;
90  size_t max_precision_length = 0;
91
92  d->count = 0;
93  d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
94  d->dir = d->direct_alloc_dir;
95
96  a->count = 0;
97  a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
98  a->arg = a->direct_alloc_arg;
99
100#define REGISTER_ARG(_index_,_type_) \
101  {                                                                     \
102    size_t n = (_index_);                                               \
103    if (n >= a_allocated)                                               \
104      {                                                                 \
105        size_t memory_size;                                             \
106        argument *memory;                                               \
107                                                                        \
108        a_allocated = xtimes (a_allocated, 2);                          \
109        if (a_allocated <= n)                                           \
110          a_allocated = xsum (n, 1);                                    \
111        memory_size = xtimes (a_allocated, sizeof (argument));          \
112        if (size_overflow_p (memory_size))                              \
113          /* Overflow, would lead to out of memory.  */                 \
114          goto out_of_memory;                                           \
115        memory = (argument *) (a->arg != a->direct_alloc_arg            \
116                               ? realloc (a->arg, memory_size)          \
117                               : malloc (memory_size));                 \
118        if (memory == NULL)                                             \
119          /* Out of memory.  */                                         \
120          goto out_of_memory;                                           \
121        if (a->arg == a->direct_alloc_arg)                              \
122          memcpy (memory, a->arg, a->count * sizeof (argument));        \
123        a->arg = memory;                                                \
124      }                                                                 \
125    while (a->count <= n)                                               \
126      a->arg[a->count++].type = TYPE_NONE;                              \
127    if (a->arg[n].type == TYPE_NONE)                                    \
128      a->arg[n].type = (_type_);                                        \
129    else if (a->arg[n].type != (_type_))                                \
130      /* Ambiguous type for positional argument.  */                    \
131      goto error;                                                       \
132  }
133
134  while (*cp != '\0')
135    {
136      CHAR_T c = *cp++;
137      if (c == '%')
138        {
139          size_t arg_index = ARG_NONE;
140          DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
141
142          /* Initialize the next directive.  */
143          dp->dir_start = cp - 1;
144          dp->flags = 0;
145          dp->width_start = NULL;
146          dp->width_end = NULL;
147          dp->width_arg_index = ARG_NONE;
148          dp->precision_start = NULL;
149          dp->precision_end = NULL;
150          dp->precision_arg_index = ARG_NONE;
151          dp->arg_index = ARG_NONE;
152
153          /* Test for positional argument.  */
154          if (*cp >= '0' && *cp <= '9')
155            {
156              const CHAR_T *np;
157
158              for (np = cp; *np >= '0' && *np <= '9'; np++)
159                ;
160              if (*np == '$')
161                {
162                  size_t n = 0;
163
164                  for (np = cp; *np >= '0' && *np <= '9'; np++)
165                    n = xsum (xtimes (n, 10), *np - '0');
166                  if (n == 0)
167                    /* Positional argument 0.  */
168                    goto error;
169                  if (size_overflow_p (n))
170                    /* n too large, would lead to out of memory later.  */
171                    goto error;
172                  arg_index = n - 1;
173                  cp = np + 1;
174                }
175            }
176
177          /* Read the flags.  */
178          for (;;)
179            {
180              if (*cp == '\'')
181                {
182                  dp->flags |= FLAG_GROUP;
183                  cp++;
184                }
185              else if (*cp == '-')
186                {
187                  dp->flags |= FLAG_LEFT;
188                  cp++;
189                }
190              else if (*cp == '+')
191                {
192                  dp->flags |= FLAG_SHOWSIGN;
193                  cp++;
194                }
195              else if (*cp == ' ')
196                {
197                  dp->flags |= FLAG_SPACE;
198                  cp++;
199                }
200              else if (*cp == '#')
201                {
202                  dp->flags |= FLAG_ALT;
203                  cp++;
204                }
205              else if (*cp == '0')
206                {
207                  dp->flags |= FLAG_ZERO;
208                  cp++;
209                }
210#if __GLIBC__ >= 2 && !defined __UCLIBC__
211              else if (*cp == 'I')
212                {
213                  dp->flags |= FLAG_LOCALIZED;
214                  cp++;
215                }
216#endif
217              else
218                break;
219            }
220
221          /* Parse the field width.  */
222          if (*cp == '*')
223            {
224              dp->width_start = cp;
225              cp++;
226              dp->width_end = cp;
227              if (max_width_length < 1)
228                max_width_length = 1;
229
230              /* Test for positional argument.  */
231              if (*cp >= '0' && *cp <= '9')
232                {
233                  const CHAR_T *np;
234
235                  for (np = cp; *np >= '0' && *np <= '9'; np++)
236                    ;
237                  if (*np == '$')
238                    {
239                      size_t n = 0;
240
241                      for (np = cp; *np >= '0' && *np <= '9'; np++)
242                        n = xsum (xtimes (n, 10), *np - '0');
243                      if (n == 0)
244                        /* Positional argument 0.  */
245                        goto error;
246                      if (size_overflow_p (n))
247                        /* n too large, would lead to out of memory later.  */
248                        goto error;
249                      dp->width_arg_index = n - 1;
250                      cp = np + 1;
251                    }
252                }
253              if (dp->width_arg_index == ARG_NONE)
254                {
255                  dp->width_arg_index = arg_posn++;
256                  if (dp->width_arg_index == ARG_NONE)
257                    /* arg_posn wrapped around.  */
258                    goto error;
259                }
260              REGISTER_ARG (dp->width_arg_index, TYPE_INT);
261            }
262          else if (*cp >= '0' && *cp <= '9')
263            {
264              size_t width_length;
265
266              dp->width_start = cp;
267              for (; *cp >= '0' && *cp <= '9'; cp++)
268                ;
269              dp->width_end = cp;
270              width_length = dp->width_end - dp->width_start;
271              if (max_width_length < width_length)
272                max_width_length = width_length;
273            }
274
275          /* Parse the precision.  */
276          if (*cp == '.')
277            {
278              cp++;
279              if (*cp == '*')
280                {
281                  dp->precision_start = cp - 1;
282                  cp++;
283                  dp->precision_end = cp;
284                  if (max_precision_length < 2)
285                    max_precision_length = 2;
286
287                  /* Test for positional argument.  */
288                  if (*cp >= '0' && *cp <= '9')
289                    {
290                      const CHAR_T *np;
291
292                      for (np = cp; *np >= '0' && *np <= '9'; np++)
293                        ;
294                      if (*np == '$')
295                        {
296                          size_t n = 0;
297
298                          for (np = cp; *np >= '0' && *np <= '9'; np++)
299                            n = xsum (xtimes (n, 10), *np - '0');
300                          if (n == 0)
301                            /* Positional argument 0.  */
302                            goto error;
303                          if (size_overflow_p (n))
304                            /* n too large, would lead to out of memory
305                               later.  */
306                            goto error;
307                          dp->precision_arg_index = n - 1;
308                          cp = np + 1;
309                        }
310                    }
311                  if (dp->precision_arg_index == ARG_NONE)
312                    {
313                      dp->precision_arg_index = arg_posn++;
314                      if (dp->precision_arg_index == ARG_NONE)
315                        /* arg_posn wrapped around.  */
316                        goto error;
317                    }
318                  REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
319                }
320              else
321                {
322                  size_t precision_length;
323
324                  dp->precision_start = cp - 1;
325                  for (; *cp >= '0' && *cp <= '9'; cp++)
326                    ;
327                  dp->precision_end = cp;
328                  precision_length = dp->precision_end - dp->precision_start;
329                  if (max_precision_length < precision_length)
330                    max_precision_length = precision_length;
331                }
332            }
333
334          {
335            arg_type type;
336
337            /* Parse argument type/size specifiers.  */
338            {
339              int flags = 0;
340
341              for (;;)
342                {
343                  if (*cp == 'h')
344                    {
345                      flags |= (1 << (flags & 1));
346                      cp++;
347                    }
348                  else if (*cp == 'L')
349                    {
350                      flags |= 4;
351                      cp++;
352                    }
353                  else if (*cp == 'l')
354                    {
355                      flags += 8;
356                      cp++;
357                    }
358                  else if (*cp == 'j')
359                    {
360                      if (sizeof (intmax_t) > sizeof (long))
361                        {
362                          /* intmax_t = long long */
363                          flags += 16;
364                        }
365                      else if (sizeof (intmax_t) > sizeof (int))
366                        {
367                          /* intmax_t = long */
368                          flags += 8;
369                        }
370                      cp++;
371                    }
372                  else if (*cp == 'z' || *cp == 'Z')
373                    {
374                      /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
375                         because the warning facility in gcc-2.95.2 understands
376                         only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784).  */
377                      if (sizeof (size_t) > sizeof (long))
378                        {
379                          /* size_t = long long */
380                          flags += 16;
381                        }
382                      else if (sizeof (size_t) > sizeof (int))
383                        {
384                          /* size_t = long */
385                          flags += 8;
386                        }
387                      cp++;
388                    }
389                  else if (*cp == 't')
390                    {
391                      if (sizeof (ptrdiff_t) > sizeof (long))
392                        {
393                          /* ptrdiff_t = long long */
394                          flags += 16;
395                        }
396                      else if (sizeof (ptrdiff_t) > sizeof (int))
397                        {
398                          /* ptrdiff_t = long */
399                          flags += 8;
400                        }
401                      cp++;
402                    }
403#if defined __APPLE__ && defined __MACH__
404                  /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
405                     We cannot change it to "lld" because PRIdMAX must also
406                     be understood by the system's printf routines.  */
407                  else if (*cp == 'q')
408                    {
409                      if (64 / 8 > sizeof (long))
410                        {
411                          /* int64_t = long long */
412                          flags += 16;
413                        }
414                      else
415                        {
416                          /* int64_t = long */
417                          flags += 8;
418                        }
419                      cp++;
420                    }
421#endif
422#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
423                  /* On native Windows, PRIdMAX is defined as "I64d".
424                     We cannot change it to "lld" because PRIdMAX must also
425                     be understood by the system's printf routines.  */
426                  else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
427                    {
428                      if (64 / 8 > sizeof (long))
429                        {
430                          /* __int64 = long long */
431                          flags += 16;
432                        }
433                      else
434                        {
435                          /* __int64 = long */
436                          flags += 8;
437                        }
438                      cp += 3;
439                    }
440#endif
441                  else
442                    break;
443                }
444
445              /* Read the conversion character.  */
446              c = *cp++;
447              switch (c)
448                {
449                case 'd': case 'i':
450#if HAVE_LONG_LONG_INT
451                  /* If 'long long' exists and is larger than 'long':  */
452                  if (flags >= 16 || (flags & 4))
453                    type = TYPE_LONGLONGINT;
454                  else
455#endif
456                  /* If 'long long' exists and is the same as 'long', we parse
457                     "lld" into TYPE_LONGINT.  */
458                  if (flags >= 8)
459                    type = TYPE_LONGINT;
460                  else if (flags & 2)
461                    type = TYPE_SCHAR;
462                  else if (flags & 1)
463                    type = TYPE_SHORT;
464                  else
465                    type = TYPE_INT;
466                  break;
467                case 'o': case 'u': case 'x': case 'X':
468#if HAVE_LONG_LONG_INT
469                  /* If 'long long' exists and is larger than 'long':  */
470                  if (flags >= 16 || (flags & 4))
471                    type = TYPE_ULONGLONGINT;
472                  else
473#endif
474                  /* If 'unsigned long long' exists and is the same as
475                     'unsigned long', we parse "llu" into TYPE_ULONGINT.  */
476                  if (flags >= 8)
477                    type = TYPE_ULONGINT;
478                  else if (flags & 2)
479                    type = TYPE_UCHAR;
480                  else if (flags & 1)
481                    type = TYPE_USHORT;
482                  else
483                    type = TYPE_UINT;
484                  break;
485                case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
486                case 'a': case 'A':
487                  if (flags >= 16 || (flags & 4))
488                    type = TYPE_LONGDOUBLE;
489                  else
490                    type = TYPE_DOUBLE;
491                  break;
492                case 'c':
493                  if (flags >= 8)
494#if HAVE_WINT_T
495                    type = TYPE_WIDE_CHAR;
496#else
497                    goto error;
498#endif
499                  else
500                    type = TYPE_CHAR;
501                  break;
502#if HAVE_WINT_T
503                case 'C':
504                  type = TYPE_WIDE_CHAR;
505                  c = 'c';
506                  break;
507#endif
508                case 's':
509                  if (flags >= 8)
510#if HAVE_WCHAR_T
511                    type = TYPE_WIDE_STRING;
512#else
513                    goto error;
514#endif
515                  else
516                    type = TYPE_STRING;
517                  break;
518#if HAVE_WCHAR_T
519                case 'S':
520                  type = TYPE_WIDE_STRING;
521                  c = 's';
522                  break;
523#endif
524                case 'p':
525                  type = TYPE_POINTER;
526                  break;
527                case 'n':
528#if HAVE_LONG_LONG_INT
529                  /* If 'long long' exists and is larger than 'long':  */
530                  if (flags >= 16 || (flags & 4))
531                    type = TYPE_COUNT_LONGLONGINT_POINTER;
532                  else
533#endif
534                  /* If 'long long' exists and is the same as 'long', we parse
535                     "lln" into TYPE_COUNT_LONGINT_POINTER.  */
536                  if (flags >= 8)
537                    type = TYPE_COUNT_LONGINT_POINTER;
538                  else if (flags & 2)
539                    type = TYPE_COUNT_SCHAR_POINTER;
540                  else if (flags & 1)
541                    type = TYPE_COUNT_SHORT_POINTER;
542                  else
543                    type = TYPE_COUNT_INT_POINTER;
544                  break;
545#if ENABLE_UNISTDIO
546                /* The unistdio extensions.  */
547                case 'U':
548                  if (flags >= 16)
549                    type = TYPE_U32_STRING;
550                  else if (flags >= 8)
551                    type = TYPE_U16_STRING;
552                  else
553                    type = TYPE_U8_STRING;
554                  break;
555#endif
556                case '%':
557                  type = TYPE_NONE;
558                  break;
559                default:
560                  /* Unknown conversion character.  */
561                  goto error;
562                }
563            }
564
565            if (type != TYPE_NONE)
566              {
567                dp->arg_index = arg_index;
568                if (dp->arg_index == ARG_NONE)
569                  {
570                    dp->arg_index = arg_posn++;
571                    if (dp->arg_index == ARG_NONE)
572                      /* arg_posn wrapped around.  */
573                      goto error;
574                  }
575                REGISTER_ARG (dp->arg_index, type);
576              }
577            dp->conversion = c;
578            dp->dir_end = cp;
579          }
580
581          d->count++;
582          if (d->count >= d_allocated)
583            {
584              size_t memory_size;
585              DIRECTIVE *memory;
586
587              d_allocated = xtimes (d_allocated, 2);
588              memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
589              if (size_overflow_p (memory_size))
590                /* Overflow, would lead to out of memory.  */
591                goto out_of_memory;
592              memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
593                                      ? realloc (d->dir, memory_size)
594                                      : malloc (memory_size));
595              if (memory == NULL)
596                /* Out of memory.  */
597                goto out_of_memory;
598              if (d->dir == d->direct_alloc_dir)
599                memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
600              d->dir = memory;
601            }
602        }
603#if CHAR_T_ONLY_ASCII
604      else if (!c_isascii (c))
605        {
606          /* Non-ASCII character.  Not supported.  */
607          goto error;
608        }
609#endif
610    }
611  d->dir[d->count].dir_start = cp;
612
613  d->max_width_length = max_width_length;
614  d->max_precision_length = max_precision_length;
615  return 0;
616
617error:
618  if (a->arg != a->direct_alloc_arg)
619    free (a->arg);
620  if (d->dir != d->direct_alloc_dir)
621    free (d->dir);
622  errno = EINVAL;
623  return -1;
624
625out_of_memory:
626  if (a->arg != a->direct_alloc_arg)
627    free (a->arg);
628  if (d->dir != d->direct_alloc_dir)
629    free (d->dir);
630  errno = ENOMEM;
631  return -1;
632}
633
634#undef PRINTF_PARSE
635#undef DIRECTIVES
636#undef DIRECTIVE
637#undef CHAR_T_ONLY_ASCII
638#undef CHAR_T
639