args.c revision dc6db1a81ab2642927a2c431b7ffab120eeb92ca
1/* args.c - Command line argument parsing.
2 *
3 * Copyright 2006 Rob Landley <rob@landley.net>
4 */
5
6#include "toys.h"
7
8// Design goals:
9//   Don't use getopt() out of libc.
10//   Don't permute original arguments (screwing up ps/top output).
11//   Integrated --long options "(noshort)a(along)b(blong1)(blong2)"
12
13/* This uses a getopt-like option string, but not getopt() itself. We call
14 * it the get_opt string.
15 *
16 * Each option in the get_opt string corresponds to a bit position in the
17 * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
18 * and so on. If the option isn't seen in argv[], its bit remains 0.
19 *
20 * Options which have an argument fill in the corresponding slot in the global
21 * union "this" (see generated/globals.h), which it treats as an array of longs
22 * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
23 *
24 * You don't have to free the option strings, which point into the environment
25 * space. List objects should be freed by main() when command_main() returns.
26 *
27 * Example:
28 *   Calling get_optflags() when toys.which->options="ab:c:d" and
29 *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
30 *
31 *     Changes to struct toys:
32 *       toys.optflags = 5 (I.E. 0101 so -b = 4 | -d = 1)
33 *       toys.optargs[0] = "walrus" (leftover argument)
34 *       toys.optargs[1] = NULL (end of list)
35 *       toys.optc = 1 (there was 1 leftover argument)
36 *
37 *     Changes to union this:
38 *       this[0]=NULL (because -c didn't get an argument this time)
39 *       this[1]="fruit" (argument to -b)
40 */
41
42// Enabling TOYBOX_DEBUG in .config adds syntax checks to option string parsing
43// which aren't needed in the final code (your option string is hardwired and
44// should be correct when you ship), but are useful for development.
45
46// What you can put in a get_opt string:
47//   Any otherwise unused character (all letters, unprefixed numbers) specify
48//   an option that sets a flag. The bit value is the same as the binary digit
49//   if you string the option characters together in order.
50//   So in "abcdefgh" a = 128, h = 1
51//
52//   Suffixes specify that this option takes an argument (stored in GLOBALS):
53//       Note that pointer and long are always the same size, even on 64 bit.
54//     : plus a string argument, keep most recent if more than one
55//     * plus a string argument, appended to a list
56//     # plus a signed long argument
57//       <LOW     - die if less than LOW
58//       >HIGH    - die if greater than HIGH
59//       =DEFAULT - value if not specified
60//     - plus a signed long argument defaulting to negative (say + for positive)
61//     . plus a double precision floating point argument (with CFG_TOYBOX_FLOAT)
62//       Chop this option out with USE_TOYBOX_FLOAT() in option string
63//       Same <LOW>HIGH=DEFAULT as #
64//     @ plus an occurrence counter (which is a long)
65//     (longopt)
66//     | this is required. If more than one marked, only one required.
67//     ; long option's argument is optional (can only be supplied with --opt=)
68//     ^ Stop parsing after encountering this argument
69//    " " (space char) the "plus an argument" must be separate
70//        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
71//
72//   At the beginning of the get_opt string (before any options):
73//     ^ stop at first nonoption argument
74//     <0 die if less than # leftover arguments (default 0)
75//     >9 die if > # leftover arguments (default MAX_INT)
76//     ? Allow unknown arguments (pass them through to command).
77//     & first argument has imaginary dash (ala tar/ps)
78//       If given twice, all arguments have imaginary dash
79//
80//   At the end: [groups] of previously seen options
81//     - Only one in group (switch off)    [-abc] means -ab=-b, -ba=-a, -abc=-c
82//     | Synonyms (switch on all)          [|abc] means -ab=-abc, -c=-abc
83//     ! More than one in group is error   [!abc] means -ab calls error_exit()
84//     + First in group switches rest on   [+abc] means -a=-abc, -b=-b, -c=-c
85//       primarily useful if you can switch things back off again.
86//
87
88// Notes from getopt man page
89//   - and -- cannot be arguments.
90//     -- force end of arguments
91//     - is a synonym for stdin in file arguments
92//   -abcd means -a -b -c -d (but if -b takes an argument, then it's -a -b cd)
93
94// Linked list of all known options (option string parsed into this).
95// Hangs off getoptflagstate, freed at end of option parsing.
96struct opts {
97  struct opts *next;
98  long *arg;         // Pointer into union "this" to store arguments at.
99  int c;             // Argument character to match
100  int flags;         // |=1, ^=2
101  unsigned dex[3];   // which bits to disable/enable/exclude in toys.optflags
102  char type;         // Type of arguments to store union "this"
103  union {
104    long l;
105    FLOAT f;
106  } val[3];          // low, high, default - range of allowed values
107};
108
109// linked list of long options. (Hangs off getoptflagstate, free at end of
110// option parsing, details about flag to set and global slot to fill out
111// stored in related short option struct, but if opt->c = -1 the long option
112// is "bare" (has no corresponding short option).
113struct longopts {
114  struct longopts *next;
115  struct opts *opt;
116  char *str;
117  int len;
118};
119
120// State during argument parsing.
121struct getoptflagstate
122{
123  int argc, minargs, maxargs, nodash;
124  char *arg;
125  struct opts *opts;
126  struct longopts *longopts;
127  int noerror, nodash_now, stopearly;
128  unsigned excludes, requires;
129};
130
131// Use getoptflagstate to parse parse one command line option from argv
132static int gotflag(struct getoptflagstate *gof, struct opts *opt)
133{
134  int type;
135
136  // Did we recognize this option?
137  if (!opt) {
138    if (gof->noerror) return 1;
139    error_exit("Unknown option %s", gof->arg);
140  }
141
142  // Set flags
143  if (toys.optflags & opt->dex[0]) {
144    struct opts *clr;
145    unsigned i = 1;
146
147    for (clr=gof->opts, i=1; clr; clr = clr->next, i<<=1)
148      if (clr->arg && (i & toys.optflags)) *clr->arg = 0;
149    toys.optflags &= ~opt->dex[0];
150  }
151  toys.optflags |= opt->dex[1];
152  gof->excludes |= opt->dex[2];
153  if (opt->flags&2) gof->stopearly=2;
154
155  if (toys.optflags & gof->excludes) {
156    struct opts *bad;
157    unsigned i = 1;
158
159    for (bad=gof->opts, i=1; ;bad = bad->next, i<<=1) {
160      if (opt == bad || !(i & toys.optflags)) continue;
161      if (toys.optflags & bad->dex[2]) break;
162    }
163    error_exit("No '%c' with '%c'", opt->c, bad->c);
164  }
165
166  // Does this option take an argument?
167  if (!gof->arg) {
168    if (opt->flags & 8) return 0;
169    gof->arg = "";
170  } else gof->arg++;
171  type = opt->type;
172
173  if (type == '@') ++*(opt->arg);
174  else if (type) {
175    char *arg = gof->arg;
176
177    // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
178    // to make "tar xCjfv blah1 blah2 thingy" work like
179    // "tar -x -C blah1 -j -f blah2 -v thingy"
180
181    if (gof->nodash_now || (!arg[0] && !(opt->flags & 8)))
182      arg = toys.argv[++gof->argc];
183    if (!arg) {
184      char *s = "Missing argument to ";
185      struct longopts *lo;
186
187      if (opt->c != -1) error_exit("%s-%c", s, opt->c);
188
189      for (lo = gof->longopts; lo->opt != opt; lo = lo->next);
190      error_exit("%s--%.*s", s, lo->len, lo->str);
191    }
192
193    if (type == ':') *(opt->arg) = (long)arg;
194    else if (type == '*') {
195      struct arg_list **list;
196
197      list = (struct arg_list **)opt->arg;
198      while (*list) list=&((*list)->next);
199      *list = xzalloc(sizeof(struct arg_list));
200      (*list)->arg = arg;
201    } else if (type == '#' || type == '-') {
202      long l = atolx(arg);
203      if (type == '-' && !ispunct(*arg)) l*=-1;
204      if (l < opt->val[0].l) error_exit("-%c < %ld", opt->c, opt->val[0].l);
205      if (l > opt->val[1].l) error_exit("-%c > %ld", opt->c, opt->val[1].l);
206
207      *(opt->arg) = l;
208    } else if (CFG_TOYBOX_FLOAT && type == '.') {
209      FLOAT *f = (FLOAT *)(opt->arg);
210
211      *f = strtod(arg, &arg);
212      if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
213        error_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
214      if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
215        error_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
216    }
217
218    if (!gof->nodash_now) gof->arg = "";
219  }
220
221  return 0;
222}
223
224// Parse this command's options string into struct getoptflagstate, which
225// includes a struct opts linked list in reverse order (I.E. right-to-left)
226void parse_optflaglist(struct getoptflagstate *gof)
227{
228  char *options = toys.which->options;
229  long *nextarg = (long *)&this;
230  struct opts *new = 0;
231  int idx;
232
233  // Parse option format string
234  memset(gof, 0, sizeof(struct getoptflagstate));
235  gof->maxargs = INT_MAX;
236  if (!options) return;
237
238  // Parse leading special behavior indicators
239  for (;;) {
240    if (*options == '^') gof->stopearly++;
241    else if (*options == '<') gof->minargs=*(++options)-'0';
242    else if (*options == '>') gof->maxargs=*(++options)-'0';
243    else if (*options == '?') gof->noerror++;
244    else if (*options == '&') gof->nodash++;
245    else break;
246    options++;
247  }
248
249  // Parse option string into a linked list of options with attributes.
250
251  if (!*options) gof->stopearly++;
252  while (*options) {
253    char *temp;
254
255    // Option groups come after all options are defined
256    if (*options == '[') break;
257
258    // Allocate a new list entry when necessary
259    if (!new) {
260      new = xzalloc(sizeof(struct opts));
261      new->next = gof->opts;
262      gof->opts = new;
263      new->val[0].l = LONG_MIN;
264      new->val[1].l = LONG_MAX;
265    }
266    // Each option must start with "(" or an option character.  (Bare
267    // longopts only come at the start of the string.)
268    if (*options == '(' && new->c != -1) {
269      char *end;
270      struct longopts *lo;
271
272      // Find the end of the longopt
273      for (end = ++options; *end && *end != ')'; end++);
274      if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
275
276      // init a new struct longopts
277      lo = xmalloc(sizeof(struct longopts));
278      lo->next = gof->longopts;
279      lo->opt = new;
280      lo->str = options;
281      lo->len = end-options;
282      gof->longopts = lo;
283      options = ++end;
284
285      // Mark this struct opt as used, even when no short opt.
286      if (!new->c) new->c = -1;
287
288      continue;
289
290    // If this is the start of a new option that wasn't a longopt,
291
292    } else if (strchr(":*#@.-", *options)) {
293      if (CFG_TOYBOX_DEBUG && new->type)
294        error_exit("multiple types %c:%c%c", new->c, new->type, *options);
295      new->type = *options;
296    } else if (-1 != (idx = stridx("|^ ;", *options))) new->flags |= 1<<idx;
297    // bounds checking
298    else if (-1 != (idx = stridx("<>=", *options))) {
299      if (new->type == '#') {
300        long l = strtol(++options, &temp, 10);
301        if (temp != options) new->val[idx].l = l;
302      } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
303        FLOAT f = strtod(++options, &temp);
304        if (temp != options) new->val[idx].f = f;
305      } else if (CFG_TOYBOX_DEBUG) error_exit("<>= only after .#");
306      options = --temp;
307
308    // At this point, we've hit the end of the previous option.  The
309    // current character is the start of a new option.  If we've already
310    // assigned an option to this struct, loop to allocate a new one.
311    // (It'll get back here afterwards and fall through to next else.)
312    } else if (new->c) {
313      new = 0;
314      continue;
315
316    // Claim this option, loop to see what's after it.
317    } else new->c = *options;
318
319    options++;
320  }
321
322  // Initialize enable/disable/exclude masks and pointers to store arguments.
323  // (This goes right to left so we need the whole list before we can start.)
324  idx = 0;
325  for (new = gof->opts; new; new = new->next) {
326    unsigned u = 1<<idx++;
327
328    new->dex[1] = u;
329    if (new->flags & 1) gof->requires |= u;
330    if (new->type) {
331      new->arg = (void *)nextarg;
332      *(nextarg++) = new->val[2].l;
333    }
334  }
335
336  // Parse trailing group indicators
337  while (*options) {
338    unsigned bits = 0;
339
340    if (CFG_TOYBOX_DEBUG && *options != '[') error_exit("trailing %s", options);
341
342    idx = stridx("-|!+", *++options);
343    if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
344    if (CFG_TOYBOX_DEBUG && (*options == ']' || !options))
345      error_exit("empty []");
346
347    // Don't advance past ] but do process it once in loop.
348    while (*(options++) != ']') {
349      struct opts *opt, *opt2 = 0;
350      int i;
351
352      if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
353      // Find this option flag (in previously parsed struct opt)
354      for (i=0, opt = gof->opts; ; i++, opt = opt->next) {
355        if (*options == ']') {
356          if (!opt) break;
357          if (idx == 3) {
358            opt2->dex[1] |= bits;
359            break;
360          }
361          if (bits&(1<<i)) opt->dex[idx] |= bits&~(1<<i);
362        } else {
363          if (CFG_TOYBOX_DEBUG && !opt)
364            error_exit("[] unknown target %c", *options);
365          if (opt->c == *options) {
366            bits |= 1<<i;
367            if (!opt2) opt2=opt;
368            break;
369          }
370        }
371      }
372    }
373  }
374}
375
376// Fill out toys.optflags, toys.optargs, and this[] from toys.argv
377
378void get_optflags(void)
379{
380  struct getoptflagstate gof;
381  struct opts *catch;
382  long saveflags;
383  char *letters[]={"s",""};
384
385  // Option parsing is a two stage process: parse the option string into
386  // a struct opts list, then use that list to process argv[];
387
388  toys.exithelp++;
389  // Allocate memory for optargs
390  saveflags = 0;
391  while (toys.argv[saveflags++]);
392  toys.optargs = xzalloc(sizeof(char *)*saveflags);
393
394  parse_optflaglist(&gof);
395
396  // Iterate through command line arguments, skipping argv[0]
397  for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
398    gof.arg = toys.argv[gof.argc];
399    catch = NULL;
400
401    // Parse this argument
402    if (gof.stopearly>1) goto notflag;
403
404    gof.nodash_now = 0;
405
406    // Various things with dashes
407    if (*gof.arg == '-') {
408
409      // Handle -
410      if (!gof.arg[1]) goto notflag;
411      gof.arg++;
412      if (*gof.arg=='-') {
413        struct longopts *lo;
414
415        gof.arg++;
416        // Handle --
417        if (!*gof.arg) {
418          gof.stopearly += 2;
419          continue;
420        }
421
422        // do we match a known --longopt?
423        for (lo = gof.longopts; lo; lo = lo->next) {
424          if (!strncmp(gof.arg, lo->str, lo->len)) {
425            if (!gof.arg[lo->len]) gof.arg = 0;
426            else if (gof.arg[lo->len] == '=' && lo->opt->type)
427              gof.arg += lo->len;
428            else continue;
429            // It's a match.
430            catch = lo->opt;
431            break;
432          }
433        }
434
435        // Should we handle this --longopt as a non-option argument?
436        if (!lo && gof.noerror) {
437          gof.arg -= 2;
438          goto notflag;
439        }
440
441        // Long option parsed, handle option.
442        gotflag(&gof, catch);
443        continue;
444      }
445
446    // Handle things that don't start with a dash.
447    } else {
448      if (gof.nodash && (gof.nodash>1 || gof.argc == 1)) gof.nodash_now = 1;
449      else goto notflag;
450    }
451
452    // At this point, we have the args part of -args.  Loop through
453    // each entry (could be -abc meaning -a -b -c)
454    saveflags = toys.optflags;
455    while (*gof.arg) {
456
457      // Identify next option char.
458      for (catch = gof.opts; catch; catch = catch->next)
459        if (*gof.arg == catch->c)
460          if (!((catch->flags&4) && gof.arg[1])) break;
461
462      // Handle option char (advancing past what was used)
463      if (gotflag(&gof, catch) ) {
464        toys.optflags = saveflags;
465        gof.arg = toys.argv[gof.argc];
466        goto notflag;
467      }
468    }
469    continue;
470
471    // Not a flag, save value in toys.optargs[]
472notflag:
473    if (gof.stopearly) gof.stopearly++;
474    toys.optargs[toys.optc++] = toys.argv[gof.argc];
475  }
476
477  // Sanity check
478  if (toys.optc<gof.minargs)
479    error_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
480      gof.minargs, letters[!(gof.minargs-1)]);
481  if (toys.optc>gof.maxargs)
482    error_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
483  if (gof.requires && !(gof.requires & toys.optflags)) {
484    struct opts *req;
485    char needs[32], *s = needs;
486
487    for (req = gof.opts; req; req = req->next)
488      if (req->flags & 1) *(s++) = req->c;
489    *s = 0;
490
491    error_exit("Needs %s-%s", s[1] ? "one of " : "", needs);
492  }
493  toys.exithelp = 0;
494
495  if (CFG_TOYBOX_FREE) {
496    llist_traverse(gof.opts, free);
497    llist_traverse(gof.longopts, free);
498  }
499}
500