args.c revision de51192c854d20b19d25daedca9e7ecdc72e13f1
1/* args.c - Command line argument parsing.
2 *
3 * Copyright 2006 Rob Landley <rob@landley.net>
4 */
5
6#include "toys.h"
7
8// Design goals:
9//   Don't use getopt()
10//   Don't permute original arguments.
11//   handle --long gracefully "(noshort)a(along)b(blong1)(blong2)"
12//   After each argument:
13//       Note that pointer and long are always the same size, even on 64 bit.
14//     : plus a string argument, keep most recent if more than one
15//     * plus a string argument, appended to a list
16//     # plus a signed long argument
17//       <LOW     - die if less than LOW
18//       >HIGH    - die if greater than HIGH
19//       =DEFAULT - value if not specified
20//     - plus a signed long argument defaulting to negative
21//     . plus a double precision floating point argument (with CFG_TOYBOX_FLOAT)
22//       Chop this out with USE_TOYBOX_FLOAT() around option string
23//       Same <LOW>HIGH=DEFAULT as #
24//     @ plus an occurrence counter (which is a long)
25//     (longopt)
26//     | this is required.  If more than one marked, only one required. TODO
27//     ^ Stop parsing after encountering this argument
28//    " " (space char) the "plus an  argument" must be separate
29//        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
30//
31//   at the beginning:
32//     ^ stop at first nonoption argument
33//     <0 die if less than # leftover arguments (default 0)
34//     >9 die if > # leftover arguments (default MAX_INT)
35//     ? Allow unknown arguments (pass them through to command).
36//     & first argument has imaginary dash (ala tar/ps)
37//       If given twice, all arguments have imaginary dash
38//
39//   At the end: [groups] of previously seen options
40//     - Only one in group (switch off)    [-abc] means -ab=-b, -ba=-a, -abc=-c
41//     | Synonyms (switch on all)          [|abc] means -ab=-abc, -c=-abc
42//     ! More than one in group is error   [!abc] means -ab calls error_exit()
43//     + First in group switches rest on   [+abc] means -a=-abc, -b=-b, -c=-c
44//       primarily useful if you can switch things back off again.
45//
46
47// Notes from getopt man page
48//   - and -- cannot be arguments.
49//     -- force end of arguments
50//     - is a synonym for stdin in file arguments
51//   -abc means -a -b -c
52
53/* This uses a getopt-like option string, but not getopt() itself. We call
54 * it the get_opt string.
55 *
56 * Each option in the get_opt string corresponds to a bit position in the
57 * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
58 * and so on. If the option isn't seen in argv[], its bit remains 0.
59 *
60 * Options which have an argument fill in the corresponding slot in the global
61 * union "this" (see generated/globals.h), which it treats as an array of longs
62 * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
63 *
64 * You don't have to free the option strings, which point into the environment
65 * space. List objects should be freed by main() when command_main() returns.
66 *
67 * Example:
68 *   Calling get_optflags() when toys.which->options="ab:c:d" and
69 *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
70 *
71 *     Changes to struct toys:
72 *       toys.optflags = 5  (-b=4 | -d=1)
73 *       toys.optargs[0]="walrus" (leftover argument)
74 *       toys.optargs[1]=NULL (end of list)
75 *       toys.optc=1 (there was 1 leftover argument)
76 *
77 *     Changes to union this:
78 *       this[0]=NULL (because -c didn't get an argument this time)
79 *       this[1]="fruit" (argument to -b)
80 */
81
82// Linked list of all known options (option string parsed into this).
83struct opts {
84  struct opts *next;
85  long *arg;         // Pointer into union "this" to store arguments at.
86  int c;             // Argument character to match
87  int flags;         // |=1, ^=2
88  unsigned dex[3];   // which bits to disable/enable/exclude in toys.optflags
89  char type;         // Type of arguments to store union "this"
90  union {
91    long l;
92    FLOAT f;
93  } val[3];          // low, high, default - range of allowed values
94};
95
96struct longopts {
97  struct longopts *next;
98  struct opts *opt;
99  char *str;
100  int len;
101};
102
103// State during argument parsing.
104struct getoptflagstate
105{
106  int argc, minargs, maxargs, nodash;
107  char *arg;
108  struct opts *opts;
109  struct longopts *longopts;
110  int noerror, nodash_now, stopearly;
111  unsigned excludes;
112};
113
114// Use getoptflagstate to parse parse one command line option from argv
115static int gotflag(struct getoptflagstate *gof, struct opts *opt)
116{
117  int type;
118
119  // Did we recognize this option?
120  if (!opt) {
121    if (gof->noerror) return 1;
122    error_exit("Unknown option %s", gof->arg);
123  }
124
125  // Set flags
126  toys.optflags &= ~opt->dex[0];
127  toys.optflags |= opt->dex[1];
128  gof->excludes |= opt->dex[2];
129  if (opt->flags&2) gof->stopearly=2;
130
131  if (toys.optflags & gof->excludes) {
132    struct opts *bad;
133    unsigned i = 1;
134
135    for (bad=gof->opts; gof->excludes && i; bad = bad->next) i<<=1;
136    error_exit("No '%c' with '%c'", opt->c, bad->c);
137  }
138
139  // Does this option take an argument?
140  gof->arg++;
141  type = opt->type;
142  if (type) {
143    char *arg = gof->arg;
144
145    // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
146    // to make "tar xCjfv blah1 blah2 thingy" work like
147    // "tar -x -C blah1 -j -f blah2 -v thingy"
148
149    if (gof->nodash_now || !arg[0]) arg = toys.argv[++gof->argc];
150    // TODO: The following line doesn't display --longopt correctly
151    if (!arg) error_exit("Missing argument to -%c", opt->c);
152
153    if (type == ':') *(opt->arg) = (long)arg;
154    else if (type == '*') {
155      struct arg_list **list;
156
157      list = (struct arg_list **)opt->arg;
158      while (*list) list=&((*list)->next);
159      *list = xzalloc(sizeof(struct arg_list));
160      (*list)->arg = arg;
161    } else if (type == '#' || type == '-') {
162      long l = atolx(arg);
163      if (type == '-' && !ispunct(*arg)) l*=-1;
164      if (l < opt->val[0].l) error_exit("-%c < %ld", opt->c, opt->val[0].l);
165      if (l > opt->val[1].l) error_exit("-%c > %ld", opt->c, opt->val[1].l);
166
167      *(opt->arg) = l;
168    } else if (CFG_TOYBOX_FLOAT && type == '.') {
169      FLOAT *f = (FLOAT *)(opt->arg);
170
171      *f = strtod(arg, &arg);
172      if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
173        error_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
174      if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
175        error_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
176    } else if (type == '@') ++*(opt->arg);
177
178    if (!gof->nodash_now) gof->arg = "";
179  }
180
181  return 0;
182}
183
184// Parse this command's options string into struct getoptflagstate, which
185// includes a struct opts linked list in reverse order (I.E. right-to-left)
186void parse_optflaglist(struct getoptflagstate *gof)
187{
188  char *options = toys.which->options;
189  long *nextarg = (long *)&this;
190  struct opts *new = 0;
191  int idx;
192
193  // Parse option format string
194  memset(gof, 0, sizeof(struct getoptflagstate));
195  gof->maxargs = INT_MAX;
196  if (!options) return;
197
198  // Parse leading special behavior indicators
199  for (;;) {
200    if (*options == '^') gof->stopearly++;
201    else if (*options == '<') gof->minargs=*(++options)-'0';
202    else if (*options == '>') gof->maxargs=*(++options)-'0';
203    else if (*options == '?') gof->noerror++;
204    else if (*options == '&') gof->nodash++;
205    else break;
206    options++;
207  }
208
209  // Parse option string into a linked list of options with attributes.
210
211  if (!*options) gof->stopearly++;
212  while (*options) {
213    char *temp;
214
215    // Option groups come after all options are defined
216    if (*options == '[') break;
217
218    // Allocate a new list entry when necessary
219    if (!new) {
220      new = xzalloc(sizeof(struct opts));
221      new->next = gof->opts;
222      gof->opts = new;
223      new->val[0].l = LONG_MIN;
224      new->val[1].l = LONG_MAX;
225    }
226    // Each option must start with "(" or an option character.  (Bare
227    // longopts only come at the start of the string.)
228    if (*options == '(') {
229      char *end;
230      struct longopts *lo = xmalloc(sizeof(struct longopts));
231
232      // Find the end of the longopt
233      for (end = ++options; *end && *end != ')'; end++);
234      if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
235
236      // init a new struct longopts
237      lo->next = gof->longopts;
238      lo->opt = new;
239      lo->str = options;
240      lo->len = end-options;
241      gof->longopts = lo;
242      options = end;
243
244      // Mark this struct opt as used, even when no short opt.
245      if (!new->c) new->c = -1;
246
247    // If this is the start of a new option that wasn't a longopt,
248
249    } else if (strchr(":*#@.-", *options)) {
250      if (CFG_TOYBOX_DEBUG && new->type)
251        error_exit("multiple types %c:%c%c", new->c, new->type, *options);
252      new->type = *options;
253    } else if (-1 != (idx = stridx("|^ ", *options))) new->flags |= 1<<idx;
254    // bounds checking
255    else if (-1 != (idx = stridx("<>=", *options))) {
256      if (new->type == '#') {
257        long l = strtol(++options, &temp, 10);
258        if (temp != options) new->val[idx].l = l;
259      } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
260        FLOAT f = strtod(++options, &temp);
261        if (temp != options) new->val[idx].f = f;
262      } else if (CFG_TOYBOX_DEBUG) error_exit("<>= only after .#");
263      options = --temp;
264    }
265
266    // At this point, we've hit the end of the previous option.  The
267    // current character is the start of a new option.  If we've already
268    // assigned an option to this struct, loop to allocate a new one.
269    // (It'll get back here afterwards and fall through to next else.)
270    else if (new->c) {
271      new = NULL;
272      continue;
273
274    // Claim this option, loop to see what's after it.
275    } else new->c = *options;
276
277    options++;
278  }
279
280  // Initialize enable/disable/exclude masks and pointers to store arguments.
281  // (This goes right to left so we need the whole list before we can start.)
282  idx = 0;
283  for (new = gof->opts; new; new = new->next) {
284    new->dex[1] = 1<<idx++;
285    if (new->type) {
286      new->arg = (void *)nextarg;
287      *(nextarg++) = new->val[2].l;
288    }
289  }
290
291  // Parse trailing group indicators
292  while (*options) {
293    unsigned bits = 0;
294
295    if (CFG_TOYBOX_DEBUG && *options) error_exit("trailing %s", options);
296
297    idx = stridx("-|!+", *++options);
298    if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
299
300    // Don't advance past ] but do process it once in loop.
301    while (*(options++) != ']') {
302      struct opts *opt, *opt2 = 0;
303      int i;
304
305      if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
306      // Find this option flag (in previously parsed struct opt)
307      for (i=0, opt = gof->opts; ; i++, opt = opt->next) {
308        if (*options == ']') {
309          if (!opt) break;
310          if (idx == 3) {
311            opt2->dex[1] |= bits;
312            break;
313          }
314          if (bits&(1<<i)) opt->dex[idx] |= bits&~(1<<i);
315        } else {
316          if (CFG_TOYBOX_DEBUG && !opt)
317            error_exit("[] unknown target %c", *options);
318          if (opt->c == *options) {
319            bits |= 1<<i;
320            if (!opt2) opt2=opt;
321            break;
322          }
323        }
324      }
325    }
326  }
327}
328
329// Fill out toys.optflags, toys.optargs, and this[] from toys.argv
330
331void get_optflags(void)
332{
333  struct getoptflagstate gof;
334  struct opts *catch;
335  long saveflags;
336  char *letters[]={"s",""};
337
338  // Option parsing is a two stage process: parse the option string into
339  // a struct opts list, then use that list to process argv[];
340
341  if (CFG_HELP) toys.exithelp++;
342  // Allocate memory for optargs
343  saveflags = 0;
344  while (toys.argv[saveflags++]);
345  toys.optargs = xzalloc(sizeof(char *)*saveflags);
346
347  parse_optflaglist(&gof);
348
349  // Iterate through command line arguments, skipping argv[0]
350  for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
351    gof.arg = toys.argv[gof.argc];
352    catch = NULL;
353
354    // Parse this argument
355    if (gof.stopearly>1) goto notflag;
356
357    gof.nodash_now = 0;
358
359    // Various things with dashes
360    if (*gof.arg == '-') {
361
362      // Handle -
363      if (!gof.arg[1]) goto notflag;
364      gof.arg++;
365      if (*gof.arg=='-') {
366        struct longopts *lo;
367
368        gof.arg++;
369        // Handle --
370        if (!*gof.arg) {
371          gof.stopearly += 2;
372          goto notflag;
373        }
374        // Handle --longopt
375
376        for (lo = gof.longopts; lo; lo = lo->next) {
377          if (!strncmp(gof.arg, lo->str, lo->len)) {
378            if (gof.arg[lo->len]) {
379              if (gof.arg[lo->len]=='=' && lo->opt->type) gof.arg += lo->len;
380              else continue;
381            }
382            // It's a match.
383            gof.arg = "";
384            catch = lo->opt;
385            break;
386          }
387        }
388
389        // Should we handle this --longopt as a non-option argument?
390        if (!lo && gof.noerror) {
391          gof.arg-=2;
392          goto notflag;
393        }
394
395        // Long option parsed, handle option.
396        gotflag(&gof, catch);
397        continue;
398      }
399
400    // Handle things that don't start with a dash.
401    } else {
402      if (gof.nodash && (gof.nodash>1 || gof.argc == 1)) gof.nodash_now = 1;
403      else goto notflag;
404    }
405
406    // At this point, we have the args part of -args.  Loop through
407    // each entry (could be -abc meaning -a -b -c)
408    saveflags = toys.optflags;
409    while (*gof.arg) {
410
411      // Identify next option char.
412      for (catch = gof.opts; catch; catch = catch->next)
413        if (*gof.arg == catch->c)
414          if (!((catch->flags&4) && gof.arg[1])) break;
415
416      // Handle option char (advancing past what was used)
417      if (gotflag(&gof, catch) ) {
418        toys.optflags = saveflags;
419        gof.arg = toys.argv[gof.argc];
420        goto notflag;
421      }
422    }
423    continue;
424
425    // Not a flag, save value in toys.optargs[]
426notflag:
427    if (gof.stopearly) gof.stopearly++;
428    toys.optargs[toys.optc++] = toys.argv[gof.argc];
429  }
430
431  // Sanity check
432  if (toys.optc<gof.minargs)
433    error_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
434      gof.minargs, letters[!(gof.minargs-1)]);
435  if (toys.optc>gof.maxargs)
436    error_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
437  if (CFG_HELP) toys.exithelp = 0;
438
439  if (CFG_TOYBOX_FREE) {
440    llist_traverse(gof.opts, free);
441    llist_traverse(gof.longopts, free);
442  }
443}
444