args.c revision 7aa651a6a4496d848f86de9b1e6b3a003256a01f
1/* args.c - Command line argument parsing.
2 *
3 * Copyright 2006 Rob Landley <rob@landley.net>
4 */
5
6#include "toys.h"
7
8// Design goals:
9//   Don't use getopt()
10//   Don't permute original arguments.
11//   handle --long gracefully "(noshort)a(along)b(blong1)(blong2)"
12//   After each argument:
13//       Note that pointer and long are always the same size, even on 64 bit.
14//     : plus a string argument, keep most recent if more than one
15//     * plus a string argument, appended to a list
16//     # plus a signed long argument
17//       <LOW     - die if less than LOW
18//       >HIGH    - die if greater than HIGH
19//       =DEFAULT - value if not specified
20//     - plus a signed long argument defaulting to negative
21//     . plus a double precision floating point argument (with CFG_TOYBOX_FLOAT)
22//       Chop this out with USE_TOYBOX_FLOAT() around option string
23//       Same <LOW>HIGH=DEFAULT as #
24//     @ plus an occurrence counter (which is a long)
25//     (longopt)
26//     | this is required.  If more than one marked, only one required. TODO
27//     ^ Stop parsing after encountering this argument
28//    " " (space char) the "plus an  argument" must be separate
29//        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
30//
31//     These modify other option letters (previously seen in string):
32//       +X enabling this enables X (switch on)
33//       ~X enabling this disables X (switch off)
34//       !X die with error if X already set (x!x die if x supplied twice)
35//       [yz] needs at least one of y or z. TODO
36//
37//   at the beginning:
38//     ^ stop at first nonoption argument
39//     <0 die if less than # leftover arguments (default 0)
40//     >9 die if > # leftover arguments (default MAX_INT)
41//     ? Allow unknown arguments (pass them through to command).
42//     & first argument has imaginary dash (ala tar/ps)
43//       If given twice, all arguments have imaginary dash
44
45// Notes from getopt man page
46//   - and -- cannot be arguments.
47//     -- force end of arguments
48//     - is a synonym for stdin in file arguments
49//   -abc means -a -b -c
50
51/* This uses a getopt-like option string, but not getopt() itself. We call
52 * it the get_opt string.
53 *
54 * Each option in the get_opt string corresponds to a bit position in the
55 * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
56 * and so on. If the option isn't seen in argv[], its bit remains 0.
57 *
58 * Options which have an argument fill in the corresponding slot in the global
59 * union "this" (see generated/globals.h), which it treats as an array of longs
60 * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
61 *
62 * You don't have to free the option strings, which point into the environment
63 * space. List objects should be freed by main() when command_main() returns.
64 *
65 * Example:
66 *   Calling get_optflags() when toys.which->options="ab:c:d" and
67 *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
68 *
69 *     Changes to struct toys:
70 *       toys.optflags = 5  (-b=4 | -d=1)
71 *       toys.optargs[0]="walrus" (leftover argument)
72 *       toys.optargs[1]=NULL (end of list)
73 *       toys.optc=1 (there was 1 leftover argument)
74 *
75 *     Changes to union this:
76 *       this[0]=NULL (because -c didn't get an argument this time)
77 *       this[1]="fruit" (argument to -b)
78 */
79
80// Linked list of all known options (get_opt string is parsed into this).
81struct opts {
82  struct opts *next;
83  long *arg;         // Pointer into union "this" to store arguments at.
84  uint32_t edx[3];   // Flag mask to enable/disable/exclude.
85  int c;             // Short argument character
86  int flags;         // |=1, ^=2
87  char type;         // Type of arguments to store
88  union {
89    long l;
90    FLOAT f;
91  } val[3];          // low, high, default - range of allowed values
92};
93
94struct longopts {
95  struct longopts *next;
96  struct opts *opt;
97  char *str;
98  int len;
99};
100
101// State during argument parsing.
102struct getoptflagstate
103{
104  int argc, minargs, maxargs, nodash;
105  char *arg;
106  struct opts *opts, *this;
107  struct longopts *longopts;
108  int noerror, nodash_now, stopearly;
109  uint32_t excludes;
110};
111
112// Parse one command line option.
113static int gotflag(struct getoptflagstate *gof)
114{
115  int type;
116  struct opts *opt = gof->this;
117
118  // Did we recognize this option?
119  if (!opt) {
120    if (gof->noerror) return 1;
121    error_exit("Unknown option %s", gof->arg);
122  }
123
124  // Set flags
125  toys.optflags |= opt->edx[0];
126  toys.optflags &= ~opt->edx[1];
127  gof->excludes = opt->edx[2];
128  if (opt->flags&2) gof->stopearly=2;
129
130  // Does this option take an argument?
131  gof->arg++;
132  type = opt->type;
133  if (type) {
134    char *arg = gof->arg;
135
136    // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
137    // to make "tar xCjfv blah1 blah2 thingy" work like
138    // "tar -x -C blah1 -j -f blah2 -v thingy"
139
140    if (gof->nodash_now || !arg[0]) arg = toys.argv[++gof->argc];
141    // TODO: The following line doesn't display --longopt correctly
142    if (!arg) error_exit("Missing argument to -%c", opt->c);
143
144    if (type == ':') *(opt->arg) = (long)arg;
145    else if (type == '*') {
146      struct arg_list **list;
147
148      list = (struct arg_list **)opt->arg;
149      while (*list) list=&((*list)->next);
150      *list = xzalloc(sizeof(struct arg_list));
151      (*list)->arg = arg;
152    } else if (type == '#' || type == '-') {
153      long l = atolx(arg);
154      if (type == '-' && !ispunct(*arg)) l*=-1;
155      if (l < opt->val[0].l) error_exit("-%c < %ld", opt->c, opt->val[0].l);
156      if (l > opt->val[1].l) error_exit("-%c > %ld", opt->c, opt->val[1].l);
157
158      *(opt->arg) = l;
159    } else if (CFG_TOYBOX_FLOAT && type == '.') {
160      FLOAT *f = (FLOAT *)(opt->arg);
161
162      *f = strtod(arg, &arg);
163      if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
164        error_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
165      if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
166        error_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
167    } else if (type == '@') ++*(opt->arg);
168
169    if (!gof->nodash_now) gof->arg = "";
170  }
171
172  gof->this = NULL;
173  return 0;
174}
175
176// Fill out toys.optflags and toys.optargs.
177
178void parse_optflaglist(struct getoptflagstate *gof)
179{
180  char *options = toys.which->options;
181  long *nextarg = (long *)&this;
182  struct opts *new = 0;
183
184  // Parse option format string
185  memset(gof, 0, sizeof(struct getoptflagstate));
186  gof->maxargs = INT_MAX;
187  if (!options) return;
188
189  // Parse leading special behavior indicators
190  for (;;) {
191    if (*options == '^') gof->stopearly++;
192    else if (*options == '<') gof->minargs=*(++options)-'0';
193    else if (*options == '>') gof->maxargs=*(++options)-'0';
194    else if (*options == '?') gof->noerror++;
195    else if (*options == '&') gof->nodash++;
196    else break;
197    options++;
198  }
199
200  // Parse the rest of the option string into a linked list
201  // of options with attributes.
202
203  if (!*options) gof->stopearly++;
204  while (*options) {
205    char *temp;
206    int idx;
207
208    // Allocate a new list entry when necessary
209    if (!new) {
210      new = xzalloc(sizeof(struct opts));
211      new->next = gof->opts;
212      gof->opts = new;
213      new->val[0].l = LONG_MIN;
214      new->val[1].l = LONG_MAX;
215      ++*(new->edx);
216    }
217    // Each option must start with "(" or an option character.  (Bare
218    // longopts only come at the start of the string.)
219    if (*options == '(') {
220      char *end;
221      struct longopts *lo = xmalloc(sizeof(struct longopts));
222
223      // Find the end of the longopt
224      for (end = ++options; *end && *end != ')'; end++);
225      if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
226
227      // init a new struct longopts
228      lo->next = gof->longopts;
229      lo->opt = new;
230      lo->str = options;
231      lo->len = end-options;
232      gof->longopts = lo;
233      options = end;
234
235      // Mark this struct opt as used, even when no short opt.
236      if (!new->c) new->c = -1;
237
238    // If this is the start of a new option that wasn't a longopt,
239
240    } else if (strchr(":*#@.-", *options)) {
241      if (CFG_TOYBOX_DEBUG && new->type)
242        error_exit("multiple types %c:%c%c", new->c, new->type, *options);
243      new->type = *options;
244    } else if (-1 != (idx = stridx("+~!", *options))) {
245      struct opts *opt;
246      int i;
247
248      if (!*++options && CFG_TOYBOX_DEBUG) error_exit("+~! no target");
249      // Find this option flag (in previously parsed struct opt)
250      for (i=0, opt = new; ; opt = opt->next) {
251        if (CFG_TOYBOX_DEBUG && !opt) error_exit("+~! unknown target");
252        if (opt->c == *options) break;
253        i++;
254      }
255      new->edx[idx] |= 1<<i;
256    } else if (*options == '[') { // TODO
257    } else if (-1 != (idx = stridx("|^ ", *options))) new->flags |= 1<<idx;
258    // bounds checking
259    else if (-1 != (idx = stridx("<>=", *options))) {
260      if (new->type == '#') {
261        long l = strtol(++options, &temp, 10);
262        if (temp != options) new->val[idx].l = l;
263      } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
264        FLOAT f = strtod(++options, &temp);
265        if (temp != options) new->val[idx].f = f;
266      } else if (CFG_TOYBOX_DEBUG) error_exit("<>= only after .#");
267      options = --temp;
268    }
269
270    // At this point, we've hit the end of the previous option.  The
271    // current character is the start of a new option.  If we've already
272    // assigned an option to this struct, loop to allocate a new one.
273    // (It'll get back here afterwards and fall through to next else.)
274    else if (new->c) {
275      new = NULL;
276      continue;
277
278    // Claim this option, loop to see what's after it.
279    } else new->c = *options;
280
281    options++;
282  }
283
284  // Initialize enable/disable/exclude masks and pointers to store arguments.
285  // (We have to calculate all this ahead of time because longopts jump into
286  // the middle of the list.  We have to do this after creating the list
287  // because we reverse direction: last entry created gets first global slot.)
288  int pos = 0;
289  for (new = gof->opts; new; new = new->next) {
290    int i;
291
292    for (i=0;i<3;i++) new->edx[i] <<= pos;
293    pos++;
294    if (new->type) {
295      new->arg = (void *)nextarg;
296      *(nextarg++) = new->val[2].l;
297    }
298  }
299}
300
301void get_optflags(void)
302{
303  struct getoptflagstate gof;
304  long saveflags;
305  char *letters[]={"s",""};
306
307  // Option parsing is a two stage process: parse the option string into
308  // a struct opts list, then use that list to process argv[];
309
310  if (CFG_HELP) toys.exithelp++;
311  // Allocate memory for optargs
312  saveflags = 0;
313  while (toys.argv[saveflags++]);
314  toys.optargs = xzalloc(sizeof(char *)*saveflags);
315
316  parse_optflaglist(&gof);
317
318  // Iterate through command line arguments, skipping argv[0]
319  for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
320    gof.arg = toys.argv[gof.argc];
321    gof.this = NULL;
322
323    // Parse this argument
324    if (gof.stopearly>1) goto notflag;
325
326    gof.nodash_now = 0;
327
328    // Various things with dashes
329    if (*gof.arg == '-') {
330
331      // Handle -
332      if (!gof.arg[1]) goto notflag;
333      gof.arg++;
334      if (*gof.arg=='-') {
335        struct longopts *lo;
336
337        gof.arg++;
338        // Handle --
339        if (!*gof.arg) {
340          gof.stopearly += 2;
341          goto notflag;
342        }
343        // Handle --longopt
344
345        for (lo = gof.longopts; lo; lo = lo->next) {
346          if (!strncmp(gof.arg, lo->str, lo->len)) {
347            if (gof.arg[lo->len]) {
348              if (gof.arg[lo->len]=='=' && lo->opt->type) gof.arg += lo->len;
349              else continue;
350            }
351            // It's a match.
352            gof.arg = "";
353            gof.this = lo->opt;
354            break;
355          }
356        }
357
358        // Should we handle this --longopt as a non-option argument?
359        if (!lo && gof.noerror) {
360          gof.arg-=2;
361          goto notflag;
362        }
363
364        // Long option parsed, handle option.
365        gotflag(&gof);
366        continue;
367      }
368
369    // Handle things that don't start with a dash.
370    } else {
371      if (gof.nodash && (gof.nodash>1 || gof.argc == 1)) gof.nodash_now = 1;
372      else goto notflag;
373    }
374
375    // At this point, we have the args part of -args.  Loop through
376    // each entry (could be -abc meaning -a -b -c)
377    saveflags = toys.optflags;
378    while (*gof.arg) {
379
380      // Identify next option char.
381      for (gof.this = gof.opts; gof.this; gof.this = gof.this->next)
382        if (*gof.arg == gof.this->c)
383          if (!((gof.this->flags&4) && gof.arg[1])) break;
384
385      // Handle option char (advancing past what was used)
386      if (gotflag(&gof) ) {
387        toys.optflags = saveflags;
388        gof.arg = toys.argv[gof.argc];
389        goto notflag;
390      }
391    }
392    continue;
393
394    // Not a flag, save value in toys.optargs[]
395notflag:
396    if (gof.stopearly) gof.stopearly++;
397    toys.optargs[toys.optc++] = toys.argv[gof.argc];
398  }
399
400  // Sanity check
401  if (toys.optc<gof.minargs)
402    error_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
403      gof.minargs, letters[!(gof.minargs-1)]);
404  if (toys.optc>gof.maxargs)
405    error_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
406  if (CFG_HELP) toys.exithelp = 0;
407
408  if (CFG_TOYBOX_FREE) {
409    llist_traverse(gof.opts, free);
410    llist_traverse(gof.longopts, free);
411  }
412}
413