find.c revision fd14a61e941828c580bd476bb51f371f3d1ddf09
1/* find.c - Search directories for matching files.
2 *
3 * Copyright 2014 Rob Landley <rob@landley.net>
4 *
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/find.c
6 *
7 * Our "unspecified" behavior for no paths is to use "."
8 * Parentheses can only stack 4096 deep
9 * Not treating two {} as an error, but only using last
10
11USE_FIND(NEWTOY(find, "?^HL", TOYFLAG_USR|TOYFLAG_BIN))
12
13config FIND
14  bool "find"
15  default n
16  help
17    usage: find [-HL] [DIR...] [<options>]
18
19    Search directories for matching files.
20    Default: search "." match all -print all matches.
21
22    -H  Follow command line symlinks         -L  Follow all symlinks
23
24    Match filters:
25    -name  PATTERN filename with wildcards   -iname   case insensitive -name
26    -path  PATTERN path name with wildcards  -ipath   case insensitive -path
27    -user  UNAME   belongs to user           -nouser  belongs to unknown user
28    -group GROUP   belongs to group          -nogroup belongs to unknown group
29    -perm  [-]MODE permissons (-=at least)   -prune   ignore contents of dir
30    -size  N[c]    512 byte blocks (c=bytes) -xdev    stay in this filesystem
31    -links N       hardlink count            -atime N accessed N days ago
32    -ctime N       created N days ago        -mtime N modified N days ago
33    -type [bcdflps] (block, char, dir, file, symlink, pipe, socket)
34
35    Numbers N may be prefixed by a - (less than) or + (greater than):
36
37    Combine matches with:
38    !, -a, -o, ( )    not, and, or, group expressions
39
40    Actions:
41    -print   Print match with newline  -print0    Print match with null
42    -exec    Run command with path     -execdir   Run command in file's dir
43    -ok      Ask before exec           -okdir     Ask before execdir
44
45    Commands substitute "{}" with matched file. End with ";" to run each file,
46    or "+" (next argument after "{}") to collect and run with multiple files.
47*/
48
49// find . ! \( -name blah -print \)
50// find . -o
51// find -type f
52
53// pending issues:
54// old false -a ! new false does not yield true.
55//
56// -user -group -newer evaluate once and save result (where?)
57// add -print if no action (-exec, -ok, -print)
58// find . -print -xdev (should xdev before print)
59// -exec {} + accepts any + after {}, not just immediately after. ";" optional
60
61#define FOR_find
62#include "toys.h"
63
64GLOBALS(
65  char **filter;
66  struct double_list *argdata;
67  int topdir, xdev, depth, envsize;
68  time_t now;
69)
70
71// None of this can go in TT because you can have more than one -exec
72struct exec_range {
73  char *next, *prev;
74
75  int dir, plus, arglen, argsize, curly, namecount, namesize;
76  char **argstart;
77  struct double_list *names;
78};
79
80// Perform pending -exec (if any)
81static int flush_exec(struct dirtree *new, struct exec_range *aa)
82{
83  struct double_list **dl;
84  char **newargs;
85  int rc;
86
87  if (!aa->namecount) return 0;
88
89  if (aa->dir && new->parent) dl = (void *)&new->parent->extra;
90  else dl = &aa->names;
91  dlist_terminate(dl);
92
93  // switch to directory for -execdir, or back to top if we have an -execdir
94  // _and_ a normal -exec, or are at top of tree in -execdir
95  if (aa->dir && new->parent) fchdir(new->parent->data);
96  else if (TT.topdir != -1) fchdir(TT.topdir);
97
98  // execdir: accumulated execs in this directory's children.
99  newargs = xmalloc(sizeof(char *)*(aa->arglen+aa->namecount+1));
100  if (!aa->curly) {
101    memcpy(newargs, aa->argstart+1, sizeof(char *)*aa->arglen);
102    newargs[aa->arglen] = 0;
103  } else {
104    struct double_list *dl2 = *dl;
105    int pos = aa->curly, rest = aa->arglen - aa->curly;
106
107    // Collate argument list
108    memcpy(newargs, aa->argstart+1, sizeof(char *)*pos);
109    for (dl2 = *dl; dl2; dl2 = dl2->next) newargs[pos++] = dl2->data;
110    rest = aa->arglen - aa->curly;
111    memcpy(newargs+pos, aa->argstart+aa->curly+1,
112      sizeof(char *)*(rest-1));
113    newargs[pos+rest] = 0;
114  }
115
116  rc = xpclose(xpopen(newargs, 0), 0);
117
118  llist_traverse(dl, llist_free_double);
119
120  return rc;
121}
122
123// Return numeric value with explicit sign
124static int compare_numsign(long val, long units, char *str)
125{
126  char sign = 0;
127  long myval;
128
129  if (*str == '+' || *str == '-') sign = *(str++);
130  else if (!isdigit(*str)) error_exit("%s not [+-]N", str);
131  myval = atolx(str);
132  if (units && isdigit(str[strlen(str)-1])) myval *= units;
133
134  if (sign == '+') return val > myval;
135  if (sign == '-') return val < myval;
136  return val == myval;
137}
138
139static void do_print(struct dirtree *new, char c)
140{
141  char *s=dirtree_path(new, 0);
142
143  xprintf("%s%c", s, c);
144  free(s);
145}
146
147void todo_store_argument(void)
148{
149  error_exit("NOP");
150}
151
152char *strlower(char *s)
153{
154  char *new;
155
156  if (!CFG_TOYBOX_I18N) {
157    new = xstrdup(s);
158    for (; *s; s++) *(new++) = tolower(*s);
159  } else {
160    // I can't guarantee the string _won't_ expand during reencoding, so...?
161    new = xmalloc(strlen(s)*2+1);
162
163    while (*s) {
164      wchar_t c;
165      int len = mbrtowc(&c, s, MB_CUR_MAX, 0);
166
167      if (len < 1) *(new++) = *(s++);
168      else {
169        // squash title case too
170        c = towlower(c);
171
172        // if we had a valid utf8 sequence, convert it to lower case, and can't
173        // encode back to utf8, something is wrong with your libc. But just
174        // in case somebody finds an exploit...
175        len = wcrtomb(s, c, 0);
176        if (len < 1) error_exit("bad utf8 %x", c);
177        s += len;
178      }
179    }
180  }
181
182  return new;
183}
184
185// Call this with 0 for first pass argument parsing and syntax checking (which
186// populates argdata). Later commands traverse argdata (in order) when they
187// need "do once" results.
188static int do_find(struct dirtree *new)
189{
190  int pcount = 0, print = 0, not = 0, active = !!new, test = active, recurse;
191  struct double_list *argdata = TT.argdata;
192  char *s, **ss;
193
194  recurse = DIRTREE_COMEAGAIN|((toys.optflags&FLAG_L) ? DIRTREE_SYMFOLLOW : 0);
195
196  // skip . and .. below topdir, handle -xdev and -depth
197  if (new) {
198    if (new->parent) {
199      if (!dirtree_notdotdot(new)) return 0;
200      if (TT.xdev && new->st.st_dev != new->parent->st.st_dev) return 0;
201    }
202    if (S_ISDIR(new->st.st_mode)) {
203      if (!new->again) {
204        if (TT.depth) return recurse;
205      } else {
206        struct double_list *dl;
207
208        if (TT.topdir != -1)
209          for (dl = TT.argdata; dl; dl = dl->next)
210            if (dl->prev == (void *)1 || !new->parent)
211              toys.exitval |= flush_exec(new, (void *)dl);
212
213        return 0;
214      }
215    }
216  }
217
218  // pcount: parentheses stack depth (using toybuf bytes, 4096 max depth)
219  // test: result of most recent test
220  // active: if 0 don't perform tests
221  // not: a pending ! applies to this test (only set if performing tests)
222  // print: saw one of print/ok/exec, no need for default -print
223
224  if (TT.filter) for (ss = TT.filter; *ss; ss++) {
225    int check = active && test;
226
227    s = *ss;
228
229    // handle ! ( ) using toybuf as a stack
230    if (*s != '-') {
231      if (s[1]) goto error;
232
233      if (*s == '!') {
234        // Don't invert if we're not making a decision
235        if (check) not = !not;
236
237      // Save old "not" and "active" on toybuf stack.
238      // Deactivate this parenthetical if !test
239      // Note: test value should never change while !active
240      } else if (*s == '(') {
241        if (pcount == sizeof(toybuf)) goto error;
242        toybuf[pcount++] = not+(active<<1);
243        if (!check) active = 0;
244        not = 0;
245
246      // Pop status, apply deferred not to test
247      } else if (*s == ')') {
248        if (--pcount < 0) goto error;
249        // Pop active state, apply deferred not (which was only set if checking)
250        active = (toybuf[pcount]>>1)&1;
251        if (active && (toybuf[pcount]&1)) test = !test;
252        not = 0;
253      } else goto error;
254
255      continue;
256    } else s++;
257
258    if (!strcmp(s, "xdev")) TT.xdev = 1;
259    else if (!strcmp(s, "depth")) TT.depth = 1;
260    else if (!strcmp(s, "o") || !strcmp(s, "or")) {
261      if (not) goto error;
262      if (active) {
263        if (!test) test = 1;
264        else active = 0;     // decision has been made until next ")"
265      }
266
267    // Mostly ignore NOP argument
268    } else if (!strcmp(s, "a") || !strcmp(s, "and")) {
269      if (not) goto error;
270
271    } else if (!strcmp(s, "print") || !strcmp("print0", s)) {
272      print++;
273      if (check) do_print(new, s[5] ? 0 : '\n');
274
275    } else if (!strcmp(s, "nouser")) {
276      if (check) if (getpwuid(new->st.st_uid)) test = 0;
277    } else if (!strcmp(s, "nogroup")) {
278      if (check) if (getgrgid(new->st.st_gid)) test = 0;
279    } else if (!strcmp(s, "prune")) {
280      if (check && S_ISDIR(new->st.st_dev) && !TT.depth) recurse = 0;
281
282    // Remaining filters take an argument
283    } else {
284      if (!strcmp(s, "name") || !strcmp(s, "iname")
285        || !strcmp(s, "path") || !strcmp(s, "ipath"))
286      {
287        int i = (*s == 'i');
288        char *arg = ss[1], *path = 0, *name = new->name;
289
290        // Handle path expansion and case flattening
291        if (new && s[i] == 'p') name = path = dirtree_path(new, 0);
292        if (i) {
293          if (check || !new) {
294            name = strlower(new ? name : arg);
295            if (!new) {
296              dlist_add(&TT.argdata, name);
297              free(path);
298            } else arg = ((struct double_list *)llist_pop(&argdata))->data;
299          }
300        }
301
302        if (check) {
303          test = !fnmatch(arg, name, FNM_PATHNAME*(s[i] == 'p'));
304          free(path);
305          if (i) free(name);
306        }
307      } else if (!strcmp(s, "perm")) {
308        if (check) {
309          char *m = ss[1];
310          mode_t m1 = string_to_mode(m+(*m == '-'), 0),
311                 m2 = new->st.st_dev & 07777;
312
313          if (*m != '-') m2 &= m1;
314          test = m1 == m2;
315        }
316      } else if (!strcmp(s, "type")) {
317        if (check) {
318          char c = stridx("bcdlpfs", *ss[1]);
319          int types[] = {S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFIFO,
320                         S_IFREG, S_IFSOCK};
321
322          if ((new->st.st_mode & S_IFMT) != types[c]) test = 0;
323        }
324
325      } else if (!strcmp(s, "atime")) {
326        if (check)
327          test = compare_numsign(TT.now - new->st.st_atime, 86400, ss[1]);
328      } else if (!strcmp(s, "ctime")) {
329        if (check)
330          test = compare_numsign(TT.now - new->st.st_ctime, 86400, ss[1]);
331      } else if (!strcmp(s, "mtime")) {
332        if (check)
333          test = compare_numsign(TT.now - new->st.st_mtime, 86400, ss[1]);
334      } else if (!strcmp(s, "size")) {
335        if (check)
336          test = compare_numsign(new->st.st_size, 512, ss[1]);
337      } else if (!strcmp(s, "links")) {
338        if (check) test = compare_numsign(new->st.st_nlink, 0, ss[1]);
339      } else if (!strcmp(s, "user")) {
340        todo_store_argument();
341      } else if (!strcmp(s, "group")) {
342        todo_store_argument();
343      } else if (!strcmp(s, "newer")) {
344        todo_store_argument();
345      } else if (!strcmp(s, "exec") || !strcmp("ok", s)
346              || !strcmp(s, "execdir") || !strcmp(s, "okdir"))
347      {
348        struct exec_range *aa;
349
350        print++;
351
352        // Initial argument parsing pass
353        if (!new) {
354          // special case "-exec \;" to fall through to "needs 1 arg" error.
355          if (!strcmp(ss[1], ";")) {
356            int len;
357
358            dlist_add_nomalloc(&TT.argdata,(void *)(aa = xzalloc(sizeof(*aa))));
359            aa->argstart = ++ss;
360
361            // Record command line arguments to -exec
362            for (len = 0; ss[len]; len++) {
363              if (!strcmp(ss[len], ";")) break;
364              else if (!strcmp(ss[len], "{}")) {
365                aa->curly = len;
366                if (!strcmp(ss[len+1], "+")) {
367
368                  // Measure environment space
369                  if (!TT.envsize) {
370                    char **env;
371
372                    for (env = environ; *env; env++)
373                      TT.envsize += sizeof(char *) + strlen(*env) + 1;
374                    TT.envsize += sizeof(char *);
375                  }
376                  aa->plus++;
377                  len++;
378                  break;
379                }
380              } else aa->argsize += sizeof(char *) + strlen(ss[len]) + 1;
381            }
382            if (!ss[len]) error_exit("-exec without \\;");
383            ss += len-1;
384            aa->arglen = len;
385            aa->dir = !!strchr(s, 'd');
386            if (aa->dir && TT.topdir == -1) TT.topdir = xopen(".", 0);
387          }
388
389        // collect names and execute commands
390        } else {
391          if (check) {
392            char *name;
393            struct double_list **dl;
394
395            // Grab command line exec argument list
396            aa = (void *)llist_pop(&argdata);
397
398            // name is always a new malloc, so we can always free it.
399            name = aa->dir ? xstrdup(new->name) : dirtree_path(new, 0);
400
401            // Mark entry so COMEAGAIN can call flush_exec() in parent.
402            // This is never a valid pointer valud for prev to have otherwise
403            if (aa->dir) aa->prev = (void *)1;
404
405            if (*s == 'o') {
406              char *prompt = xmprintf("[%s] %s", ss[1], name);
407              if(!(test = yesno(prompt, 0))) goto cont;
408            }
409
410            // Add next name to list (global list without -dir, local with)
411            if (aa->dir && new->parent)
412              dl = (struct double_list **)&new->parent->extra;
413            else dl = &aa->names;
414
415            // Is this + mode?
416            if (aa->plus) {
417              int size = sizeof(char *)+strlen(name)+1;
418
419              // Linux caps environment space (env vars + args) at 32 4k pages.
420              // todo: is there a way to probe this instead of constant here?
421
422              if (TT.envsize+aa->argsize+aa->namesize+size >= 131072)
423                toys.exitval |= flush_exec(new, aa);
424              aa->namesize += size;
425            }
426            dlist_add(dl, name);
427            if (!aa->plus) test = flush_exec(new, aa);
428          }
429        }
430      } else goto error;
431
432      // This test can go at the end because we do a syntax checking
433      // pass first. Putting it here gets the error message (-unknown
434      // vs -known noarg) right.
435      if (!*++ss) error_exit("'%s' needs 1 arg", --s);
436    }
437cont:
438    // Apply pending "!" to result
439    if (active && not) test = !test;
440    not = 0;
441  }
442
443  if (new) {
444    // If there was no action, print
445    if (!print && test) do_print(new, '\n');
446  } else dlist_terminate(TT.argdata);
447
448  return recurse;
449
450error:
451  error_exit("bad arg '%s'", *ss);
452}
453
454void find_main(void)
455{
456  int i, len;
457  char **ss = toys.optargs;
458
459  TT.topdir = -1;
460
461  // Distinguish paths from filters
462  for (len = 0; toys.optargs[len]; len++)
463    if (strchr("-!(", *toys.optargs[len])) break;
464  TT.filter = toys.optargs+len;
465
466  // use "." if no paths
467  if (!*ss || **ss == '-') {
468    ss = (char *[]){"."};
469    len = 1;
470  }
471
472  // first pass argument parsing, verify args match up, handle "evaluate once"
473  TT.now = time(0);
474  do_find(0);
475
476  // Loop through paths
477  for (i = 0; i < len; i++) {
478    struct dirtree *new;
479
480    new = dirtree_add_node(0, ss[i], toys.optflags&(FLAG_H|FLAG_L));
481    if (new) dirtree_handle_callback(new, do_find);
482  }
483
484  if (CFG_TOYBOX_FREE) {
485    close(TT.topdir);
486    llist_traverse(TT.argdata, free);
487  }
488}
489