find.c revision a6336b942302b92f0b65ec35299e7667b9fcbe19
1/* find.c - Search directories for matching files.
2 *
3 * Copyright 2014 Rob Landley <rob@landley.net>
4 *
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/find.c
6 *
7 * Our "unspecified" behavior for no paths is to use "."
8 * Parentheses can only stack 4096 deep
9 * Not treating two {} as an error, but only using last
10
11USE_FIND(NEWTOY(find, "?^HL[-HL]", TOYFLAG_USR|TOYFLAG_BIN))
12
13config FIND
14  bool "find"
15  default y
16  help
17    usage: find [-HL] [DIR...] [<options>]
18
19    Search directories for matching files.
20    Default: search "." match all -print all matches.
21
22    -H  Follow command line symlinks         -L  Follow all symlinks
23
24    Match filters:
25    -name  PATTERN filename with wildcards   -iname      case insensitive -name
26    -path  PATTERN path name with wildcards  -ipath      case insensitive -path
27    -user  UNAME   belongs to user UNAME     -nouser     user not in /etc/passwd
28    -group GROUP   belongs to group GROUP    -nogroup    group not in /etc/group
29    -perm  [-]MODE permissons (-=at least)   -prune      ignore contents of dir
30    -size  N[c]    512 byte blocks (c=bytes) -xdev       stay in this filesystem
31    -links N       hardlink count            -atime N    accessed N days ago
32    -ctime N       created N days ago        -mtime N    modified N days ago
33    -newer FILE    newer mtime than FILE     -mindepth # at least # dirs down
34    -depth         ignore contents of dir    -maxdepth # at most # dirs down
35    -type [bcdflps] (block, char, dir, file, symlink, pipe, socket)
36
37    Numbers N may be prefixed by a - (less than) or + (greater than):
38
39    Combine matches with:
40    !, -a, -o, ( )    not, and, or, group expressions
41
42    Actions:
43    -print   Print match with newline  -print0    Print match with null
44    -exec    Run command with path     -execdir   Run command in file's dir
45    -ok      Ask before exec           -okdir     Ask before execdir
46
47    Commands substitute "{}" with matched file. End with ";" to run each file,
48    or "+" (next argument after "{}") to collect and run with multiple files.
49*/
50
51#define FOR_find
52#include "toys.h"
53
54GLOBALS(
55  char **filter;
56  struct double_list *argdata;
57  int topdir, xdev, depth, envsize;
58  time_t now;
59)
60
61// None of this can go in TT because you can have more than one -exec
62struct exec_range {
63  char *next, *prev;
64
65  int dir, plus, arglen, argsize, curly, namecount, namesize;
66  char **argstart;
67  struct double_list *names;
68};
69
70// Perform pending -exec (if any)
71static int flush_exec(struct dirtree *new, struct exec_range *aa)
72{
73  struct double_list **dl;
74  char **newargs;
75  int rc;
76
77  if (!aa->namecount) return 0;
78
79  if (aa->dir && new->parent) dl = (void *)&new->parent->extra;
80  else dl = &aa->names;
81  dlist_terminate(*dl);
82
83  // switch to directory for -execdir, or back to top if we have an -execdir
84  // _and_ a normal -exec, or are at top of tree in -execdir
85  if (aa->dir && new->parent) fchdir(new->parent->data);
86  else if (TT.topdir != -1) fchdir(TT.topdir);
87
88  // execdir: accumulated execs in this directory's children.
89  newargs = xmalloc(sizeof(char *)*(aa->arglen+aa->namecount+1));
90  if (aa->curly < 0) {
91    memcpy(newargs, aa->argstart, sizeof(char *)*aa->arglen);
92    newargs[aa->arglen] = 0;
93  } else {
94    struct double_list *dl2 = *dl;
95    int pos = aa->curly, rest = aa->arglen - aa->curly;
96
97    // Collate argument list
98    memcpy(newargs, aa->argstart, sizeof(char *)*pos);
99    for (dl2 = *dl; dl2; dl2 = dl2->next) newargs[pos++] = dl2->data;
100    rest = aa->arglen - aa->curly - 1;
101    memcpy(newargs+pos, aa->argstart+aa->curly+1, sizeof(char *)*rest);
102    newargs[pos+rest] = 0;
103  }
104
105  rc = xrun(newargs);
106
107  llist_traverse(*dl, llist_free_double);
108  *dl = 0;
109  aa->namecount = 0;
110
111  return rc;
112}
113
114// Return numeric value with explicit sign
115static int compare_numsign(long val, long units, char *str)
116{
117  char sign = 0;
118  long myval;
119
120  if (*str == '+' || *str == '-') sign = *(str++);
121  else if (!isdigit(*str)) error_exit("%s not [+-]N", str);
122  myval = atolx(str);
123  if (units && isdigit(str[strlen(str)-1])) myval *= units;
124
125  if (sign == '+') return val > myval;
126  if (sign == '-') return val < myval;
127  return val == myval;
128}
129
130static void do_print(struct dirtree *new, char c)
131{
132  char *s=dirtree_path(new, 0);
133
134  xprintf("%s%c", s, c);
135  free(s);
136}
137
138char *strlower(char *s)
139{
140  char *try, *new;
141
142  if (!CFG_TOYBOX_I18N) {
143    try = new = xstrdup(s);
144    for (; *s; s++) *(new++) = tolower(*s);
145  } else {
146    // I can't guarantee the string _won't_ expand during reencoding, so...?
147    try = new = xmalloc(strlen(s)*2+1);
148
149    while (*s) {
150      wchar_t c;
151      int len = mbrtowc(&c, s, MB_CUR_MAX, 0);
152
153      if (len < 1) *(new++) = *(s++);
154      else {
155        s += len;
156        // squash title case too
157        c = towlower(c);
158
159        // if we had a valid utf8 sequence, convert it to lower case, and can't
160        // encode back to utf8, something is wrong with your libc. But just
161        // in case somebody finds an exploit...
162        len = wcrtomb(new, c, 0);
163        if (len < 1) error_exit("bad utf8 %x", c);
164        new += len;
165      }
166    }
167    *new = 0;
168  }
169
170  return try;
171}
172
173// Call this with 0 for first pass argument parsing and syntax checking (which
174// populates argdata). Later commands traverse argdata (in order) when they
175// need "do once" results.
176static int do_find(struct dirtree *new)
177{
178  int pcount = 0, print = 0, not = 0, active = !!new, test = active, recurse;
179  struct double_list *argdata = TT.argdata;
180  char *s, **ss;
181
182  recurse = DIRTREE_COMEAGAIN|((toys.optflags&FLAG_L) ? DIRTREE_SYMFOLLOW : 0);
183
184  // skip . and .. below topdir, handle -xdev and -depth
185  if (new) {
186    if (new->parent) {
187      if (!dirtree_notdotdot(new)) return 0;
188      if (TT.xdev && new->st.st_dev != new->parent->st.st_dev) recurse = 0;
189    }
190    if (S_ISDIR(new->st.st_mode)) {
191      if (!new->again) {
192        struct dirtree *n;
193
194        if (TT.depth) return recurse;
195        for (n = new->parent; n; n = n->parent) {
196          if (n->st.st_ino==new->st.st_ino && n->st.st_dev==new->st.st_dev) {
197            error_msg("'%s': loop detected", s = dirtree_path(new, 0));
198            free(s);
199
200            return 0;
201          }
202        }
203      } else {
204        struct double_list *dl;
205
206        if (TT.topdir != -1)
207          for (dl = TT.argdata; dl; dl = dl->next)
208            if (dl->prev == (void *)1 || !new->parent)
209              toys.exitval |= flush_exec(new, (void *)dl);
210
211        return 0;
212      }
213    }
214  }
215
216  // pcount: parentheses stack depth (using toybuf bytes, 4096 max depth)
217  // test: result of most recent test
218  // active: if 0 don't perform tests
219  // not: a pending ! applies to this test (only set if performing tests)
220  // print: saw one of print/ok/exec, no need for default -print
221
222  if (TT.filter) for (ss = TT.filter; *ss; ss++) {
223    int check = active && test;
224
225    s = *ss;
226
227    // handle ! ( ) using toybuf as a stack
228    if (*s != '-') {
229      if (s[1]) goto error;
230
231      if (*s == '!') {
232        // Don't invert if we're not making a decision
233        if (check) not = !not;
234
235      // Save old "not" and "active" on toybuf stack.
236      // Deactivate this parenthetical if !test
237      // Note: test value should never change while !active
238      } else if (*s == '(') {
239        if (pcount == sizeof(toybuf)) goto error;
240        toybuf[pcount++] = not+(active<<1);
241        if (!check) active = 0;
242        not = 0;
243
244      // Pop status, apply deferred not to test
245      } else if (*s == ')') {
246        if (--pcount < 0) goto error;
247        // Pop active state, apply deferred not (which was only set if checking)
248        active = (toybuf[pcount]>>1)&1;
249        if (active && (toybuf[pcount]&1)) test = !test;
250        not = 0;
251      } else goto error;
252
253      continue;
254    } else s++;
255
256    if (!strcmp(s, "xdev")) TT.xdev = 1;
257    else if (!strcmp(s, "depth")) TT.depth = 1;
258    else if (!strcmp(s, "o") || !strcmp(s, "or")) {
259      if (not) goto error;
260      if (active) {
261        if (!test) test = 1;
262        else active = 0;     // decision has been made until next ")"
263      }
264    } else if (!strcmp(s, "not")) {
265      if (check) not = !not;
266      continue;
267    // Mostly ignore NOP argument
268    } else if (!strcmp(s, "a") || !strcmp(s, "and")) {
269      if (not) goto error;
270
271    } else if (!strcmp(s, "print") || !strcmp("print0", s)) {
272      print++;
273      if (check) do_print(new, s[5] ? 0 : '\n');
274
275    } else if (!strcmp(s, "nouser")) {
276      if (check) if (getpwuid(new->st.st_uid)) test = 0;
277    } else if (!strcmp(s, "nogroup")) {
278      if (check) if (getgrgid(new->st.st_gid)) test = 0;
279    } else if (!strcmp(s, "prune")) {
280      if (check && S_ISDIR(new->st.st_dev) && !TT.depth) recurse = 0;
281
282    // Remaining filters take an argument
283    } else {
284      if (!strcmp(s, "name") || !strcmp(s, "iname")
285        || !strcmp(s, "path") || !strcmp(s, "ipath"))
286      {
287        int i = (*s == 'i');
288        char *arg = ss[1], *path = 0, *name = new->name;
289
290        // Handle path expansion and case flattening
291        if (new && s[i] == 'p') name = path = dirtree_path(new, 0);
292        if (i) {
293          if (check || !new) {
294            name = strlower(new ? name : arg);
295            if (!new) {
296              dlist_add(&TT.argdata, name);
297              free(path);
298            } else arg = ((struct double_list *)llist_pop(&argdata))->data;
299          }
300        }
301
302        if (check) {
303          test = !fnmatch(arg, name, FNM_PATHNAME*(s[i] == 'p'));
304          free(path);
305          if (i) free(name);
306        }
307      } else if (!strcmp(s, "perm")) {
308        if (check) {
309          char *m = ss[1];
310          mode_t m1 = string_to_mode(m+(*m == '-'), 0),
311                 m2 = new->st.st_dev & 07777;
312
313          if (*m != '-') m2 &= m1;
314          test = m1 == m2;
315        }
316      } else if (!strcmp(s, "type")) {
317        if (check) {
318          int types[] = {S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFIFO,
319                         S_IFREG, S_IFSOCK}, i = stridx("bcdlpfs", *ss[1]);
320
321          if (i<0) error_exit("bad -type '%c'", *ss[1]);
322          if ((new->st.st_mode & S_IFMT) != types[i]) test = 0;
323        }
324
325      } else if (!strcmp(s, "atime")) {
326        if (check)
327          test = compare_numsign(TT.now - new->st.st_atime, 86400, ss[1]);
328      } else if (!strcmp(s, "ctime")) {
329        if (check)
330          test = compare_numsign(TT.now - new->st.st_ctime, 86400, ss[1]);
331      } else if (!strcmp(s, "mtime")) {
332        if (check)
333          test = compare_numsign(TT.now - new->st.st_mtime, 86400, ss[1]);
334      } else if (!strcmp(s, "size")) {
335        if (check)
336          test = compare_numsign(new->st.st_size, 512, ss[1]);
337      } else if (!strcmp(s, "links")) {
338        if (check) test = compare_numsign(new->st.st_nlink, 0, ss[1]);
339      } else if (!strcmp(s, "mindepth") || !strcmp(s, "maxdepth")) {
340        if (check) {
341          struct dirtree *dt = new;
342          int i = 0, d = atolx(ss[1]);
343
344          while ((dt = dt->parent)) i++;
345          if (s[1] == 'i') {
346            test = i >= d;
347            if (i == d && not) recurse = 0;
348          } else {
349            test = i <= d;
350            if (i == d && !not) recurse = 0;
351          }
352        }
353      } else if (!strcmp(s, "user") || !strcmp(s, "group")
354              || !strcmp(s, "newer"))
355      {
356        struct {
357          void *next, *prev;
358          union {
359            uid_t uid;
360            gid_t gid;
361            struct timespec tm;
362          } u;
363        } *udl;
364
365        if (!new && ss[1]) {
366          udl = xmalloc(sizeof(*udl));
367          dlist_add_nomalloc(&TT.argdata, (void *)udl);
368
369          if (*s == 'u') udl->u.uid = xgetpwnam(ss[1])->pw_uid;
370          else if (*s == 'g') udl->u.gid = xgetgrnam(ss[1])->gr_gid;
371          else {
372            struct stat st;
373
374            xstat(ss[1], &st);
375            udl->u.tm = st.st_mtim;
376          }
377        } else if (check) {
378          udl = (void *)llist_pop(&argdata);
379          if (*s == 'u') test = new->st.st_uid == udl->u.uid;
380          else if (*s == 'g') test = new->st.st_gid == udl->u.gid;
381          else {
382            test = new->st.st_mtim.tv_sec > udl->u.tm.tv_sec;
383            if (new->st.st_mtim.tv_sec == udl->u.tm.tv_sec)
384              test = new->st.st_mtim.tv_nsec > udl->u.tm.tv_nsec;
385          }
386        }
387      } else if (!strcmp(s, "exec") || !strcmp("ok", s)
388              || !strcmp(s, "execdir") || !strcmp(s, "okdir"))
389      {
390        struct exec_range *aa;
391
392        print++;
393
394        // Initial argument parsing pass
395        if (!new) {
396          int len;
397
398          // catch "-exec" with no args and "-exec \;"
399          if (!ss[1] || !strcmp(ss[1], ";")) error_exit("'%s' needs 1 arg", s);
400
401          dlist_add_nomalloc(&TT.argdata, (void *)(aa = xzalloc(sizeof(*aa))));
402          aa->argstart = ++ss;
403          aa->curly = -1;
404
405          // Record command line arguments to -exec
406          for (len = 0; ss[len]; len++) {
407            if (!strcmp(ss[len], ";")) break;
408            else if (!strcmp(ss[len], "{}")) {
409              aa->curly = len;
410              if (!strcmp(ss[len+1], "+")) {
411
412                // Measure environment space
413                if (!TT.envsize) {
414                  char **env;
415
416                  for (env = environ; *env; env++)
417                    TT.envsize += sizeof(char *) + strlen(*env) + 1;
418                  TT.envsize += sizeof(char *);
419                }
420                aa->plus++;
421                len++;
422                break;
423              }
424            } else aa->argsize += sizeof(char *) + strlen(ss[len]) + 1;
425          }
426          if (!ss[len]) error_exit("-exec without \\;");
427          ss += len;
428          aa->arglen = len;
429          aa->dir = !!strchr(s, 'd');
430          if (aa->dir && TT.topdir == -1) TT.topdir = xopen(".", 0);
431
432        // collect names and execute commands
433        } else if (check) {
434          char *name, *ss1 = ss[1];
435          struct double_list **ddl;
436
437          // Grab command line exec argument list
438          aa = (void *)llist_pop(&argdata);
439          ss += aa->arglen + 1;
440
441          // name is always a new malloc, so we can always free it.
442          name = aa->dir ? xstrdup(new->name) : dirtree_path(new, 0);
443
444          // Mark entry so COMEAGAIN can call flush_exec() in parent.
445          // This is never a valid pointer valud for prev to have otherwise
446          if (aa->dir) aa->prev = (void *)1;
447
448          if (*s == 'o') {
449            char *prompt = xmprintf("[%s] %s", ss1, name);
450            test = yesno(prompt, 0);
451            free(prompt);
452            if (!test) {
453              free(name);
454              goto cont;
455            }
456          }
457
458          // Add next name to list (global list without -dir, local with)
459          if (aa->dir && new->parent)
460            ddl = (struct double_list **)&new->parent->extra;
461          else ddl = &aa->names;
462
463          // Is this + mode?
464          if (aa->plus) {
465            int size = sizeof(char *)+strlen(name)+1;
466
467            // Linux caps environment space (env vars + args) at 32 4k pages.
468            // todo: is there a way to probe this instead of constant here?
469
470            if (TT.envsize+aa->argsize+aa->namesize+size >= 131072)
471              toys.exitval |= flush_exec(new, aa);
472            aa->namesize += size;
473          }
474          dlist_add(ddl, name);
475          aa->namecount++;
476          if (!aa->plus) test = flush_exec(new, aa);
477        }
478
479        // Argument consumed, skip the check.
480        goto cont;
481      } else goto error;
482
483      // This test can go at the end because we do a syntax checking
484      // pass first. Putting it here gets the error message (-unknown
485      // vs -known noarg) right.
486      if (!*++ss) error_exit("'%s' needs 1 arg", --s);
487    }
488cont:
489    // Apply pending "!" to result
490    if (active && not) test = !test;
491    not = 0;
492  }
493
494  if (new) {
495    // If there was no action, print
496    if (!print && test) do_print(new, '\n');
497  } else dlist_terminate(TT.argdata);
498
499  return recurse;
500
501error:
502  error_exit("bad arg '%s'", *ss);
503}
504
505void find_main(void)
506{
507  int i, len;
508  char **ss = toys.optargs;
509
510  TT.topdir = -1;
511
512  // Distinguish paths from filters
513  for (len = 0; toys.optargs[len]; len++)
514    if (strchr("-!(", *toys.optargs[len])) break;
515  TT.filter = toys.optargs+len;
516
517  // use "." if no paths
518  if (!len) {
519    ss = (char *[]){"."};
520    len = 1;
521  }
522
523  // first pass argument parsing, verify args match up, handle "evaluate once"
524  TT.now = time(0);
525  do_find(0);
526
527  // Loop through paths
528  for (i = 0; i < len; i++) {
529    struct dirtree *new;
530
531    new = dirtree_add_node(0, ss[i], toys.optflags&(FLAG_H|FLAG_L));
532    if (new) dirtree_handle_callback(new, do_find);
533  }
534
535  if (CFG_TOYBOX_FREE) {
536    close(TT.topdir);
537    llist_traverse(TT.argdata, free);
538  }
539}
540