1/* find.c - Search directories for matching files.
2 *
3 * Copyright 2014 Rob Landley <rob@landley.net>
4 *
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/find.c
6 *
7 * Our "unspecified" behavior for no paths is to use "."
8 * Parentheses can only stack 4096 deep
9 * Not treating two {} as an error, but only using last
10 *
11 * TODO: -empty (dirs too!)
12
13USE_FIND(NEWTOY(find, "?^HL[-HL]", TOYFLAG_USR|TOYFLAG_BIN))
14
15config FIND
16  bool "find"
17  default y
18  help
19    usage: find [-HL] [DIR...] [<options>]
20
21    Search directories for matching files.
22    Default: search "." match all -print all matches.
23
24    -H  Follow command line symlinks         -L  Follow all symlinks
25
26    Match filters:
27    -name  PATTERN  filename with wildcards   -iname      case insensitive -name
28    -path  PATTERN  path name with wildcards  -ipath      case insensitive -path
29    -user  UNAME    belongs to user UNAME     -nouser     user ID not known
30    -group GROUP    belongs to group GROUP    -nogroup    group ID not known
31    -perm  [-/]MODE permissions (-=min /=any) -prune      ignore contents of dir
32    -size  N[c]     512 byte blocks (c=bytes) -xdev       only this filesystem
33    -links N        hardlink count            -atime N    accessed N days ago
34    -ctime N        created N days ago        -mtime N    modified N days ago
35    -newer FILE     newer mtime than FILE     -mindepth # at least # dirs down
36    -depth          ignore contents of dir    -maxdepth # at most # dirs down
37    -inum  N        inode number N            -empty      empty files and dirs
38    -type [bcdflps] (block, char, dir, file, symlink, pipe, socket)
39
40    Numbers N may be prefixed by a - (less than) or + (greater than):
41
42    Combine matches with:
43    !, -a, -o, ( )    not, and, or, group expressions
44
45    Actions:
46    -print   Print match with newline  -print0    Print match with null
47    -exec    Run command with path     -execdir   Run command in file's dir
48    -ok      Ask before exec           -okdir     Ask before execdir
49    -delete  Remove matching file/dir
50
51    Commands substitute "{}" with matched file. End with ";" to run each file,
52    or "+" (next argument after "{}") to collect and run with multiple files.
53*/
54
55#define FOR_find
56#include "toys.h"
57
58GLOBALS(
59  char **filter;
60  struct double_list *argdata;
61  int topdir, xdev, depth;
62  time_t now;
63)
64
65struct execdir_data {
66  struct execdir_data *next;
67
68  int namecount;
69  struct double_list *names;
70};
71
72// None of this can go in TT because you can have more than one -exec
73struct exec_range {
74  char *next, *prev;  // layout compatible with struct double_list
75
76  int dir, plus, arglen, argsize, curly;
77  char **argstart;
78  struct execdir_data exec, *execdir;
79};
80
81// Perform pending -exec (if any)
82static int flush_exec(struct dirtree *new, struct exec_range *aa)
83{
84  struct execdir_data *bb = aa->execdir ? aa->execdir : &aa->exec;
85  char **newargs;
86  int rc, revert = 0;
87
88  if (!bb->namecount) return 0;
89
90  dlist_terminate(bb->names);
91
92  // switch to directory for -execdir, or back to top if we have an -execdir
93  // _and_ a normal -exec, or are at top of tree in -execdir
94  if (TT.topdir != -1) {
95    if (aa->dir && new && new->parent) {
96      revert++;
97      rc = fchdir(new->parent->dirfd);
98    } else rc = fchdir(TT.topdir);
99    if (rc) {
100      perror_msg_raw(revert ? new->name : ".");
101
102      return rc;
103    }
104  }
105
106  // execdir: accumulated execs in this directory's children.
107  newargs = xmalloc(sizeof(char *)*(aa->arglen+bb->namecount+1));
108  if (aa->curly < 0) {
109    memcpy(newargs, aa->argstart, sizeof(char *)*aa->arglen);
110    newargs[aa->arglen] = 0;
111  } else {
112    int pos = aa->curly, rest = aa->arglen - aa->curly;
113    struct double_list *dl;
114
115    // Collate argument list
116    memcpy(newargs, aa->argstart, sizeof(char *)*pos);
117    for (dl = bb->names; dl; dl = dl->next) newargs[pos++] = dl->data;
118    rest = aa->arglen - aa->curly - 1;
119    memcpy(newargs+pos, aa->argstart+aa->curly+1, sizeof(char *)*rest);
120    newargs[pos+rest] = 0;
121  }
122
123  rc = xrun(newargs);
124
125  llist_traverse(bb->names, llist_free_double);
126  bb->names = 0;
127  bb->namecount = 0;
128
129  if (revert) revert = fchdir(TT.topdir);
130
131  return rc;
132}
133
134// Return numeric value with explicit sign
135static int compare_numsign(long val, long units, char *str)
136{
137  char sign = 0;
138  long myval;
139
140  if (*str == '+' || *str == '-') sign = *(str++);
141  else if (!isdigit(*str)) error_exit("%s not [+-]N", str);
142  myval = atolx(str);
143  if (units && isdigit(str[strlen(str)-1])) myval *= units;
144
145  if (sign == '+') return val > myval;
146  if (sign == '-') return val < myval;
147  return val == myval;
148}
149
150static void do_print(struct dirtree *new, char c)
151{
152  char *s=dirtree_path(new, 0);
153
154  xprintf("%s%c", s, c);
155  free(s);
156}
157
158// Descend or ascend -execdir + directory level
159static void execdir(struct dirtree *new, int flush)
160{
161  struct double_list *dl;
162  struct exec_range *aa;
163  struct execdir_data *bb;
164
165  if (new && TT.topdir == -1) return;
166
167  for (dl = TT.argdata; dl; dl = dl->next) {
168    if (dl->prev != (void *)1) continue;
169    aa = (void *)dl;
170    if (!aa->plus || (new && !aa->dir)) continue;
171
172    if (flush) {
173
174      // Flush pending "-execdir +" instances for this dir
175      // or flush everything for -exec at top
176      toys.exitval |= flush_exec(new, aa);
177
178      // pop per-directory struct
179      if ((bb = aa->execdir)) {
180        aa->execdir = bb->next;
181        free(bb);
182      }
183    } else if (aa->dir) {
184
185      // Push new per-directory struct for -execdir/okdir + codepath. (Can't
186      // use new->extra because command line may have multiple -execdir)
187      bb = xzalloc(sizeof(struct execdir_data));
188      bb->next = aa->execdir;
189      aa->execdir = bb;
190    }
191  }
192}
193
194// Call this with 0 for first pass argument parsing and syntax checking (which
195// populates argdata). Later commands traverse argdata (in order) when they
196// need "do once" results.
197static int do_find(struct dirtree *new)
198{
199  int pcount = 0, print = 0, not = 0, active = !!new, test = active, recurse;
200  struct double_list *argdata = TT.argdata;
201  char *s, **ss;
202
203  recurse = DIRTREE_COMEAGAIN|(DIRTREE_SYMFOLLOW*!!(toys.optflags&FLAG_L));
204
205  // skip . and .. below topdir, handle -xdev and -depth
206  if (new) {
207    if (new->parent) {
208      if (!dirtree_notdotdot(new)) return 0;
209      if (TT.xdev && new->st.st_dev != new->parent->st.st_dev) recurse = 0;
210    }
211
212    if (S_ISDIR(new->st.st_mode)) {
213      // Descending into new directory
214      if (!new->again) {
215        struct dirtree *n;
216
217        for (n = new->parent; n; n = n->parent) {
218          if (n->st.st_ino==new->st.st_ino && n->st.st_dev==new->st.st_dev) {
219            error_msg("'%s': loop detected", s = dirtree_path(new, 0));
220            free(s);
221
222            return 0;
223          }
224        }
225
226        if (TT.depth) {
227          execdir(new, 0);
228
229          return recurse;
230        }
231      // Done with directory (COMEAGAIN call)
232      } else {
233        execdir(new, 1);
234        recurse = 0;
235        if (!TT.depth) return 0;
236      }
237    }
238  }
239
240  // pcount: parentheses stack depth (using toybuf bytes, 4096 max depth)
241  // test: result of most recent test
242  // active: if 0 don't perform tests
243  // not: a pending ! applies to this test (only set if performing tests)
244  // print: saw one of print/ok/exec, no need for default -print
245
246  if (TT.filter) for (ss = TT.filter; *ss; ss++) {
247    int check = active && test;
248
249    s = *ss;
250
251    // handle ! ( ) using toybuf as a stack
252    if (*s != '-') {
253      if (s[1]) goto error;
254
255      if (*s == '!') {
256        // Don't invert if we're not making a decision
257        if (check) not = !not;
258
259      // Save old "not" and "active" on toybuf stack.
260      // Deactivate this parenthetical if !test
261      // Note: test value should never change while !active
262      } else if (*s == '(') {
263        if (pcount == sizeof(toybuf)) goto error;
264        toybuf[pcount++] = not+(active<<1);
265        if (!check) active = 0;
266        not = 0;
267
268      // Pop status, apply deferred not to test
269      } else if (*s == ')') {
270        if (--pcount < 0) goto error;
271        // Pop active state, apply deferred not (which was only set if checking)
272        active = (toybuf[pcount]>>1)&1;
273        if (active && (toybuf[pcount]&1)) test = !test;
274        not = 0;
275      } else goto error;
276
277      continue;
278    } else s++;
279
280    if (!strcmp(s, "xdev")) TT.xdev = 1;
281    else if (!strcmp(s, "delete")) {
282      // Delete forces depth first
283      TT.depth = 1;
284      if (new && check)
285        test = !unlinkat(dirtree_parentfd(new), new->name,
286          S_ISDIR(new->st.st_mode) ? AT_REMOVEDIR : 0);
287    } else if (!strcmp(s, "depth")) TT.depth = 1;
288    else if (!strcmp(s, "o") || !strcmp(s, "or")) {
289      if (not) goto error;
290      if (active) {
291        if (!test) test = 1;
292        else active = 0;     // decision has been made until next ")"
293      }
294    } else if (!strcmp(s, "not")) {
295      if (check) not = !not;
296      continue;
297    // Mostly ignore NOP argument
298    } else if (!strcmp(s, "a") || !strcmp(s, "and")) {
299      if (not) goto error;
300
301    } else if (!strcmp(s, "print") || !strcmp("print0", s)) {
302      print++;
303      if (check) do_print(new, s[5] ? 0 : '\n');
304
305    } else if (!strcmp(s, "nouser")) {
306      if (check) if (getpwuid(new->st.st_uid)) test = 0;
307    } else if (!strcmp(s, "nogroup")) {
308      if (check) if (getgrgid(new->st.st_gid)) test = 0;
309    } else if (!strcmp(s, "prune")) {
310      if (check && S_ISDIR(new->st.st_mode) && !TT.depth) recurse = 0;
311
312    // Remaining filters take an argument
313    } else {
314      if (!strcmp(s, "name") || !strcmp(s, "iname")
315        || !strcmp(s, "path") || !strcmp(s, "ipath"))
316      {
317        int i = (*s == 'i');
318        char *arg = ss[1], *path = 0, *name = new ? new->name : arg;
319
320        // Handle path expansion and case flattening
321        if (new && s[i] == 'p') name = path = dirtree_path(new, 0);
322        if (i) {
323          if (check || !new) {
324            if (name) name = strlower(name);
325            if (!new) {
326              dlist_add(&TT.argdata, name);
327              free(path);
328            } else arg = ((struct double_list *)llist_pop(&argdata))->data;
329          }
330        }
331
332        if (check) {
333          test = !fnmatch(arg, name, FNM_PATHNAME*(s[i] == 'p'));
334          free(path);
335          if (i) free(name);
336        }
337      } else if (!strcmp(s, "perm")) {
338        if (check) {
339          char *m = ss[1];
340          int match_min = *m == '-',
341              match_any = *m == '/';
342          mode_t m1 = string_to_mode(m+(match_min || match_any), 0),
343                 m2 = new->st.st_mode & 07777;
344
345          if (match_min || match_any) m2 &= m1;
346          test = match_any ? !m1 || m2 : m1 == m2;
347        }
348      } else if (!strcmp(s, "type")) {
349        if (check) {
350          int types[] = {S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFIFO,
351                         S_IFREG, S_IFSOCK}, i = stridx("bcdlpfs", *ss[1]);
352
353          if (i<0) error_exit("bad -type '%c'", *ss[1]);
354          if ((new->st.st_mode & S_IFMT) != types[i]) test = 0;
355        }
356
357      } else if (!strcmp(s, "atime")) {
358        if (check)
359          test = compare_numsign(TT.now - new->st.st_atime, 86400, ss[1]);
360      } else if (!strcmp(s, "ctime")) {
361        if (check)
362          test = compare_numsign(TT.now - new->st.st_ctime, 86400, ss[1]);
363      } else if (!strcmp(s, "mtime")) {
364        if (check)
365          test = compare_numsign(TT.now - new->st.st_mtime, 86400, ss[1]);
366      } else if (!strcmp(s, "size")) {
367        if (check)
368          test = compare_numsign(new->st.st_size, 512, ss[1]);
369      } else if (!strcmp(s, "links")) {
370        if (check) test = compare_numsign(new->st.st_nlink, 0, ss[1]);
371      } else if (!strcmp(s, "inum")) {
372        if (check)
373          test = compare_numsign(new->st.st_ino, 0, ss[1]);
374      } else if (!strcmp(s, "mindepth") || !strcmp(s, "maxdepth")) {
375        if (check) {
376          struct dirtree *dt = new;
377          int i = 0, d = atolx(ss[1]);
378
379          while ((dt = dt->parent)) i++;
380          if (s[1] == 'i') {
381            test = i >= d;
382            if (i == d && not) recurse = 0;
383          } else {
384            test = i <= d;
385            if (i == d && !not) recurse = 0;
386          }
387        }
388      } else if (!strcmp(s, "user") || !strcmp(s, "group")
389              || !strcmp(s, "newer"))
390      {
391        struct {
392          void *next, *prev;
393          union {
394            uid_t uid;
395            gid_t gid;
396            struct timespec tm;
397          } u;
398        } *udl;
399
400        if (!new) {
401          if (ss[1]) {
402            udl = xmalloc(sizeof(*udl));
403            dlist_add_nomalloc(&TT.argdata, (void *)udl);
404
405            if (*s == 'u') udl->u.uid = xgetpwnamid(ss[1])->pw_uid;
406            else if (*s == 'g') udl->u.gid = xgetgrnamid(ss[1])->gr_gid;
407            else {
408              struct stat st;
409
410              xstat(ss[1], &st);
411              udl->u.tm = st.st_mtim;
412            }
413          }
414        } else {
415          udl = (void *)llist_pop(&argdata);
416          if (check) {
417            if (*s == 'u') test = new->st.st_uid == udl->u.uid;
418            else if (*s == 'g') test = new->st.st_gid == udl->u.gid;
419            else {
420              test = new->st.st_mtim.tv_sec > udl->u.tm.tv_sec;
421              if (new->st.st_mtim.tv_sec == udl->u.tm.tv_sec)
422                test = new->st.st_mtim.tv_nsec > udl->u.tm.tv_nsec;
423            }
424          }
425        }
426      } else if (!strcmp(s, "exec") || !strcmp("ok", s)
427              || !strcmp(s, "execdir") || !strcmp(s, "okdir"))
428      {
429        struct exec_range *aa;
430
431        print++;
432
433        // Initial argument parsing pass
434        if (!new) {
435          int len;
436
437          // catch "-exec" with no args and "-exec \;"
438          if (!ss[1] || !strcmp(ss[1], ";")) error_exit("'%s' needs 1 arg", s);
439
440          dlist_add_nomalloc(&TT.argdata, (void *)(aa = xzalloc(sizeof(*aa))));
441          aa->argstart = ++ss;
442          aa->curly = -1;
443
444          // Record command line arguments to -exec
445          for (len = 0; ss[len]; len++) {
446            if (!strcmp(ss[len], ";")) break;
447            else if (!strcmp(ss[len], "{}")) {
448              aa->curly = len;
449              if (ss[len+1] && !strcmp(ss[len+1], "+")) {
450                aa->plus++;
451                len++;
452                break;
453              }
454            } else aa->argsize += sizeof(char *) + strlen(ss[len]) + 1;
455          }
456          if (!ss[len]) error_exit("-exec without %s",
457            aa->curly!=-1 ? "\\;" : "{}");
458          ss += len;
459          aa->arglen = len;
460          aa->dir = !!strchr(s, 'd');
461          if (TT.topdir == -1) TT.topdir = xopen(".", 0);
462
463        // collect names and execute commands
464        } else {
465          char *name, *ss1 = ss[1];
466          struct execdir_data *bb;
467
468          // Grab command line exec argument list
469          aa = (void *)llist_pop(&argdata);
470          ss += aa->arglen + 1;
471
472          if (!check) goto cont;
473          // name is always a new malloc, so we can always free it.
474          name = aa->dir ? xstrdup(new->name) : dirtree_path(new, 0);
475
476          if (*s == 'o') {
477            fprintf(stderr, "[%s] %s", ss1, name);
478            if (!(test = yesno(0))) {
479              free(name);
480              goto cont;
481            }
482          }
483
484          // Add next name to list (global list without -dir, local with)
485          bb = aa->execdir ? aa->execdir : &aa->exec;
486          dlist_add(&bb->names, name);
487          bb->namecount++;
488
489          // -exec + collates and saves result in exitval
490          if (aa->plus) {
491            // Mark entry so COMEAGAIN can call flush_exec() in parent.
492            // This is never a valid pointer value for prev to have otherwise
493            // Done here vs argument parsing pass so it's after dlist_terminate
494            aa->prev = (void *)1;
495
496            // Flush if we pass 16 megs of environment space.
497            // An insanely long path (>2 gigs) could wrap the counter and
498            // defeat this test, which could potentially trigger OOM killer.
499            if ((aa->plus += sizeof(char *)+strlen(name)+1) > 1<<24) {
500              aa->plus = 1;
501              toys.exitval |= flush_exec(new, aa);
502            }
503          } else test = flush_exec(new, aa);
504        }
505
506        // Argument consumed, skip the check.
507        goto cont;
508      } else goto error;
509
510      // This test can go at the end because we do a syntax checking
511      // pass first. Putting it here gets the error message (-unknown
512      // vs -known noarg) right.
513      if (!*++ss) error_exit("'%s' needs 1 arg", --s);
514    }
515cont:
516    // Apply pending "!" to result
517    if (active && not) test = !test;
518    not = 0;
519  }
520
521  if (new) {
522    // If there was no action, print
523    if (!print && test) do_print(new, '\n');
524
525    if (S_ISDIR(new->st.st_mode)) execdir(new, 0);
526
527  } else dlist_terminate(TT.argdata);
528
529  return recurse;
530
531error:
532  error_exit("bad arg '%s'", *ss);
533}
534
535void find_main(void)
536{
537  int i, len;
538  char **ss = toys.optargs;
539
540  TT.topdir = -1;
541
542  // Distinguish paths from filters
543  for (len = 0; toys.optargs[len]; len++)
544    if (strchr("-!(", *toys.optargs[len])) break;
545  TT.filter = toys.optargs+len;
546
547  // use "." if no paths
548  if (!len) {
549    ss = (char *[]){"."};
550    len = 1;
551  }
552
553  // first pass argument parsing, verify args match up, handle "evaluate once"
554  TT.now = time(0);
555  do_find(0);
556
557  // Loop through paths
558  for (i = 0; i < len; i++)
559    dirtree_handle_callback(dirtree_start(ss[i], toys.optflags&(FLAG_H|FLAG_L)),
560      do_find);
561
562  execdir(0, 1);
563
564  if (CFG_TOYBOX_FREE) {
565    close(TT.topdir);
566    llist_traverse(TT.argdata, free);
567  }
568}
569