find.c revision 60c35c486a2ea1c6ea8920c599abf992b27542c5
1/* find.c - Search directories for matching files.
2 *
3 * Copyright 2014 Rob Landley <rob@landley.net>
4 *
5 * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/find.c
6 *
7 * Our "unspecified" behavior for no paths is to use "."
8 * Parentheses can only stack 4096 deep
9 * Not treating two {} as an error, but only using last
10
11USE_FIND(NEWTOY(find, "?^HL", TOYFLAG_USR|TOYFLAG_BIN))
12
13config FIND
14  bool "find"
15  default y
16  help
17    usage: find [-HL] [DIR...] [<options>]
18
19    Search directories for matching files.
20    Default: search "." match all -print all matches.
21
22    -H  Follow command line symlinks         -L  Follow all symlinks
23
24    Match filters:
25    -name  PATTERN filename with wildcards   -iname   case insensitive -name
26    -path  PATTERN path name with wildcards  -ipath   case insensitive -path
27    -user  UNAME   belongs to user           -nouser  belongs to unknown user
28    -group GROUP   belongs to group          -nogroup belongs to unknown group
29    -perm  [-]MODE permissons (-=at least)   -prune   ignore contents of dir
30    -size  N[c]    512 byte blocks (c=bytes) -xdev    stay in this filesystem
31    -links N       hardlink count            -atime N accessed N days ago
32    -ctime N       created N days ago        -mtime N modified N days ago
33    -type [bcdflps] (block, char, dir, file, symlink, pipe, socket)
34
35    Numbers N may be prefixed by a - (less than) or + (greater than):
36
37    Combine matches with:
38    !, -a, -o, ( )    not, and, or, group expressions
39
40    Actions:
41    -print   Print match with newline  -print0    Print match with null
42    -exec    Run command with path     -execdir   Run command in file's dir
43    -ok      Ask before exec           -okdir     Ask before execdir
44
45    Commands substitute "{}" with matched file. End with ";" to run each file,
46    or "+" (next argument after "{}") to collect and run with multiple files.
47*/
48
49#define FOR_find
50#include "toys.h"
51
52GLOBALS(
53  char **filter;
54  struct double_list *argdata;
55  int topdir, xdev, depth, envsize;
56  time_t now;
57)
58
59// None of this can go in TT because you can have more than one -exec
60struct exec_range {
61  char *next, *prev;
62
63  int dir, plus, arglen, argsize, curly, namecount, namesize;
64  char **argstart;
65  struct double_list *names;
66};
67
68// Perform pending -exec (if any)
69static int flush_exec(struct dirtree *new, struct exec_range *aa)
70{
71  struct double_list **dl;
72  char **newargs;
73  int rc;
74
75  if (!aa->namecount) return 0;
76
77  if (aa->dir && new->parent) dl = (void *)&new->parent->extra;
78  else dl = &aa->names;
79  dlist_terminate(*dl);
80
81  // switch to directory for -execdir, or back to top if we have an -execdir
82  // _and_ a normal -exec, or are at top of tree in -execdir
83  if (aa->dir && new->parent) fchdir(new->parent->data);
84  else if (TT.topdir != -1) fchdir(TT.topdir);
85
86  // execdir: accumulated execs in this directory's children.
87  newargs = xmalloc(sizeof(char *)*(aa->arglen+aa->namecount+1));
88  if (aa->curly < 0) {
89    memcpy(newargs, aa->argstart, sizeof(char *)*aa->arglen);
90    newargs[aa->arglen] = 0;
91  } else {
92    struct double_list *dl2 = *dl;
93    int pos = aa->curly, rest = aa->arglen - aa->curly;
94
95    // Collate argument list
96    memcpy(newargs, aa->argstart, sizeof(char *)*pos);
97    for (dl2 = *dl; dl2; dl2 = dl2->next) newargs[pos++] = dl2->data;
98    rest = aa->arglen - aa->curly - 1;
99    memcpy(newargs+pos, aa->argstart+aa->curly+1, sizeof(char *)*rest);
100    newargs[pos+rest] = 0;
101  }
102
103  rc = xpclose(xpopen(newargs, 0), 0);
104
105  llist_traverse(*dl, llist_free_double);
106  *dl = 0;
107  aa->namecount = 0;
108
109  return rc;
110}
111
112// Return numeric value with explicit sign
113static int compare_numsign(long val, long units, char *str)
114{
115  char sign = 0;
116  long myval;
117
118  if (*str == '+' || *str == '-') sign = *(str++);
119  else if (!isdigit(*str)) error_exit("%s not [+-]N", str);
120  myval = atolx(str);
121  if (units && isdigit(str[strlen(str)-1])) myval *= units;
122
123  if (sign == '+') return val > myval;
124  if (sign == '-') return val < myval;
125  return val == myval;
126}
127
128static void do_print(struct dirtree *new, char c)
129{
130  char *s=dirtree_path(new, 0);
131
132  xprintf("%s%c", s, c);
133  free(s);
134}
135
136char *strlower(char *s)
137{
138  char *try, *new;
139
140  if (!CFG_TOYBOX_I18N) {
141    try = new = xstrdup(s);
142    for (; *s; s++) *(new++) = tolower(*s);
143  } else {
144    // I can't guarantee the string _won't_ expand during reencoding, so...?
145    try = new = xmalloc(strlen(s)*2+1);
146
147    while (*s) {
148      wchar_t c;
149      int len = mbrtowc(&c, s, MB_CUR_MAX, 0);
150
151      if (len < 1) *(new++) = *(s++);
152      else {
153        s += len;
154        // squash title case too
155        c = towlower(c);
156
157        // if we had a valid utf8 sequence, convert it to lower case, and can't
158        // encode back to utf8, something is wrong with your libc. But just
159        // in case somebody finds an exploit...
160        len = wcrtomb(new, c, 0);
161        if (len < 1) error_exit("bad utf8 %x", c);
162        new += len;
163      }
164    }
165    *new = 0;
166  }
167
168  return try;
169}
170
171// Call this with 0 for first pass argument parsing and syntax checking (which
172// populates argdata). Later commands traverse argdata (in order) when they
173// need "do once" results.
174static int do_find(struct dirtree *new)
175{
176  int pcount = 0, print = 0, not = 0, active = !!new, test = active, recurse;
177  struct double_list *argdata = TT.argdata;
178  char *s, **ss;
179
180  recurse = DIRTREE_COMEAGAIN|((toys.optflags&FLAG_L) ? DIRTREE_SYMFOLLOW : 0);
181
182  // skip . and .. below topdir, handle -xdev and -depth
183  if (new) {
184    if (new->parent) {
185      if (!dirtree_notdotdot(new)) return 0;
186      if (TT.xdev && new->st.st_dev != new->parent->st.st_dev) return 0;
187    }
188    if (S_ISDIR(new->st.st_mode)) {
189      if (!new->again) {
190        if (TT.depth) return recurse;
191      } else {
192        struct double_list *dl;
193
194        if (TT.topdir != -1)
195          for (dl = TT.argdata; dl; dl = dl->next)
196            if (dl->prev == (void *)1 || !new->parent)
197              toys.exitval |= flush_exec(new, (void *)dl);
198
199        return 0;
200      }
201    }
202  }
203
204  // pcount: parentheses stack depth (using toybuf bytes, 4096 max depth)
205  // test: result of most recent test
206  // active: if 0 don't perform tests
207  // not: a pending ! applies to this test (only set if performing tests)
208  // print: saw one of print/ok/exec, no need for default -print
209
210  if (TT.filter) for (ss = TT.filter; *ss; ss++) {
211    int check = active && test;
212
213    s = *ss;
214
215    // handle ! ( ) using toybuf as a stack
216    if (*s != '-') {
217      if (s[1]) goto error;
218
219      if (*s == '!') {
220        // Don't invert if we're not making a decision
221        if (check) not = !not;
222
223      // Save old "not" and "active" on toybuf stack.
224      // Deactivate this parenthetical if !test
225      // Note: test value should never change while !active
226      } else if (*s == '(') {
227        if (pcount == sizeof(toybuf)) goto error;
228        toybuf[pcount++] = not+(active<<1);
229        if (!check) active = 0;
230        not = 0;
231
232      // Pop status, apply deferred not to test
233      } else if (*s == ')') {
234        if (--pcount < 0) goto error;
235        // Pop active state, apply deferred not (which was only set if checking)
236        active = (toybuf[pcount]>>1)&1;
237        if (active && (toybuf[pcount]&1)) test = !test;
238        not = 0;
239      } else goto error;
240
241      continue;
242    } else s++;
243
244    if (!strcmp(s, "xdev")) TT.xdev = 1;
245    else if (!strcmp(s, "depth")) TT.depth = 1;
246    else if (!strcmp(s, "o") || !strcmp(s, "or")) {
247      if (not) goto error;
248      if (active) {
249        if (!test) test = 1;
250        else active = 0;     // decision has been made until next ")"
251      }
252
253    // Mostly ignore NOP argument
254    } else if (!strcmp(s, "a") || !strcmp(s, "and")) {
255      if (not) goto error;
256
257    } else if (!strcmp(s, "print") || !strcmp("print0", s)) {
258      print++;
259      if (check) do_print(new, s[5] ? 0 : '\n');
260
261    } else if (!strcmp(s, "nouser")) {
262      if (check) if (getpwuid(new->st.st_uid)) test = 0;
263    } else if (!strcmp(s, "nogroup")) {
264      if (check) if (getgrgid(new->st.st_gid)) test = 0;
265    } else if (!strcmp(s, "prune")) {
266      if (check && S_ISDIR(new->st.st_dev) && !TT.depth) recurse = 0;
267
268    // Remaining filters take an argument
269    } else {
270      if (!strcmp(s, "name") || !strcmp(s, "iname")
271        || !strcmp(s, "path") || !strcmp(s, "ipath"))
272      {
273        int i = (*s == 'i');
274        char *arg = ss[1], *path = 0, *name = new->name;
275
276        // Handle path expansion and case flattening
277        if (new && s[i] == 'p') name = path = dirtree_path(new, 0);
278        if (i) {
279          if (check || !new) {
280            name = strlower(new ? name : arg);
281            if (!new) {
282              dlist_add(&TT.argdata, name);
283              free(path);
284            } else arg = ((struct double_list *)llist_pop(&argdata))->data;
285          }
286        }
287
288        if (check) {
289          test = !fnmatch(arg, name, FNM_PATHNAME*(s[i] == 'p'));
290          free(path);
291          if (i) free(name);
292        }
293      } else if (!strcmp(s, "perm")) {
294        if (check) {
295          char *m = ss[1];
296          mode_t m1 = string_to_mode(m+(*m == '-'), 0),
297                 m2 = new->st.st_dev & 07777;
298
299          if (*m != '-') m2 &= m1;
300          test = m1 == m2;
301        }
302      } else if (!strcmp(s, "type")) {
303        if (check) {
304          char c = stridx("bcdlpfs", *ss[1]);
305          int types[] = {S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFIFO,
306                         S_IFREG, S_IFSOCK};
307
308          if ((new->st.st_mode & S_IFMT) != types[c]) test = 0;
309        }
310
311      } else if (!strcmp(s, "atime")) {
312        if (check)
313          test = compare_numsign(TT.now - new->st.st_atime, 86400, ss[1]);
314      } else if (!strcmp(s, "ctime")) {
315        if (check)
316          test = compare_numsign(TT.now - new->st.st_ctime, 86400, ss[1]);
317      } else if (!strcmp(s, "mtime")) {
318        if (check)
319          test = compare_numsign(TT.now - new->st.st_mtime, 86400, ss[1]);
320      } else if (!strcmp(s, "size")) {
321        if (check)
322          test = compare_numsign(new->st.st_size, 512, ss[1]);
323      } else if (!strcmp(s, "links")) {
324        if (check) test = compare_numsign(new->st.st_nlink, 0, ss[1]);
325      } else if (!strcmp(s, "user") || !strcmp(s, "group")
326              || !strcmp(s, "newer"))
327      {
328        struct {
329          void *next, *prev;
330          union {
331            uid_t uid;
332            gid_t gid;
333            struct timespec tm;
334          } u;
335        } *udl;
336
337        if (!new && ss[1]) {
338          udl = xmalloc(sizeof(*udl));
339          dlist_add_nomalloc(&TT.argdata, (void *)udl);
340
341          if (*s == 'u') udl->u.uid = xgetpwnam(ss[1])->pw_uid;
342          else if (*s == 'g') udl->u.gid = xgetgrnam(ss[1])->gr_gid;
343          else {
344            struct stat st;
345
346            xstat(ss[1], &st);
347            udl->u.tm = st.st_mtim;
348          }
349        } else if (check) {
350          udl = (void *)llist_pop(&argdata);
351          if (*s == 'u') test = new->st.st_uid == udl->u.uid;
352          else if (*s == 'g') test = new->st.st_gid == udl->u.gid;
353          else {
354            test = new->st.st_mtim.tv_sec > udl->u.tm.tv_sec;
355            if (new->st.st_mtim.tv_sec == udl->u.tm.tv_sec)
356              test = new->st.st_mtim.tv_nsec > udl->u.tm.tv_nsec;
357          }
358        }
359      } else if (!strcmp(s, "exec") || !strcmp("ok", s)
360              || !strcmp(s, "execdir") || !strcmp(s, "okdir"))
361      {
362        struct exec_range *aa;
363
364        print++;
365
366        // Initial argument parsing pass
367        if (!new) {
368          int len;
369
370          // catch "-exec" with no args and "-exec \;"
371          if (!ss[1] || !strcmp(ss[1], ";")) error_exit("'%s' needs 1 arg", s);
372
373          dlist_add_nomalloc(&TT.argdata, (void *)(aa = xzalloc(sizeof(*aa))));
374          aa->argstart = ++ss;
375          aa->curly = -1;
376
377          // Record command line arguments to -exec
378          for (len = 0; ss[len]; len++) {
379            if (!strcmp(ss[len], ";")) break;
380            else if (!strcmp(ss[len], "{}")) {
381              aa->curly = len;
382              if (!strcmp(ss[len+1], "+")) {
383
384                // Measure environment space
385                if (!TT.envsize) {
386                  char **env;
387
388                  for (env = environ; *env; env++)
389                    TT.envsize += sizeof(char *) + strlen(*env) + 1;
390                  TT.envsize += sizeof(char *);
391                }
392                aa->plus++;
393                len++;
394                break;
395              }
396            } else aa->argsize += sizeof(char *) + strlen(ss[len]) + 1;
397          }
398          if (!ss[len]) error_exit("-exec without \\;");
399          ss += len;
400          aa->arglen = len;
401          aa->dir = !!strchr(s, 'd');
402          if (aa->dir && TT.topdir == -1) TT.topdir = xopen(".", 0);
403
404        // collect names and execute commands
405        } else if (check) {
406          char *name, *ss1 = ss[1];
407          struct double_list **ddl;
408
409          // Grab command line exec argument list
410          aa = (void *)llist_pop(&argdata);
411          ss += aa->arglen + 1;
412
413          // name is always a new malloc, so we can always free it.
414          name = aa->dir ? xstrdup(new->name) : dirtree_path(new, 0);
415
416          // Mark entry so COMEAGAIN can call flush_exec() in parent.
417          // This is never a valid pointer valud for prev to have otherwise
418          if (aa->dir) aa->prev = (void *)1;
419
420          if (*s == 'o') {
421            char *prompt = xmprintf("[%s] %s", ss1, name);
422            if(!(test = yesno(prompt, 0))) goto cont;
423          }
424
425          // Add next name to list (global list without -dir, local with)
426          if (aa->dir && new->parent)
427            ddl = (struct double_list **)&new->parent->extra;
428          else ddl = &aa->names;
429
430          // Is this + mode?
431          if (aa->plus) {
432            int size = sizeof(char *)+strlen(name)+1;
433
434            // Linux caps environment space (env vars + args) at 32 4k pages.
435            // todo: is there a way to probe this instead of constant here?
436
437            if (TT.envsize+aa->argsize+aa->namesize+size >= 131072)
438              toys.exitval |= flush_exec(new, aa);
439            aa->namesize += size;
440          }
441          dlist_add(ddl, name);
442          aa->namecount++;
443          if (!aa->plus) test = flush_exec(new, aa);
444        }
445
446        // Argument consumed, skip the check.
447        goto cont;
448      } else goto error;
449
450      // This test can go at the end because we do a syntax checking
451      // pass first. Putting it here gets the error message (-unknown
452      // vs -known noarg) right.
453      if (!*++ss) error_exit("'%s' needs 1 arg", --s);
454    }
455cont:
456    // Apply pending "!" to result
457    if (active && not) test = !test;
458    not = 0;
459  }
460
461  if (new) {
462    // If there was no action, print
463    if (!print && test) do_print(new, '\n');
464  } else dlist_terminate(TT.argdata);
465
466  return recurse;
467
468error:
469  error_exit("bad arg '%s'", *ss);
470}
471
472void find_main(void)
473{
474  int i, len;
475  char **ss = toys.optargs;
476
477  TT.topdir = -1;
478
479  // Distinguish paths from filters
480  for (len = 0; toys.optargs[len]; len++)
481    if (strchr("-!(", *toys.optargs[len])) break;
482  TT.filter = toys.optargs+len;
483
484  // use "." if no paths
485  if (!*ss || **ss == '-') {
486    ss = (char *[]){"."};
487    len = 1;
488  }
489
490  // first pass argument parsing, verify args match up, handle "evaluate once"
491  TT.now = time(0);
492  do_find(0);
493
494  // Loop through paths
495  for (i = 0; i < len; i++) {
496    struct dirtree *new;
497
498    new = dirtree_add_node(0, ss[i], toys.optflags&(FLAG_H|FLAG_L));
499    if (new) dirtree_handle_callback(new, do_find);
500  }
501
502  if (CFG_TOYBOX_FREE) {
503    close(TT.topdir);
504    llist_traverse(TT.argdata, free);
505  }
506}
507