17aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley/* wc.c - Word count
2f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley *
3fe1ca3dfbf423a3aa3041f0c3d44e4dd195b3842Rob Landley * Copyright 2011 Rob Landley <rob@landley.net>
4f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley *
5f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley * See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html
6f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
720f67f157c2284057328e6391d10e329b088f8d2Rob LandleyUSE_WC(NEWTOY(wc, "mcwl", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
8f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
9f8a333e3c002e05966fb3ec13029f528df7a892dRob Landleyconfig WC
107aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  bool "wc"
117aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  default y
127aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  help
137aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    usage: wc -lwcm [FILE...]
14f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
157aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    Count lines, words, and characters in input.
16f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
177aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    -l	show lines
187aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    -w	show words
197aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    -c	show bytes
207aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    -m	show characters
21f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
227aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    By default outputs lines, words, bytes, and filename for each
237aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    argument (or from stdin if none). Displays only either bytes
247aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    or characters.
25f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley*/
26f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
27c0e56edaf256adb6c60c5a052525a1ffbb927901Rob Landley#define FOR_wc
28f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley#include "toys.h"
29f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
30c0e56edaf256adb6c60c5a052525a1ffbb927901Rob LandleyGLOBALS(
3120f67f157c2284057328e6391d10e329b088f8d2Rob Landley  unsigned long totals[4];
327c8103e5d2e472a2f11f6d6c6660c2903167fb7bRob Landley)
337c8103e5d2e472a2f11f6d6c6660c2903167fb7bRob Landley
347c8103e5d2e472a2f11f6d6c6660c2903167fb7bRob Landleystatic void show_lengths(unsigned long *lengths, char *name)
357c8103e5d2e472a2f11f6d6c6660c2903167fb7bRob Landley{
3620f67f157c2284057328e6391d10e329b088f8d2Rob Landley  int i, space = 7, first = 1;
3720f67f157c2284057328e6391d10e329b088f8d2Rob Landley
3820f67f157c2284057328e6391d10e329b088f8d2Rob Landley  for (i = 0; i<4; i++) if (toys.optflags == (1<<i)) space = 0;
3920f67f157c2284057328e6391d10e329b088f8d2Rob Landley  for (i = 0; i<4; i++) {
4020f67f157c2284057328e6391d10e329b088f8d2Rob Landley    if (toys.optflags&(1<<i)) {
4120f67f157c2284057328e6391d10e329b088f8d2Rob Landley      printf(" %*ld"+first, space, lengths[i]);
4220f67f157c2284057328e6391d10e329b088f8d2Rob Landley      first = 0;
437aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    }
447aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    TT.totals[i] += lengths[i];
457aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  }
4620f67f157c2284057328e6391d10e329b088f8d2Rob Landley  if (*toys.optargs) printf(" %s", name);
477aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  xputc('\n');
487c8103e5d2e472a2f11f6d6c6660c2903167fb7bRob Landley}
497c8103e5d2e472a2f11f6d6c6660c2903167fb7bRob Landley
50f8a333e3c002e05966fb3ec13029f528df7a892dRob Landleystatic void do_wc(int fd, char *name)
51f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley{
5220f67f157c2284057328e6391d10e329b088f8d2Rob Landley  int len = 0, clen = 1, space = 0;
5320f67f157c2284057328e6391d10e329b088f8d2Rob Landley  unsigned long word = 0, lengths[] = {0,0,0,0};
54f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
5520f67f157c2284057328e6391d10e329b088f8d2Rob Landley  // Speed up common case: wc -c normalfile is file length.
563684510034450f5f50d1ad9b5acca327a5c484ddizabera  if (toys.optflags == FLAG_c) {
573684510034450f5f50d1ad9b5acca327a5c484ddizabera    struct stat st;
583684510034450f5f50d1ad9b5acca327a5c484ddizabera
5909d95477765d3941aacb61c97f76ee94301b8faaElliott Hughes    // On Linux, files in /proc often report their size as 0.
6020f67f157c2284057328e6391d10e329b088f8d2Rob Landley    if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size) {
613684510034450f5f50d1ad9b5acca327a5c484ddizabera      lengths[2] = st.st_size;
623684510034450f5f50d1ad9b5acca327a5c484ddizabera      goto show;
633684510034450f5f50d1ad9b5acca327a5c484ddizabera    }
643684510034450f5f50d1ad9b5acca327a5c484ddizabera  }
653684510034450f5f50d1ad9b5acca327a5c484ddizabera
667aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  for (;;) {
6720f67f157c2284057328e6391d10e329b088f8d2Rob Landley    int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len);
6820f67f157c2284057328e6391d10e329b088f8d2Rob Landley
6920f67f157c2284057328e6391d10e329b088f8d2Rob Landley    if (len2<0) perror_msg_raw(name);
7020f67f157c2284057328e6391d10e329b088f8d2Rob Landley    else len += len2;
7120f67f157c2284057328e6391d10e329b088f8d2Rob Landley    if (len2<1) done++;
7220f67f157c2284057328e6391d10e329b088f8d2Rob Landley
7320f67f157c2284057328e6391d10e329b088f8d2Rob Landley    for (pos = 0; pos<len; pos++) {
7420f67f157c2284057328e6391d10e329b088f8d2Rob Landley      if (toybuf[pos]=='\n') lengths[0]++;
7520f67f157c2284057328e6391d10e329b088f8d2Rob Landley      lengths[2]++;
7620f67f157c2284057328e6391d10e329b088f8d2Rob Landley      if (toys.optflags&FLAG_m) {
7720f67f157c2284057328e6391d10e329b088f8d2Rob Landley        // If we've consumed next wide char
7820f67f157c2284057328e6391d10e329b088f8d2Rob Landley        if (--clen<1) {
7920f67f157c2284057328e6391d10e329b088f8d2Rob Landley          wchar_t wchar;
8020f67f157c2284057328e6391d10e329b088f8d2Rob Landley
8120f67f157c2284057328e6391d10e329b088f8d2Rob Landley          // next wide size, don't count invalid, fetch more data if necessary
8267ddade3373d0fefeff25b48430e5f08c3a7711bRob Landley          clen = utf8towc(&wchar, toybuf+pos, len-pos);
8320f67f157c2284057328e6391d10e329b088f8d2Rob Landley          if (clen == -1) continue;
8420f67f157c2284057328e6391d10e329b088f8d2Rob Landley          if (clen == -2 && !done) break;
8520f67f157c2284057328e6391d10e329b088f8d2Rob Landley
8620f67f157c2284057328e6391d10e329b088f8d2Rob Landley          lengths[3]++;
8720f67f157c2284057328e6391d10e329b088f8d2Rob Landley          space = iswspace(wchar);
887aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley        }
8920f67f157c2284057328e6391d10e329b088f8d2Rob Landley      } else space = isspace(toybuf[pos]);
90abb8ca2455f3efd6f8f0eed78c54829bf0a9001eFelix Janda
917aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley      if (space) word=0;
927aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley      else {
937aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley        if (!word) lengths[1]++;
947aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley        word=1;
957aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley      }
967aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    }
9720f67f157c2284057328e6391d10e329b088f8d2Rob Landley    if (done) break;
9820f67f157c2284057328e6391d10e329b088f8d2Rob Landley    if (pos != len) memmove(toybuf, toybuf+pos, len-pos);
9920f67f157c2284057328e6391d10e329b088f8d2Rob Landley    len -= pos;
1007aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  }
1017c8103e5d2e472a2f11f6d6c6660c2903167fb7bRob Landley
1023684510034450f5f50d1ad9b5acca327a5c484ddizaberashow:
1037aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  show_lengths(lengths, name);
104f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley}
105f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley
106f8a333e3c002e05966fb3ec13029f528df7a892dRob Landleyvoid wc_main(void)
107f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley{
10820f67f157c2284057328e6391d10e329b088f8d2Rob Landley  if (!toys.optflags) toys.optflags = FLAG_l|FLAG_w|FLAG_c;
1097aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  loopfiles(toys.optargs, do_wc);
1107aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  if (toys.optc>1) show_lengths(TT.totals, "total");
111f8a333e3c002e05966fb3ec13029f528df7a892dRob Landley}
112