1/* Copyright (c) 2014, Google Inc.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15#include <openssl/base.h>
16
17#include <memory>
18#include <string>
19#include <vector>
20
21#include <errno.h>
22#include <fcntl.h>
23#include <limits.h>
24#include <sys/stat.h>
25#include <sys/types.h>
26
27#if !defined(OPENSSL_WINDOWS)
28#include <string.h>
29#include <unistd.h>
30#if !defined(O_BINARY)
31#define O_BINARY 0
32#endif
33#else
34#pragma warning(push, 3)
35#include <windows.h>
36#pragma warning(pop)
37#include <io.h>
38#define PATH_MAX MAX_PATH
39typedef int ssize_t;
40#endif
41
42#include <openssl/digest.h>
43
44
45struct close_delete {
46  void operator()(int *fd) {
47    close(*fd);
48  }
49};
50
51template<typename T, typename R, R (*func) (T*)>
52struct func_delete {
53  void operator()(T* obj) {
54    func(obj);
55  }
56};
57
58// Source is an awkward expression of a union type in C++: Stdin | File filename.
59struct Source {
60  enum Type {
61    STDIN,
62  };
63
64  Source() : is_stdin_(false) {}
65  Source(Type) : is_stdin_(true) {}
66  Source(const std::string &name) : is_stdin_(false), filename_(name) {}
67
68  bool is_stdin() const { return is_stdin_; }
69  const std::string &filename() const { return filename_; }
70
71 private:
72  bool is_stdin_;
73  std::string filename_;
74};
75
76static const char kStdinName[] = "standard input";
77
78// OpenFile opens the regular file named |filename| and sets |*out_fd| to be a
79// file descriptor to it. Returns true on sucess or prints an error to stderr
80// and returns false on error.
81static bool OpenFile(int *out_fd, const std::string &filename) {
82  *out_fd = -1;
83
84  int fd = open(filename.c_str(), O_RDONLY | O_BINARY);
85  if (fd < 0) {
86    fprintf(stderr, "Failed to open input file '%s': %s\n", filename.c_str(),
87            strerror(errno));
88    return false;
89  }
90  std::unique_ptr<int, close_delete> scoped_fd(&fd);
91
92#if !defined(OPENSSL_WINDOWS)
93  struct stat st;
94  if (fstat(fd, &st)) {
95    fprintf(stderr, "Failed to stat input file '%s': %s\n", filename.c_str(),
96            strerror(errno));
97    return false;
98  }
99
100  if (!S_ISREG(st.st_mode)) {
101    fprintf(stderr, "%s: not a regular file\n", filename.c_str());
102    return false;
103  }
104#endif
105
106  *out_fd = fd;
107  scoped_fd.release();
108  return true;
109}
110
111// SumFile hashes the contents of |source| with |md| and sets |*out_hex| to the
112// hex-encoded result.
113//
114// It returns true on success or prints an error to stderr and returns false on
115// error.
116static bool SumFile(std::string *out_hex, const EVP_MD *md,
117                    const Source &source) {
118  std::unique_ptr<int, close_delete> scoped_fd;
119  int fd;
120
121  if (source.is_stdin()) {
122    fd = 0;
123  } else {
124    if (!OpenFile(&fd, source.filename())) {
125      return false;
126    }
127    scoped_fd.reset(&fd);
128  }
129
130  static const size_t kBufSize = 8192;
131  std::unique_ptr<uint8_t[]> buf(new uint8_t[kBufSize]);
132
133  EVP_MD_CTX ctx;
134  EVP_MD_CTX_init(&ctx);
135  std::unique_ptr<EVP_MD_CTX, func_delete<EVP_MD_CTX, int, EVP_MD_CTX_cleanup>>
136      scoped_ctx(&ctx);
137
138  if (!EVP_DigestInit_ex(&ctx, md, NULL)) {
139    fprintf(stderr, "Failed to initialize EVP_MD_CTX.\n");
140    return false;
141  }
142
143  for (;;) {
144    ssize_t n;
145
146    do {
147      n = read(fd, buf.get(), kBufSize);
148    } while (n == -1 && errno == EINTR);
149
150    if (n == 0) {
151      break;
152    } else if (n < 0) {
153      fprintf(stderr, "Failed to read from %s: %s\n",
154              source.is_stdin() ? kStdinName : source.filename().c_str(),
155              strerror(errno));
156      return false;
157    }
158
159    if (!EVP_DigestUpdate(&ctx, buf.get(), n)) {
160      fprintf(stderr, "Failed to update hash.\n");
161      return false;
162    }
163  }
164
165  uint8_t digest[EVP_MAX_MD_SIZE];
166  unsigned digest_len;
167  if (!EVP_DigestFinal_ex(&ctx, digest, &digest_len)) {
168    fprintf(stderr, "Failed to finish hash.\n");
169    return false;
170  }
171
172  char hex_digest[EVP_MAX_MD_SIZE * 2];
173  static const char kHextable[] = "0123456789abcdef";
174  for (unsigned i = 0; i < digest_len; i++) {
175    const uint8_t b = digest[i];
176    hex_digest[i * 2] = kHextable[b >> 4];
177    hex_digest[i * 2 + 1] = kHextable[b & 0xf];
178  }
179  *out_hex = std::string(hex_digest, digest_len * 2);
180
181  return true;
182}
183
184// PrintFileSum hashes |source| with |md| and prints a line to stdout in the
185// format of the coreutils *sum utilities. It returns true on success or prints
186// an error to stderr and returns false on error.
187static bool PrintFileSum(const EVP_MD *md, const Source &source) {
188  std::string hex_digest;
189  if (!SumFile(&hex_digest, md, source)) {
190    return false;
191  }
192
193  // TODO: When given "--binary" or "-b", we should print " *" instead of "  "
194  // between the digest and the filename.
195  //
196  // MSYS and Cygwin md5sum default to binary mode by default, whereas other
197  // platforms' tools default to text mode by default. We default to text mode
198  // by default and consider text mode equivalent to binary mode (i.e. we
199  // always use Unix semantics, even on Windows), which means that our default
200  // output will differ from the MSYS and Cygwin tools' default output.
201  printf("%s  %s\n", hex_digest.c_str(),
202         source.is_stdin() ? "-" : source.filename().c_str());
203  return true;
204}
205
206// CheckModeArguments contains arguments for the check mode. See the
207// sha256sum(1) man page for details.
208struct CheckModeArguments {
209  bool quiet = false;
210  bool status = false;
211  bool warn = false;
212  bool strict = false;
213};
214
215// Check reads lines from |source| where each line is in the format of the
216// coreutils *sum utilities. It attempts to verify each hash by reading the
217// file named in the line.
218//
219// It returns true if all files were verified and, if |args.strict|, no input
220// lines had formatting errors. Otherwise it prints errors to stderr and
221// returns false.
222static bool Check(const CheckModeArguments &args, const EVP_MD *md,
223                  const Source &source) {
224  std::unique_ptr<FILE, func_delete<FILE, int, fclose>> scoped_file;
225  FILE *file;
226
227  if (source.is_stdin()) {
228    file = stdin;
229  } else {
230    int fd;
231    if (!OpenFile(&fd, source.filename())) {
232      return false;
233    }
234
235    file = fdopen(fd, "rb");
236    if (!file) {
237      perror("fdopen");
238      close(fd);
239      return false;
240    }
241
242    scoped_file = std::unique_ptr<FILE, func_delete<FILE, int, fclose>>(file);
243  }
244
245  const size_t hex_size = EVP_MD_size(md) * 2;
246  char line[EVP_MAX_MD_SIZE * 2 + 2 /* spaces */ + PATH_MAX + 1 /* newline */ +
247            1 /* NUL */];
248  unsigned bad_lines = 0;
249  unsigned parsed_lines = 0;
250  unsigned error_lines = 0;
251  unsigned bad_hash_lines = 0;
252  unsigned line_no = 0;
253  bool ok = true;
254  bool draining_overlong_line = false;
255
256  for (;;) {
257    line_no++;
258
259    if (fgets(line, sizeof(line), file) == nullptr) {
260      if (feof(file)) {
261        break;
262      }
263      fprintf(stderr, "Error reading from input.\n");
264      return false;
265    }
266
267    size_t len = strlen(line);
268
269    if (draining_overlong_line) {
270      if (line[len - 1] == '\n') {
271        draining_overlong_line = false;
272      }
273      continue;
274    }
275
276    const bool overlong = line[len - 1] != '\n' && !feof(file);
277
278    if (len < hex_size + 2 /* spaces */ + 1 /* filename */ ||
279        line[hex_size] != ' ' ||
280        line[hex_size + 1] != ' ' ||
281        overlong) {
282      bad_lines++;
283      if (args.warn) {
284        fprintf(stderr, "%s: %u: improperly formatted line\n",
285                source.is_stdin() ? kStdinName : source.filename().c_str(), line_no);
286      }
287      if (args.strict) {
288        ok = false;
289      }
290      if (overlong) {
291        draining_overlong_line = true;
292      }
293      continue;
294    }
295
296    if (line[len - 1] == '\n') {
297      line[len - 1] = 0;
298      len--;
299    }
300
301    parsed_lines++;
302
303    // coreutils does not attempt to restrict relative or absolute paths in the
304    // input so nor does this code.
305    std::string calculated_hex_digest;
306    const std::string target_filename(&line[hex_size + 2]);
307    Source target_source;
308    if (target_filename == "-") {
309      // coreutils reads from stdin if the filename is "-".
310      target_source = Source(Source::STDIN);
311    } else {
312      target_source = Source(target_filename);
313    }
314
315    if (!SumFile(&calculated_hex_digest, md, target_source)) {
316      error_lines++;
317      ok = false;
318      continue;
319    }
320
321    if (calculated_hex_digest != std::string(line, hex_size)) {
322      bad_hash_lines++;
323      if (!args.status) {
324        printf("%s: FAILED\n", target_filename.c_str());
325      }
326      ok = false;
327      continue;
328    }
329
330    if (!args.quiet) {
331      printf("%s: OK\n", target_filename.c_str());
332    }
333  }
334
335  if (!args.status) {
336    if (bad_lines > 0 && parsed_lines > 0) {
337      fprintf(stderr, "WARNING: %u line%s improperly formatted\n", bad_lines,
338              bad_lines == 1 ? " is" : "s are");
339    }
340    if (error_lines > 0) {
341      fprintf(stderr, "WARNING: %u computed checksum(s) did NOT match\n",
342              error_lines);
343    }
344  }
345
346  if (parsed_lines == 0) {
347    fprintf(stderr, "%s: no properly formatted checksum lines found.\n",
348            source.is_stdin() ? kStdinName : source.filename().c_str());
349    ok = false;
350  }
351
352  return ok;
353}
354
355// DigestSum acts like the coreutils *sum utilites, with the given hash
356// function.
357static bool DigestSum(const EVP_MD *md,
358                      const std::vector<std::string> &args) {
359  bool check_mode = false;
360  CheckModeArguments check_args;
361  bool check_mode_args_given = false;
362  std::vector<Source> sources;
363
364  auto it = args.begin();
365  while (it != args.end()) {
366    const std::string &arg = *it;
367    if (!arg.empty() && arg[0] != '-') {
368      break;
369    }
370
371    it++;
372
373    if (arg == "--") {
374      break;
375    }
376
377    if (arg == "-") {
378      // "-" ends the argument list and indicates that stdin should be used.
379      sources.push_back(Source(Source::STDIN));
380      break;
381    }
382
383    if (arg.size() >= 2 && arg[0] == '-' && arg[1] != '-') {
384      for (size_t i = 1; i < arg.size(); i++) {
385        switch (arg[i]) {
386          case 'b':
387          case 't':
388            // Binary/text mode – irrelevent, even on Windows.
389            break;
390          case 'c':
391            check_mode = true;
392            break;
393          case 'w':
394            check_mode_args_given = true;
395            check_args.warn = true;
396            break;
397          default:
398            fprintf(stderr, "Unknown option '%c'.\n", arg[i]);
399            return false;
400        }
401      }
402    } else if (arg == "--binary" || arg == "--text") {
403      // Binary/text mode – irrelevent, even on Windows.
404    } else if (arg == "--check") {
405      check_mode = true;
406    } else if (arg == "--quiet") {
407      check_mode_args_given = true;
408      check_args.quiet = true;
409    } else if (arg == "--status") {
410      check_mode_args_given = true;
411      check_args.status = true;
412    } else if (arg == "--warn") {
413      check_mode_args_given = true;
414      check_args.warn = true;
415    } else if (arg == "--strict") {
416      check_mode_args_given = true;
417      check_args.strict = true;
418    } else {
419      fprintf(stderr, "Unknown option '%s'.\n", arg.c_str());
420      return false;
421    }
422  }
423
424  if (check_mode_args_given && !check_mode) {
425    fprintf(
426        stderr,
427        "Check mode arguments are only meaningful when verifying checksums.\n");
428    return false;
429  }
430
431  for (; it != args.end(); it++) {
432    sources.push_back(Source(*it));
433  }
434
435  if (sources.empty()) {
436    sources.push_back(Source(Source::STDIN));
437  }
438
439  bool ok = true;
440
441  if (check_mode) {
442    for (auto &source : sources) {
443      ok &= Check(check_args, md, source);
444    }
445  } else {
446    for (auto &source : sources) {
447      ok &= PrintFileSum(md, source);
448    }
449  }
450
451  return ok;
452}
453
454bool MD5Sum(const std::vector<std::string> &args) {
455  return DigestSum(EVP_md5(), args);
456}
457
458bool SHA1Sum(const std::vector<std::string> &args) {
459  return DigestSum(EVP_sha1(), args);
460}
461
462bool SHA224Sum(const std::vector<std::string> &args) {
463  return DigestSum(EVP_sha224(), args);
464}
465
466bool SHA256Sum(const std::vector<std::string> &args) {
467  return DigestSum(EVP_sha256(), args);
468}
469
470bool SHA384Sum(const std::vector<std::string> &args) {
471  return DigestSum(EVP_sha384(), args);
472}
473
474bool SHA512Sum(const std::vector<std::string> &args) {
475  return DigestSum(EVP_sha512(), args);
476}
477