1/*
2 * mksary.c for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#if HAVE_CONFIG_H
28# include "config.h"
29#endif
30#include <stdio.h>
31#if HAVE_STRING_H
32# include <string.h>
33#endif
34#if HAVE_STDLIB_H
35# include <stdlib.h>
36#endif
37#if HAVE_MEMORY_H
38# include <memory.h>
39#endif
40#if HAVE_STDDEF_H
41# include <stddef.h>
42#endif
43#if HAVE_STRINGS_H
44# include <strings.h>
45#endif
46#if HAVE_SYS_TYPES_H
47# include <sys/types.h>
48#endif
49#if HAVE_IO_H && HAVE_FCNTL_H
50# include <io.h>
51# include <fcntl.h>
52#endif
53#include <time.h>
54#include <divsufsort.h>
55#include "lfs.h"
56
57
58static
59void
60print_help(const char *progname, int status) {
61  fprintf(stderr,
62          "mksary, a simple suffix array builder, version %s.\n",
63          divsufsort_version());
64  fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
65  exit(status);
66}
67
68int
69main(int argc, const char *argv[]) {
70  FILE *fp, *ofp;
71  const char *fname, *ofname;
72  sauchar_t *T;
73  saidx_t *SA;
74  LFS_OFF_T n;
75  clock_t start, finish;
76  saint_t needclose = 3;
77
78  /* Check arguments. */
79  if((argc == 1) ||
80     (strcmp(argv[1], "-h") == 0) ||
81     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
82  if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
83
84  /* Open a file for reading. */
85  if(strcmp(argv[1], "-") != 0) {
86#if HAVE_FOPEN_S
87    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
88#else
89    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
90#endif
91      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
92      perror(NULL);
93      exit(EXIT_FAILURE);
94    }
95  } else {
96#if HAVE__SETMODE && HAVE__FILENO
97    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
98      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
99      perror(NULL);
100      exit(EXIT_FAILURE);
101    }
102#endif
103    fp = stdin;
104    fname = "stdin";
105    needclose ^= 1;
106  }
107
108  /* Open a file for writing. */
109  if(strcmp(argv[2], "-") != 0) {
110#if HAVE_FOPEN_S
111    if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
112#else
113    if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
114#endif
115      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
116      perror(NULL);
117      exit(EXIT_FAILURE);
118    }
119  } else {
120#if HAVE__SETMODE && HAVE__FILENO
121    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
122      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
123      perror(NULL);
124      exit(EXIT_FAILURE);
125    }
126#endif
127    ofp = stdout;
128    ofname = "stdout";
129    needclose ^= 2;
130  }
131
132  /* Get the file size. */
133  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
134    n = LFS_FTELL(fp);
135    rewind(fp);
136    if(n < 0) {
137      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
138      perror(NULL);
139      exit(EXIT_FAILURE);
140    }
141    if(0x7fffffff <= n) {
142      fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
143      exit(EXIT_FAILURE);
144    }
145  } else {
146    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
147    perror(NULL);
148    exit(EXIT_FAILURE);
149  }
150
151  /* Allocate 5blocksize bytes of memory. */
152  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
153  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
154  if((T == NULL) || (SA == NULL)) {
155    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
156    exit(EXIT_FAILURE);
157  }
158
159  /* Read n bytes of data. */
160  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
161    fprintf(stderr, "%s: %s `%s': ",
162      argv[0],
163      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
164      fname);
165    perror(NULL);
166    exit(EXIT_FAILURE);
167  }
168  if(needclose & 1) { fclose(fp); }
169
170  /* Construct the suffix array. */
171  fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
172  start = clock();
173  if(divsufsort(T, SA, (saidx_t)n) != 0) {
174    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
175    exit(EXIT_FAILURE);
176  }
177  finish = clock();
178  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
179
180  /* Write the suffix array. */
181  if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) {
182    fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
183    perror(NULL);
184    exit(EXIT_FAILURE);
185  }
186  if(needclose & 2) { fclose(ofp); }
187
188  /* Deallocate memory. */
189  free(SA);
190  free(T);
191
192  return 0;
193}
194