1/* 2 * bwt.c for libdivsufsort 3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person 6 * obtaining a copy of this software and associated documentation 7 * files (the "Software"), to deal in the Software without 8 * restriction, including without limitation the rights to use, 9 * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following 12 * conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27#if HAVE_CONFIG_H 28# include "config.h" 29#endif 30#include <stdio.h> 31#if HAVE_STRING_H 32# include <string.h> 33#endif 34#if HAVE_STDLIB_H 35# include <stdlib.h> 36#endif 37#if HAVE_MEMORY_H 38# include <memory.h> 39#endif 40#if HAVE_STDDEF_H 41# include <stddef.h> 42#endif 43#if HAVE_STRINGS_H 44# include <strings.h> 45#endif 46#if HAVE_SYS_TYPES_H 47# include <sys/types.h> 48#endif 49#if HAVE_IO_H && HAVE_FCNTL_H 50# include <io.h> 51# include <fcntl.h> 52#endif 53#include <time.h> 54#include <divsufsort.h> 55#include "lfs.h" 56 57 58static 59size_t 60write_int(FILE *fp, saidx_t n) { 61 unsigned char c[4]; 62 c[0] = (unsigned char)((n >> 0) & 0xff), c[1] = (unsigned char)((n >> 8) & 0xff), 63 c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff); 64 return fwrite(c, sizeof(unsigned char), 4, fp); 65} 66 67static 68void 69print_help(const char *progname, int status) { 70 fprintf(stderr, 71 "bwt, a burrows-wheeler transform program, version %s.\n", 72 divsufsort_version()); 73 fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname); 74 fprintf(stderr, " -b num set block size to num MiB [1..512] (default: 32)\n\n"); 75 exit(status); 76} 77 78int 79main(int argc, const char *argv[]) { 80 FILE *fp, *ofp; 81 const char *fname, *ofname; 82 sauchar_t *T; 83 saidx_t *SA; 84 LFS_OFF_T n; 85 size_t m; 86 saidx_t pidx; 87 clock_t start,finish; 88 saint_t i, blocksize = 32, needclose = 3; 89 90 /* Check arguments. */ 91 if((argc == 1) || 92 (strcmp(argv[1], "-h") == 0) || 93 (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } 94 if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); } 95 i = 1; 96 if(argc == 5) { 97 if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); } 98 blocksize = atoi(argv[i + 1]); 99 if(blocksize < 0) { blocksize = 1; } 100 else if(512 < blocksize) { blocksize = 512; } 101 i += 2; 102 } 103 blocksize <<= 20; 104 105 /* Open a file for reading. */ 106 if(strcmp(argv[i], "-") != 0) { 107#if HAVE_FOPEN_S 108 if(fopen_s(&fp, fname = argv[i], "rb") != 0) { 109#else 110 if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) { 111#endif 112 fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); 113 perror(NULL); 114 exit(EXIT_FAILURE); 115 } 116 } else { 117#if HAVE__SETMODE && HAVE__FILENO 118 if(_setmode(_fileno(stdin), _O_BINARY) == -1) { 119 fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 120 perror(NULL); 121 exit(EXIT_FAILURE); 122 } 123#endif 124 fp = stdin; 125 fname = "stdin"; 126 needclose ^= 1; 127 } 128 i += 1; 129 130 /* Open a file for writing. */ 131 if(strcmp(argv[i], "-") != 0) { 132#if HAVE_FOPEN_S 133 if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) { 134#else 135 if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) { 136#endif 137 fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); 138 perror(NULL); 139 exit(EXIT_FAILURE); 140 } 141 } else { 142#if HAVE__SETMODE && HAVE__FILENO 143 if(_setmode(_fileno(stdout), _O_BINARY) == -1) { 144 fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 145 perror(NULL); 146 exit(EXIT_FAILURE); 147 } 148#endif 149 ofp = stdout; 150 ofname = "stdout"; 151 needclose ^= 2; 152 } 153 154 /* Get the file size. */ 155 if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { 156 n = LFS_FTELL(fp); 157 rewind(fp); 158 if(n < 0) { 159 fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); 160 perror(NULL); 161 exit(EXIT_FAILURE); 162 } 163 if(0x20000000L < n) { n = 0x20000000L; } 164 if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; } 165 } else if(blocksize == 0) { blocksize = 32 << 20; } 166 167 /* Allocate 5blocksize bytes of memory. */ 168 T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t)); 169 SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t)); 170 if((T == NULL) || (SA == NULL)) { 171 fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 172 exit(EXIT_FAILURE); 173 } 174 175 /* Write the blocksize. */ 176 if(write_int(ofp, blocksize) != 4) { 177 fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); 178 perror(NULL); 179 exit(EXIT_FAILURE); 180 } 181 182 fprintf(stderr, " BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize); 183 start = clock(); 184 for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) { 185 /* Burrows-Wheeler Transform. */ 186 pidx = divbwt(T, T, SA, m); 187 if(pidx < 0) { 188 fprintf(stderr, "%s (bw_transform): %s.\n", 189 argv[0], 190 (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory"); 191 exit(EXIT_FAILURE); 192 } 193 194 /* Write the bwted data. */ 195 if((write_int(ofp, pidx) != 4) || 196 (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) { 197 fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); 198 perror(NULL); 199 exit(EXIT_FAILURE); 200 } 201 } 202 if(ferror(fp)) { 203 fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); 204 perror(NULL); 205 exit(EXIT_FAILURE); 206 } 207 finish = clock(); 208 fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n", 209 n, (double)(finish - start) / (double)CLOCKS_PER_SEC); 210 211 /* Close files */ 212 if(needclose & 1) { fclose(fp); } 213 if(needclose & 2) { fclose(ofp); } 214 215 /* Deallocate memory. */ 216 free(SA); 217 free(T); 218 219 return 0; 220} 221