1#include <cstdlib> 2#include <iostream> 3#include <limits> 4#include <string> 5#include <vector> 6 7#include <marisa.h> 8 9#include "./cmdopt.h" 10 11namespace { 12 13enum FindMode { 14 FIND_ALL, 15 FIND_FIRST, 16 FIND_LAST 17}; 18 19std::size_t max_num_results = 10; 20FindMode find_mode = FIND_ALL; 21bool mmap_flag = true; 22 23void print_help(const char *cmd) { 24 std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 25 "Options:\n" 26 " -n, --max-num-results=[N] limits the number of results to N" 27 " (default: 10)\n" 28 " 0: no limit\n" 29 " -a, --find-all find all prefix keys (default)\n" 30 " -f, --find-first find a shortest prefix key\n" 31 " -l, --find-last find a longest prefix key\n" 32 " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 33 " (default)\n" 34 " -r, --read-dictionary read an entire dictionary into memory\n" 35 " -h, --help print this help\n" 36 << std::endl; 37} 38 39void find_all(const marisa::Trie &trie, const std::string &str) { 40 static std::vector<marisa::UInt32> key_ids; 41 static std::vector<std::size_t> lengths; 42 const std::size_t num_keys = trie.find(str, &key_ids, &lengths); 43 if (num_keys != 0) { 44 std::cout << num_keys << " found" << std::endl; 45 for (std::size_t i = 0; (i < num_keys) && (i < max_num_results); ++i) { 46 std::cout << key_ids[i] << '\t'; 47 std::cout.write(str.c_str(), lengths[i]) << '\t' << str << '\n'; 48 } 49 } else { 50 std::cout << "not found" << std::endl; 51 } 52 key_ids.clear(); 53 lengths.clear(); 54} 55 56void find_first(const marisa::Trie &trie, const std::string &str) { 57 std::size_t length = 0; 58 const marisa::UInt32 key_id = trie.find_first(str, &length); 59 if (key_id != trie.notfound()) { 60 std::cout << key_id << '\t'; 61 std::cout.write(str.c_str(), length) << '\t' << str << '\n'; 62 } else { 63 std::cout << "-1\t" << str << '\n'; 64 } 65} 66 67void find_last(const marisa::Trie &trie, const std::string &str) { 68 std::size_t length = 0; 69 const marisa::UInt32 key_id = trie.find_last(str, &length); 70 if (key_id != trie.notfound()) { 71 std::cout << key_id << '\t'; 72 std::cout.write(str.c_str(), length) << '\t' << str << '\n'; 73 } else { 74 std::cout << "-1\t" << str << '\n'; 75 } 76} 77 78int find(const char * const *args, std::size_t num_args) { 79 if (num_args == 0) { 80 std::cerr << "error: a dictionary is not specified" << std::endl; 81 return 10; 82 } else if (num_args > 1) { 83 std::cerr << "error: more than one dictionaries are specified" 84 << std::endl; 85 return 11; 86 } 87 88 marisa::Trie trie; 89 marisa::Mapper mapper; 90 if (mmap_flag) { 91 try { 92 trie.mmap(&mapper, args[0]); 93 } catch (const marisa::Exception &ex) { 94 std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what() 95 << ": failed to mmap a dictionary file: " << args[0] << std::endl; 96 return 20; 97 } 98 } else { 99 try { 100 trie.load(args[0]); 101 } catch (const marisa::Exception &ex) { 102 std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what() 103 << ": failed to load a dictionary file: " << args[0] << std::endl; 104 return 21; 105 } 106 } 107 108 std::string str; 109 while (std::getline(std::cin, str)) { 110 try { 111 switch (find_mode) { 112 case FIND_ALL: { 113 find_all(trie, str); 114 break; 115 } 116 case FIND_FIRST: { 117 find_first(trie, str); 118 break; 119 } 120 case FIND_LAST: { 121 find_last(trie, str); 122 break; 123 } 124 } 125 } catch (const marisa::Exception &ex) { 126 std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what() 127 << ": failed to find keys in: " << str << std::endl; 128 return 30; 129 } 130 if (!std::cout) { 131 std::cerr << "error: failed to write results to standard output" 132 << std::endl; 133 return 31; 134 } 135 } 136 137 return 0; 138} 139 140} // namespace 141 142int main(int argc, char *argv[]) { 143 std::ios::sync_with_stdio(false); 144 145 ::cmdopt_option long_options[] = { 146 { "max-num-results", 1, NULL, 'n' }, 147 { "find-all", 0, NULL, 'a' }, 148 { "find-first", 0, NULL, 'f' }, 149 { "find-last", 0, NULL, 'l' }, 150 { "mmap-dictionary", 0, NULL, 'm' }, 151 { "read-dictionary", 0, NULL, 'r' }, 152 { "help", 0, NULL, 'h' }, 153 { NULL, 0, NULL, 0 } 154 }; 155 ::cmdopt_t cmdopt; 156 ::cmdopt_init(&cmdopt, argc, argv, "n:aflmrh", long_options); 157 int label; 158 while ((label = ::cmdopt_get(&cmdopt)) != -1) { 159 switch (label) { 160 case 'n': { 161 char *end_of_value; 162 const long value = std::strtol(cmdopt.optarg, &end_of_value, 10); 163 if ((*end_of_value != '\0') || (value < 0)) { 164 std::cerr << "error: option `-n' with an invalid argument: " 165 << cmdopt.optarg << std::endl; 166 } 167 if ((value == 0) || ((unsigned long)value > MARISA_MAX_NUM_KEYS)) { 168 max_num_results = MARISA_MAX_NUM_KEYS; 169 } else { 170 max_num_results = (std::size_t)(value); 171 } 172 break; 173 } 174 case 'a': { 175 find_mode = FIND_ALL; 176 break; 177 } 178 case 'f': { 179 find_mode = FIND_FIRST; 180 break; 181 } 182 case 'l': { 183 find_mode = FIND_LAST; 184 break; 185 } 186 case 'm': { 187 mmap_flag = true; 188 break; 189 } 190 case 'r': { 191 mmap_flag = false; 192 break; 193 } 194 case 'h': { 195 print_help(argv[0]); 196 return 0; 197 } 198 default: { 199 return 1; 200 } 201 } 202 } 203 return find(cmdopt.argv + cmdopt.optind, cmdopt.argc - cmdopt.optind); 204} 205