1f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath#include <cstdlib> 2f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath#include <iostream> 3f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath#include <limits> 4f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath#include <string> 5f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath#include <vector> 6f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 7f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath#include <marisa.h> 8f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 9f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath#include "./cmdopt.h" 10f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 11f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathnamespace { 12f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 13f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathenum FindMode { 14f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath FIND_ALL, 15f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath FIND_FIRST, 16f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath FIND_LAST 17f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath}; 18f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 19f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathstd::size_t max_num_results = 10; 20f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan KamathFindMode find_mode = FIND_ALL; 21f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathbool mmap_flag = true; 22f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 23f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathvoid print_help(const char *cmd) { 24f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 25f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath "Options:\n" 26f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " -n, --max-num-results=[N] limits the number of results to N" 27f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " (default: 10)\n" 28f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " 0: no limit\n" 29f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " -a, --find-all find all prefix keys (default)\n" 30f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " -f, --find-first find a shortest prefix key\n" 31f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " -l, --find-last find a longest prefix key\n" 32f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 33f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " (default)\n" 34f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " -r, --read-dictionary read an entire dictionary into memory\n" 35f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath " -h, --help print this help\n" 36f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath << std::endl; 37f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath} 38f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 39f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathvoid find_all(const marisa::Trie &trie, const std::string &str) { 40f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath static std::vector<marisa::UInt32> key_ids; 41f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath static std::vector<std::size_t> lengths; 42f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath const std::size_t num_keys = trie.find(str, &key_ids, &lengths); 43f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if (num_keys != 0) { 44f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout << num_keys << " found" << std::endl; 45f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath for (std::size_t i = 0; (i < num_keys) && (i < max_num_results); ++i) { 46f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout << key_ids[i] << '\t'; 47f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout.write(str.c_str(), lengths[i]) << '\t' << str << '\n'; 48f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 49f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } else { 50f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout << "not found" << std::endl; 51f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 52f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath key_ids.clear(); 53f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath lengths.clear(); 54f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath} 55f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 56f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathvoid find_first(const marisa::Trie &trie, const std::string &str) { 57f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::size_t length = 0; 58f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath const marisa::UInt32 key_id = trie.find_first(str, &length); 59f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if (key_id != trie.notfound()) { 60f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout << key_id << '\t'; 61f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout.write(str.c_str(), length) << '\t' << str << '\n'; 62f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } else { 63f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout << "-1\t" << str << '\n'; 64f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 65f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath} 66f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 67f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathvoid find_last(const marisa::Trie &trie, const std::string &str) { 68f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::size_t length = 0; 69f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath const marisa::UInt32 key_id = trie.find_last(str, &length); 70f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if (key_id != trie.notfound()) { 71f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout << key_id << '\t'; 72f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout.write(str.c_str(), length) << '\t' << str << '\n'; 73f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } else { 74f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cout << "-1\t" << str << '\n'; 75f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 76f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath} 77f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 78f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathint find(const char * const *args, std::size_t num_args) { 79f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if (num_args == 0) { 80f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << "error: a dictionary is not specified" << std::endl; 81f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 10; 82f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } else if (num_args > 1) { 83f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << "error: more than one dictionaries are specified" 84f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath << std::endl; 85f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 11; 86f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 87f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 88f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath marisa::Trie trie; 89f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath marisa::Mapper mapper; 90f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if (mmap_flag) { 91f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath try { 92f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath trie.mmap(&mapper, args[0]); 93f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } catch (const marisa::Exception &ex) { 94f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what() 95f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath << ": failed to mmap a dictionary file: " << args[0] << std::endl; 96f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 20; 97f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 98f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } else { 99f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath try { 100f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath trie.load(args[0]); 101f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } catch (const marisa::Exception &ex) { 102f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what() 103f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath << ": failed to load a dictionary file: " << args[0] << std::endl; 104f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 21; 105f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 106f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 107f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 108f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::string str; 109f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath while (std::getline(std::cin, str)) { 110f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath try { 111f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath switch (find_mode) { 112f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case FIND_ALL: { 113f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath find_all(trie, str); 114f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 115f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 116f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case FIND_FIRST: { 117f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath find_first(trie, str); 118f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 119f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 120f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case FIND_LAST: { 121f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath find_last(trie, str); 122f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 123f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 124f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 125f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } catch (const marisa::Exception &ex) { 126f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what() 127f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath << ": failed to find keys in: " << str << std::endl; 128f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 30; 129f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 130f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if (!std::cout) { 131f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << "error: failed to write results to standard output" 132f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath << std::endl; 133f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 31; 134f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 135f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 136f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 137f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 0; 138f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath} 139f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 140f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath} // namespace 141f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 142f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamathint main(int argc, char *argv[]) { 143f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::ios::sync_with_stdio(false); 144f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath 145f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath ::cmdopt_option long_options[] = { 146f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { "max-num-results", 1, NULL, 'n' }, 147f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { "find-all", 0, NULL, 'a' }, 148f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { "find-first", 0, NULL, 'f' }, 149f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { "find-last", 0, NULL, 'l' }, 150f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { "mmap-dictionary", 0, NULL, 'm' }, 151f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { "read-dictionary", 0, NULL, 'r' }, 152f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { "help", 0, NULL, 'h' }, 153f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath { NULL, 0, NULL, 0 } 154f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath }; 155f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath ::cmdopt_t cmdopt; 156f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath ::cmdopt_init(&cmdopt, argc, argv, "n:aflmrh", long_options); 157f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath int label; 158f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath while ((label = ::cmdopt_get(&cmdopt)) != -1) { 159f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath switch (label) { 160f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case 'n': { 161f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath char *end_of_value; 162f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath const long value = std::strtol(cmdopt.optarg, &end_of_value, 10); 163f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if ((*end_of_value != '\0') || (value < 0)) { 164f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath std::cerr << "error: option `-n' with an invalid argument: " 165f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath << cmdopt.optarg << std::endl; 166f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 167f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath if ((value == 0) || ((unsigned long)value > MARISA_MAX_NUM_KEYS)) { 168f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath max_num_results = MARISA_MAX_NUM_KEYS; 169f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } else { 170f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath max_num_results = (std::size_t)(value); 171f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 172f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 173f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 174f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case 'a': { 175f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath find_mode = FIND_ALL; 176f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 177f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 178f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case 'f': { 179f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath find_mode = FIND_FIRST; 180f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 181f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 182f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case 'l': { 183f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath find_mode = FIND_LAST; 184f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 185f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 186f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case 'm': { 187f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath mmap_flag = true; 188f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 189f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 190f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case 'r': { 191f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath mmap_flag = false; 192f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath break; 193f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 194f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath case 'h': { 195f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath print_help(argv[0]); 196f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 0; 197f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 198f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath default: { 199f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return 1; 200f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 201f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 202f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath } 203f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath return find(cmdopt.argv + cmdopt.optind, cmdopt.argc - cmdopt.optind); 204f163f6985a63328d07e3de249ad3daf4a0c67d8aNarayan Kamath} 205