1#ifndef MARISA_BASE_H_ 2#define MARISA_BASE_H_ 3 4// Visual C++ does not provide stdint.h. 5#ifndef _MSC_VER 6#include <stdint.h> 7#endif // _MSC_VER 8 9#ifdef __cplusplus 10#include <cstddef> 11#include <new> 12#else // __cplusplus 13#include <stddef.h> 14#endif // __cplusplus 15 16#if defined(__ANDROID__) 17#include <android/log.h> 18#include <stdio.h> 19#endif // __ANDROID__ 20 21#ifdef __cplusplus 22extern "C" { 23#endif // __cplusplus 24 25#ifdef _MSC_VER 26typedef unsigned __int8 marisa_uint8; 27typedef unsigned __int16 marisa_uint16; 28typedef unsigned __int32 marisa_uint32; 29typedef unsigned __int64 marisa_uint64; 30#else // _MSC_VER 31typedef uint8_t marisa_uint8; 32typedef uint16_t marisa_uint16; 33typedef uint32_t marisa_uint32; 34typedef uint64_t marisa_uint64; 35#endif // _MSC_VER 36 37#define MARISA_UINT8_MAX ((marisa_uint8)-1) 38#define MARISA_UINT16_MAX ((marisa_uint16)-1) 39#define MARISA_UINT32_MAX ((marisa_uint32)-1) 40#define MARISA_UINT64_MAX ((marisa_uint64)-1) 41#define MARISA_SIZE_MAX ((size_t)-1) 42 43#define MARISA_ZERO_TERMINATED MARISA_UINT32_MAX 44#define MARISA_NOT_FOUND MARISA_UINT32_MAX 45#define MARISA_MISMATCH MARISA_UINT32_MAX 46 47#define MARISA_MAX_LENGTH (MARISA_UINT32_MAX - 1) 48#define MARISA_MAX_NUM_KEYS (MARISA_UINT32_MAX - 1) 49 50// marisa_status provides a list of error codes. Most of functions in 51// libmarisa throw or return an error code. 52typedef enum marisa_status_ { 53 // MARISA_OK means that a requested operation has succeeded. 54 MARISA_OK = 0, 55 56 // MARISA_HANDLE_ERROR means that a given handle is invalid. 57 MARISA_HANDLE_ERROR = 1, 58 59 // MARISA_STATE_ERROR means that an object is not ready for a requested 60 // operation. For example, an operation to modify a fixed container throws 61 // an exception with this error code. 62 MARISA_STATE_ERROR = 2, 63 64 // MARISA_PARAM_ERROR means that a given argument is invalid. For example, 65 // some functions throw an exception with this error code when an 66 // out-of-range value or a NULL pointer is given. 67 MARISA_PARAM_ERROR = 3, 68 69 // MARISA_SIZE_ERROR means that a size exceeds its limit. This error code 70 // is used when a building dictionary is too large or std::length_error is 71 // catched. 72 MARISA_SIZE_ERROR = 4, 73 74 // MARISA_MEMORY_ERROR means that a memory allocation has failed. 75 MARISA_MEMORY_ERROR = 5, 76 77 // MARISA_IO_ERROR means that an I/O failure. 78 MARISA_IO_ERROR = 6, 79 80 // MARISA_UNEXPECTED_ERROR means that an unexpected error has occurred. 81 MARISA_UNEXPECTED_ERROR = 7 82} marisa_status; 83 84// marisa_strerror() returns a name of an error code. 85const char *marisa_strerror(marisa_status status); 86 87// Flags and masks for dictionary settings are defined as follows. Please note 88// that unspecified value/flags will be replaced with default value/flags. 89typedef enum marisa_flags_ { 90 // A dictionary consinsts of 3 tries in default. If you want to change the 91 // number of tries, please give it with other flags. 92 MARISA_MIN_NUM_TRIES = 0x00001, 93 MARISA_MAX_NUM_TRIES = 0x000FF, 94 MARISA_DEFAULT_NUM_TRIES = 0x00003, 95 96 // MARISA_PATRICIA_TRIE is usually a better choice. MARISA_PREFIX_TRIE is 97 // provided for comparing prefix/patricia tries. 98 MARISA_PATRICIA_TRIE = 0x00100, 99 MARISA_PREFIX_TRIE = 0x00200, 100 MARISA_DEFAULT_TRIE = MARISA_PATRICIA_TRIE, 101 102 // There are 3 kinds of TAIL implementations. 103 // - MARISA_WITHOUT_TAIL: 104 // builds a dictionary without a TAIL. Its last trie has only 1-byte 105 // labels. 106 // - MARISA_BINARY_TAIL: 107 // builds a dictionary with a binary-mode TAIL. Its last labels are stored 108 // as binary data. 109 // - MARISA_TEXT_TAIL: 110 // builds a dictionary with a text-mode TAIL if its last labels do not 111 // contain NULL characters. The last labels are stored as zero-terminated 112 // string. Otherwise, a dictionary is built with a binary-mode TAIL. 113 MARISA_WITHOUT_TAIL = 0x01000, 114 MARISA_BINARY_TAIL = 0x02000, 115 MARISA_TEXT_TAIL = 0x04000, 116 MARISA_DEFAULT_TAIL = MARISA_TEXT_TAIL, 117 118 // libmarisa arranges nodes in ascending order of their labels 119 // (MARISA_LABEL_ORDER) or in descending order of their weights 120 // (MARISA_WEIGHT_ORDER). MARISA_WEIGHT_ORDER is generally a better choice 121 // because it enables faster lookups, but MARISA_LABEL_ORDER is still useful 122 // if an application needs to predict keys in label order. 123 MARISA_LABEL_ORDER = 0x10000, 124 MARISA_WEIGHT_ORDER = 0x20000, 125 MARISA_DEFAULT_ORDER = MARISA_WEIGHT_ORDER, 126 127 // The default settings. 0 is equivalent to MARISA_DEFAULT_FLAGS. 128 MARISA_DEFAULT_FLAGS = MARISA_DEFAULT_NUM_TRIES 129 | MARISA_DEFAULT_TRIE | MARISA_DEFAULT_TAIL | MARISA_DEFAULT_ORDER, 130 131 MARISA_NUM_TRIES_MASK = 0x000FF, 132 MARISA_TRIE_MASK = 0x00F00, 133 MARISA_TAIL_MASK = 0x0F000, 134 MARISA_ORDER_MASK = 0xF0000, 135 MARISA_FLAGS_MASK = 0xFFFFF 136} marisa_flags; 137 138#ifdef __cplusplus 139} // extern "C" 140#endif // __cplusplus 141 142//#include <cstddef> 143 144#ifdef __cplusplus 145namespace marisa { 146 147typedef ::marisa_uint8 UInt8; 148typedef ::marisa_uint16 UInt16; 149typedef ::marisa_uint32 UInt32; 150typedef ::marisa_uint64 UInt64; 151 152typedef ::marisa_status Status; 153 154// An exception object stores a filename, a line number and an error code. 155class Exception { 156 public: 157 Exception(const char *filename, int line, Status status) 158 : filename_(filename), line_(line), status_(status) {} 159 Exception(const Exception &ex) 160 : filename_(ex.filename_), line_(ex.line_), status_(ex.status_) {} 161 162 Exception &operator=(const Exception &rhs) { 163 filename_ = rhs.filename_; 164 line_ = rhs.line_; 165 status_ = rhs.status_; 166 return *this; 167 } 168 169 const char *filename() const { 170 return filename_; 171 } 172 int line() const { 173 return line_; 174 } 175 Status status() const { 176 return status_; 177 } 178 179 // Same as std::exception, what() returns an error message. 180 const char *what() const { 181 return ::marisa_strerror(status_); 182 } 183 184 private: 185 const char *filename_; 186 int line_; 187 Status status_; 188}; 189 190// MARISA_THROW adds a filename and a line number to an exception. 191#if !defined(__ANDROID__) 192#define MARISA_THROW(status) \ 193 (throw Exception(__FILE__, __LINE__, status)) 194#else 195 196inline int android_log_exception(int status) { 197 char tmpbuf[100]; 198 snprintf(tmpbuf, sizeof(tmpbuf), "marisa exception: %d", status); 199 __android_log_write(ANDROID_LOG_ERROR, "marisa-trie", tmpbuf); 200 return 0; 201} 202 203#define MARISA_THROW(status) \ 204 (android_log_exception(status)) 205 206#endif // __ANDROID__ 207 208// MARISA_THROW_IF throws an exception with `status' if `cond' is true. 209#define MARISA_THROW_IF(cond, status) \ 210 (void)((!(cond)) || (MARISA_THROW(status), 0)) 211 212// MARISA_DEBUG_IF is used for debugging. For example, MARISA_DEBUG_IF is used 213// to find out-of-range accesses in marisa::Vector, marisa::IntVector, etc. 214#ifdef _DEBUG 215#define MARISA_DEBUG_IF(cond, status) \ 216 MARISA_THROW_IF(cond, status) 217#else 218#define MARISA_DEBUG_IF(cond, status) 219#endif 220 221// To not include <algorithm> only for std::swap(). 222template <typename T> 223void Swap(T *lhs, T *rhs) { 224 MARISA_THROW_IF((lhs == NULL) || (rhs == NULL), MARISA_PARAM_ERROR); 225 T temp = *lhs; 226 *lhs = *rhs; 227 *rhs = temp; 228} 229 230} // namespace marisa 231#endif // __cplusplus 232 233#endif // MARISA_BASE_H_ 234