1#include <algorithm> 2#include <functional> 3#include <utility> 4 5#include "tail.h" 6 7namespace marisa_alpha { 8 9Tail::Tail() : buf_() {} 10 11void Tail::build(const Vector<String> &keys, 12 Vector<UInt32> *offsets, int mode) { 13 switch (mode) { 14 case MARISA_ALPHA_BINARY_TAIL: { 15 build_binary_tail(keys, offsets); 16 return; 17 } 18 case MARISA_ALPHA_TEXT_TAIL: { 19 if (!build_text_tail(keys, offsets)) { 20 build_binary_tail(keys, offsets); 21 } 22 return; 23 } 24 default: { 25 MARISA_ALPHA_THROW(MARISA_ALPHA_PARAM_ERROR); 26 } 27 } 28} 29 30void Tail::mmap(Mapper *mapper, const char *filename, 31 long offset, int whence) { 32 if (mapper == NULL) { 33 MARISA_ALPHA_THROW(MARISA_ALPHA_PARAM_ERROR); 34 } 35 Mapper temp_mapper; 36 temp_mapper.open(filename, offset, whence); 37 map(temp_mapper); 38 temp_mapper.swap(mapper); 39} 40 41void Tail::map(const void *ptr, std::size_t size) { 42 Mapper mapper(ptr, size); 43 map(mapper); 44} 45 46void Tail::map(Mapper &mapper) { 47 Tail temp; 48 temp.buf_.map(mapper); 49 temp.swap(this); 50} 51 52void Tail::load(const char *filename, long offset, int whence) { 53 Reader reader; 54 reader.open(filename, offset, whence); 55 read(reader); 56} 57 58void Tail::fread(::FILE *file) { 59 Reader reader(file); 60 read(reader); 61} 62 63void Tail::read(int fd) { 64 Reader reader(fd); 65 read(reader); 66} 67 68void Tail::read(std::istream &stream) { 69 Reader reader(&stream); 70 read(reader); 71} 72 73void Tail::read(Reader &reader) { 74 Tail temp; 75 temp.buf_.read(reader); 76 temp.swap(this); 77} 78 79void Tail::save(const char *filename, bool trunc_flag, 80 long offset, int whence) const { 81 Writer writer; 82 writer.open(filename, trunc_flag, offset, whence); 83 write(writer); 84} 85 86void Tail::fwrite(::FILE *file) const { 87 Writer writer(file); 88 write(writer); 89} 90 91void Tail::write(int fd) const { 92 Writer writer(fd); 93 write(writer); 94} 95 96void Tail::write(std::ostream &stream) const { 97 Writer writer(&stream); 98 write(writer); 99} 100 101void Tail::write(Writer &writer) const { 102 buf_.write(writer); 103} 104 105void Tail::clear() { 106 Tail().swap(this); 107} 108 109void Tail::swap(Tail *rhs) { 110 buf_.swap(&rhs->buf_); 111} 112 113void Tail::build_binary_tail(const Vector<String> &keys, 114 Vector<UInt32> *offsets) { 115 if (keys.empty()) { 116 build_empty_tail(offsets); 117 return; 118 } 119 120 Vector<UInt8> buf; 121 buf.push_back('\0'); 122 123 Vector<UInt32> temp_offsets; 124 temp_offsets.resize(keys.size() + 1); 125 126 for (std::size_t i = 0; i < keys.size(); ++i) { 127 temp_offsets[i] = (UInt32)buf.size(); 128 for (std::size_t j = 0; j < keys[i].length(); ++j) { 129 buf.push_back(keys[i][j]); 130 } 131 } 132 temp_offsets.back() = (UInt32)buf.size(); 133 buf.shrink(); 134 135 if (offsets != NULL) { 136 temp_offsets.swap(offsets); 137 } 138 buf_.swap(&buf); 139} 140 141bool Tail::build_text_tail(const Vector<String> &keys, 142 Vector<UInt32> *offsets) { 143 if (keys.empty()) { 144 build_empty_tail(offsets); 145 return true; 146 } 147 148 typedef std::pair<RString, UInt32> KeyIdPair; 149 Vector<KeyIdPair> pairs; 150 pairs.resize(keys.size()); 151 for (std::size_t i = 0; i < keys.size(); ++i) { 152 for (std::size_t j = 0; j < keys[i].length(); ++j) { 153 if (keys[i][j] == '\0') { 154 return false; 155 } 156 } 157 pairs[i].first = RString(keys[i]); 158 pairs[i].second = (UInt32)i; 159 } 160 std::sort(pairs.begin(), pairs.end(), std::greater<KeyIdPair>()); 161 162 Vector<UInt8> buf; 163 buf.push_back('T'); 164 165 Vector<UInt32> temp_offsets; 166 temp_offsets.resize(pairs.size(), 1); 167 168 const KeyIdPair dummy_key; 169 const KeyIdPair *last = &dummy_key; 170 for (std::size_t i = 0; i < pairs.size(); ++i) { 171 const KeyIdPair &cur = pairs[i]; 172 std::size_t match = 0; 173 while ((match < cur.first.length()) && (match < last->first.length()) && 174 last->first[match] == cur.first[match]) { 175 ++match; 176 } 177 if ((match == cur.first.length()) && (last->first.length() != 0)) { 178 temp_offsets[cur.second] = (UInt32)(temp_offsets[last->second] 179 + (last->first.length() - match)); 180 } else { 181 temp_offsets[cur.second] = (UInt32)buf.size(); 182 for (std::size_t j = 1; j <= cur.first.length(); ++j) { 183 buf.push_back(cur.first[cur.first.length() - j]); 184 } 185 buf.push_back('\0'); 186 } 187 last = &cur; 188 } 189 buf.shrink(); 190 191 if (offsets != NULL) { 192 temp_offsets.swap(offsets); 193 } 194 buf_.swap(&buf); 195 return true; 196} 197 198void Tail::build_empty_tail(Vector<UInt32> *offsets) { 199 buf_.clear(); 200 if (offsets != NULL) { 201 offsets->clear(); 202 } 203} 204 205} // namespace marisa_alpha 206