1#include <algorithm>
2#include <functional>
3#include <utility>
4
5#include "tail.h"
6
7namespace marisa {
8
9Tail::Tail() : buf_() {}
10
11void Tail::build(const Vector<String> &keys,
12    Vector<UInt32> *offsets, int mode) {
13  switch (mode) {
14    case MARISA_BINARY_TAIL: {
15      build_binary_tail(keys, offsets);
16      return;
17    }
18    case MARISA_TEXT_TAIL: {
19      if (!build_text_tail(keys, offsets)) {
20        build_binary_tail(keys, offsets);
21      }
22      return;
23    }
24    default: {
25      MARISA_THROW(MARISA_PARAM_ERROR);
26    }
27  }
28}
29
30void Tail::mmap(Mapper *mapper, const char *filename,
31    long offset, int whence) {
32  if (mapper == NULL) {
33    MARISA_THROW(MARISA_PARAM_ERROR);
34  }
35  Mapper temp_mapper;
36  temp_mapper.open(filename, offset, whence);
37  map(temp_mapper);
38  temp_mapper.swap(mapper);
39}
40
41void Tail::map(const void *ptr, std::size_t size) {
42  Mapper mapper(ptr, size);
43  map(mapper);
44}
45
46void Tail::map(Mapper &mapper) {
47  Tail temp;
48  temp.buf_.map(mapper);
49  temp.swap(this);
50}
51
52void Tail::load(const char *filename, long offset, int whence) {
53  Reader reader;
54  reader.open(filename, offset, whence);
55  read(reader);
56}
57
58void Tail::fread(::FILE *file) {
59  Reader reader(file);
60  read(reader);
61}
62
63void Tail::read(int fd) {
64  Reader reader(fd);
65  read(reader);
66}
67
68void Tail::read(std::istream &stream) {
69  Reader reader(&stream);
70  read(reader);
71}
72
73void Tail::read(Reader &reader) {
74  Tail temp;
75  temp.buf_.read(reader);
76  temp.swap(this);
77}
78
79void Tail::save(const char *filename, bool trunc_flag,
80    long offset, int whence) const {
81  Writer writer;
82  writer.open(filename, trunc_flag, offset, whence);
83  write(writer);
84}
85
86void Tail::fwrite(::FILE *file) const {
87  Writer writer(file);
88  write(writer);
89}
90
91void Tail::write(int fd) const {
92  Writer writer(fd);
93  write(writer);
94}
95
96void Tail::write(std::ostream &stream) const {
97  Writer writer(&stream);
98  write(writer);
99}
100
101void Tail::write(Writer &writer) const {
102  buf_.write(writer);
103}
104
105void Tail::clear() {
106  Tail().swap(this);
107}
108
109void Tail::swap(Tail *rhs) {
110  buf_.swap(&rhs->buf_);
111}
112
113void Tail::build_binary_tail(const Vector<String> &keys,
114    Vector<UInt32> *offsets) {
115  if (keys.empty()) {
116    build_empty_tail(offsets);
117    return;
118  }
119
120  Vector<UInt8> buf;
121  buf.push_back('\0');
122
123  Vector<UInt32> temp_offsets;
124  temp_offsets.resize(keys.size() + 1);
125
126  for (std::size_t i = 0; i < keys.size(); ++i) {
127    temp_offsets[i] = (UInt32)buf.size();
128    for (std::size_t j = 0; j < keys[i].length(); ++j) {
129      buf.push_back(keys[i][j]);
130    }
131  }
132  temp_offsets.back() = (UInt32)buf.size();
133  buf.shrink();
134
135  if (offsets != NULL) {
136    temp_offsets.swap(offsets);
137  }
138  buf_.swap(&buf);
139}
140
141bool Tail::build_text_tail(const Vector<String> &keys,
142    Vector<UInt32> *offsets) {
143  if (keys.empty()) {
144    build_empty_tail(offsets);
145    return true;
146  }
147
148  typedef std::pair<RString, UInt32> KeyIdPair;
149  Vector<KeyIdPair> pairs;
150  pairs.resize(keys.size());
151  for (std::size_t i = 0; i < keys.size(); ++i) {
152    for (std::size_t j = 0; j < keys[i].length(); ++j) {
153      if (keys[i][j] == '\0') {
154        return false;
155      }
156    }
157    pairs[i].first = RString(keys[i]);
158    pairs[i].second = (UInt32)i;
159  }
160  std::sort(pairs.begin(), pairs.end(), std::greater<KeyIdPair>());
161
162  Vector<UInt8> buf;
163  buf.push_back('T');
164
165  Vector<UInt32> temp_offsets;
166  temp_offsets.resize(pairs.size(), 1);
167
168  const KeyIdPair dummy_key;
169  const KeyIdPair *last = &dummy_key;
170  for (std::size_t i = 0; i < pairs.size(); ++i) {
171    const KeyIdPair &cur = pairs[i];
172    std::size_t match = 0;
173    while ((match < cur.first.length()) && (match < last->first.length()) &&
174        last->first[match] == cur.first[match]) {
175      ++match;
176    }
177    if ((match == cur.first.length()) && (last->first.length() != 0)) {
178      temp_offsets[cur.second] = (UInt32)(temp_offsets[last->second]
179          + (last->first.length() - match));
180    } else {
181      temp_offsets[cur.second] = (UInt32)buf.size();
182      for (std::size_t j = 1; j <= cur.first.length(); ++j) {
183        buf.push_back(cur.first[cur.first.length() - j]);
184      }
185      buf.push_back('\0');
186    }
187    last = &cur;
188  }
189  buf.shrink();
190
191  if (offsets != NULL) {
192    temp_offsets.swap(offsets);
193  }
194  buf_.swap(&buf);
195  return true;
196}
197
198void Tail::build_empty_tail(Vector<UInt32> *offsets) {
199  buf_.clear();
200  if (offsets != NULL) {
201    offsets->clear();
202  }
203}
204
205}  // namespace marisa
206