1
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6//     http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13//
14// Copyright 2005-2010 Google, Inc.
15// Author: Jeffrey Soresnen (sorenj@google.com)
16
17#include <fst/extensions/ngram/bitmap-index.h>
18
19#include <algorithm>
20#include <iterator>
21
22#include <fst/extensions/ngram/nthbit.h>
23
24namespace fst {
25
26// These two internal classes implemented inverted views of the
27// primary and secondary indexes.  That is, they provide iterators
28// that have operator*'s that return the number zeros rather than
29// the number of ones.
30
31class primary_index_inverted : public vector<uint32>::const_iterator {
32 public:
33  primary_index_inverted() {}
34  primary_index_inverted(vector<uint32>::const_iterator loc,
35                         vector<uint32>::const_iterator begin) :
36    vector<uint32>::const_iterator(loc), begin_(begin) {}
37  uint32 operator*() {
38    return BitmapIndex::kStorageBitSize * BitmapIndex::kSecondaryBlockSize *
39           (1 + std::distance<vector<uint32>::const_iterator>(begin_, *this)) -
40           vector<uint32>::const_iterator::operator*();
41  }
42 private:
43  vector<uint32>::const_iterator begin_;
44};
45
46class secondary_index_inverted : public vector<uint16>::const_iterator {
47 public:
48  secondary_index_inverted() : vector<uint16>::const_iterator() {}
49  secondary_index_inverted(vector<uint16>::const_iterator loc,
50                           vector<uint16>::const_iterator block_begin) :
51    vector<uint16>::const_iterator(loc), block_begin_(block_begin) {}
52  uint16 operator*() {
53    return ((1 + std::distance<vector<uint16>::const_iterator>(
54        block_begin_, *this)) << BitmapIndex::kStorageLogBitSize) -
55        vector<uint16>::const_iterator::operator*();
56  }
57 private:
58  vector<uint16>::const_iterator block_begin_;
59};
60
61size_t BitmapIndex::Rank1(size_t end) const {
62  if (end == 0) return 0;
63  CHECK_LE(end, Bits());
64  const uint32 end_word = (end - 1) >> BitmapIndex::kStorageLogBitSize;
65  const uint32 sum = get_index_ones_count(end_word);
66  const uint64 zero = 0;
67  const uint64 ones = ~zero;
68  return sum + __builtin_popcountll(bits_[end_word] &
69      (ones >> (kStorageBitSize - (end & kStorageBlockMask))));
70}
71
72size_t BitmapIndex::Select1(size_t bit_index) const {
73  if (bit_index >= GetOnesCount()) return Bits();
74  // search primary index for the relevant block
75  uint32 rembits = bit_index + 1;
76  const uint32 block = find_primary_block(bit_index + 1);
77  uint32 offset = 0;
78  if (block > 0) {
79    rembits -= primary_index_[block - 1];
80    offset += block * kSecondaryBlockSize;
81  }
82  // search the secondary index
83  uint32 word = find_secondary_block(offset, rembits);
84  if (word > 0) {
85    rembits -= secondary_index_[offset + word - 1];
86    offset += word;
87  }
88  int nth = nth_bit(bits_[offset], rembits);
89  return (offset << BitmapIndex::kStorageLogBitSize) + nth;
90}
91
92size_t BitmapIndex::Select0(size_t bit_index) const {
93  if (bit_index >= Bits() - GetOnesCount()) return Bits();
94  // search inverted primary index for relevant block
95  uint32 remzeros = bit_index + 1;
96  uint32 offset = 0;
97  const uint32 block = find_inverted_primary_block(bit_index + 1);
98  if (block > 0) {
99    remzeros -= *primary_index_inverted(primary_index_.begin() + block - 1,
100                                        primary_index_.begin());
101    offset += block * kSecondaryBlockSize;
102  }
103  // search the inverted secondary index
104  uint32 word = find_inverted_secondary_block(offset, remzeros);
105  if (word > 0) {
106    vector<uint16>::const_iterator block_begin =
107        secondary_index_.begin() + offset;
108    remzeros -= *secondary_index_inverted(block_begin + word - 1, block_begin);
109    offset += word;
110  }
111  int nth = nth_bit(~bits_[offset], remzeros);
112  return (offset << BitmapIndex::kStorageLogBitSize) + nth;
113}
114
115size_t BitmapIndex::get_index_ones_count(size_t array_index) const {
116  uint32 sum = 0;
117  if (array_index > 0) {
118    sum += secondary_index_[array_index-1];
119    uint32 end_block = (array_index - 1) / kSecondaryBlockSize;
120    if (end_block > 0) sum += primary_index_[end_block-1];
121  }
122  return sum;
123}
124
125void BitmapIndex::BuildIndex(const uint64 *bits, size_t size) {
126  bits_ = bits;
127  size_ = size;
128  secondary_index_.clear();
129  secondary_index_.reserve(ArraySize());
130  primary_index_.clear();
131  primary_index_.reserve(primary_index_size());
132  const uint64 zero = 0;
133  const uint64 ones = ~zero;
134  uint32 popcount = 0;
135  for (uint32 block_begin = 0; block_begin < ArraySize();
136       block_begin += kSecondaryBlockSize) {
137    uint32 block_popcount = 0;
138    uint32 block_end = block_begin + kSecondaryBlockSize;
139    if (block_end > ArraySize()) block_end = ArraySize();
140    for (uint32 j = block_begin; j < block_end; ++j) {
141      uint64 mask = ones;
142      if (j == ArraySize() - 1) {
143        mask = ones >> (-size_ & BitmapIndex::kStorageBlockMask);
144      }
145      block_popcount += __builtin_popcountll(bits_[j] & mask);
146      secondary_index_.push_back(block_popcount);
147    }
148    popcount += block_popcount;
149    primary_index_.push_back(popcount);
150  }
151}
152
153size_t BitmapIndex::find_secondary_block(
154    size_t block_begin, size_t rem_bit_index) const {
155  size_t block_end = block_begin + kSecondaryBlockSize;
156  if (block_end > secondary_index_.size()) block_end = secondary_index_.size();
157  return std::distance(secondary_index_.begin() + block_begin,
158                       std::lower_bound(secondary_index_.begin() + block_begin,
159                                        secondary_index_.begin() + block_end,
160                                        rem_bit_index));
161}
162
163size_t BitmapIndex::find_inverted_secondary_block(
164    size_t block_begin, size_t rem_bit_index) const {
165  size_t block_end = block_begin + kSecondaryBlockSize;
166  if (block_end > secondary_index_.size()) block_end = secondary_index_.size();
167  secondary_index_inverted start(secondary_index_.begin() + block_begin,
168                                 secondary_index_.begin() + block_begin);
169  secondary_index_inverted end(secondary_index_.begin() + block_end,
170                               secondary_index_.begin() + block_begin);
171  return std::distance(start,
172                       std::lower_bound(start, end, rem_bit_index));
173}
174
175inline size_t BitmapIndex::find_primary_block(size_t bit_index) const {
176  return std::distance(primary_index_.begin(),
177                       std::lower_bound(primary_index_.begin(),
178                                        primary_index_.end(), bit_index));
179}
180
181size_t BitmapIndex::find_inverted_primary_block(size_t bit_index) const {
182  primary_index_inverted start(primary_index_.begin(), primary_index_.begin());
183  primary_index_inverted end(primary_index_.end(), primary_index_.begin());
184  return std::distance(start, std::lower_bound(start, end, bit_index));
185}
186
187}  // end namespace fst
188