1 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13// 14// Copyright 2005-2010 Google, Inc. 15// Author: Jeffrey Soresnen (sorenj@google.com) 16 17#include <fst/extensions/ngram/bitmap-index.h> 18 19#include <algorithm> 20#include <iterator> 21 22#include <fst/extensions/ngram/nthbit.h> 23 24namespace fst { 25 26// These two internal classes implemented inverted views of the 27// primary and secondary indexes. That is, they provide iterators 28// that have operator*'s that return the number zeros rather than 29// the number of ones. 30 31class primary_index_inverted : public vector<uint32>::const_iterator { 32 public: 33 primary_index_inverted() {} 34 primary_index_inverted(vector<uint32>::const_iterator loc, 35 vector<uint32>::const_iterator begin) : 36 vector<uint32>::const_iterator(loc), begin_(begin) {} 37 uint32 operator*() { 38 return BitmapIndex::kStorageBitSize * BitmapIndex::kSecondaryBlockSize * 39 (1 + std::distance<vector<uint32>::const_iterator>(begin_, *this)) - 40 vector<uint32>::const_iterator::operator*(); 41 } 42 private: 43 vector<uint32>::const_iterator begin_; 44}; 45 46class secondary_index_inverted : public vector<uint16>::const_iterator { 47 public: 48 secondary_index_inverted() : vector<uint16>::const_iterator() {} 49 secondary_index_inverted(vector<uint16>::const_iterator loc, 50 vector<uint16>::const_iterator block_begin) : 51 vector<uint16>::const_iterator(loc), block_begin_(block_begin) {} 52 uint16 operator*() { 53 return ((1 + std::distance<vector<uint16>::const_iterator>( 54 block_begin_, *this)) << BitmapIndex::kStorageLogBitSize) - 55 vector<uint16>::const_iterator::operator*(); 56 } 57 private: 58 vector<uint16>::const_iterator block_begin_; 59}; 60 61size_t BitmapIndex::Rank1(size_t end) const { 62 if (end == 0) return 0; 63 CHECK_LE(end, Bits()); 64 const uint32 end_word = (end - 1) >> BitmapIndex::kStorageLogBitSize; 65 const uint32 sum = get_index_ones_count(end_word); 66 const uint64 zero = 0; 67 const uint64 ones = ~zero; 68 return sum + __builtin_popcountll(bits_[end_word] & 69 (ones >> (kStorageBitSize - (end & kStorageBlockMask)))); 70} 71 72size_t BitmapIndex::Select1(size_t bit_index) const { 73 if (bit_index >= GetOnesCount()) return Bits(); 74 // search primary index for the relevant block 75 uint32 rembits = bit_index + 1; 76 const uint32 block = find_primary_block(bit_index + 1); 77 uint32 offset = 0; 78 if (block > 0) { 79 rembits -= primary_index_[block - 1]; 80 offset += block * kSecondaryBlockSize; 81 } 82 // search the secondary index 83 uint32 word = find_secondary_block(offset, rembits); 84 if (word > 0) { 85 rembits -= secondary_index_[offset + word - 1]; 86 offset += word; 87 } 88 int nth = nth_bit(bits_[offset], rembits); 89 return (offset << BitmapIndex::kStorageLogBitSize) + nth; 90} 91 92size_t BitmapIndex::Select0(size_t bit_index) const { 93 if (bit_index >= Bits() - GetOnesCount()) return Bits(); 94 // search inverted primary index for relevant block 95 uint32 remzeros = bit_index + 1; 96 uint32 offset = 0; 97 const uint32 block = find_inverted_primary_block(bit_index + 1); 98 if (block > 0) { 99 remzeros -= *primary_index_inverted(primary_index_.begin() + block - 1, 100 primary_index_.begin()); 101 offset += block * kSecondaryBlockSize; 102 } 103 // search the inverted secondary index 104 uint32 word = find_inverted_secondary_block(offset, remzeros); 105 if (word > 0) { 106 vector<uint16>::const_iterator block_begin = 107 secondary_index_.begin() + offset; 108 remzeros -= *secondary_index_inverted(block_begin + word - 1, block_begin); 109 offset += word; 110 } 111 int nth = nth_bit(~bits_[offset], remzeros); 112 return (offset << BitmapIndex::kStorageLogBitSize) + nth; 113} 114 115size_t BitmapIndex::get_index_ones_count(size_t array_index) const { 116 uint32 sum = 0; 117 if (array_index > 0) { 118 sum += secondary_index_[array_index-1]; 119 uint32 end_block = (array_index - 1) / kSecondaryBlockSize; 120 if (end_block > 0) sum += primary_index_[end_block-1]; 121 } 122 return sum; 123} 124 125void BitmapIndex::BuildIndex(const uint64 *bits, size_t size) { 126 bits_ = bits; 127 size_ = size; 128 secondary_index_.clear(); 129 secondary_index_.reserve(ArraySize()); 130 primary_index_.clear(); 131 primary_index_.reserve(primary_index_size()); 132 const uint64 zero = 0; 133 const uint64 ones = ~zero; 134 uint32 popcount = 0; 135 for (uint32 block_begin = 0; block_begin < ArraySize(); 136 block_begin += kSecondaryBlockSize) { 137 uint32 block_popcount = 0; 138 uint32 block_end = block_begin + kSecondaryBlockSize; 139 if (block_end > ArraySize()) block_end = ArraySize(); 140 for (uint32 j = block_begin; j < block_end; ++j) { 141 uint64 mask = ones; 142 if (j == ArraySize() - 1) { 143 mask = ones >> (-size_ & BitmapIndex::kStorageBlockMask); 144 } 145 block_popcount += __builtin_popcountll(bits_[j] & mask); 146 secondary_index_.push_back(block_popcount); 147 } 148 popcount += block_popcount; 149 primary_index_.push_back(popcount); 150 } 151} 152 153size_t BitmapIndex::find_secondary_block( 154 size_t block_begin, size_t rem_bit_index) const { 155 size_t block_end = block_begin + kSecondaryBlockSize; 156 if (block_end > secondary_index_.size()) block_end = secondary_index_.size(); 157 return std::distance(secondary_index_.begin() + block_begin, 158 std::lower_bound(secondary_index_.begin() + block_begin, 159 secondary_index_.begin() + block_end, 160 rem_bit_index)); 161} 162 163size_t BitmapIndex::find_inverted_secondary_block( 164 size_t block_begin, size_t rem_bit_index) const { 165 size_t block_end = block_begin + kSecondaryBlockSize; 166 if (block_end > secondary_index_.size()) block_end = secondary_index_.size(); 167 secondary_index_inverted start(secondary_index_.begin() + block_begin, 168 secondary_index_.begin() + block_begin); 169 secondary_index_inverted end(secondary_index_.begin() + block_end, 170 secondary_index_.begin() + block_begin); 171 return std::distance(start, 172 std::lower_bound(start, end, rem_bit_index)); 173} 174 175inline size_t BitmapIndex::find_primary_block(size_t bit_index) const { 176 return std::distance(primary_index_.begin(), 177 std::lower_bound(primary_index_.begin(), 178 primary_index_.end(), bit_index)); 179} 180 181size_t BitmapIndex::find_inverted_primary_block(size_t bit_index) const { 182 primary_index_inverted start(primary_index_.begin(), primary_index_.begin()); 183 primary_index_inverted end(primary_index_.end(), primary_index_.begin()); 184 return std::distance(start, std::lower_bound(start, end, bit_index)); 185} 186 187} // end namespace fst 188