1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "hash_set.h"
18
19#include <map>
20#include <forward_list>
21#include <sstream>
22#include <string>
23#include <unordered_set>
24#include <vector>
25
26#include <gtest/gtest.h>
27#include "hash_map.h"
28
29namespace art {
30
31struct IsEmptyFnString {
32  void MakeEmpty(std::string& item) const {
33    item.clear();
34  }
35  bool IsEmpty(const std::string& item) const {
36    return item.empty();
37  }
38};
39
40class HashSetTest : public testing::Test {
41 public:
42  HashSetTest() : seed_(97421), unique_number_(0) {
43  }
44  std::string RandomString(size_t len) {
45    std::ostringstream oss;
46    for (size_t i = 0; i < len; ++i) {
47      oss << static_cast<char>('A' + PRand() % 64);
48    }
49    static_assert(' ' < 'A', "space must be less than a");
50    oss << " " << unique_number_++;  // Relies on ' ' < 'A'
51    return oss.str();
52  }
53  void SetSeed(size_t seed) {
54    seed_ = seed;
55  }
56  size_t PRand() {  // Pseudo random.
57    seed_ = seed_ * 1103515245 + 12345;
58    return seed_;
59  }
60
61 private:
62  size_t seed_;
63  size_t unique_number_;
64};
65
66TEST_F(HashSetTest, TestSmoke) {
67  HashSet<std::string, IsEmptyFnString> hash_set;
68  const std::string test_string = "hello world 1234";
69  ASSERT_TRUE(hash_set.Empty());
70  ASSERT_EQ(hash_set.Size(), 0U);
71  hash_set.Insert(test_string);
72  auto it = hash_set.Find(test_string);
73  ASSERT_EQ(*it, test_string);
74  auto after_it = hash_set.Erase(it);
75  ASSERT_TRUE(after_it == hash_set.end());
76  ASSERT_TRUE(hash_set.Empty());
77  ASSERT_EQ(hash_set.Size(), 0U);
78  it = hash_set.Find(test_string);
79  ASSERT_TRUE(it == hash_set.end());
80}
81
82TEST_F(HashSetTest, TestInsertAndErase) {
83  HashSet<std::string, IsEmptyFnString> hash_set;
84  static constexpr size_t count = 1000;
85  std::vector<std::string> strings;
86  for (size_t i = 0; i < count; ++i) {
87    // Insert a bunch of elements and make sure we can find them.
88    strings.push_back(RandomString(10));
89    hash_set.Insert(strings[i]);
90    auto it = hash_set.Find(strings[i]);
91    ASSERT_TRUE(it != hash_set.end());
92    ASSERT_EQ(*it, strings[i]);
93  }
94  ASSERT_EQ(strings.size(), hash_set.Size());
95  // Try to erase the odd strings.
96  for (size_t i = 1; i < count; i += 2) {
97    auto it = hash_set.Find(strings[i]);
98    ASSERT_TRUE(it != hash_set.end());
99    ASSERT_EQ(*it, strings[i]);
100    hash_set.Erase(it);
101  }
102  // Test removed.
103  for (size_t i = 1; i < count; i += 2) {
104    auto it = hash_set.Find(strings[i]);
105    ASSERT_TRUE(it == hash_set.end());
106  }
107  for (size_t i = 0; i < count; i += 2) {
108    auto it = hash_set.Find(strings[i]);
109    ASSERT_TRUE(it != hash_set.end());
110    ASSERT_EQ(*it, strings[i]);
111  }
112}
113
114TEST_F(HashSetTest, TestIterator) {
115  HashSet<std::string, IsEmptyFnString> hash_set;
116  ASSERT_TRUE(hash_set.begin() == hash_set.end());
117  static constexpr size_t count = 1000;
118  std::vector<std::string> strings;
119  for (size_t i = 0; i < count; ++i) {
120    // Insert a bunch of elements and make sure we can find them.
121    strings.push_back(RandomString(10));
122    hash_set.Insert(strings[i]);
123  }
124  // Make sure we visit each string exactly once.
125  std::map<std::string, size_t> found_count;
126  for (const std::string& s : hash_set) {
127    ++found_count[s];
128  }
129  for (size_t i = 0; i < count; ++i) {
130    ASSERT_EQ(found_count[strings[i]], 1U);
131  }
132  found_count.clear();
133  // Remove all the elements with iterator erase.
134  for (auto it = hash_set.begin(); it != hash_set.end();) {
135    ++found_count[*it];
136    it = hash_set.Erase(it);
137    ASSERT_EQ(hash_set.Verify(), 0U);
138  }
139  for (size_t i = 0; i < count; ++i) {
140    ASSERT_EQ(found_count[strings[i]], 1U);
141  }
142}
143
144TEST_F(HashSetTest, TestSwap) {
145  HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
146  std::vector<std::string> strings;
147  static constexpr size_t count = 1000;
148  for (size_t i = 0; i < count; ++i) {
149    strings.push_back(RandomString(10));
150    hash_seta.Insert(strings[i]);
151  }
152  std::swap(hash_seta, hash_setb);
153  hash_seta.Insert("TEST");
154  hash_setb.Insert("TEST2");
155  for (size_t i = 0; i < count; ++i) {
156    strings.push_back(RandomString(10));
157    hash_seta.Insert(strings[i]);
158  }
159}
160
161TEST_F(HashSetTest, TestShrink) {
162  HashSet<std::string, IsEmptyFnString> hash_set;
163  std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
164  for (size_t i = 0; i < strings.size(); ++i) {
165    // Insert some strings into the beginning of our hash set to establish an initial size
166    hash_set.Insert(strings[i]);
167  }
168
169  hash_set.ShrinkToMaximumLoad();
170  const double initial_load = hash_set.CalculateLoadFactor();
171
172  // Insert a bunch of random strings to guarantee that we grow the capacity.
173  std::vector<std::string> random_strings;
174  static constexpr size_t count = 1000;
175  for (size_t i = 0; i < count; ++i) {
176    random_strings.push_back(RandomString(10));
177    hash_set.Insert(random_strings[i]);
178  }
179
180  // Erase all the extra strings which guarantees that our load factor will be really bad.
181  for (size_t i = 0; i < count; ++i) {
182    hash_set.Erase(hash_set.Find(random_strings[i]));
183  }
184
185  const double bad_load = hash_set.CalculateLoadFactor();
186  EXPECT_GT(initial_load, bad_load);
187
188  // Shrink again, the load factor should be good again.
189  hash_set.ShrinkToMaximumLoad();
190  EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
191
192  // Make sure all the initial elements we had are still there
193  for (const std::string& initial_string : strings) {
194    EXPECT_NE(hash_set.end(), hash_set.Find(initial_string))
195        << "expected to find " << initial_string;
196  }
197}
198
199TEST_F(HashSetTest, TestLoadFactor) {
200  HashSet<std::string, IsEmptyFnString> hash_set;
201  static constexpr size_t kStringCount = 1000;
202  static constexpr double kEpsilon = 0.01;
203  for (size_t i = 0; i < kStringCount; ++i) {
204    hash_set.Insert(RandomString(i % 10 + 1));
205  }
206  // Check that changing the load factor resizes the table to be within the target range.
207  EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
208  EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
209  hash_set.SetLoadFactor(0.1, 0.3);
210  EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
211  EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
212  EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
213  hash_set.SetLoadFactor(0.6, 0.8);
214  EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
215}
216
217TEST_F(HashSetTest, TestStress) {
218  HashSet<std::string, IsEmptyFnString> hash_set;
219  std::unordered_multiset<std::string> std_set;
220  std::vector<std::string> strings;
221  static constexpr size_t string_count = 2000;
222  static constexpr size_t operations = 100000;
223  static constexpr size_t target_size = 5000;
224  for (size_t i = 0; i < string_count; ++i) {
225    strings.push_back(RandomString(i % 10 + 1));
226  }
227  const size_t seed = time(nullptr);
228  SetSeed(seed);
229  LOG(INFO) << "Starting stress test with seed " << seed;
230  for (size_t i = 0; i < operations; ++i) {
231    ASSERT_EQ(hash_set.Size(), std_set.size());
232    size_t delta = std::abs(static_cast<ssize_t>(target_size) -
233                            static_cast<ssize_t>(hash_set.Size()));
234    size_t n = PRand();
235    if (n % target_size == 0) {
236      hash_set.Clear();
237      std_set.clear();
238      ASSERT_TRUE(hash_set.Empty());
239      ASSERT_TRUE(std_set.empty());
240    } else  if (n % target_size < delta) {
241      // Skew towards adding elements until we are at the desired size.
242      const std::string& s = strings[PRand() % string_count];
243      hash_set.Insert(s);
244      std_set.insert(s);
245      ASSERT_EQ(*hash_set.Find(s), *std_set.find(s));
246    } else {
247      const std::string& s = strings[PRand() % string_count];
248      auto it1 = hash_set.Find(s);
249      auto it2 = std_set.find(s);
250      ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
251      if (it1 != hash_set.end()) {
252        ASSERT_EQ(*it1, *it2);
253        hash_set.Erase(it1);
254        std_set.erase(it2);
255      }
256    }
257  }
258}
259
260struct IsEmptyStringPair {
261  void MakeEmpty(std::pair<std::string, int>& pair) const {
262    pair.first.clear();
263  }
264  bool IsEmpty(const std::pair<std::string, int>& pair) const {
265    return pair.first.empty();
266  }
267};
268
269TEST_F(HashSetTest, TestHashMap) {
270  HashMap<std::string, int, IsEmptyStringPair> hash_map;
271  hash_map.Insert(std::make_pair(std::string("abcd"), 123));
272  hash_map.Insert(std::make_pair(std::string("abcd"), 124));
273  hash_map.Insert(std::make_pair(std::string("bags"), 444));
274  auto it = hash_map.Find(std::string("abcd"));
275  ASSERT_EQ(it->second, 123);
276  hash_map.Erase(it);
277  it = hash_map.Find(std::string("abcd"));
278  ASSERT_EQ(it->second, 124);
279}
280
281struct IsEmptyFnVectorInt {
282  void MakeEmpty(std::vector<int>& item) const {
283    item.clear();
284  }
285  bool IsEmpty(const std::vector<int>& item) const {
286    return item.empty();
287  }
288};
289
290template <typename T>
291size_t HashIntSequence(T begin, T end) {
292  size_t hash = 0;
293  for (auto iter = begin; iter != end; ++iter) {
294    hash = hash * 2 + *iter;
295  }
296  return hash;
297};
298
299struct VectorIntHashEquals {
300  std::size_t operator()(const std::vector<int>& item) const {
301    return HashIntSequence(item.begin(), item.end());
302  }
303
304  std::size_t operator()(const std::forward_list<int>& item) const {
305    return HashIntSequence(item.begin(), item.end());
306  }
307
308  bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
309    return a == b;
310  }
311
312  bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
313    auto aiter = a.begin();
314    auto biter = b.begin();
315    while (aiter != a.end() && biter != b.end()) {
316      if (*aiter != *biter) {
317        return false;
318      }
319      aiter++;
320      biter++;
321    }
322    return (aiter == a.end() && biter == b.end());
323  }
324};
325
326TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
327  HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
328  hash_set.Insert(std::vector<int>({1, 2, 3, 4}));
329  hash_set.Insert(std::vector<int>({4, 2}));
330  ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1})));
331  ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4})));
332  ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1})));
333  ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
334}
335
336TEST_F(HashSetTest, TestReserve) {
337  HashSet<std::string, IsEmptyFnString> hash_set;
338  std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
339  for (size_t size : sizes) {
340    hash_set.Reserve(size);
341    const size_t buckets_before = hash_set.NumBuckets();
342    // Check that we expanded enough.
343    CHECK_GE(hash_set.ElementsUntilExpand(), size);
344    // Try inserting elements until we are at our reserve size and ensure the hash set did not
345    // expand.
346    while (hash_set.Size() < size) {
347      hash_set.Insert(std::to_string(hash_set.Size()));
348    }
349    CHECK_EQ(hash_set.NumBuckets(), buckets_before);
350  }
351  // Check the behaviour for shrinking, it does not necessarily resize down.
352  constexpr size_t size = 100;
353  hash_set.Reserve(size);
354  CHECK_GE(hash_set.ElementsUntilExpand(), size);
355}
356
357}  // namespace art
358