1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#if GOOGLE_CUDA
17
18#include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
19
20#include "tensorflow/core/platform/stream_executor.h"
21#include "tensorflow/core/platform/test.h"
22
23namespace gpu = ::perftools::gputools;
24
25namespace tensorflow {
26namespace {
27
28TEST(PoolAllocatorTest, ZeroSizeBuffers) {
29  gpu::Platform* platform =
30      gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
31  PoolAllocator pool(
32      2 /*pool_size_limit*/, false /*auto_resize*/,
33      new CUDAHostAllocator(
34          platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0))
35              .ValueOrDie()),
36      new NoopRounder, "pool");
37
38  EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
39  pool.DeallocateRaw(nullptr);  // Should not crash.
40  EXPECT_EQ(0, pool.get_from_pool_count());
41  EXPECT_EQ(0, pool.put_count());
42  EXPECT_EQ(0, pool.allocated_count());
43  EXPECT_EQ(0, pool.evicted_count());
44}
45
46TEST(PoolAllocatorTest, ZeroSizePool) {
47  gpu::Platform* platform =
48      gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
49  PoolAllocator pool(
50      0 /*pool_size_limit*/, false /*auto_resize*/,
51      new CUDAHostAllocator(
52          platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0))
53              .ValueOrDie()),
54      new NoopRounder, "pool");
55
56  EXPECT_EQ(0, pool.get_from_pool_count());
57  EXPECT_EQ(0, pool.put_count());
58  EXPECT_EQ(0, pool.allocated_count());
59  EXPECT_EQ(0, pool.evicted_count());
60
61  // All allocations should bypass the pool and return valid pointers.
62  for (int i = 0; i < 3; ++i) {
63    void* p0 = pool.AllocateRaw(4, 0);
64    void* p4 = pool.AllocateRaw(4, 4);
65    void* p12 = pool.AllocateRaw(4, 12);
66    EXPECT_EQ(nullptr, p0);
67    EXPECT_NE(nullptr, p4);
68    EXPECT_NE(nullptr, p12);
69    pool.DeallocateRaw(p0);
70    pool.DeallocateRaw(p4);
71    pool.DeallocateRaw(p12);
72  }
73  EXPECT_EQ(0, pool.get_from_pool_count());
74  EXPECT_EQ(0, pool.put_count());
75  EXPECT_EQ(0, pool.allocated_count());
76  EXPECT_EQ(0, pool.evicted_count());
77}
78
79TEST(PoolAllocatorTest, Alignment) {
80  gpu::Platform* platform =
81      gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
82  PoolAllocator pool(
83      0 /*pool_size_limit*/, false /*auto_resize*/,
84      new CUDAHostAllocator(
85          platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0))
86              .ValueOrDie()),
87      new NoopRounder, "pool");
88  for (int i = 0; i < 16; ++i) {
89    size_t alignment = 1 << i;
90    void* p = pool.AllocateRaw(alignment, 111);
91    EXPECT_TRUE(p != nullptr);
92    EXPECT_EQ(0, reinterpret_cast<int64>(p) & (alignment - 1))
93        << "ptr: " << p << " alignment " << alignment;
94    // Intentionally don't deallocate, to test that destruction of
95    // the PoolAllocator frees all pending memory.
96  }
97}
98
99TEST(PoolAllocatorTest, AutoResize) {
100  PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
101                     new BasicCPUAllocator, new NoopRounder, "pool");
102
103  // Alloc/dealloc 10 sizes just a few times, confirming pool size
104  // stays at 2.
105  for (int i = 0; i < 10; ++i) {
106    void* p = pool.AllocateRaw(4, 64 << i);
107    pool.DeallocateRaw(p);
108  }
109  EXPECT_EQ(0, pool.get_from_pool_count());
110  EXPECT_EQ(10, pool.allocated_count());
111  EXPECT_EQ(10, pool.put_count());
112  EXPECT_EQ(8, pool.evicted_count());
113  EXPECT_EQ(2, pool.size_limit());
114
115  // Then repeat 1200 times.  Pool size limit should jump to 100.
116  for (int j = 0; j < 120; ++j) {
117    for (int i = 0; i < 10; ++i) {
118      void* p = pool.AllocateRaw(4, 64 << i);
119      pool.DeallocateRaw(p);
120    }
121  }
122  EXPECT_EQ(100, pool.size_limit());
123}
124
125TEST(PoolAllocatorTest, CudaHostAllocator) {
126  gpu::Platform* platform =
127      gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
128  PoolAllocator pool(
129      2 /*pool_size_limit*/, false /*auto_resize*/,
130      new CUDAHostAllocator(
131          platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0))
132              .ValueOrDie()),
133      new NoopRounder, "pool");
134
135  // Repeatedly Get a 16-byte value, confirming that there's only
136  // one real allocation.
137  void* p1_16 = pool.AllocateRaw(4, 16);
138  EXPECT_EQ(0, pool.get_from_pool_count());
139  EXPECT_EQ(1, pool.allocated_count());
140  EXPECT_NE(nullptr, p1_16);
141  pool.DeallocateRaw(p1_16);
142  // Pool contents {16}
143  EXPECT_EQ(1, pool.put_count());
144  void* p2_16 = pool.AllocateRaw(4, 16);  // Get it again.
145  EXPECT_EQ(1, pool.get_from_pool_count());
146  EXPECT_EQ(1, pool.allocated_count());
147  EXPECT_EQ(p1_16, p2_16);    // Same pointer value
148  pool.DeallocateRaw(p2_16);  // Put it back.
149  // Pool contents {16}
150  EXPECT_EQ(2, pool.put_count());
151
152  // Get two more values of different sizes.
153  void* p3_4 = pool.AllocateRaw(4, 4);
154  EXPECT_EQ(2, pool.allocated_count());
155  EXPECT_NE(p1_16, p3_4);  // Different pointer value
156  EXPECT_NE(nullptr, p3_4);
157  pool.DeallocateRaw(p3_4);  // Put it back. Pool is now full.
158  // Pool contents {4, 16}
159  EXPECT_EQ(3, pool.put_count());
160  void* p4_2 = pool.AllocateRaw(4, 2);  // Get a third size buffer.
161  EXPECT_NE(nullptr, p4_2);
162  EXPECT_EQ(0, pool.evicted_count());
163
164  // The pool is full: when we put back p4_2, the 16-byte buffer
165  // should be evicted since it was least recently inserted.
166  pool.DeallocateRaw(p4_2);
167  // Pool contents {2, 4}
168  EXPECT_EQ(4, pool.put_count());
169  EXPECT_EQ(1, pool.evicted_count());
170
171  // Re-getting and putting size 2 or 4 should not alter pool size or
172  // num-evicted.
173  void* p5_4 = pool.AllocateRaw(4, 4);
174  EXPECT_NE(nullptr, p5_4);
175  pool.DeallocateRaw(p5_4);
176  void* p6_2 = pool.AllocateRaw(4, 2);
177  EXPECT_NE(nullptr, p6_2);
178  pool.DeallocateRaw(p6_2);
179  EXPECT_EQ(3, pool.get_from_pool_count());
180  EXPECT_EQ(6, pool.put_count());
181  EXPECT_EQ(3, pool.allocated_count());
182  EXPECT_EQ(1, pool.evicted_count());
183
184  pool.Clear();
185  EXPECT_EQ(0, pool.get_from_pool_count());
186  EXPECT_EQ(0, pool.put_count());
187  EXPECT_EQ(0, pool.allocated_count());
188  EXPECT_EQ(0, pool.evicted_count());
189}
190
191TEST(PoolAllocatorTest, Pow2Rounder) {
192  Pow2Rounder rounder;
193  EXPECT_EQ(1, rounder.RoundUp(1));
194  EXPECT_EQ(2, rounder.RoundUp(2));
195  EXPECT_EQ(16, rounder.RoundUp(9));
196  EXPECT_EQ(16, rounder.RoundUp(16));
197  EXPECT_EQ(65536, rounder.RoundUp(41234));
198  EXPECT_EQ(65536, rounder.RoundUp(65535));
199  EXPECT_EQ(65536, rounder.RoundUp(65536));
200}
201
202TEST(PoolAllocatorTest, Name) {
203  gpu::Platform* platform =
204      gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
205  PoolAllocator pool(
206      2 /*pool_size_limit*/, false /*auto_resize*/,
207      new CUDAHostAllocator(
208          platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0))
209              .ValueOrDie()),
210      new NoopRounder, "pool");
211  EXPECT_EQ("pool", pool.Name());
212}
213
214}  // namespace
215}  // namespace tensorflow
216
217#endif  // GOOGLE_CUDA
218