1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#if GOOGLE_CUDA 17 18#include "tensorflow/core/common_runtime/gpu/pool_allocator.h" 19 20#include "tensorflow/core/platform/stream_executor.h" 21#include "tensorflow/core/platform/test.h" 22 23namespace gpu = ::perftools::gputools; 24 25namespace tensorflow { 26namespace { 27 28TEST(PoolAllocatorTest, ZeroSizeBuffers) { 29 gpu::Platform* platform = 30 gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); 31 PoolAllocator pool( 32 2 /*pool_size_limit*/, false /*auto_resize*/, 33 new CUDAHostAllocator( 34 platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) 35 .ValueOrDie()), 36 new NoopRounder, "pool"); 37 38 EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/)); 39 pool.DeallocateRaw(nullptr); // Should not crash. 40 EXPECT_EQ(0, pool.get_from_pool_count()); 41 EXPECT_EQ(0, pool.put_count()); 42 EXPECT_EQ(0, pool.allocated_count()); 43 EXPECT_EQ(0, pool.evicted_count()); 44} 45 46TEST(PoolAllocatorTest, ZeroSizePool) { 47 gpu::Platform* platform = 48 gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); 49 PoolAllocator pool( 50 0 /*pool_size_limit*/, false /*auto_resize*/, 51 new CUDAHostAllocator( 52 platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) 53 .ValueOrDie()), 54 new NoopRounder, "pool"); 55 56 EXPECT_EQ(0, pool.get_from_pool_count()); 57 EXPECT_EQ(0, pool.put_count()); 58 EXPECT_EQ(0, pool.allocated_count()); 59 EXPECT_EQ(0, pool.evicted_count()); 60 61 // All allocations should bypass the pool and return valid pointers. 62 for (int i = 0; i < 3; ++i) { 63 void* p0 = pool.AllocateRaw(4, 0); 64 void* p4 = pool.AllocateRaw(4, 4); 65 void* p12 = pool.AllocateRaw(4, 12); 66 EXPECT_EQ(nullptr, p0); 67 EXPECT_NE(nullptr, p4); 68 EXPECT_NE(nullptr, p12); 69 pool.DeallocateRaw(p0); 70 pool.DeallocateRaw(p4); 71 pool.DeallocateRaw(p12); 72 } 73 EXPECT_EQ(0, pool.get_from_pool_count()); 74 EXPECT_EQ(0, pool.put_count()); 75 EXPECT_EQ(0, pool.allocated_count()); 76 EXPECT_EQ(0, pool.evicted_count()); 77} 78 79TEST(PoolAllocatorTest, Alignment) { 80 gpu::Platform* platform = 81 gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); 82 PoolAllocator pool( 83 0 /*pool_size_limit*/, false /*auto_resize*/, 84 new CUDAHostAllocator( 85 platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) 86 .ValueOrDie()), 87 new NoopRounder, "pool"); 88 for (int i = 0; i < 16; ++i) { 89 size_t alignment = 1 << i; 90 void* p = pool.AllocateRaw(alignment, 111); 91 EXPECT_TRUE(p != nullptr); 92 EXPECT_EQ(0, reinterpret_cast<int64>(p) & (alignment - 1)) 93 << "ptr: " << p << " alignment " << alignment; 94 // Intentionally don't deallocate, to test that destruction of 95 // the PoolAllocator frees all pending memory. 96 } 97} 98 99TEST(PoolAllocatorTest, AutoResize) { 100 PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/, 101 new BasicCPUAllocator, new NoopRounder, "pool"); 102 103 // Alloc/dealloc 10 sizes just a few times, confirming pool size 104 // stays at 2. 105 for (int i = 0; i < 10; ++i) { 106 void* p = pool.AllocateRaw(4, 64 << i); 107 pool.DeallocateRaw(p); 108 } 109 EXPECT_EQ(0, pool.get_from_pool_count()); 110 EXPECT_EQ(10, pool.allocated_count()); 111 EXPECT_EQ(10, pool.put_count()); 112 EXPECT_EQ(8, pool.evicted_count()); 113 EXPECT_EQ(2, pool.size_limit()); 114 115 // Then repeat 1200 times. Pool size limit should jump to 100. 116 for (int j = 0; j < 120; ++j) { 117 for (int i = 0; i < 10; ++i) { 118 void* p = pool.AllocateRaw(4, 64 << i); 119 pool.DeallocateRaw(p); 120 } 121 } 122 EXPECT_EQ(100, pool.size_limit()); 123} 124 125TEST(PoolAllocatorTest, CudaHostAllocator) { 126 gpu::Platform* platform = 127 gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); 128 PoolAllocator pool( 129 2 /*pool_size_limit*/, false /*auto_resize*/, 130 new CUDAHostAllocator( 131 platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) 132 .ValueOrDie()), 133 new NoopRounder, "pool"); 134 135 // Repeatedly Get a 16-byte value, confirming that there's only 136 // one real allocation. 137 void* p1_16 = pool.AllocateRaw(4, 16); 138 EXPECT_EQ(0, pool.get_from_pool_count()); 139 EXPECT_EQ(1, pool.allocated_count()); 140 EXPECT_NE(nullptr, p1_16); 141 pool.DeallocateRaw(p1_16); 142 // Pool contents {16} 143 EXPECT_EQ(1, pool.put_count()); 144 void* p2_16 = pool.AllocateRaw(4, 16); // Get it again. 145 EXPECT_EQ(1, pool.get_from_pool_count()); 146 EXPECT_EQ(1, pool.allocated_count()); 147 EXPECT_EQ(p1_16, p2_16); // Same pointer value 148 pool.DeallocateRaw(p2_16); // Put it back. 149 // Pool contents {16} 150 EXPECT_EQ(2, pool.put_count()); 151 152 // Get two more values of different sizes. 153 void* p3_4 = pool.AllocateRaw(4, 4); 154 EXPECT_EQ(2, pool.allocated_count()); 155 EXPECT_NE(p1_16, p3_4); // Different pointer value 156 EXPECT_NE(nullptr, p3_4); 157 pool.DeallocateRaw(p3_4); // Put it back. Pool is now full. 158 // Pool contents {4, 16} 159 EXPECT_EQ(3, pool.put_count()); 160 void* p4_2 = pool.AllocateRaw(4, 2); // Get a third size buffer. 161 EXPECT_NE(nullptr, p4_2); 162 EXPECT_EQ(0, pool.evicted_count()); 163 164 // The pool is full: when we put back p4_2, the 16-byte buffer 165 // should be evicted since it was least recently inserted. 166 pool.DeallocateRaw(p4_2); 167 // Pool contents {2, 4} 168 EXPECT_EQ(4, pool.put_count()); 169 EXPECT_EQ(1, pool.evicted_count()); 170 171 // Re-getting and putting size 2 or 4 should not alter pool size or 172 // num-evicted. 173 void* p5_4 = pool.AllocateRaw(4, 4); 174 EXPECT_NE(nullptr, p5_4); 175 pool.DeallocateRaw(p5_4); 176 void* p6_2 = pool.AllocateRaw(4, 2); 177 EXPECT_NE(nullptr, p6_2); 178 pool.DeallocateRaw(p6_2); 179 EXPECT_EQ(3, pool.get_from_pool_count()); 180 EXPECT_EQ(6, pool.put_count()); 181 EXPECT_EQ(3, pool.allocated_count()); 182 EXPECT_EQ(1, pool.evicted_count()); 183 184 pool.Clear(); 185 EXPECT_EQ(0, pool.get_from_pool_count()); 186 EXPECT_EQ(0, pool.put_count()); 187 EXPECT_EQ(0, pool.allocated_count()); 188 EXPECT_EQ(0, pool.evicted_count()); 189} 190 191TEST(PoolAllocatorTest, Pow2Rounder) { 192 Pow2Rounder rounder; 193 EXPECT_EQ(1, rounder.RoundUp(1)); 194 EXPECT_EQ(2, rounder.RoundUp(2)); 195 EXPECT_EQ(16, rounder.RoundUp(9)); 196 EXPECT_EQ(16, rounder.RoundUp(16)); 197 EXPECT_EQ(65536, rounder.RoundUp(41234)); 198 EXPECT_EQ(65536, rounder.RoundUp(65535)); 199 EXPECT_EQ(65536, rounder.RoundUp(65536)); 200} 201 202TEST(PoolAllocatorTest, Name) { 203 gpu::Platform* platform = 204 gpu::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); 205 PoolAllocator pool( 206 2 /*pool_size_limit*/, false /*auto_resize*/, 207 new CUDAHostAllocator( 208 platform->GetExecutor(gpu::StreamExecutorConfig(/*ordinal=*/0)) 209 .ValueOrDie()), 210 new NoopRounder, "pool"); 211 EXPECT_EQ("pool", pool.Name()); 212} 213 214} // namespace 215} // namespace tensorflow 216 217#endif // GOOGLE_CUDA 218