1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#if GOOGLE_CUDA
17
18#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
19
20#include <atomic>
21#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
22#include "tensorflow/core/platform/stream_executor.h"
23#include "tensorflow/core/platform/test.h"
24#include "tensorflow/core/protobuf/config.pb.h"
25
26namespace gpu = ::perftools::gputools;
27
28namespace tensorflow {
29
30class TEST_EventMgrHelper {
31 public:
32  explicit TEST_EventMgrHelper(EventMgr* em) : em_(em) {
33    // The polling loop can interfere with the measurements made here, and
34    // isn't needed since the member PollEvents() always clears the queue.
35    // The tested behavior is slightly different from what may occur in
36    // ordinary execution.
37    StopPollingLoop();
38  }
39
40  size_t queue_size() {
41    mutex_lock l(em_->mu_);
42    return em_->used_events_.size();
43  }
44
45  size_t free_size() {
46    mutex_lock l(em_->mu_);
47    return em_->free_events_.size();
48  }
49
50  void QueueTensors(perftools::gputools::Stream* stream,
51                    TensorReferenceVector* tensors) {
52    mutex_lock l(em_->mu_);
53    em_->QueueTensors(stream, tensors);
54  }
55
56  void PollEvents(bool is_dedicated_poller) {
57    while (queue_size() > 0) {
58      // For ordinary tensor frees, this function
59      // should synchronously harvest all complete
60      // events and execute the corresponding memory frees.
61      EventMgr::ToFreeVector to_free;
62      {
63        mutex_lock l(em_->mu_);
64        em_->PollEvents(is_dedicated_poller, &to_free);
65      }
66      em_->FreeMemory(to_free);
67    }
68  }
69
70  void StopPollingLoop() { em_->StopPollingLoop(); }
71
72  void StartPollingLoop() { em_->StartPollingLoop(); }
73
74 private:
75  EventMgr* em_;
76};
77
78static std::atomic_int_fast64_t live_tensor_bytes(0);
79
80// A TensorBuffer that counts live memory usage for testing
81class TestTensorBuffer : public TensorBuffer {
82 public:
83  explicit TestTensorBuffer(size_t bytes) : bytes_(bytes) {
84    live_tensor_bytes += bytes_;
85  }
86  ~TestTensorBuffer() override { live_tensor_bytes -= bytes_; }
87
88  size_t size() const override { return bytes_; }
89
90  // Not used in this test
91  void* data() const override { return nullptr; }
92  TensorBuffer* root_buffer() override { return nullptr; }
93  void FillAllocationDescription(AllocationDescription* arg) const override {}
94
95 private:
96  size_t bytes_;
97};
98
99namespace {
100
101TEST(EventMgr, Empty) {
102  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
103  EventMgr em(stream_exec, GPUOptions());
104  TEST_EventMgrHelper th(&em);
105  EXPECT_EQ(0, th.queue_size());
106  EXPECT_EQ(0, th.free_size());
107}
108
109static void AddTensorReference(TensorReferenceVector* v, int64 size) {
110  TestTensorBuffer* buf = new TestTensorBuffer(size);
111  v->push_back(TensorReference(buf));
112  buf->Unref();
113}
114
115// Delaying polling until after several enqueings should grow the
116// total number of allocated events.  Once we have enough events for
117// the max simultaneously pending, we should not allocate any more.
118TEST(EventMgr, DelayedPolling) {
119  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
120  EventMgr em(stream_exec, GPUOptions());
121  TEST_EventMgrHelper th(&em);
122  EXPECT_EQ(0, th.queue_size());
123  TensorReferenceVector* v = nullptr;
124  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
125  CHECK(stream.get());
126  stream->Init();
127  for (int i = 0; i < 5; ++i) {
128    v = new TensorReferenceVector;
129    AddTensorReference(v, 100 * 1048576);
130    th.QueueTensors(stream.get(), v);
131    EXPECT_EQ(i + 1, th.queue_size());
132    EXPECT_EQ(0, th.free_size());
133  }
134  th.PollEvents(false);
135  EXPECT_EQ(0, th.queue_size());
136  EXPECT_EQ(5, th.free_size());
137  for (int j = 0; j < 2; ++j) {
138    for (int i = 0; i < 5; ++i) {
139      v = new TensorReferenceVector;
140      AddTensorReference(v, 100 * 1048576);
141      th.QueueTensors(stream.get(), v);
142      EXPECT_EQ(i + 1, th.queue_size());
143      EXPECT_EQ(4 - i, th.free_size());
144    }
145    th.PollEvents(false);
146    EXPECT_EQ(0, th.queue_size());
147    EXPECT_EQ(5, th.free_size());
148  }
149}
150
151TEST(EventMgr, FlushLargeTensorImmediately) {
152  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
153  EventMgr em(stream_exec, GPUOptions());
154  TEST_EventMgrHelper th(&em);
155  EXPECT_EQ(0, live_tensor_bytes);
156  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
157  CHECK(stream.get());
158  stream->Init();
159  for (int i = 0; i < 5; ++i) {
160    TensorReferenceVector v;
161    AddTensorReference(&v, 100 * 1048576);
162    em.ThenDeleteTensors(stream.get(), v);
163    th.PollEvents(false);  // Ensure things get registered to be freed by Poll
164    EXPECT_EQ(0, live_tensor_bytes);
165  }
166}
167
168TEST(EventMgr, ManySmallTensorsFlushedImmediately) {
169  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
170  EventMgr em(stream_exec, GPUOptions());
171  TEST_EventMgrHelper th(&em);
172  EXPECT_EQ(0, live_tensor_bytes);
173  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
174  CHECK(stream.get());
175  stream->Init();
176  for (int i = 0; i < 5; ++i) {
177    TensorReferenceVector v;
178    for (int i = 0; i < 1000; i++) {
179      AddTensorReference(&v, 100 * 1024);
180    }
181    em.ThenDeleteTensors(stream.get(), v);
182    th.PollEvents(false);  // Harvest the tensors ready to be freed.
183    EXPECT_EQ(0, live_tensor_bytes);
184  }
185}
186
187TEST(EventMgr, StreamSwitchingFlushesImmediately) {
188  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
189  EventMgr em(stream_exec, GPUOptions());
190  TEST_EventMgrHelper th(&em);
191  EXPECT_EQ(0, live_tensor_bytes);
192  std::unique_ptr<gpu::Stream> stream1(new gpu::Stream(stream_exec));
193  std::unique_ptr<gpu::Stream> stream2(new gpu::Stream(stream_exec));
194  stream1->Init();
195  stream2->Init();
196  TensorReferenceVector v1;
197  AddTensorReference(&v1, 1024);
198  em.ThenDeleteTensors(stream1.get(), v1);
199
200  TensorReferenceVector v2;
201  AddTensorReference(&v2, 1024);
202  int64 initial_live_bytes = live_tensor_bytes;
203  em.ThenDeleteTensors(stream2.get(), v2);
204  th.PollEvents(false);  // Ensure things get registered to be freed by Poll
205  // Different stream should cause first tensor to get deleted
206  EXPECT_GT(initial_live_bytes, live_tensor_bytes);
207}
208
209TEST(EventMgr, ManySmallTensorsSeparateCallsFlushed) {
210  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
211  EventMgr em(stream_exec, GPUOptions());
212  TEST_EventMgrHelper th(&em);
213  EXPECT_EQ(0, live_tensor_bytes);
214  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
215  CHECK(stream.get());
216  stream->Init();
217  for (int i = 0; i < 5; ++i) {
218    for (int i = 0; i < 1000; i++) {
219      TensorReferenceVector v;
220      AddTensorReference(&v, 100 * 1024);
221      em.ThenDeleteTensors(stream.get(), v);
222    }
223    th.PollEvents(false);  // Ensure things get registered to be freed by Poll
224    // Some of the tensors at least should be flushed
225    EXPECT_GT(1000 * 100 * 1024, live_tensor_bytes);
226  }
227}
228
229// Deleting the EventMgr when events are still pending should shut
230// down gracefully.
231TEST(EventMgr, NonEmptyShutdown) {
232  auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
233  EventMgr em(stream_exec, GPUOptions());
234  TEST_EventMgrHelper th(&em);
235  EXPECT_EQ(0, th.queue_size());
236  EXPECT_EQ(0, th.free_size());
237  std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec));
238  CHECK(stream.get());
239  stream->Init();
240  for (int i = 0; i < 5; ++i) {
241    TensorReferenceVector* v = new TensorReferenceVector;
242    AddTensorReference(v, 100 * 1048576);
243    th.QueueTensors(stream.get(), v);
244    EXPECT_EQ(1 + i, th.queue_size());
245    EXPECT_EQ(0, th.free_size());
246  }
247}
248
249}  // namespace
250}  // namespace tensorflow
251
252#endif  // GOOGLE_CUDA
253