1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#include <atomic> 17 18#include "tensorflow/core/common_runtime/bfc_allocator.h" 19 20#include "tensorflow/core/common_runtime/allocator_retry.h" 21#include "tensorflow/core/lib/core/bits.h" 22#include "tensorflow/core/lib/gtl/stl_util.h" 23#include "tensorflow/core/lib/strings/numbers.h" 24#include "tensorflow/core/lib/strings/str_util.h" 25#include "tensorflow/core/lib/strings/strcat.h" 26#include "tensorflow/core/platform/logging.h" 27#include "tensorflow/core/platform/mutex.h" 28#include "tensorflow/core/platform/types.h" 29 30namespace tensorflow { 31 32BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, 33 bool allow_growth, const string& name) 34 : suballocator_(sub_allocator), 35 name_(name), 36 free_chunks_list_(kInvalidChunkHandle), 37 next_allocation_id_(1) { 38 if (allow_growth) { 39 // 1MiB smallest initial allocation, unless total memory available 40 // is less. 41 curr_region_allocation_bytes_ = 42 RoundedBytes(std::min(total_memory, size_t{1048576})); 43 } else { 44 curr_region_allocation_bytes_ = RoundedBytes(total_memory); 45 } 46 47 // Allocate the requested amount of memory. 48 memory_limit_ = total_memory; 49 stats_.bytes_limit = static_cast<int64>(total_memory); 50 51 // Create a bunch of bins of various good sizes. 52 53 // We create bins to fit all possible ranges that cover the 54 // memory_limit_ starting from allocations up to 256 bytes to 55 // allocations up to (and including) the memory limit. 56 for (BinNum b = 0; b < kNumBins; b++) { 57 size_t bin_size = BinNumToSize(b); 58 VLOG(1) << "Creating bin of max chunk size " 59 << strings::HumanReadableNumBytes(bin_size); 60 new (BinFromIndex(b)) Bin(this, bin_size); 61 CHECK_EQ(BinForSize(bin_size), BinFromIndex(b)); 62 CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b)); 63 CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b)); 64 if (b + 1 < kNumBins) { 65 CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b)); 66 } 67 } 68} 69 70BFCAllocator::~BFCAllocator() { 71 // Return memory back. 72 VLOG(2) << "Number of regions allocated: " 73 << region_manager_.regions().size(); 74 for (const auto& region : region_manager_.regions()) { 75 suballocator_->Free(region.ptr(), region.memory_size()); 76 } 77 78 for (BinNum b = 0; b < kNumBins; b++) { 79 BinFromIndex(b)->~Bin(); 80 } 81} 82 83BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) { 84 DCHECK_GE(h, 0); 85 DCHECK_LT(h, static_cast<int>(chunks_.size())); 86 return &(chunks_[h]); 87} 88 89bool BFCAllocator::Extend(size_t rounded_bytes) { 90 size_t available_bytes = memory_limit_ - total_region_allocated_bytes_; 91 // Rounds available_bytes down to the nearest multiple of kMinAllocationSize. 92 available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize; 93 94 // Do we have enough space to handle the client's request? 95 // If not, fail immediately. 96 if (rounded_bytes > available_bytes) { 97 return false; 98 } 99 100 // If curr_region_allocation_bytes_ is not enough to satisfy the 101 // allocation, keep multiplying by a power of two until that is 102 // sufficient. 103 bool increased_allocation = false; 104 while (rounded_bytes > curr_region_allocation_bytes_) { 105 curr_region_allocation_bytes_ *= 2; 106 increased_allocation = true; 107 } 108 109 // Try allocating. 110 size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); 111 void* mem_addr = suballocator_->Alloc(32, bytes); 112 if (mem_addr == nullptr && !started_backpedal_) { 113 // Only backpedal once. 114 started_backpedal_ = true; 115 116 static constexpr float kBackpedalFactor = 0.9; 117 118 // Try allocating less memory. 119 while (mem_addr == nullptr) { 120 bytes = RoundedBytes(bytes * kBackpedalFactor); 121 if (bytes < rounded_bytes) break; 122 mem_addr = suballocator_->Alloc(32, bytes); 123 } 124 } 125 126 if (mem_addr == nullptr) { 127 return false; 128 } 129 130 if (!increased_allocation) { 131 // Increase the region size of the next required allocation. 132 curr_region_allocation_bytes_ *= 2; 133 } 134 135 VLOG(1) << "Extending allocation by " << strings::HumanReadableNumBytes(bytes) 136 << " bytes."; 137 138 total_region_allocated_bytes_ += bytes; 139 VLOG(1) << "Total allocated bytes: " 140 << strings::HumanReadableNumBytes(total_region_allocated_bytes_); 141 142 VLOG(1) << "Allocated memory at " << mem_addr << " to " 143 << static_cast<void*>(static_cast<char*>(mem_addr) + bytes); 144 region_manager_.AddAllocationRegion(mem_addr, bytes); 145 146 // Create one large chunk for the whole memory space that will 147 // be chunked later. 148 ChunkHandle h = AllocateChunk(); 149 BFCAllocator::Chunk* c = ChunkFromHandle(h); 150 c->ptr = mem_addr; 151 c->size = bytes; 152 c->allocation_id = -1; 153 c->prev = kInvalidChunkHandle; 154 c->next = kInvalidChunkHandle; 155 156 region_manager_.set_handle(c->ptr, h); 157 158 // TODO(vrv): Try to merge this new region with an existing region, 159 // if the address space is contiguous, to avoid fragmentation 160 // across regions. 161 162 // Insert the chunk into the right bin. 163 InsertFreeChunkIntoBin(h); 164 165 // Invoke visitors on newly allocated region. 166 for (const auto& visitor : region_visitors_) { 167 visitor(mem_addr, bytes); 168 } 169 return true; 170} 171 172BFCAllocator::ChunkHandle BFCAllocator::AllocateChunk() { 173 if (free_chunks_list_ != kInvalidChunkHandle) { 174 ChunkHandle h = free_chunks_list_; 175 Chunk* c = ChunkFromHandle(h); 176 free_chunks_list_ = c->next; 177 return h; 178 } else { 179 ChunkHandle h = chunks_.size(); 180 chunks_.resize(h + 1); 181 return h; 182 } 183} 184 185void BFCAllocator::DeallocateChunk(ChunkHandle h) { 186 Chunk* c = ChunkFromHandle(h); 187 c->next = free_chunks_list_; 188 free_chunks_list_ = h; 189} 190 191void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes) { 192 // Fast path: Try once to allocate without getting the retry_helper_ involved 193 void* r = AllocateRawInternal(unused_alignment, num_bytes, false); 194 if (r != nullptr) { 195 return r; 196 } else { 197 static const int64 kMaxMillisToWait = 10000; // 10 seconds 198 return retry_helper_.AllocateRaw( 199 [this](size_t a, size_t nb, bool v) { 200 return AllocateRawInternal(a, nb, v); 201 }, 202 kMaxMillisToWait, unused_alignment, num_bytes); 203 } 204} 205 206void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes, 207 const AllocationAttributes& allocation_attr) { 208 if (allocation_attr.no_retry_on_failure) { 209 // Return immediately upon the first failure if this is for allocating an 210 // optional scratch space. 211 bool dump_log_on_failure = VLOG_IS_ON(2); 212 void* result = 213 AllocateRawInternal(unused_alignment, num_bytes, dump_log_on_failure); 214 if (result == nullptr) { 215 static std::atomic<int32> log_counter{0}; 216 int32 counter_value = log_counter.load(std::memory_order_relaxed); 217 if (counter_value < 10) { 218 log_counter.store(counter_value + 1, std::memory_order_relaxed); 219 LOG(WARNING) 220 << "Allocator (" << Name() << ") ran out of memory trying " 221 << "to allocate " << strings::HumanReadableNumBytes(num_bytes) 222 << ". The caller indicates that this is not a failure, but" 223 << " may mean that there could be performance gains if more" 224 << " memory were available."; 225 } 226 } 227 return result; 228 } else { 229 return AllocateRaw(unused_alignment, num_bytes); 230 } 231} 232 233// static 234size_t BFCAllocator::RoundedBytes(size_t bytes) { 235 size_t rounded_bytes = 236 (kMinAllocationSize * 237 ((bytes + kMinAllocationSize - 1) / kMinAllocationSize)); 238 DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize); 239 return rounded_bytes; 240} 241 242void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, 243 size_t num_bytes, 244 bool dump_log_on_failure) { 245 if (num_bytes == 0) { 246 LOG(ERROR) << "tried to allocate 0 bytes"; 247 return nullptr; 248 } 249 // First, always allocate memory of at least kMinAllocationSize 250 // bytes, and always allocate multiples of kMinAllocationSize bytes 251 // so all memory addresses are nicely byte aligned. 252 size_t rounded_bytes = RoundedBytes(num_bytes); 253 254 // The BFC allocator tries to find the best fit first. 255 BinNum bin_num = BinNumForSize(rounded_bytes); 256 257 mutex_lock l(lock_); 258 void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); 259 if (ptr != nullptr) { 260 return ptr; 261 } 262 263 // Try to extend 264 if (Extend(rounded_bytes)) { 265 ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes); 266 if (ptr != nullptr) { 267 return ptr; 268 } 269 } 270 271 // We searched all bins for an existing free chunk to use and 272 // couldn't find one. This means we must have run out of memory, 273 // Dump the memory log for analysis. 274 if (dump_log_on_failure) { 275 LOG(WARNING) << "Allocator (" << Name() << ") ran out of memory trying " 276 << "to allocate " << strings::HumanReadableNumBytes(num_bytes) 277 << ". Current allocation summary follows."; 278 DumpMemoryLog(rounded_bytes); 279 LOG(WARNING) << RenderOccupancy(); 280 } 281 return nullptr; 282} 283 284void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes, 285 size_t num_bytes) { 286 // First identify the first bin that could satisfy rounded_bytes. 287 for (; bin_num < kNumBins; bin_num++) { 288 // Start searching from the first bin for the smallest chunk that fits 289 // rounded_bytes. 290 Bin* b = BinFromIndex(bin_num); 291 for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end(); 292 ++citer) { 293 const BFCAllocator::ChunkHandle h = (*citer); 294 BFCAllocator::Chunk* chunk = ChunkFromHandle(h); 295 DCHECK(!chunk->in_use()); 296 if (chunk->size >= rounded_bytes) { 297 // We found an existing chunk that fits us that wasn't in use, so remove 298 // it from the free bin structure prior to using. 299 RemoveFreeChunkIterFromBin(&b->free_chunks, citer); 300 301 // If we can break the size of the chunk into two reasonably large 302 // pieces, do so. In any case don't waste more than 303 // kMaxInternalFragmentation bytes on padding this alloc. 304 const int64 kMaxInternalFragmentation = 128 << 20; // 128mb 305 if (chunk->size >= rounded_bytes * 2 || 306 static_cast<int64>(chunk->size) - rounded_bytes >= 307 kMaxInternalFragmentation) { 308 SplitChunk(h, rounded_bytes); 309 chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved 310 } 311 312 // The requested size of the returned chunk is what the user 313 // has allocated. 314 chunk->requested_size = num_bytes; 315 // Assign a unique id and increment the id counter, marking the 316 // chunk as being in use. 317 chunk->allocation_id = next_allocation_id_++; 318 319 // Update stats. 320 ++stats_.num_allocs; 321 stats_.bytes_in_use += chunk->size; 322 stats_.max_bytes_in_use = 323 std::max(stats_.max_bytes_in_use, stats_.bytes_in_use); 324 stats_.max_alloc_size = 325 std::max<std::size_t>(stats_.max_alloc_size, chunk->size); 326 327 VLOG(4) << "Returning: " << chunk->ptr; 328 if (VLOG_IS_ON(4)) { 329 LOG(INFO) << "A: " << RenderOccupancy(); 330 } 331 return chunk->ptr; 332 } 333 } 334 } 335 336 return nullptr; 337} 338 339void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) { 340 // Allocate the new chunk before we do any ChunkFromHandle 341 ChunkHandle h_new_chunk = AllocateChunk(); 342 343 Chunk* c = ChunkFromHandle(h); 344 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum)); 345 346 // Create a new chunk starting num_bytes after c 347 BFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk); 348 new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes); 349 region_manager_.set_handle(new_chunk->ptr, h_new_chunk); 350 351 // Set the new sizes of the chunks. 352 new_chunk->size = c->size - num_bytes; 353 c->size = num_bytes; 354 355 // The new chunk is not in use. 356 new_chunk->allocation_id = -1; 357 358 // Maintain the pointers. 359 // c <-> c_neighbor becomes 360 // c <-> new_chunk <-> c_neighbor 361 BFCAllocator::ChunkHandle h_neighbor = c->next; 362 new_chunk->prev = h; 363 new_chunk->next = h_neighbor; 364 c->next = h_new_chunk; 365 if (h_neighbor != kInvalidChunkHandle) { 366 Chunk* c_neighbor = ChunkFromHandle(h_neighbor); 367 c_neighbor->prev = h_new_chunk; 368 } 369 370 // Add the newly free chunk to the free bin. 371 InsertFreeChunkIntoBin(h_new_chunk); 372} 373 374void BFCAllocator::DeallocateRaw(void* ptr) { 375 DeallocateRawInternal(ptr); 376 retry_helper_.NotifyDealloc(); 377} 378 379void BFCAllocator::DeallocateRawInternal(void* ptr) { 380 if (ptr == nullptr) { 381 LOG(ERROR) << "tried to deallocate nullptr"; 382 return; 383 } 384 mutex_lock l(lock_); 385 386 // Find the chunk from the ptr. 387 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); 388 CHECK(h != kInvalidChunkHandle); 389 390 // Consider coalescing it. 391 FreeAndMaybeCoalesce(h); 392 393 if (VLOG_IS_ON(4)) { 394 LOG(INFO) << "F: " << RenderOccupancy(); 395 } 396} 397 398// Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1. 399// We merge Chunk(h2) into Chunk(h1). 400void BFCAllocator::Merge(BFCAllocator::ChunkHandle h1, 401 BFCAllocator::ChunkHandle h2) { 402 Chunk* c1 = ChunkFromHandle(h1); 403 Chunk* c2 = ChunkFromHandle(h2); 404 // We can only merge chunks that are not in use. 405 CHECK(!c1->in_use() && !c2->in_use()); 406 407 // c1's prev doesn't change, still points to the same ptr, and is 408 // still not in use. 409 410 // Fix up neighbor pointers 411 // 412 // c1 <-> c2 <-> c3 should become 413 // c1 <-> c3 414 415 BFCAllocator::ChunkHandle h3 = c2->next; 416 c1->next = h3; 417 CHECK(c2->prev == h1); 418 if (h3 != kInvalidChunkHandle) { 419 BFCAllocator::Chunk* c3 = ChunkFromHandle(h3); 420 c3->prev = h1; 421 } 422 423 // Set the new size 424 c1->size += c2->size; 425 426 DeleteChunk(h2); 427} 428 429void BFCAllocator::DeleteChunk(ChunkHandle h) { 430 // Delete h and cleanup all state 431 Chunk* c = ChunkFromHandle(h); 432 // VLOG(4) << "Removing: " << c->ptr; 433 region_manager_.erase(c->ptr); 434 DeallocateChunk(h); 435} 436 437void BFCAllocator::InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h) { 438 Chunk* c = ChunkFromHandle(h); 439 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum)); 440 BinNum bin_num = BinNumForSize(c->size); 441 Bin* new_bin = BinFromIndex(bin_num); 442 c->bin_num = bin_num; 443 new_bin->free_chunks.insert(h); 444} 445 446void BFCAllocator::RemoveFreeChunkIterFromBin( 447 BFCAllocator::Bin::FreeChunkSet* free_chunks, 448 const BFCAllocator::Bin::FreeChunkSet::iterator& citer) { 449 ChunkHandle h = *citer; 450 Chunk* c = ChunkFromHandle(h); 451 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum)); 452 free_chunks->erase(citer); 453 c->bin_num = kInvalidBinNum; 454} 455 456void BFCAllocator::RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h) { 457 Chunk* c = ChunkFromHandle(h); 458 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum)); 459 CHECK_GT(BinFromIndex(c->bin_num)->free_chunks.erase(h), 0) 460 << "Could not find chunk in bin"; 461 c->bin_num = kInvalidBinNum; 462} 463 464void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) { 465 Chunk* c = ChunkFromHandle(h); 466 CHECK(c->in_use() && (c->bin_num == kInvalidBinNum)); 467 468 // Mark the chunk as no longer in use 469 c->allocation_id = -1; 470 471 // Updates the stats. 472 stats_.bytes_in_use -= c->size; 473 474 // This chunk is no longer in-use, consider coalescing the chunk 475 // with adjacent chunks. 476 ChunkHandle chunk_to_reassign = h; 477 478 // If the next chunk is free, coalesce the two 479 if (c->next != kInvalidChunkHandle) { 480 Chunk* cnext = ChunkFromHandle(c->next); 481 if (!cnext->in_use()) { 482 // VLOG(8) << "Chunk at " << cnext->ptr << " merging with c " << 483 // c->ptr; 484 485 chunk_to_reassign = h; 486 487 // Deletes c->next 488 RemoveFreeChunkFromBin(c->next); 489 Merge(h, ChunkFromHandle(h)->next); 490 } 491 } 492 493 // If the previous chunk is free, coalesce the two 494 c = ChunkFromHandle(h); 495 if (c->prev != kInvalidChunkHandle) { 496 Chunk* cprev = ChunkFromHandle(c->prev); 497 if (!cprev->in_use()) { 498 // VLOG(8) << "Chunk at " << c->ptr << " merging into c->prev " 499 // << cprev->ptr; 500 501 chunk_to_reassign = c->prev; 502 503 // Deletes c 504 RemoveFreeChunkFromBin(c->prev); 505 Merge(ChunkFromHandle(h)->prev, h); 506 c = ChunkFromHandle(h); 507 } 508 } 509 510 InsertFreeChunkIntoBin(chunk_to_reassign); 511} 512 513void BFCAllocator::AddAllocVisitor(Visitor visitor) { 514 VLOG(1) << "AddVisitor"; 515 mutex_lock l(lock_); 516 region_visitors_.push_back(visitor); 517 for (const auto& region : region_manager_.regions()) { 518 visitor(region.ptr(), region.memory_size()); 519 } 520} 521 522bool BFCAllocator::TracksAllocationSizes() { return true; } 523 524size_t BFCAllocator::RequestedSize(const void* ptr) { 525 mutex_lock l(lock_); 526 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); 527 CHECK(h != kInvalidChunkHandle) 528 << "Asked for requested size of pointer we never allocated: " << ptr; 529 BFCAllocator::Chunk* c = ChunkFromHandle(h); 530 return c->requested_size; 531} 532 533size_t BFCAllocator::AllocatedSize(const void* ptr) { 534 mutex_lock l(lock_); 535 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); 536 CHECK(h != kInvalidChunkHandle) 537 << "Asked for allocated size of pointer we never allocated: " << ptr; 538 BFCAllocator::Chunk* c = ChunkFromHandle(h); 539 return c->size; 540} 541 542int64 BFCAllocator::AllocationId(const void* ptr) { 543 mutex_lock l(lock_); 544 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); 545 CHECK(h != kInvalidChunkHandle) 546 << "Asked for allocation id of pointer we never allocated: " << ptr; 547 BFCAllocator::Chunk* c = ChunkFromHandle(h); 548 return c->allocation_id; 549} 550 551namespace { 552 553void RenderRegion(char* rendered, const size_t resolution, 554 const size_t total_render_size, const size_t offset, 555 const void* base_ptr, const void* ptr, const size_t size, 556 const char c) { 557 const char* base_ptr_c = static_cast<const char*>(base_ptr); 558 const char* ptr_c = static_cast<const char*>(ptr); 559 560 size_t start_location = 561 ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size; 562 CHECK_GE(start_location, 0); 563 CHECK_LT(start_location, resolution); 564 size_t end_location = 565 ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) / 566 total_render_size; 567 CHECK_GE(end_location, 0); 568 CHECK_LT(end_location, resolution); 569 570 for (size_t i = start_location; i <= end_location; ++i) { 571 rendered[i] = c; 572 } 573} 574 575} // namespace 576 577string BFCAllocator::RenderOccupancy() { 578 // Make a buffer for the ASCII-art representation. 579 const size_t resolution = 100; 580 char rendered[resolution]; 581 582 // Compute the total region size to render over 583 size_t total_region_size = 0; 584 for (const auto& region : region_manager_.regions()) { 585 total_region_size += region.memory_size(); 586 } 587 588 if (total_region_size == 0) { 589 return "<allocator contains no memory>"; 590 } 591 592 // Start out with everything empty 593 RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr, 594 total_region_size, '_'); 595 596 size_t region_offset = 0; 597 for (const auto& region : region_manager_.regions()) { 598 ChunkHandle h = region_manager_.get_handle(region.ptr()); 599 // Then render each chunk left to right. 600 while (h != kInvalidChunkHandle) { 601 Chunk* c = ChunkFromHandle(h); 602 if (c->in_use()) { 603 // Render the wasted space 604 size_t wasted = c->size - c->requested_size; 605 if (wasted > 0) { 606 RenderRegion(rendered, resolution, total_region_size, 607 region_offset + c->requested_size, region.ptr(), c->ptr, 608 wasted, 'x'); 609 } 610 // Then the occupied space 611 RenderRegion(rendered, resolution, total_region_size, region_offset, 612 region.ptr(), c->ptr, c->requested_size, '*'); 613 } 614 h = c->next; 615 } 616 region_offset += region.memory_size(); 617 } 618 619 return StringPiece(rendered, resolution).ToString(); 620} 621 622void BFCAllocator::DumpMemoryLog(size_t num_bytes) { 623 const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info(); 624 for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) { 625 Bin* b = BinFromIndex(bin_num); 626 const BinDebugInfo& bin_info = bin_infos[bin_num]; 627 CHECK_EQ(b->free_chunks.size(), 628 bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use); 629 630 LOG(INFO) << "Bin (" << b->bin_size 631 << "): \tTotal Chunks: " << bin_info.total_chunks_in_bin 632 << ", Chunks in use: " << bin_info.total_chunks_in_use << ". " 633 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_bin) 634 << " allocated for chunks. " 635 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_use) 636 << " in use in bin. " 637 << strings::HumanReadableNumBytes( 638 bin_info.total_requested_bytes_in_use) 639 << " client-requested in use in bin."; 640 } 641 642 // Find the bin that we would have liked to allocate in, so we 643 // can get some further analysis about fragmentation. 644 Bin* b = BinForSize(num_bytes); 645 646 LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes) 647 << " was " << strings::HumanReadableNumBytes(b->bin_size) 648 << ", Chunk State: "; 649 650 for (ChunkHandle h : b->free_chunks) { 651 Chunk* c = ChunkFromHandle(h); 652 LOG(INFO) << c->DebugString(this, true); 653 } 654 655 // Next show the chunks that are in use, and also summarize their 656 // number by size. 657 std::map<size_t, int> in_use_by_size; 658 for (const auto& region : region_manager_.regions()) { 659 ChunkHandle h = region_manager_.get_handle(region.ptr()); 660 while (h != kInvalidChunkHandle) { 661 const Chunk* c = ChunkFromHandle(h); 662 if (c->in_use()) { 663 in_use_by_size[c->size]++; 664 } 665 LOG(INFO) << (c->in_use() ? "Chunk" : "Free ") << " at " << c->ptr 666 << " of size " << c->size; 667 h = c->next; 668 } 669 } 670 671 LOG(INFO) << " Summary of in-use Chunks by size: "; 672 size_t total_bytes = 0; 673 for (auto& it : in_use_by_size) { 674 LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling " 675 << strings::HumanReadableNumBytes(it.first * it.second); 676 total_bytes += (it.first * it.second); 677 } 678 LOG(INFO) << "Sum Total of in-use chunks: " 679 << strings::HumanReadableNumBytes(total_bytes); 680 LOG(INFO) << "Stats: \n" << stats_.DebugString(); 681} 682 683void BFCAllocator::GetStats(AllocatorStats* stats) { 684 mutex_lock l(lock_); 685 *stats = stats_; 686} 687 688void BFCAllocator::ClearStats() { 689 mutex_lock l(lock_); 690 stats_.num_allocs = 0; 691 stats_.max_bytes_in_use = stats_.bytes_in_use; 692 stats_.max_alloc_size = 0; 693} 694 695std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins> 696BFCAllocator::get_bin_debug_info() { 697 std::array<BinDebugInfo, kNumBins> bin_infos; 698 for (const auto& region : region_manager_.regions()) { 699 ChunkHandle h = region_manager_.get_handle(region.ptr()); 700 while (h != kInvalidChunkHandle) { 701 const Chunk* c = ChunkFromHandle(h); 702 BinNum bin_num = BinNumForSize(c->size); 703 BinDebugInfo& bin_info = bin_infos[bin_num]; 704 bin_info.total_bytes_in_bin += c->size; 705 bin_info.total_chunks_in_bin++; 706 if (c->in_use()) { 707 bin_info.total_bytes_in_use += c->size; 708 bin_info.total_requested_bytes_in_use += c->requested_size; 709 bin_info.total_chunks_in_use++; 710 } else { 711 Bin* bin = BinFromIndex(bin_num); 712 CHECK_EQ(bin->free_chunks.count(h), 1); 713 CHECK_EQ(c->bin_num, bin_num); 714 } 715 h = c->next; 716 } 717 } 718 return bin_infos; 719} 720 721} // namespace tensorflow 722