1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// A simple CPU allocator that intercepts malloc/free calls from MKL library
17// and redirects them to Tensorflow allocator
18
19#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
20#define TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
21
22#ifdef INTEL_MKL
23
24#include <cstdlib>
25#include <string>
26#include "tensorflow/core/common_runtime/bfc_allocator.h"
27#include "tensorflow/core/common_runtime/visitable_allocator.h"
28#include "tensorflow/core/lib/strings/numbers.h"
29#include "tensorflow/core/lib/strings/str_util.h"
30#include "tensorflow/core/platform/mem.h"
31
32#include "i_malloc.h"
33
34namespace tensorflow {
35
36class MklSubAllocator : public SubAllocator {
37 public:
38  ~MklSubAllocator() override {}
39
40  void* Alloc(size_t alignment, size_t num_bytes) override {
41    return port::AlignedMalloc(num_bytes, alignment);
42  }
43  void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); }
44};
45
46/// CPU allocator for MKL that wraps BFC allocator and intercepts
47/// and redirects memory allocation calls from MKL.
48class MklCPUAllocator : public VisitableAllocator {
49 public:
50  // Constructor and other standard functions
51
52  /// Environment variable that user can set to upper bound on memory allocation
53  static constexpr const char* kMaxLimitStr = "TF_MKL_ALLOC_MAX_BYTES";
54
55  /// Default upper limit on allocator size - 64GB
56  static const size_t kDefaultMaxLimit = 64LL << 30;
57
58  MklCPUAllocator() { TF_CHECK_OK(Initialize()); }
59
60  ~MklCPUAllocator() override { delete allocator_; }
61
62  Status Initialize() {
63    VLOG(2) << "MklCPUAllocator: In MklCPUAllocator";
64
65    // Set upper bound on memory allocation to physical RAM available on the
66    // CPU unless explicitly specified by user
67    uint64 max_mem_bytes = kDefaultMaxLimit;
68#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
69    max_mem_bytes =
70        (uint64)sysconf(_SC_PHYS_PAGES) * (uint64)sysconf(_SC_PAGESIZE);
71#endif
72    char* user_mem_bytes = getenv(kMaxLimitStr);
73
74    if (user_mem_bytes != NULL) {
75      uint64 user_val = 0;
76      if (!strings::safe_strtou64(user_mem_bytes, &user_val)) {
77        return errors::InvalidArgument("Invalid memory limit (", user_mem_bytes,
78                                       ") specified for MKL allocator through ",
79                                       kMaxLimitStr);
80      }
81#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
82      if (user_val > max_mem_bytes) {
83        LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr
84                     << "=" << user_val
85                     << " greater than available physical memory: "
86                     << max_mem_bytes
87                     << ". This could significantly reduce performance!";
88      }
89#endif
90      max_mem_bytes = user_val;
91    }
92
93    VLOG(1) << "MklCPUAllocator: Setting max_mem_bytes: " << max_mem_bytes;
94    allocator_ = new BFCAllocator(new MklSubAllocator, max_mem_bytes,
95                                  kAllowGrowth, kName);
96
97    // For redirecting all allocations from MKL to this allocator
98    // From: http://software.intel.com/en-us/node/528565
99    i_malloc = MallocHook;
100    i_calloc = CallocHook;
101    i_realloc = ReallocHook;
102    i_free = FreeHook;
103
104    return Status::OK();
105  }
106
107  inline string Name() override { return kName; }
108
109  inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
110    return allocator_->AllocateRaw(alignment, num_bytes);
111  }
112
113  inline void DeallocateRaw(void* ptr) override {
114    allocator_->DeallocateRaw(ptr);
115  }
116
117  void GetStats(AllocatorStats* stats) override { allocator_->GetStats(stats); }
118
119  void ClearStats() override { allocator_->ClearStats(); }
120
121  void AddAllocVisitor(Visitor visitor) override {
122    allocator_->AddAllocVisitor(visitor);
123  }
124
125  void AddFreeVisitor(Visitor visitor) override {
126    allocator_->AddFreeVisitor(visitor);
127  }
128
129 private:
130  // Hooks provided by this allocator for memory allocation routines from MKL
131
132  static inline void* MallocHook(size_t size) {
133    VLOG(3) << "MklCPUAllocator: In MallocHook";
134    return cpu_allocator()->AllocateRaw(kAlignment, size);
135  }
136
137  static inline void FreeHook(void* ptr) {
138    VLOG(3) << "MklCPUAllocator: In FreeHook";
139    cpu_allocator()->DeallocateRaw(ptr);
140  }
141
142  static inline void* CallocHook(size_t num, size_t size) {
143    Status s = Status(error::Code::UNIMPLEMENTED,
144                      "Unimplemented case for hooking MKL function.");
145    TF_CHECK_OK(s);  // way to assert with an error message
146  }
147
148  static inline void* ReallocHook(void* ptr, size_t size) {
149    Status s = Status(error::Code::UNIMPLEMENTED,
150                      "Unimplemented case for hooking MKL function.");
151    TF_CHECK_OK(s);  // way to assert with an error message
152  }
153
154  /// Do we allow growth in BFC Allocator
155  static const bool kAllowGrowth = true;
156
157  /// Name
158  static constexpr const char* kName = "mklcpu";
159
160  /// The alignment that we need for the allocations
161  static const size_t kAlignment = 64;
162
163  VisitableAllocator* allocator_;  // owned by this class
164};
165
166}  // namespace tensorflow
167
168#endif  // INTEL_MKL
169
170#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
171