1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
17#define TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
18
19#if GOOGLE_CUDA
20
21#include <unordered_map>
22
23#include "tensorflow/core/lib/strings/str_util.h"
24#include "tensorflow/core/lib/strings/strcat.h"
25#include "tensorflow/core/lib/strings/stringprintf.h"
26#include "tensorflow/core/platform/logging.h"
27#include "tensorflow/core/platform/stream_executor.h"
28
29namespace tensorflow {
30
31template <typename T>
32inline perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory,
33                                                           uint64 size) {
34  perftools::gputools::DeviceMemoryBase wrapped(const_cast<T*>(cuda_memory),
35                                                size * sizeof(T));
36  perftools::gputools::DeviceMemory<T> typed(wrapped);
37  return typed;
38}
39
40// A helper class that looks up the best autotuned config from parameters.
41// Due to the noisy nature of autotune, especially with multiple devices, it
42// only accepts a config if its margin exceeds a threshold.
43// For the same shape configs, if a new best config matches the previous best,
44// they get promoted; otherwise, the winner gets demoted. This process stops
45// when the winner's score exceeds the threshold.
46// In a bad case when two configs are very close to each other and flips
47// back and forth randomly, the expected number of experiments before autotune
48// settles is O(threshold ^ 2). So we recommend that number of warmup runs
49// for any benchmarks.
50template <typename Parameters, typename Config>
51class AutoTuneMap {
52 public:
53  bool Find(const Parameters& params, Config* config) const {
54    mutex_lock lock(mu_);
55    auto iter = params_config_map_.find(params);
56    if (iter == params_config_map_.end() ||
57        (iter->second.score < min_score_threshold_ &&
58         iter->second.count <= max_autotune_count_)) {
59      return false;
60    }
61    *config = iter->second.config;
62    return true;
63  }
64  void Insert(const Parameters& params, const Config& config) {
65    mutex_lock lock(mu_);
66    auto iter = params_config_map_.find(params);
67    int new_score = 0;
68    if (iter == params_config_map_.end()) {
69      // Create a new entry if params is new.
70      VLOG(1) << GetActionSummary("creates", params, config);
71      params_config_map_.insert(
72          std::make_pair(params, ValueType{config, 1, 1}));
73      new_score = 1;
74    } else if (iter->second.score < min_score_threshold_ &&
75               iter->second.count <= max_autotune_count_) {
76      DCHECK_GT(iter->second.score, 0);
77      if (iter->second.config != config) {
78        // If it is different from the current winner, demotes the winner.
79        VLOG(1) << GetActionSummary("demotes", params, config);
80        new_score = --iter->second.score;
81        ++iter->second.count;
82        if (new_score <= 0) {
83          VLOG(1) << GetActionSummary("erases", params, config);
84          params_config_map_.erase(iter);
85        }
86      } else {
87        // If it is the same as the current winner, promotes the winner.
88        VLOG(1) << GetActionSummary("promotes", params, config);
89        new_score = ++iter->second.score;
90        ++iter->second.count;
91      }
92    }
93    if (new_score >= min_score_threshold_) {
94      VLOG(1) << GetActionSummary("accepts", params, config);
95    }
96  }
97
98 private:
99  AutoTuneMap(const string& name) : name_(name) {
100    min_score_threshold_ = 1;
101    int min_warmup_iterations = 10;
102    const char* threshold_str = getenv("TF_AUTOTUNE_THRESHOLD");
103    if (threshold_str != nullptr) {
104      strings::safe_strto32(threshold_str, &min_score_threshold_);
105    }
106    const char* min_warmup_iteration_str =
107        getenv("TF_AUTOTUNE_MIN_WARMUP_ITERATIONS");
108    if (min_warmup_iteration_str != nullptr) {
109      strings::safe_strto32(min_warmup_iteration_str, &min_warmup_iterations);
110    }
111    min_score_threshold_ = std::max(min_score_threshold_, 1);
112    max_autotune_count_ = std::max(
113        5 * min_score_threshold_ * min_score_threshold_, min_warmup_iterations);
114  }
115
116  template <class Group, class Params, class Cfg>
117  friend class AutoTuneSingleton;
118
119  struct Hasher {
120    std::size_t operator()(const Parameters& parameter) const {
121      return parameter.hash();
122    }
123  };
124
125  string GetActionSummary(StringPiece action, const Parameters& params,
126                          const Config& config) {
127    return strings::Printf("autotune_map %s %s: %s -> (%s)", name_.c_str(),
128                           action.ToString().c_str(), params.ToString().c_str(),
129                           config.ToString().c_str());
130  }
131
132  mutable mutex mu_;
133  struct ValueType {
134    Config config;
135    int32 score;
136    int32 count;
137  };
138  std::unordered_map<Parameters, ValueType, Hasher> params_config_map_
139      GUARDED_BY(mu_);
140  string name_;
141  int32 min_score_threshold_;
142  int32 max_autotune_count_;
143
144  TF_DISALLOW_COPY_AND_ASSIGN(AutoTuneMap);
145};
146
147// A Singleton helper that manages the global autotune results by groups.
148// The caller specified arbitrary Group type that can distinguish between
149// different autotune results, even if their Parameters and Configs are the
150// same.
151template <class Group, typename Parameters, typename Config>
152class AutoTuneSingleton {
153 public:
154  typedef AutoTuneMap<Parameters, Config> AutoTuneType;
155  static AutoTuneType* GetInstance() {
156    static AutoTuneType* instance = new AutoTuneType(Group::name());
157    return instance;
158  }
159};
160
161}  // namespace tensorflow
162
163#endif  // GOOGLE_CUDA
164
165#endif  // TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
166