196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerLicensed under the Apache License, Version 2.0 (the "License");
496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steineryou may not use this file except in compliance with the License.
596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerYou may obtain a copy of the License at
696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    http://www.apache.org/licenses/LICENSE-2.0
896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerUnless required by applicable law or agreed to in writing, software
1096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinerdistributed under the License is distributed on an "AS IS" BASIS,
1196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerSee the License for the specific language governing permissions and
1396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinerlimitations under the License.
1496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner==============================================================================*/
1596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
1696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/grappler/costs/utils.h"
1796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
1896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include <stddef.h>
1996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include <utility>
2096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
2196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "third_party/eigen3/Eigen/Core"
2296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
2396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#if GOOGLE_CUDA
2496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "cuda/include/cuda.h"
2596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "cuda/include/cuda_runtime_api.h"
2696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "cuda/include/cudnn.h"
2796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#endif
2896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
29d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/framework/allocation_description.pb.h"
3013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/framework/attr_value.pb.h"
3113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/framework/op.h"
3213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/framework/op_def.pb.h"
33d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/framework/step_stats.pb.h"
34e85d3df92deb9d717befdf173966a2913ac2aea0Geoffrey Irving#include "tensorflow/core/framework/tensor.pb.h"
35d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/framework/tensor_description.pb.h"
3696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/framework/tensor_shape.pb.h"
3796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/framework/types.pb.h"
38a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou#include "tensorflow/core/graph/graph.h"
39a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou#include "tensorflow/core/graph/tensor_id.h"
401937df84a17f8f520ab2d45d083d65188afd9c62Benoit Steiner#include "tensorflow/core/grappler/clusters/utils.h"
4124c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou#include "tensorflow/core/grappler/utils.h"
42d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/lib/core/bits.h"
4396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/lib/strings/numbers.h"
4496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/lib/strings/strcat.h"
4596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/platform/cpu_info.h"
4613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/platform/env.h"
47d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/platform/logging.h"
4813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/platform/protobuf.h"
49d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/protobuf/config.pb.h"
5096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/util/device_name_utils.h"
5196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
5296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinernamespace tensorflow {
5396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinernamespace grappler {
5496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
55a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhoustatic OpInfo::TensorProperties UnknownInput() {
56a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  OpInfo::TensorProperties input;
57a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  input.set_dtype(DataType::DT_INVALID);
58a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  input.mutable_shape()->set_unknown_rank(true);
59a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  return input;
60a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou}
61a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou
62a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhoustatic std::vector<TensorProto> ExtractTensors(const AttrValue& attr_value) {
63a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  std::vector<TensorProto> tensors;
64a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  switch (attr_value.value_case()) {
65a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    case AttrValue::kTensor: {
66a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou      tensors.push_back(attr_value.tensor());
67a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou      break;
68a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    }
69a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    case AttrValue::kList: {
70a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou      for (const auto& tensor_proto : attr_value.list().tensor()) {
71a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou        tensors.push_back(tensor_proto);
72a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou      }
73a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou      break;
74a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    }
75a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    default: {}
76a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  }
77a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou  return tensors;
78a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou}
79a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou
80513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner// Annotate the op_info inputs with extra information when possible (e.g. the
81513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner// input value if it's known statically).
8213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhoustatic void ExtractExtraProperties(
8313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    const NodeDef& node,
8413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    const std::unordered_map<string, const NodeDef*>& name_to_node,
85513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner    OpInfo* op_info) {
8613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  OpRegistry* op_registry = OpRegistry::Global();
87337748d9800717c50195f05e5e1027426b965408Benoit Steiner  const OpDef* op_def = nullptr;
8813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  auto s = op_registry->LookUpOpDef(node.op(), &op_def);
8913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  if (!s.ok()) {
9013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    op_def = nullptr;
9113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  }
9213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
9313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  for (int i = 0; i < node.input_size(); ++i) {
9413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    const string input_name = node.input(i);
9513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    CHECK(!input_name.empty());
9624c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou    if (IsControlInput(input_name)) {
9724c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou      continue;
9824c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou    }
9913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    TensorId input_tensor_id = ParseTensorName(input_name);
10013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    const string input_node_name = input_tensor_id.first.ToString();
10113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
10213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    auto iter = name_to_node.find(input_node_name);
10313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    if (iter == name_to_node.end()) continue;
10413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    const NodeDef* input_node = iter->second;
10513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
10624c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou    if (i >= op_info->inputs_size()) {
10724c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou      LOG(ERROR) << "OpInfo's inputs doesn't match the graph! OpInfo: "
10824c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou                 << op_info->DebugString()
10924c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou                 << "\nCurrent node: " << node.DebugString()
11024c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou                 << "\nInput node: " << input_node->DebugString();
11124c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou    }
11224c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou
11313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    // The value attribute in Const input is useful for cost prediction.
11424c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou    if (input_node->op() == "Const" && i < op_info->inputs_size()) {
11513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      auto it = input_node->attr().find("value");
11613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      if (it == input_node->attr().end()) continue;
11713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
11813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      const AttrValue& attr_value = it->second;
11913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      std::vector<TensorProto> tensors = ExtractTensors(attr_value);
12013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      if (tensors.empty()) continue;
12113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
12213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      const TensorProto& t = tensors[0];
123513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner      OpInfo::TensorProperties* input = op_info->mutable_inputs(i);
124513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner      *(input->mutable_value()) = t;
12513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
12613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      // For filename input, the file size can also be useful.
127337748d9800717c50195f05e5e1027426b965408Benoit Steiner      if (op_def && i < op_def->input_arg_size() &&
12813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou          op_def->input_arg(i).name().find("filename") != std::string::npos) {
12913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou        Tensor tensor;
130337748d9800717c50195f05e5e1027426b965408Benoit Steiner        if (!tensor.FromProto(t)) {
131337748d9800717c50195f05e5e1027426b965408Benoit Steiner          continue;
132337748d9800717c50195f05e5e1027426b965408Benoit Steiner        }
133337748d9800717c50195f05e5e1027426b965408Benoit Steiner        if (tensor.NumElements() != 1) {
134337748d9800717c50195f05e5e1027426b965408Benoit Steiner          continue;
135337748d9800717c50195f05e5e1027426b965408Benoit Steiner        }
13613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou        const string filename = tensor.scalar<string>()();
13713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
13813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou        Env* env = Env::Default();
13913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou        FileStatistics stat;
14013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou        Status s = env->Stat(filename, &stat);
141337748d9800717c50195f05e5e1027426b965408Benoit Steiner        if (!s.ok()) {
142337748d9800717c50195f05e5e1027426b965408Benoit Steiner          continue;
14313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou        }
144337748d9800717c50195f05e5e1027426b965408Benoit Steiner        AttrValue attr;
145337748d9800717c50195f05e5e1027426b965408Benoit Steiner        attr.set_i(stat.length);
146337748d9800717c50195f05e5e1027426b965408Benoit Steiner        string attr_key = strings::StrCat("input_", i, "_filesize");
147513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner        (*op_info->mutable_attr())[attr_key] = attr;
14813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      }
14913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    }
15013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
15113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    // When the input is a handle (e.g. look up table handle), the information
15213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    // in the op itself is not sufficient to predict the op memory.
153337748d9800717c50195f05e5e1027426b965408Benoit Steiner    if (op_def && i < op_def->input_arg_size() &&
15413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou        op_def->input_arg(i).name().find("handle") != std::string::npos) {
15513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      string new_key = strings::StrCat("parent_", i, "_op");
15613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      AttrValue attr;
15713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      attr.set_s(input_node->op());
158513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner      (*op_info->mutable_attr())[new_key] = attr;
15913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      // TODO(yuefengz): Only parent node's op name is copied. Copy inputs
16013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou      // and attributes when necessary.
16113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    }
16213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  }
16313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou}
16413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
16596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinerstd::vector<OpInfo::TensorProperties> FindInputFeatures(
16696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    const NodeDef& node,
167a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    const std::unordered_map<string, const CostGraphDef::Node*>& name_to_cost,
168a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    const std::unordered_map<string, const NodeDef*>& name_to_node) {
16996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  std::vector<OpInfo::TensorProperties> inputs;
17096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  for (const auto& input_name : node.input()) {
17196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    CHECK(!input_name.empty());
172a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    TensorId input_tensor_id = ParseTensorName(input_name);
173a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    const string input_node_name = input_tensor_id.first.ToString();
174a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    const int output_index = input_tensor_id.second;
175a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou
176a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    // Skip control inputs.
177a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    if (output_index == Graph::kControlSlot) {
17896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner      continue;
17996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    }
18096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
181a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou    auto it = name_to_cost.find(input_node_name);
18296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    if (it == name_to_cost.end() || output_index < 0) {
183a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou      inputs.push_back(UnknownInput());
18496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    } else {
18596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner      const CostGraphDef::Node* input_cost = it->second;
186e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang      if (input_cost->output_info_size() == 0) {
187e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang        inputs.push_back(UnknownInput());
188e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang      } else {
189e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang        const CostGraphDef::Node::OutputInfo& output =
190e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang            input_cost->output_info(output_index);
191e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang        OpInfo::TensorProperties input;
192e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang        input.set_dtype(output.dtype());
193e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang        *input.mutable_shape() = output.shape();
194e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang        inputs.push_back(input);
195e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang      }
19696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    }
19796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  }
19896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
19996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  return inputs;
20096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner}
20196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
20213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng ZhouDeviceProperties GetDeviceInfo(const string& device_str) {
20396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  DeviceNameUtils::ParsedName parsed;
20413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  if (DeviceNameUtils::ParseFullName(device_str, &parsed)) {
20596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    if (parsed.type == "GPU") {
20696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner      return GetLocalGPUInfo(parsed.id);
20796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    } else if (parsed.type == "CPU") {
20896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner      return GetLocalCPUInfo();
20996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner    }
21096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  }
21187ba9f5370c0f7068760f9536979d9183f6dfe9cBenoit Steiner  DeviceProperties device;
21296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  device.set_type("UNKNOWN");
21396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner  return device;
21496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner}
21596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner
21613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng ZhouDeviceProperties GetDeviceInfo(const CostGraphDef::Node& node) {
21713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  return GetDeviceInfo(node.device());
21813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou}
21913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
2200cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng ZhouOpInfo BuildOpInfoWithoutDevice(
2210cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng Zhou    const NodeDef& node,
22213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    const std::unordered_map<string, const NodeDef*>& name_to_node,
22313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    const std::vector<OpInfo::TensorProperties>& inputs) {
22413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  OpInfo op_info;
22513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  op_info.set_op(node.op());
22613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  *op_info.mutable_attr() = node.attr();
22713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  for (auto& input : inputs) {
22813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou    *op_info.add_inputs() = input;
22913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  }
230513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner  ExtractExtraProperties(node, name_to_node, &op_info);
23113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou  return op_info;
23213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou}
23313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou
2344a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlowerstring GetOpDescription(const OpInfo& op_info) {
2354a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower  string description = "[";
2364a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower  description += "Op=" + op_info.op() + ", ";
2374a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower  description += "input_shapes=[";
2384a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower  for (auto const& input : op_info.inputs()) {
2394a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower    description += PartialTensorShape::DebugString(input.shape());
2404a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower  }
2414a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower  description += "]";
2424a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower  return description;
2434a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower}
2444a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower
2452251633a509d6807d309bff60013c86650bb891dBenoit SteinerOpPerformanceList CostGraphToOpPerformanceData(const CostGraphDef& cost_graph,
2462251633a509d6807d309bff60013c86650bb891dBenoit Steiner                                               const GraphDef& graph) {
2472251633a509d6807d309bff60013c86650bb891dBenoit Steiner  OpPerformanceList ret;
2482251633a509d6807d309bff60013c86650bb891dBenoit Steiner  std::unordered_map<string, const CostGraphDef::Node*> name_to_cost;
2492251633a509d6807d309bff60013c86650bb891dBenoit Steiner  std::unordered_map<string, const NodeDef*> name_to_node;
2502251633a509d6807d309bff60013c86650bb891dBenoit Steiner  for (auto& node : cost_graph.node()) {
2512251633a509d6807d309bff60013c86650bb891dBenoit Steiner    name_to_cost[node.name()] = &node;
2522251633a509d6807d309bff60013c86650bb891dBenoit Steiner  }
2532251633a509d6807d309bff60013c86650bb891dBenoit Steiner  for (auto& node : graph.node()) {
2542251633a509d6807d309bff60013c86650bb891dBenoit Steiner    name_to_node[node.name()] = &node;
2552251633a509d6807d309bff60013c86650bb891dBenoit Steiner  }
2562251633a509d6807d309bff60013c86650bb891dBenoit Steiner
2572251633a509d6807d309bff60013c86650bb891dBenoit Steiner  for (const auto& node : graph.node()) {
2582251633a509d6807d309bff60013c86650bb891dBenoit Steiner    // Skip the nodes that are not in the cost graph: these are nodes that
2592251633a509d6807d309bff60013c86650bb891dBenoit Steiner    // aren't run, because they aren't in the intersection of transitive
2602251633a509d6807d309bff60013c86650bb891dBenoit Steiner    // fan-in of a fetch node and the transitive fan-out of an input, or nodes
2612251633a509d6807d309bff60013c86650bb891dBenoit Steiner    // that were optimized away by the optimizer. Since they don't contribute
2622251633a509d6807d309bff60013c86650bb891dBenoit Steiner    // to the execution time we simply discard them.
2632251633a509d6807d309bff60013c86650bb891dBenoit Steiner    auto it = name_to_cost.find(node.name());
2642251633a509d6807d309bff60013c86650bb891dBenoit Steiner    if (it == name_to_cost.end()) {
2652251633a509d6807d309bff60013c86650bb891dBenoit Steiner      continue;
2662251633a509d6807d309bff60013c86650bb891dBenoit Steiner    }
2672251633a509d6807d309bff60013c86650bb891dBenoit Steiner    const CostGraphDef::Node* cost_node = it->second;
2682251633a509d6807d309bff60013c86650bb891dBenoit Steiner
2692251633a509d6807d309bff60013c86650bb891dBenoit Steiner    OpPerformance* perf = ret.add_op_performance();
2702251633a509d6807d309bff60013c86650bb891dBenoit Steiner    perf->set_node(node.name());
2712251633a509d6807d309bff60013c86650bb891dBenoit Steiner
2722251633a509d6807d309bff60013c86650bb891dBenoit Steiner    std::vector<OpInfo::TensorProperties> inputs =
2732251633a509d6807d309bff60013c86650bb891dBenoit Steiner        FindInputFeatures(node, name_to_cost, name_to_node);
2740cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng Zhou    *perf->mutable_op() = BuildOpInfoWithoutDevice(node, name_to_node, inputs);
2750cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng Zhou    *perf->mutable_op()->mutable_device() = GetDeviceInfo(cost_node->device());
2762251633a509d6807d309bff60013c86650bb891dBenoit Steiner
2772251633a509d6807d309bff60013c86650bb891dBenoit Steiner    perf->set_temporary_memory_size(cost_node->temporary_memory_size());
2782251633a509d6807d309bff60013c86650bb891dBenoit Steiner    // Note that CostGraphDef::Node::compute_cost is microseconds, while
2792251633a509d6807d309bff60013c86650bb891dBenoit Steiner    // OpPerformance.compute_cost is nanoseconds.
2802251633a509d6807d309bff60013c86650bb891dBenoit Steiner    perf->set_compute_cost(cost_node->compute_cost() * 1000);
2812251633a509d6807d309bff60013c86650bb891dBenoit Steiner    perf->set_compute_time(cost_node->compute_time() * 1000);
2822251633a509d6807d309bff60013c86650bb891dBenoit Steiner    perf->set_memory_time(cost_node->memory_time() * 1000);
2832251633a509d6807d309bff60013c86650bb891dBenoit Steiner
2842251633a509d6807d309bff60013c86650bb891dBenoit Steiner    for (const auto& output_info : cost_node->output_info()) {
2852251633a509d6807d309bff60013c86650bb891dBenoit Steiner      perf->mutable_op_memory()->add_output_memory(output_info.size());
2862251633a509d6807d309bff60013c86650bb891dBenoit Steiner    }
2872251633a509d6807d309bff60013c86650bb891dBenoit Steiner
288e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou    perf->mutable_op_memory()->set_temp_memory(
289e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou        cost_node->temporary_memory_size());
290e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou    perf->mutable_op_memory()->set_persistent_memory(
291e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou        cost_node->persistent_memory_size());
2922251633a509d6807d309bff60013c86650bb891dBenoit Steiner  }
2932251633a509d6807d309bff60013c86650bb891dBenoit Steiner  return ret;
2942251633a509d6807d309bff60013c86650bb891dBenoit Steiner}
2952251633a509d6807d309bff60013c86650bb891dBenoit Steiner
296d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowervoid TensorSizeHistogram::Add(const uint64 value) {
297d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  num_elem_++;
298d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  sum_elem_ += value;
299d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  min_ = std::min(min_, value);
300d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  max_ = std::max(max_, value);
301d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  buckets_[Index(value)]++;
302d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower}
303d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
304d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowervoid TensorSizeHistogram::Merge(const TensorSizeHistogram& src) {
305d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  num_elem_ += src.num_elem_;
306d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  sum_elem_ += src.sum_elem_;
307d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  min_ = std::min(min_, src.min_);
308d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  max_ = std::max(max_, src.max_);
309d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  std::transform(buckets_.begin(), buckets_.end(), src.buckets_.begin(),
310d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower                 buckets_.begin(), std::plus<uint64>());
311d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower}
312d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
313d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstd::string TensorSizeHistogram::ToString() const {
314d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  std::string r;
315d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  char buf[200];
316d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  snprintf(buf, sizeof(buf), "Count: %lld, Average: ", num_elem_);
317d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  r.append(buf);
318d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  r.append(strings::HumanReadableNumBytes(Average()));
319d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  r.append(", Min: ");
320d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  r.append(strings::HumanReadableNumBytes(min_));
321d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  r.append(", Max: ");
322d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  r.append(strings::HumanReadableNumBytes(max_));
323d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  r.append("\n------------------------------------------------------\n");
324d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  const double mult = num_elem_ > 0 ? 100.0 / num_elem_ : 0.0;
325d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  uint64 cumul_sum = 0;
326d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
327d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  const int size_string_width = 12;
328d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  for (int i = 0; i < buckets_.size(); i++) {
329d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    if (buckets_[i] == 0) continue;
330d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    cumul_sum += buckets_[i];
331d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    r.append("[ ");
332d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    if (i == 0) {
333d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      r.append(size_string_width - 2, ' ');
334d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      r.append("0B");
335d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    } else {
336d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      uint64 left = 1ULL << (i - 1);
337d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      const auto left_string = strings::HumanReadableNumBytes(left);
338d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      r.append(size_string_width - left_string.size(), ' ');
339d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      r.append(left_string);
340d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    }
341d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    r.append(", ");
342d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    uint64 right = 1ULL << i;
343d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto right_string = strings::HumanReadableNumBytes(right);
344d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    r.append(size_string_width - right_string.size(), ' ');
345d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    r.append(right_string);
346d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    snprintf(buf, sizeof(buf), ") %7lld %7.3f%% %7.3f%% ",
347d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower             buckets_[i],         // count
348d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower             mult * buckets_[i],  // percentage
349d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower             mult * cumul_sum);   // cum percentage
350d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    r.append(buf);
351d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
352d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    // Add hash marks based on percentage; 40 marks for 100%.
353d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    auto marks = static_cast<int>(
354d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        (static_cast<double>(40 * buckets_[i] + (num_elem_ >> 1)) / num_elem_));
355d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    r.append(marks, '#');
356d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    r.push_back('\n');
357d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  }
358d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  return r;
359d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower}
360d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
361d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerconst int TensorSizeHistogram::Index(const uint64 value) const {
362d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // Log2Floor64 returns -1 for 0, 0 for 1, 1 for 2-3, 2 for 4-7, ...
363d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  const auto index = Log2Floor64(value) + 1;
364d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  return std::min(index, kMaxBuckets - 1);
365d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower}
366d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
367d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstring GetDeviceClassForNonChannelDevice(const string& device_name) {
368d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  DeviceNameUtils::ParsedName parsed_name;
369d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  bool parsed = DeviceNameUtils::ParseFullName(device_name, &parsed_name);
3704241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner  if (!parsed) {
3714241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    string name = str_util::StringReplace(device_name, "/job_", "/job:", true);
3724241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    name = str_util::StringReplace(name, "/replica_", "/replica:", true);
3734241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    name = str_util::StringReplace(name, "/task_", "/task:", true);
3744241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    name = str_util::StringReplace(name, "/device_", "/device:", true);
3754241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    name = str_util::StringReplace(name, "GPU_", "GPU:", true);
3764241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    name = str_util::StringReplace(name, "CPU_", "CPU:", true);
3774241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    name = str_util::StringReplace(name, "gpu_", "gpu:", true);
3784241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    name = str_util::StringReplace(name, "cpu_", "cpu:", true);
3794241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    parsed = DeviceNameUtils::ParseFullName(name, &parsed_name);
3804241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner  }
381d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  if (parsed) {
3824241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    const string jobname = parsed_name.has_job ? parsed_name.job : "";
383d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    return strings::StrCat("/", jobname, "/", parsed_name.type);
384d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  } else {
385d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    return "Unclassified";
386d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  }
387d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower}
388d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
389d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstring GetDeviceClass(const string& device_name) {
390d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // TODO(dyoon): channel device name follows the convention we currently have
391d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // in VirtualScheduler. This should be revised with VirtualScheduler as well
392d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // as VirtualPlacer in the future.
393d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  if (device_name.find("Channel") != string::npos) {
3944241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    const string from = "_from_";
3954241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner    const string to = "_to_";
396d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto from_loc = device_name.find(from);
397d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto to_loc = device_name.find(to);
398d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto src_device_full = device_name.substr(
399d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        from_loc + from.size(), to_loc - (from_loc + from.size()));
400d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto dst_device_full = device_name.substr(to_loc + to.size());
401d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    return strings::StrCat(
402d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        "Channel", ": ", GetDeviceClassForNonChannelDevice(src_device_full),
403d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        " -> ", GetDeviceClassForNonChannelDevice(dst_device_full));
404d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  } else {
405d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    return GetDeviceClassForNonChannelDevice(device_name);
406d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  }
407d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower}
408d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
409d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstring GetStatsStringFromRunMetadata(const RunMetadata& run_metadata,
410d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower                                     bool verbosity) {
411d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // TODO(dyoon): print out other stats as needed.
412d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  std::ostringstream output;
413d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
414d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // Tensor size histogram:
415d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // if verbosity, it outputs per-device histogram,
416d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  // otherwise, only per-class histogram.
417d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  std::unordered_map<string, TensorSizeHistogram> device_to_hist_map;
418d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  const auto& step_stats = run_metadata.step_stats();
419d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  for (const auto& dev_stat : step_stats.dev_stats()) {
420d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto& device_name = dev_stat.device();
421d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    auto& hist = device_to_hist_map[device_name];
422d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    for (const auto& node_stat : dev_stat.node_stats()) {
423d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      for (const auto& node_output : node_stat.output()) {
424d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        // TODO(dyoon): Calculate tensor size from tensor_description's dtype
425d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        // and shape, instead of using optional allocation_description.
426d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        const auto size = node_output.tensor_description()
427d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower                              .allocation_description()
428d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower                              .allocated_bytes();
429d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower        hist.Add(size);
430d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      }
431d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    }
432d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  }
433d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  if (verbosity) {
434d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    output << "\n";
435d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    output << "Per device tensor size histogram.\n";
436d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  }
437d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
438d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  std::unordered_map<string, TensorSizeHistogram> device_class_to_hist_map;
439d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  for (const auto& device_hist : device_to_hist_map) {
440d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto& device_name = device_hist.first;
441d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto& hist = device_hist.second;
442d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    if (verbosity) {
443d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      output << "Device: " << device_name << "\n" << hist.ToString() << "\n";
444d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    }
445d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto device_class = GetDeviceClass(device_name);
446d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    auto it = device_class_to_hist_map.find(device_class);
447d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    if (it == device_class_to_hist_map.end()) {
448d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      device_class_to_hist_map.emplace(device_class, TensorSizeHistogram(hist));
449d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    } else {
450d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower      it->second.Merge(hist);
451d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    }
452d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  }
453d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  output << "\n";
454d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  output << "Aggregated per device / channel type tensor size histogram:\n";
455d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  for (const auto& device_hist : device_class_to_hist_map) {
456d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto& device_name = device_hist.first;
457d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    const auto& hist = device_hist.second;
458d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower    output << "Device: " << device_name << "\n" << hist.ToString() << "\n";
459d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  }
460d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  output << "\n";
461d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
462d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower  return output.str();
463d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower}
464d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower
46596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner}  // end namespace grappler
46696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner}  // end namespace tensorflow
467