196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerLicensed under the Apache License, Version 2.0 (the "License"); 496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steineryou may not use this file except in compliance with the License. 596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerYou may obtain a copy of the License at 696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner http://www.apache.org/licenses/LICENSE-2.0 896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerUnless required by applicable law or agreed to in writing, software 1096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinerdistributed under the License is distributed on an "AS IS" BASIS, 1196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit SteinerSee the License for the specific language governing permissions and 1396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinerlimitations under the License. 1496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner==============================================================================*/ 1596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 1696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/grappler/costs/utils.h" 1796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 1896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include <stddef.h> 1996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include <utility> 2096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 2196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "third_party/eigen3/Eigen/Core" 2296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 2396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#if GOOGLE_CUDA 2496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "cuda/include/cuda.h" 2596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "cuda/include/cuda_runtime_api.h" 2696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "cuda/include/cudnn.h" 2796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#endif 2896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 29d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/framework/allocation_description.pb.h" 3013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/framework/attr_value.pb.h" 3113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/framework/op.h" 3213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/framework/op_def.pb.h" 33d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/framework/step_stats.pb.h" 34e85d3df92deb9d717befdf173966a2913ac2aea0Geoffrey Irving#include "tensorflow/core/framework/tensor.pb.h" 35d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/framework/tensor_description.pb.h" 3696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/framework/tensor_shape.pb.h" 3796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/framework/types.pb.h" 38a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou#include "tensorflow/core/graph/graph.h" 39a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou#include "tensorflow/core/graph/tensor_id.h" 401937df84a17f8f520ab2d45d083d65188afd9c62Benoit Steiner#include "tensorflow/core/grappler/clusters/utils.h" 4124c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou#include "tensorflow/core/grappler/utils.h" 42d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/lib/core/bits.h" 4396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/lib/strings/numbers.h" 4496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/lib/strings/strcat.h" 4596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/platform/cpu_info.h" 4613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/platform/env.h" 47d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/platform/logging.h" 4813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou#include "tensorflow/core/platform/protobuf.h" 49d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower#include "tensorflow/core/protobuf/config.pb.h" 5096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner#include "tensorflow/core/util/device_name_utils.h" 5196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 5296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinernamespace tensorflow { 5396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinernamespace grappler { 5496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 55a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhoustatic OpInfo::TensorProperties UnknownInput() { 56a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou OpInfo::TensorProperties input; 57a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou input.set_dtype(DataType::DT_INVALID); 58a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou input.mutable_shape()->set_unknown_rank(true); 59a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou return input; 60a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou} 61a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou 62a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhoustatic std::vector<TensorProto> ExtractTensors(const AttrValue& attr_value) { 63a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou std::vector<TensorProto> tensors; 64a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou switch (attr_value.value_case()) { 65a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou case AttrValue::kTensor: { 66a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou tensors.push_back(attr_value.tensor()); 67a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou break; 68a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou } 69a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou case AttrValue::kList: { 70a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou for (const auto& tensor_proto : attr_value.list().tensor()) { 71a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou tensors.push_back(tensor_proto); 72a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou } 73a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou break; 74a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou } 75a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou default: {} 76a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou } 77a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou return tensors; 78a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou} 79a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou 80513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner// Annotate the op_info inputs with extra information when possible (e.g. the 81513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner// input value if it's known statically). 8213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhoustatic void ExtractExtraProperties( 8313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const NodeDef& node, 8413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const std::unordered_map<string, const NodeDef*>& name_to_node, 85513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner OpInfo* op_info) { 8613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou OpRegistry* op_registry = OpRegistry::Global(); 87337748d9800717c50195f05e5e1027426b965408Benoit Steiner const OpDef* op_def = nullptr; 8813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou auto s = op_registry->LookUpOpDef(node.op(), &op_def); 8913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou if (!s.ok()) { 9013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou op_def = nullptr; 9113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou } 9213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 9313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou for (int i = 0; i < node.input_size(); ++i) { 9413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const string input_name = node.input(i); 9513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou CHECK(!input_name.empty()); 9624c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou if (IsControlInput(input_name)) { 9724c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou continue; 9824c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou } 9913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou TensorId input_tensor_id = ParseTensorName(input_name); 10013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const string input_node_name = input_tensor_id.first.ToString(); 10113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 10213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou auto iter = name_to_node.find(input_node_name); 10313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou if (iter == name_to_node.end()) continue; 10413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const NodeDef* input_node = iter->second; 10513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 10624c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou if (i >= op_info->inputs_size()) { 10724c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou LOG(ERROR) << "OpInfo's inputs doesn't match the graph! OpInfo: " 10824c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou << op_info->DebugString() 10924c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou << "\nCurrent node: " << node.DebugString() 11024c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou << "\nInput node: " << input_node->DebugString(); 11124c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou } 11224c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou 11313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou // The value attribute in Const input is useful for cost prediction. 11424c5f7784889a5ecf1b3fe42ebb08709590abafdYuefeng Zhou if (input_node->op() == "Const" && i < op_info->inputs_size()) { 11513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou auto it = input_node->attr().find("value"); 11613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou if (it == input_node->attr().end()) continue; 11713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 11813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const AttrValue& attr_value = it->second; 11913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou std::vector<TensorProto> tensors = ExtractTensors(attr_value); 12013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou if (tensors.empty()) continue; 12113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 12213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const TensorProto& t = tensors[0]; 123513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner OpInfo::TensorProperties* input = op_info->mutable_inputs(i); 124513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner *(input->mutable_value()) = t; 12513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 12613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou // For filename input, the file size can also be useful. 127337748d9800717c50195f05e5e1027426b965408Benoit Steiner if (op_def && i < op_def->input_arg_size() && 12813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou op_def->input_arg(i).name().find("filename") != std::string::npos) { 12913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou Tensor tensor; 130337748d9800717c50195f05e5e1027426b965408Benoit Steiner if (!tensor.FromProto(t)) { 131337748d9800717c50195f05e5e1027426b965408Benoit Steiner continue; 132337748d9800717c50195f05e5e1027426b965408Benoit Steiner } 133337748d9800717c50195f05e5e1027426b965408Benoit Steiner if (tensor.NumElements() != 1) { 134337748d9800717c50195f05e5e1027426b965408Benoit Steiner continue; 135337748d9800717c50195f05e5e1027426b965408Benoit Steiner } 13613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const string filename = tensor.scalar<string>()(); 13713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 13813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou Env* env = Env::Default(); 13913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou FileStatistics stat; 14013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou Status s = env->Stat(filename, &stat); 141337748d9800717c50195f05e5e1027426b965408Benoit Steiner if (!s.ok()) { 142337748d9800717c50195f05e5e1027426b965408Benoit Steiner continue; 14313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou } 144337748d9800717c50195f05e5e1027426b965408Benoit Steiner AttrValue attr; 145337748d9800717c50195f05e5e1027426b965408Benoit Steiner attr.set_i(stat.length); 146337748d9800717c50195f05e5e1027426b965408Benoit Steiner string attr_key = strings::StrCat("input_", i, "_filesize"); 147513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner (*op_info->mutable_attr())[attr_key] = attr; 14813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou } 14913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou } 15013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 15113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou // When the input is a handle (e.g. look up table handle), the information 15213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou // in the op itself is not sufficient to predict the op memory. 153337748d9800717c50195f05e5e1027426b965408Benoit Steiner if (op_def && i < op_def->input_arg_size() && 15413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou op_def->input_arg(i).name().find("handle") != std::string::npos) { 15513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou string new_key = strings::StrCat("parent_", i, "_op"); 15613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou AttrValue attr; 15713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou attr.set_s(input_node->op()); 158513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner (*op_info->mutable_attr())[new_key] = attr; 15913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou // TODO(yuefengz): Only parent node's op name is copied. Copy inputs 16013b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou // and attributes when necessary. 16113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou } 16213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou } 16313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou} 16413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 16596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steinerstd::vector<OpInfo::TensorProperties> FindInputFeatures( 16696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner const NodeDef& node, 167a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou const std::unordered_map<string, const CostGraphDef::Node*>& name_to_cost, 168a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou const std::unordered_map<string, const NodeDef*>& name_to_node) { 16996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner std::vector<OpInfo::TensorProperties> inputs; 17096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner for (const auto& input_name : node.input()) { 17196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner CHECK(!input_name.empty()); 172a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou TensorId input_tensor_id = ParseTensorName(input_name); 173a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou const string input_node_name = input_tensor_id.first.ToString(); 174a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou const int output_index = input_tensor_id.second; 175a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou 176a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou // Skip control inputs. 177a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou if (output_index == Graph::kControlSlot) { 17896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner continue; 17996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner } 18096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 181a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou auto it = name_to_cost.find(input_node_name); 18296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner if (it == name_to_cost.end() || output_index < 0) { 183a88053349afdd85f9d0efca7917fdb8eedbcc647Yuefeng Zhou inputs.push_back(UnknownInput()); 18496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner } else { 18596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner const CostGraphDef::Node* input_cost = it->second; 186e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang if (input_cost->output_info_size() == 0) { 187e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang inputs.push_back(UnknownInput()); 188e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang } else { 189e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang const CostGraphDef::Node::OutputInfo& output = 190e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang input_cost->output_info(output_index); 191e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang OpInfo::TensorProperties input; 192e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang input.set_dtype(output.dtype()); 193e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang *input.mutable_shape() = output.shape(); 194e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang inputs.push_back(input); 195e5088cb823964216adfba3155965e0f6f2c7bf7cYao Zhang } 19696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner } 19796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner } 19896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 19996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner return inputs; 20096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner} 20196cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 20213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng ZhouDeviceProperties GetDeviceInfo(const string& device_str) { 20396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner DeviceNameUtils::ParsedName parsed; 20413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou if (DeviceNameUtils::ParseFullName(device_str, &parsed)) { 20596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner if (parsed.type == "GPU") { 20696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner return GetLocalGPUInfo(parsed.id); 20796cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner } else if (parsed.type == "CPU") { 20896cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner return GetLocalCPUInfo(); 20996cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner } 21096cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner } 21187ba9f5370c0f7068760f9536979d9183f6dfe9cBenoit Steiner DeviceProperties device; 21296cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner device.set_type("UNKNOWN"); 21396cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner return device; 21496cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner} 21596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner 21613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng ZhouDeviceProperties GetDeviceInfo(const CostGraphDef::Node& node) { 21713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou return GetDeviceInfo(node.device()); 21813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou} 21913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 2200cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng ZhouOpInfo BuildOpInfoWithoutDevice( 2210cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng Zhou const NodeDef& node, 22213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const std::unordered_map<string, const NodeDef*>& name_to_node, 22313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou const std::vector<OpInfo::TensorProperties>& inputs) { 22413b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou OpInfo op_info; 22513b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou op_info.set_op(node.op()); 22613b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou *op_info.mutable_attr() = node.attr(); 22713b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou for (auto& input : inputs) { 22813b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou *op_info.add_inputs() = input; 22913b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou } 230513def0bb27e4a7c29f6ff533d8ca150b2ab78b4Benoit Steiner ExtractExtraProperties(node, name_to_node, &op_info); 23113b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou return op_info; 23213b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou} 23313b552e332698ed6282aad3b702c52cc619a8c37Yuefeng Zhou 2344a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlowerstring GetOpDescription(const OpInfo& op_info) { 2354a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower string description = "["; 2364a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower description += "Op=" + op_info.op() + ", "; 2374a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower description += "input_shapes=["; 2384a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower for (auto const& input : op_info.inputs()) { 2394a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower description += PartialTensorShape::DebugString(input.shape()); 2404a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower } 2414a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower description += "]"; 2424a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower return description; 2434a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower} 2444a09e96797fdcf55b308fade4fd719ef77497d0dA. Unique TensorFlower 2452251633a509d6807d309bff60013c86650bb891dBenoit SteinerOpPerformanceList CostGraphToOpPerformanceData(const CostGraphDef& cost_graph, 2462251633a509d6807d309bff60013c86650bb891dBenoit Steiner const GraphDef& graph) { 2472251633a509d6807d309bff60013c86650bb891dBenoit Steiner OpPerformanceList ret; 2482251633a509d6807d309bff60013c86650bb891dBenoit Steiner std::unordered_map<string, const CostGraphDef::Node*> name_to_cost; 2492251633a509d6807d309bff60013c86650bb891dBenoit Steiner std::unordered_map<string, const NodeDef*> name_to_node; 2502251633a509d6807d309bff60013c86650bb891dBenoit Steiner for (auto& node : cost_graph.node()) { 2512251633a509d6807d309bff60013c86650bb891dBenoit Steiner name_to_cost[node.name()] = &node; 2522251633a509d6807d309bff60013c86650bb891dBenoit Steiner } 2532251633a509d6807d309bff60013c86650bb891dBenoit Steiner for (auto& node : graph.node()) { 2542251633a509d6807d309bff60013c86650bb891dBenoit Steiner name_to_node[node.name()] = &node; 2552251633a509d6807d309bff60013c86650bb891dBenoit Steiner } 2562251633a509d6807d309bff60013c86650bb891dBenoit Steiner 2572251633a509d6807d309bff60013c86650bb891dBenoit Steiner for (const auto& node : graph.node()) { 2582251633a509d6807d309bff60013c86650bb891dBenoit Steiner // Skip the nodes that are not in the cost graph: these are nodes that 2592251633a509d6807d309bff60013c86650bb891dBenoit Steiner // aren't run, because they aren't in the intersection of transitive 2602251633a509d6807d309bff60013c86650bb891dBenoit Steiner // fan-in of a fetch node and the transitive fan-out of an input, or nodes 2612251633a509d6807d309bff60013c86650bb891dBenoit Steiner // that were optimized away by the optimizer. Since they don't contribute 2622251633a509d6807d309bff60013c86650bb891dBenoit Steiner // to the execution time we simply discard them. 2632251633a509d6807d309bff60013c86650bb891dBenoit Steiner auto it = name_to_cost.find(node.name()); 2642251633a509d6807d309bff60013c86650bb891dBenoit Steiner if (it == name_to_cost.end()) { 2652251633a509d6807d309bff60013c86650bb891dBenoit Steiner continue; 2662251633a509d6807d309bff60013c86650bb891dBenoit Steiner } 2672251633a509d6807d309bff60013c86650bb891dBenoit Steiner const CostGraphDef::Node* cost_node = it->second; 2682251633a509d6807d309bff60013c86650bb891dBenoit Steiner 2692251633a509d6807d309bff60013c86650bb891dBenoit Steiner OpPerformance* perf = ret.add_op_performance(); 2702251633a509d6807d309bff60013c86650bb891dBenoit Steiner perf->set_node(node.name()); 2712251633a509d6807d309bff60013c86650bb891dBenoit Steiner 2722251633a509d6807d309bff60013c86650bb891dBenoit Steiner std::vector<OpInfo::TensorProperties> inputs = 2732251633a509d6807d309bff60013c86650bb891dBenoit Steiner FindInputFeatures(node, name_to_cost, name_to_node); 2740cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng Zhou *perf->mutable_op() = BuildOpInfoWithoutDevice(node, name_to_node, inputs); 2750cc851c08f9ff4c084c98e28e72d3f99a78d161fYuefeng Zhou *perf->mutable_op()->mutable_device() = GetDeviceInfo(cost_node->device()); 2762251633a509d6807d309bff60013c86650bb891dBenoit Steiner 2772251633a509d6807d309bff60013c86650bb891dBenoit Steiner perf->set_temporary_memory_size(cost_node->temporary_memory_size()); 2782251633a509d6807d309bff60013c86650bb891dBenoit Steiner // Note that CostGraphDef::Node::compute_cost is microseconds, while 2792251633a509d6807d309bff60013c86650bb891dBenoit Steiner // OpPerformance.compute_cost is nanoseconds. 2802251633a509d6807d309bff60013c86650bb891dBenoit Steiner perf->set_compute_cost(cost_node->compute_cost() * 1000); 2812251633a509d6807d309bff60013c86650bb891dBenoit Steiner perf->set_compute_time(cost_node->compute_time() * 1000); 2822251633a509d6807d309bff60013c86650bb891dBenoit Steiner perf->set_memory_time(cost_node->memory_time() * 1000); 2832251633a509d6807d309bff60013c86650bb891dBenoit Steiner 2842251633a509d6807d309bff60013c86650bb891dBenoit Steiner for (const auto& output_info : cost_node->output_info()) { 2852251633a509d6807d309bff60013c86650bb891dBenoit Steiner perf->mutable_op_memory()->add_output_memory(output_info.size()); 2862251633a509d6807d309bff60013c86650bb891dBenoit Steiner } 2872251633a509d6807d309bff60013c86650bb891dBenoit Steiner 288e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou perf->mutable_op_memory()->set_temp_memory( 289e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou cost_node->temporary_memory_size()); 290e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou perf->mutable_op_memory()->set_persistent_memory( 291e86fc00d097cb8cc118435ebd464497825bcad1eYuefeng Zhou cost_node->persistent_memory_size()); 2922251633a509d6807d309bff60013c86650bb891dBenoit Steiner } 2932251633a509d6807d309bff60013c86650bb891dBenoit Steiner return ret; 2942251633a509d6807d309bff60013c86650bb891dBenoit Steiner} 2952251633a509d6807d309bff60013c86650bb891dBenoit Steiner 296d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowervoid TensorSizeHistogram::Add(const uint64 value) { 297d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower num_elem_++; 298d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower sum_elem_ += value; 299d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower min_ = std::min(min_, value); 300d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower max_ = std::max(max_, value); 301d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower buckets_[Index(value)]++; 302d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower} 303d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 304d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowervoid TensorSizeHistogram::Merge(const TensorSizeHistogram& src) { 305d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower num_elem_ += src.num_elem_; 306d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower sum_elem_ += src.sum_elem_; 307d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower min_ = std::min(min_, src.min_); 308d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower max_ = std::max(max_, src.max_); 309d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower std::transform(buckets_.begin(), buckets_.end(), src.buckets_.begin(), 310d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower buckets_.begin(), std::plus<uint64>()); 311d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower} 312d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 313d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstd::string TensorSizeHistogram::ToString() const { 314d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower std::string r; 315d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower char buf[200]; 316d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower snprintf(buf, sizeof(buf), "Count: %lld, Average: ", num_elem_); 317d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(buf); 318d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(strings::HumanReadableNumBytes(Average())); 319d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(", Min: "); 320d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(strings::HumanReadableNumBytes(min_)); 321d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(", Max: "); 322d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(strings::HumanReadableNumBytes(max_)); 323d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append("\n------------------------------------------------------\n"); 324d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const double mult = num_elem_ > 0 ? 100.0 / num_elem_ : 0.0; 325d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower uint64 cumul_sum = 0; 326d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 327d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const int size_string_width = 12; 328d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower for (int i = 0; i < buckets_.size(); i++) { 329d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower if (buckets_[i] == 0) continue; 330d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower cumul_sum += buckets_[i]; 331d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append("[ "); 332d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower if (i == 0) { 333d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(size_string_width - 2, ' '); 334d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append("0B"); 335d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } else { 336d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower uint64 left = 1ULL << (i - 1); 337d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto left_string = strings::HumanReadableNumBytes(left); 338d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(size_string_width - left_string.size(), ' '); 339d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(left_string); 340d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 341d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(", "); 342d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower uint64 right = 1ULL << i; 343d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto right_string = strings::HumanReadableNumBytes(right); 344d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(size_string_width - right_string.size(), ' '); 345d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(right_string); 346d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower snprintf(buf, sizeof(buf), ") %7lld %7.3f%% %7.3f%% ", 347d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower buckets_[i], // count 348d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower mult * buckets_[i], // percentage 349d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower mult * cumul_sum); // cum percentage 350d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(buf); 351d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 352d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // Add hash marks based on percentage; 40 marks for 100%. 353d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower auto marks = static_cast<int>( 354d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower (static_cast<double>(40 * buckets_[i] + (num_elem_ >> 1)) / num_elem_)); 355d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.append(marks, '#'); 356d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower r.push_back('\n'); 357d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 358d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower return r; 359d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower} 360d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 361d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerconst int TensorSizeHistogram::Index(const uint64 value) const { 362d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // Log2Floor64 returns -1 for 0, 0 for 1, 1 for 2-3, 2 for 4-7, ... 363d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto index = Log2Floor64(value) + 1; 364d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower return std::min(index, kMaxBuckets - 1); 365d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower} 366d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 367d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstring GetDeviceClassForNonChannelDevice(const string& device_name) { 368d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower DeviceNameUtils::ParsedName parsed_name; 369d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower bool parsed = DeviceNameUtils::ParseFullName(device_name, &parsed_name); 3704241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner if (!parsed) { 3714241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner string name = str_util::StringReplace(device_name, "/job_", "/job:", true); 3724241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner name = str_util::StringReplace(name, "/replica_", "/replica:", true); 3734241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner name = str_util::StringReplace(name, "/task_", "/task:", true); 3744241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner name = str_util::StringReplace(name, "/device_", "/device:", true); 3754241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner name = str_util::StringReplace(name, "GPU_", "GPU:", true); 3764241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner name = str_util::StringReplace(name, "CPU_", "CPU:", true); 3774241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner name = str_util::StringReplace(name, "gpu_", "gpu:", true); 3784241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner name = str_util::StringReplace(name, "cpu_", "cpu:", true); 3794241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner parsed = DeviceNameUtils::ParseFullName(name, &parsed_name); 3804241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner } 381d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower if (parsed) { 3824241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner const string jobname = parsed_name.has_job ? parsed_name.job : ""; 383d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower return strings::StrCat("/", jobname, "/", parsed_name.type); 384d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } else { 385d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower return "Unclassified"; 386d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 387d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower} 388d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 389d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstring GetDeviceClass(const string& device_name) { 390d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // TODO(dyoon): channel device name follows the convention we currently have 391d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // in VirtualScheduler. This should be revised with VirtualScheduler as well 392d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // as VirtualPlacer in the future. 393d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower if (device_name.find("Channel") != string::npos) { 3944241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner const string from = "_from_"; 3954241b86dc8da0f8ba23cb832c090469635bf09a9Benoit Steiner const string to = "_to_"; 396d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto from_loc = device_name.find(from); 397d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto to_loc = device_name.find(to); 398d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto src_device_full = device_name.substr( 399d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower from_loc + from.size(), to_loc - (from_loc + from.size())); 400d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto dst_device_full = device_name.substr(to_loc + to.size()); 401d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower return strings::StrCat( 402d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower "Channel", ": ", GetDeviceClassForNonChannelDevice(src_device_full), 403d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower " -> ", GetDeviceClassForNonChannelDevice(dst_device_full)); 404d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } else { 405d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower return GetDeviceClassForNonChannelDevice(device_name); 406d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 407d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower} 408d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 409d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlowerstring GetStatsStringFromRunMetadata(const RunMetadata& run_metadata, 410d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower bool verbosity) { 411d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // TODO(dyoon): print out other stats as needed. 412d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower std::ostringstream output; 413d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 414d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // Tensor size histogram: 415d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // if verbosity, it outputs per-device histogram, 416d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // otherwise, only per-class histogram. 417d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower std::unordered_map<string, TensorSizeHistogram> device_to_hist_map; 418d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto& step_stats = run_metadata.step_stats(); 419d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower for (const auto& dev_stat : step_stats.dev_stats()) { 420d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto& device_name = dev_stat.device(); 421d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower auto& hist = device_to_hist_map[device_name]; 422d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower for (const auto& node_stat : dev_stat.node_stats()) { 423d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower for (const auto& node_output : node_stat.output()) { 424d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // TODO(dyoon): Calculate tensor size from tensor_description's dtype 425d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower // and shape, instead of using optional allocation_description. 426d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto size = node_output.tensor_description() 427d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower .allocation_description() 428d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower .allocated_bytes(); 429d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower hist.Add(size); 430d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 431d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 432d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 433d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower if (verbosity) { 434d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower output << "\n"; 435d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower output << "Per device tensor size histogram.\n"; 436d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 437d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 438d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower std::unordered_map<string, TensorSizeHistogram> device_class_to_hist_map; 439d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower for (const auto& device_hist : device_to_hist_map) { 440d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto& device_name = device_hist.first; 441d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto& hist = device_hist.second; 442d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower if (verbosity) { 443d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower output << "Device: " << device_name << "\n" << hist.ToString() << "\n"; 444d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 445d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto device_class = GetDeviceClass(device_name); 446d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower auto it = device_class_to_hist_map.find(device_class); 447d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower if (it == device_class_to_hist_map.end()) { 448d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower device_class_to_hist_map.emplace(device_class, TensorSizeHistogram(hist)); 449d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } else { 450d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower it->second.Merge(hist); 451d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 452d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 453d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower output << "\n"; 454d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower output << "Aggregated per device / channel type tensor size histogram:\n"; 455d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower for (const auto& device_hist : device_class_to_hist_map) { 456d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto& device_name = device_hist.first; 457d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower const auto& hist = device_hist.second; 458d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower output << "Device: " << device_name << "\n" << hist.ToString() << "\n"; 459d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower } 460d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower output << "\n"; 461d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 462d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower return output.str(); 463d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower} 464d08cb107e6eeedd74c44f0d3654753b141cfa645A. Unique TensorFlower 46596cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner} // end namespace grappler 46696cb8f886ad84202e363c5a9da56cdbce4eaf408Benoit Steiner} // end namespace tensorflow 467