1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/core/grappler/costs/virtual_placer.h"
17#include "tensorflow/core/framework/node_def.pb.h"
18#include "tensorflow/core/grappler/clusters/cluster.h"
19#include "tensorflow/core/grappler/devices.h"
20#include "tensorflow/core/lib/strings/str_util.h"
21#include "tensorflow/core/util/device_name_utils.h"
22
23namespace tensorflow {
24namespace grappler {
25
26VirtualPlacer::VirtualPlacer(const Cluster* cluster) {
27  CHECK(cluster);
28
29  // Default job name for canonical device name. Needs to be set before the
30  // first call to to_lfqn_or_empty()
31  default_job_name_lowercase_ = "localhost";
32
33  devices_ = cluster->GetDevices();
34  lfqn_map_.reserve(devices_.size());
35  for (const auto& kv : devices_) {
36    const auto lfqn = to_lfqn_or_empty(kv.first);
37    if (lfqn.empty()) {
38      LOG(ERROR) << "VirtualPlacer couldn't parse device name from cluster: "
39                 << kv.first;
40    } else {
41      lfqn_map_[lfqn] = kv.first;
42    }
43  }
44
45  if (devices_.empty()) {
46    // If there are no devices in the cluster, add a single device, "UNKNOWN" to
47    // the cluster.
48    default_device_name_ = "UNKNOWN";
49    DeviceProperties& prop = devices_["UNKNOWN"];
50    prop.set_type("UNKNOWN");
51  } else if (devices_.size() == 1) {
52    // If there is only one device in the cluster, use it as default device,
53    // whatever it is.
54    default_device_name_ = devices_.begin()->first;
55  } else {
56    // Default device is set from the devices in the cluster in the following
57    // priority: /gpu:0, /cpu:0, or any device.
58    // TODO(dyoon): This logic assumes single machine with CPU and GPU devices.
59    // Make it more general to support multiple machines, job types, and devices
60    // other than CPU and GPU.
61    std::map<int, string> cpu_devices;  // CPU device map: id -> device name.
62    std::map<int, string> gpu_devices;  // GPU device map: id -> device name.
63    for (const auto& kv : lfqn_map_) {
64      const auto& lfqn = kv.first;
65      const auto& cluster_device_name = kv.second;
66      DeviceNameUtils::ParsedName parsed_name;
67      bool parsed = DeviceNameUtils::ParseFullName(lfqn, &parsed_name);
68      if (parsed) {
69        // Parsed devices are stored to cpu_devices or gpu_devices map,
70        // addressed (and ordered) by device id.
71        const auto type = str_util::Lowercase(parsed_name.type);
72        if (type == "gpu") {
73          gpu_devices[parsed_name.id] = cluster_device_name;
74        } else if (type == "cpu") {
75          cpu_devices[parsed_name.id] = cluster_device_name;
76        }
77      }
78    }
79
80    if (!gpu_devices.empty()) {
81      // GPU:0 (or GPU with smallest device id).
82      default_device_name_ = gpu_devices.begin()->second;
83    } else if (!cpu_devices.empty()) {
84      // CPU:0 (or CPU with smallest device id).
85      default_device_name_ = cpu_devices.begin()->second;
86    } else {
87      default_device_name_ = devices_.begin()->first;  // Any device.
88    }
89  }
90
91  // Scan the device names from the cluster, and if there is one job name used,
92  // use it for canonical device name.
93  std::unordered_set<string> job_names_from_cluster;
94  for (const auto& device : lfqn_map_) {
95    const auto& lfqn = device.first;
96    DeviceNameUtils::ParsedName parsed_name;
97    bool parsed = DeviceNameUtils::ParseFullName(lfqn, &parsed_name);
98    if (parsed && !parsed_name.job.empty()) {
99      job_names_from_cluster.insert(parsed_name.job);
100      if (job_names_from_cluster.size() > 1) {
101        break;
102      }
103    }
104  }
105  // If there is only  type of job name in all the devices in the cluster, use
106  // that one as default job name; otherwise, use localhost.
107  // TODO(dyoon): this should be improved, especially when the cluster is
108  // composed of multiple worker, PS, and other types of jobs.
109  if (job_names_from_cluster.size() == 1) {
110    auto it = job_names_from_cluster.begin();
111    default_job_name_lowercase_ = *it;
112  }
113}
114
115const DeviceProperties& VirtualPlacer::get_device(const NodeDef& node) const {
116  string device = get_canonical_device_name(node);
117  VLOG(3) << "node.name=" << node.name() << " node.device=" << node.device()
118          << " is placed on: " << device;
119  auto it = devices_.find(device);
120  DCHECK(it != devices_.end());
121  return it->second;
122}
123
124string VirtualPlacer::get_canonical_device_name(const NodeDef& node) const {
125  if (node.device().empty()) {
126    return default_device_name_;
127  }
128
129  const auto lfqn = to_lfqn_or_empty(node.device());
130  if (lfqn.empty()) {
131    return default_device_name_;
132  }
133
134  const auto it = lfqn_map_.find(lfqn);
135  if (it != lfqn_map_.end()) {
136    return it->second;
137  }
138
139  return default_device_name_;
140}
141
142string VirtualPlacer::to_lfqn_or_empty(const string& device_name) const {
143  DeviceNameUtils::ParsedName parsed_name;
144  const auto lowercase_name = str_util::Lowercase(device_name);
145  bool parsed = DeviceNameUtils::ParseFullName(lowercase_name, &parsed_name);
146  if (!parsed) {
147    parsed = DeviceNameUtils::ParseLocalName(lowercase_name, &parsed_name);
148    parsed_name.job = "localhost";
149  }
150  if (!parsed) {
151    if (lowercase_name == "gpu" || lowercase_name == "cpu") {
152      parsed_name.job = "localhost";
153      parsed_name.type = lowercase_name;
154      parsed = true;
155    }
156  }
157  if (!parsed) {
158    return {};
159  }
160
161  if (parsed_name.job.empty()) {
162    parsed_name.job = default_job_name_lowercase_;
163  }
164
165  // Have to do this, because parser returns uppercase types for CPU and GPU.
166  parsed_name.type = str_util::Lowercase(parsed_name.type);
167
168  string lfqn = strings::StrCat(
169      "/job:", parsed_name.job, "/replica:", parsed_name.replica,
170      "/task:", parsed_name.task, "/device:", parsed_name.type, ":",
171      parsed_name.id);
172  return lfqn;
173}
174
175}  // end namespace grappler
176}  // end namespace tensorflow
177