human_readable_profile_builder.h revision 7d433077006a8e775f1b1b449f11a0582966a2e5
1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
17#define TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
18
19#include <vector>
20
21#include "tensorflow/compiler/xla/types.h"
22#include "tensorflow/core/lib/core/stringpiece.h"
23#include "tensorflow/core/platform/logging.h"
24#include "tensorflow/core/platform/types.h"
25
26namespace xla {
27
28// HumanReadableProfileBuilder helps you create a textual profile of a
29// computation, suitable for consumption by humans.
30class HumanReadableProfileBuilder {
31 public:
32  explicit HumanReadableProfileBuilder(tensorflow::StringPiece computation_name,
33                                       int64 total_cycles,
34                                       double clock_rate_ghz)
35      : computation_name_(computation_name.ToString()),
36        total_cycles_(total_cycles),
37        clock_rate_ghz_(clock_rate_ghz) {
38    CHECK_GE(clock_rate_ghz, 1e-9);
39  }
40
41  int64 total_cycles() const { return total_cycles_; }
42
43  // Adds an operation to the profile.  If you don't know the number of
44  // floating-point ops or bytes touched by the op, pass -1 for that param.
45  void AddOp(tensorflow::StringPiece op_name,
46             tensorflow::StringPiece short_name,
47             tensorflow::StringPiece category, int64 cycles, int64 flop_count,
48             int64 transcendental_count, int64 bytes_accessed,
49             float optimal_seconds) {
50    op_infos_.push_back(
51        {op_name.ToString(), short_name.ToString(), category.ToString(), cycles,
52         flop_count, transcendental_count, bytes_accessed, optimal_seconds});
53  }
54
55  // Gets the human-readable profile.
56  string ToString() const;
57
58 private:
59  struct OpInfo {
60    string name;
61    string short_name;
62    string category;
63    int64 cycles;
64    int64 flop_count;
65    int64 transcendental_count;
66    int64 bytes_accessed;
67    float optimal_seconds;
68  };
69
70  double CyclesToSeconds(int64 cycles) const {
71    return cycles / clock_rate_ghz_ / 1e9;
72  }
73  double CyclesToMicroseconds(int64 cycles) const {
74    return cycles / clock_rate_ghz_ / 1000.0;
75  }
76
77  string computation_name_;
78  int64 total_cycles_;
79  double clock_rate_ghz_;
80  std::vector<OpInfo> op_infos_;
81};
82
83}  // namespace xla
84
85#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
86