10f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
20f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
30f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenLicensed under the Apache License, Version 2.0 (the "License");
40f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardenyou may not use this file except in compliance with the License.
50f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenYou may obtain a copy of the License at
60f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
70f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    http://www.apache.org/licenses/LICENSE-2.0
80f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
90f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenUnless required by applicable law or agreed to in writing, software
100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardendistributed under the License is distributed on an "AS IS" BASIS,
110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
120f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenSee the License for the specific language governing permissions and
130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardenlimitations under the License.
140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden==============================================================================*/
150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#define EIGEN_USE_THREADS
170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/common_runtime/constant_folding.h"
190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/common_runtime/threadpool_device.h"
200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/graph/graph_constructor.h"
210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/graph/node_builder.h"
220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/graph/subgraph.h"
230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/kernels/quantization_utils.h"
240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/platform/init_main.h"
250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/public/session.h"
260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/core/util/command_line_flags.h"
270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden#include "tensorflow/tools/graph_transforms/transform_utils.h"
280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardennamespace tensorflow {
300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardennamespace graph_transforms {
310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Holds the information we need to translate from a float version of this op
330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// into the quantized equivalent.
340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardenstruct QuantizedOpInfo {
350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // The name of the float op.
360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  string float_name;
370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Which attributes to copy directly over.
380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::vector<string> attrs_to_copy;
390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Extra data type attributes we need to set.
400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::vector<std::pair<string, DataType>> dtypes_to_set;
410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // What depth of inputs the op can read in.
420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  DataType input_bit_depth;
430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // The depth of the op's quantized outputs.
440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  DataType output_bit_depth;
450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Which inputs (e.g. shapes) aren't involved in the quantization process.
460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::set<int32> unquantized_inputs;
470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // How the outputs are arranged, either
480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // [input0, input1, min0, max0, min1, max1] for contiguous, or
490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // [input0, input1, min0, min1, max0, max1] for separate.
500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // The separate order is needed because it's the only way to specify unknown
510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // numbers of inputs for ops like Concat.
520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  enum { CONTIGUOUS_MIN_MAX, SEPARATE_MIN_MAX } min_max_order;
530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden};
540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Every op that has a quantized equivalent should be listed here, so that the
560f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// conversion process can transform them.
570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardenconst std::vector<QuantizedOpInfo>& GetQuantizedOpList() {
580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  static const std::vector<QuantizedOpInfo> op_list = {
59625ce1ac462292fb4bc76a06343f170871cb428cA. Unique TensorFlower      {"Add",
60625ce1ac462292fb4bc76a06343f170871cb428cA. Unique TensorFlower       {},
61625ce1ac462292fb4bc76a06343f170871cb428cA. Unique TensorFlower       {{"T1", DT_QUINT8}, {"T2", DT_QUINT8}, {"Toutput", DT_QINT32}},
62625ce1ac462292fb4bc76a06343f170871cb428cA. Unique TensorFlower       DT_QUINT8,
63625ce1ac462292fb4bc76a06343f170871cb428cA. Unique TensorFlower       DT_QINT32,
64625ce1ac462292fb4bc76a06343f170871cb428cA. Unique TensorFlower       {},
65625ce1ac462292fb4bc76a06343f170871cb428cA. Unique TensorFlower       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"AvgPool",
670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {"ksize", "strides", "padding"},
680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"T", DT_QUINT8}},
690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"BiasAdd",
740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"T1", DT_QUINT8}, {"T2", DT_QUINT8}, {"out_type", DT_QINT32}},
760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QINT32,
780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"Concat",
810f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {"N"},
820f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"T", DT_QUINT8}},
830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {0},
860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::SEPARATE_MIN_MAX},
870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"Conv2D",
880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {"strides", "padding"},
890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"Tinput", DT_QUINT8}, {"Tfilter", DT_QUINT8}, {"out_type", DT_QINT32}},
900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QINT32,
920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"MatMul",
950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {"transpose_a", "transpose_b"},
960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"T1", DT_QUINT8}, {"T2", DT_QUINT8}, {"Toutput", DT_QINT32}},
970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QINT32,
990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
1000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
1010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"MaxPool",
1020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {"ksize", "strides", "padding"},
1030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"T", DT_QUINT8}},
1040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1050f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1060f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
1070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
10843a0c04b42f7160642b9f164b45c16b5dbeef6d9Pete Warden      {"Mul",
10943a0c04b42f7160642b9f164b45c16b5dbeef6d9Pete Warden       {},
11043a0c04b42f7160642b9f164b45c16b5dbeef6d9Pete Warden       {{"T1", DT_QUINT8}, {"T2", DT_QUINT8}, {"Toutput", DT_QINT32}},
11143a0c04b42f7160642b9f164b45c16b5dbeef6d9Pete Warden       DT_QUINT8,
11243a0c04b42f7160642b9f164b45c16b5dbeef6d9Pete Warden       DT_QINT32,
11343a0c04b42f7160642b9f164b45c16b5dbeef6d9Pete Warden       {},
11443a0c04b42f7160642b9f164b45c16b5dbeef6d9Pete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
1150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"Relu",
1160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
1170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"Tinput", DT_QUINT8}},
1180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
1210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
1227e3e768d264fdbfd8ce9dbafe0ea08c671c8f7f0A. Unique TensorFlower      {"ResizeBilinear",
1237e3e768d264fdbfd8ce9dbafe0ea08c671c8f7f0A. Unique TensorFlower       {"align_corners"},
1247e3e768d264fdbfd8ce9dbafe0ea08c671c8f7f0A. Unique TensorFlower       {{"T", DT_QUINT8}},
1257e3e768d264fdbfd8ce9dbafe0ea08c671c8f7f0A. Unique TensorFlower       DT_QUINT8,
1267e3e768d264fdbfd8ce9dbafe0ea08c671c8f7f0A. Unique TensorFlower       DT_QUINT8,
1277e3e768d264fdbfd8ce9dbafe0ea08c671c8f7f0A. Unique TensorFlower       {1},
1287e3e768d264fdbfd8ce9dbafe0ea08c671c8f7f0A. Unique TensorFlower       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
1290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"Relu6",
1300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
1310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"Tinput", DT_QUINT8}},
1320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
1350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
1360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"Reshape",
1370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {},
1380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {{"T", DT_QUINT8}},
1390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       DT_QUINT8,
1410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       {1},
1420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden       QuantizedOpInfo::CONTIGUOUS_MIN_MAX},
1430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  };
1440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return op_list;
1450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
1460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
1470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardennamespace {
1480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Replaces invalid characters in input names to get a unique node name.
1490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Wardenstring UniqueNodeNameFromInput(const string& input_name) {
1500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  string prefix;
1510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  string node_name;
1520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  string suffix;
1530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  NodeNamePartsFromInput(input_name, &prefix, &node_name, &suffix);
1540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  string result;
1550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  if (prefix == "^") {
1560f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    result += "__hat__";
1570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
1580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  result += node_name;
1597280dafca161eb3413ea120d3dd07c63e5254e72A. Unique TensorFlower  if (!suffix.empty()) {
1600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    result += "__port__" + suffix.substr(1, suffix.size() - 1);
1610f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
1620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return result;
1630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
1640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
1650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Pulls two float values from the named parameters, with a lot of checking.
1660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus ExtractRangeFromParams(const TransformFuncContext& context,
1670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                              const string& min_name, const string& max_name,
1680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                              float* min_value, float* max_value,
1690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                              bool* has_range) {
1700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // See if we've been given quantized inputs with a known range.
1710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  const bool has_min = (context.params.count(min_name) != 0);
1720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  const bool has_max = (context.params.count(max_name) != 0);
1730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  *has_range = (has_min || has_max);
1740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  if (!*has_range) {
1750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    return Status::OK();
1760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
1770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  if (!has_min || !has_max) {
1780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    return errors::InvalidArgument("You must pass both ", min_name, " and ",
1790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                   max_name, " into quantize_nodes");
1800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
18180679a6a741d2caa00c9b8bd0fa309f9cbbe2905Pete Warden  TF_RETURN_IF_ERROR(context.GetOneFloatParameter(min_name, 0.0f, min_value));
18280679a6a741d2caa00c9b8bd0fa309f9cbbe2905Pete Warden  TF_RETURN_IF_ERROR(context.GetOneFloatParameter(max_name, 0.0f, max_value));
1830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return Status::OK();
1840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
1850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
1860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}  // namespace
1870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
1880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Analyzes all the nodes in the graph to figure out which ones are duplicates
1890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// apart from their names. This commonly includes identical Const nodes, but can
1900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// also be simple operations that are repeated on multiple outputs of a
1910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// particular node. The complexity is managed using a hash function that avoids
1920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// the need for any O(n^2) algorithms when identifying duplicates.
1930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus MergeDuplicateNodes(const GraphDef& input_graph_def,
1940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                           const TransformFuncContext& context,
1950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                           GraphDef* output_graph_def) {
1960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Make sure we can look up inputs and outputs quickly.
1970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::set<string> input_names(context.input_names.begin(),
1980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                               context.input_names.end());
1990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::set<string> output_names(context.output_names.begin(),
2000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                context.output_names.end());
2010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef current_graph_def = input_graph_def;
2020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Keep running the merging until no more duplicates are found.
2030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  bool any_duplicates_found;
2040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  do {
2050f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    any_duplicates_found = false;
2060f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    // First arrange all of the nodes by a hash of their contents.
2070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    std::map<uint64, std::vector<const NodeDef*>> hashed_nodes;
2080f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    for (const NodeDef& node : current_graph_def.node()) {
2090f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      NodeDef nameless_node = node;
2100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      // The name matters if it's being used as an input or output node,
2110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      // otherwise ignore it when looking for duplicates.
2120f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      if (!input_names.count(node.name()) && !output_names.count(node.name())) {
2130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        nameless_node.set_name("");
2140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      }
2150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      const uint64 hash = HashNodeDef(nameless_node);
2160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      hashed_nodes[hash].push_back(&node);
2170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    }
2180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    // If we have multiple nodes with the same hash, then we know they're
2190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    // duplicates and can be removed, unless they're stateful.
2200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    std::map<string, string> inputs_to_rename;
2210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    GraphDef merged_graph_def;
2220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    for (const std::pair<uint64, std::vector<const NodeDef*>> hashed_node_info :
2230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden         hashed_nodes) {
2240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      const std::vector<const NodeDef*>& hash_node_list =
2250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          hashed_node_info.second;
2260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      for (int i = 0; i < hash_node_list.size(); ++i) {
2270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef* current_node = hash_node_list[i];
2280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const OpDef* op_def = nullptr;
2290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        TF_RETURN_IF_ERROR(
2300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            OpRegistry::Global()->LookUpOpDef(current_node->op(), &op_def));
2310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const bool is_duplicate = ((!op_def->is_stateful()) && (i > 0));
2320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        if (is_duplicate) {
2330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          const string original_name = hash_node_list[0]->name();
2340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          inputs_to_rename[current_node->name() + ":*"] = original_name;
2350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          any_duplicates_found = true;
2360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        } else {
2370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef* new_node = merged_graph_def.mutable_node()->Add();
2380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          *new_node = *current_node;
2390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
2400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      }
2410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    }
2420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    // Update the graph so that any nodes that referred to removed inputs now
2430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    // pull from the remaining duplicate.
244bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins    TF_RETURN_IF_ERROR(RenameNodeInputs(merged_graph_def, inputs_to_rename,
245bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins                                        std::unordered_set<string>(),
246bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins                                        &current_graph_def));
2470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  } while (any_duplicates_found);
2480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
2490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  *output_graph_def = current_graph_def;
2500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
2510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return Status::OK();
2520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
2530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
2540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Looks for the patterns that indicate there are two eight-bit ops feeding into
2550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// each other, separated by a conversion up to float and back again. These occur
2560f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// during the initial conversion of ops to their quantized forms. Because we're
2570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// only looking at an individual op in that phase and don't know if its inputs
2580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// and outputs are eight-bit-capable, we start by converting the actual op into
2590f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// quantized form, but add float conversions before and after. This pass gets
2600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// rid of those conversions if it turns out we do have adjacent ops capable of
2610f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// eight-bit processing.
2620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus RemoveRedundantQuantizations(const GraphDef& input_graph_def,
2630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                    const TransformFuncContext& context,
2640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                    GraphDef* output_graph_def) {
2650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::set<string> graph_outputs;
2660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  for (const string& output_name : context.output_names) {
2670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    graph_outputs.insert(NodeNameFromInput(output_name));
2680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
2690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::map<string, string> inputs_to_rename;
2700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef replaced_graph_def;
2710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes(
2720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      input_graph_def,  // clang-format off
2730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"QuantizeV2",
2740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        {
2750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"Dequantize"},
2760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"Min"},
2770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"Max"},
2780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
2790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      },  // clang-format on
2800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      [&inputs_to_rename, &graph_outputs](const NodeMatch& match,
2810f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                          const std::set<string>& input_nodes,
2820f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                          const std::set<string>& output_nodes,
2830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                          std::vector<NodeDef>* new_nodes) {
2840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& quantize_node = match.node;
2850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& dequantize_node = match.inputs[0].node;
2860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        inputs_to_rename[quantize_node.name() + ":0"] =
2870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            dequantize_node.input(0);
2880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        inputs_to_rename[quantize_node.name() + ":1"] =
2890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            dequantize_node.input(1);
2900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        inputs_to_rename[quantize_node.name() + ":2"] =
2910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            dequantize_node.input(2);
2920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
2930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        // Are other sub-graphs using the float intermediate result? If so,
2940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        // preserve it, but the input renaming still rewires the eight-bit ops
2950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        // so they don't go through float.
2960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        if (output_nodes.count(dequantize_node.name()) ||
2970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            graph_outputs.count(dequantize_node.name())) {
2980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          CopyOriginalMatch(match, new_nodes);
2990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
3000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        return Status::OK();
3020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      },
3030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {true}, &replaced_graph_def));
3040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
305bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins  return RenameNodeInputs(replaced_graph_def, inputs_to_rename,
306bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins                          std::unordered_set<string>(), output_graph_def);
3070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
3080f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3090f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// If the user has passed in the input_min and input_max args, then we need to
3100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// convert any input placeholders from float to eight bit, so quantized inputs
3110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// can be fed directly into the graph.
3120f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus QuantizePlaceholders(const GraphDef& input_graph_def,
3130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                            const TransformFuncContext& context,
3140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                            GraphDef* output_graph_def) {
3150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  float input_min;
3160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  float input_max;
3170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  bool has_input_range;
3180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(ExtractRangeFromParams(context, "input_min", "input_max",
3190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                            &input_min, &input_max,
3200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                            &has_input_range));
3210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  if (!has_input_range) {
3220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    *output_graph_def = input_graph_def;
3230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    return Status::OK();
3240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
3250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::map<string, string> inputs_to_rename_first_pass;
3260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::map<string, string> inputs_to_rename_second_pass;
3270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef placeholder_graph_def;
3280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  placeholder_graph_def.Clear();
3290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  for (const NodeDef& node : input_graph_def.node()) {
3300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    if (node.op() != "Placeholder") {
3319501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower      *(placeholder_graph_def.mutable_node()->Add()) = node;
3320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    } else {
3330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      string namespace_prefix = node.name() + "_eightbit";
3340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      NodeDef quantized_placeholder;
3369501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower      quantized_placeholder = node;
3370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      SetNodeAttr("dtype", DT_QUINT8, &quantized_placeholder);
3389501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower      *(placeholder_graph_def.mutable_node()->Add()) = quantized_placeholder;
3390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      NodeDef min_node;
3410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      min_node.set_op("Const");
3420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      min_node.set_name(namespace_prefix + "/min");
3430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      SetNodeAttr("dtype", DT_FLOAT, &min_node);
3440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      Tensor min_tensor(DT_FLOAT, {});
3450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      min_tensor.flat<float>()(0) = input_min;
3460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      SetNodeTensorAttr<float>("value", min_tensor, &min_node);
3479501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower      *(placeholder_graph_def.mutable_node()->Add()) = min_node;
3480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      NodeDef max_node;
3500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      max_node.set_op("Const");
3510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      max_node.set_name(namespace_prefix + "/max");
3520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      SetNodeAttr("dtype", DT_FLOAT, &max_node);
3530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      Tensor max_tensor(DT_FLOAT, {});
3540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      max_tensor.flat<float>()(0) = input_max;
3550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      SetNodeTensorAttr<float>("value", max_tensor, &max_node);
3569501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower      *(placeholder_graph_def.mutable_node()->Add()) = max_node;
3570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      const string rename_suffix = "__RENAMED_PLACEHOLDER__";
3590f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      NodeDef dequantize_node;
3600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      dequantize_node.set_op("Dequantize");
3610f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      dequantize_node.set_name(namespace_prefix + "/dequantize");
3620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      SetNodeAttr("T", DT_QUINT8, &dequantize_node);
3630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      SetNodeAttr("mode", "MIN_FIRST", &dequantize_node);
3640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      AddNodeInput(node.name() + rename_suffix, &dequantize_node);
3650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      AddNodeInput(min_node.name(), &dequantize_node);
3660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      AddNodeInput(max_node.name(), &dequantize_node);
3679501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower      *(placeholder_graph_def.mutable_node()->Add()) = dequantize_node;
3680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      // First make sure that any internal references to the old placeholder
3700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      // now point to the dequantize result.
3710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      inputs_to_rename_first_pass[node.name()] = dequantize_node.name();
3720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      // Then fix up the dequantize op so that it really points to the
3730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      // placeholder.
3740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      inputs_to_rename_second_pass[node.name() + rename_suffix] = node.name();
3750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    }
3760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
3770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef first_pass_graph_def;
379bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins  TF_RETURN_IF_ERROR(
380bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins      RenameNodeInputs(placeholder_graph_def, inputs_to_rename_first_pass,
381bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins                       std::unordered_set<string>(), &first_pass_graph_def));
382bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins  TF_RETURN_IF_ERROR(
383bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins      RenameNodeInputs(first_pass_graph_def, inputs_to_rename_second_pass,
384bc225bfaa534acc25047fe844f19edc333b7a76aPeter Hawkins                       std::unordered_set<string>(), output_graph_def));
3850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return Status::OK();
3870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
3880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
3890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// During training, FakeQuantWithMinMaxVars ops capture a good min/max range for
3900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// an activation layer. To use these during inference, this pass converts those
3910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// ops into Requantizes with the trained min/maxes as constant inputs.
3920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus ConvertFakeQuantsToRequantize(const GraphDef& input_graph_def,
3930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                     const TransformFuncContext& context,
3940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                     GraphDef* output_graph_def) {
3950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes(
3960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      input_graph_def,  // clang-format off
3970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"FakeQuantWithMinMaxVars",
3980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        {
3990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"*"},
4000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"Const"},
4010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"Const"},
4020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
4030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      },  // clang-format on
4040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      [](const NodeMatch& match, const std::set<string>& input_nodes,
4050f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden         const std::set<string>& output_nodes,
4060f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden         std::vector<NodeDef>* new_nodes) {
4070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& fake_quant_node = match.node;
4080f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& original_op_node = match.inputs[0].node;
4090f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& fake_quant_min_node = match.inputs[1].node;
4100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& fake_quant_max_node = match.inputs[2].node;
4110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4120f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        string namespace_prefix = fake_quant_node.name() + "_eightbit";
4130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(original_op_node);
4150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(fake_quant_min_node);
4160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(fake_quant_max_node);
4170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        NodeDef quantize_node;
4190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        quantize_node.set_op("QuantizeV2");
4200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        quantize_node.set_name(namespace_prefix + "/quantize");
4210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("T", DT_QINT32, &quantize_node);
4220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("mode", "MIN_FIRST", &quantize_node);
4230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(fake_quant_node.input(0), &quantize_node);
4240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(fake_quant_min_node.name(), &quantize_node);
4250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(fake_quant_max_node.name(), &quantize_node);
4260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(quantize_node);
4270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        NodeDef requantize_node;
4290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        requantize_node.set_op("Requantize");
4300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        requantize_node.set_name(namespace_prefix + "/requantize");
4310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("Tinput", DT_QINT32, &requantize_node);
4320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("out_type", DT_QUINT8, &requantize_node);
4330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(quantize_node.name() + ":0", &requantize_node);
4340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(quantize_node.name() + ":1", &requantize_node);
4350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(quantize_node.name() + ":2", &requantize_node);
4360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(fake_quant_min_node.name(), &requantize_node);
4370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(fake_quant_max_node.name(), &requantize_node);
4380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(requantize_node);
4390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        // Convert the 8-bit result back into float for the final output.
4410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        NodeDef dequantize_node;
4420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        dequantize_node.set_op("Dequantize");
4430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        dequantize_node.set_name(fake_quant_node.name());
4440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("T", DT_QUINT8, &dequantize_node);
4450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("mode", "MIN_FIRST", &dequantize_node);
4460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(requantize_node.name() + ":0", &dequantize_node);
4470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(requantize_node.name() + ":1", &dequantize_node);
4480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(requantize_node.name() + ":2", &dequantize_node);
4490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(dequantize_node);
4500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        return Status::OK();
4520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      },
4530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {}, output_graph_def));
4540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return Status::OK();
4560f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
4570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
4580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// We always generate Requantize ops driven by dynamic RequantizationRange
4590f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// calculations when we produce quantized ops like Conv2D or BiasAdd with
4600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// 32-bit results. If there were FakeQuant ops already for those activation
4610f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// layers, then there will be a later Requantize op with constant min/max
4620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// inputs, which is preferable for fast inference. This pass looks for those
4630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// later Requantize ops, and replaces the dynamic version with them.
4640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus MergeAdjacentRequantizes(const GraphDef& input_graph_def,
4650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                const TransformFuncContext& context,
4660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                GraphDef* output_graph_def) {
4670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes(
4680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      input_graph_def,  // clang-format off
4690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {"Requantize",
4700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        {
4710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"QuantizeV2",
4720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            {
4730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden              {"Dequantize",
4740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                {
4750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                  {"Requantize",
4760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                    {
4770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                      {"*"},
4780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                      {"*"},
4790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                      {"*"},
4800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                      {"RequantizationRange"},
4810f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                      {"RequantizationRange"},
4820f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                    }
4830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                  },
4840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                  {"Requantize"},
4850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                  {"Requantize"},
4860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                }
4870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden              },
4880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden              {"Const"},
4890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden              {"Const"},
4900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            },
4910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          },
4920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"QuantizeV2"},
4930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"QuantizeV2"},
4940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"Const"},
4950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          {"Const"},
4960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
4970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      },  // clang-format on
4980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      [](const NodeMatch& match, const std::set<string>& input_nodes,
4990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden         const std::set<string>& output_nodes,
5000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden         std::vector<NodeDef>* new_nodes) {
5010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& fake_requantize_node = match.node;
5020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& original_op_node =
5030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            match.inputs[0].inputs[0].inputs[0].inputs[0].node;
5040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& fake_requantize_min_node = match.inputs[3].node;
5050f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& fake_requantize_max_node = match.inputs[4].node;
5060f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(original_op_node);
5080f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(fake_requantize_min_node);
5090f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(fake_requantize_max_node);
5100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        NodeDef requantize_node;
5129501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower        requantize_node = fake_requantize_node;
5130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        requantize_node.mutable_input()->Clear();
5140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(original_op_node.name() + ":0", &requantize_node);
5150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(original_op_node.name() + ":1", &requantize_node);
5160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(original_op_node.name() + ":2", &requantize_node);
5170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(fake_requantize_min_node.name(), &requantize_node);
5180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(fake_requantize_max_node.name(), &requantize_node);
5190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(requantize_node);
5200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        return Status::OK();
5220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      },
5230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {}, output_graph_def));
5240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return Status::OK();
5260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
5270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Sometimes FakeQuantWithMinMaxVars ops are added at the end of a chain of
5290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// linear ops like Relu, MaxPool, etc, several steps from the Conv2D or BiasAdd
5300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// op that we want to apply the trained constant conversions to. This pass tries
5310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// to move FakeQuant ops up the input chain, so they're as close as possible to
5320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// the 32-bit conversion, and so can be easily merged into the automatic dynamic
5330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Requantizes.
5340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus HoistFakeQuants(const GraphDef& input_graph_def,
5350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                       const TransformFuncContext& context,
5360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                       GraphDef* output_graph_def) {
5370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef current_graph_def = input_graph_def;
5380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  const int max_depth = 3;
5390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  for (int depth = max_depth; depth > 0; --depth) {
5400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    OpTypePattern pattern = {"*"};
5410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    for (int i = 0; i < depth; ++i) {
5420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      pattern = {"*", {pattern}};
5430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    }
5440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    pattern = {"FakeQuantWithMinMaxVars", {pattern, {"Const"}, {"Const"}}};
5450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    GraphDef hoisted_graph_def;
5460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes(
5470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        current_graph_def, pattern,
5480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        [depth](const NodeMatch& match, const std::set<string>& input_nodes,
5490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                const std::set<string>& output_nodes,
5500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                std::vector<NodeDef>* new_nodes) {
5510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          const NodeDef& fake_quant_node = match.node;
5520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          const NodeDef& fake_quant_min_node = match.inputs[1].node;
5530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          const NodeDef& fake_quant_max_node = match.inputs[2].node;
5540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          std::vector<NodeDef> linear_nodes;
5550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeMatch current_match = match;
5560f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          for (int i = 0; i <= depth; ++i) {
5570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            linear_nodes.push_back(current_match.inputs[0].node);
5580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            current_match = current_match.inputs[0];
5590f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
5600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef new_fake_quant_node;
5619501c4104125fb8c2c2d2e837fc2dd8a24034d52A. Unique TensorFlower          new_fake_quant_node = fake_quant_node;
5620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_fake_quant_node.set_name(fake_quant_node.name() + "_hoisted");
5630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_fake_quant_node.set_input(
5640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden              0, linear_nodes[linear_nodes.size() - 2].input(0));
5650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(new_fake_quant_node);
5660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(fake_quant_min_node);
5680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(fake_quant_max_node);
5690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          linear_nodes[linear_nodes.size() - 2].set_input(
5710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden              0, new_fake_quant_node.name());
5720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          linear_nodes.front().set_name(fake_quant_node.name());
5730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          for (const NodeDef& linear_node : linear_nodes) {
5740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            new_nodes->push_back(linear_node);
5750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
5760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          return Status::OK();
5780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        },
5790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        {}, &hoisted_graph_def));
5800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden    current_graph_def = hoisted_graph_def;
5810f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
5820f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  *output_graph_def = current_graph_def;
5830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return Status::OK();
5850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
5860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
5870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// Converts any float ops that have eight-bit equivalents into their quantized
5880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// forms, so that as much calculation as possible is done in the lower-precision
5890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden// format.
5900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenStatus QuantizeNodes(const GraphDef& input_graph_def,
5910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                     const TransformFuncContext& context,
5920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                     GraphDef* output_graph_def) {
5930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Loop through all of the quantizable op types, and replace any occurrences
5940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // with equivalent sub-graphs with quantized ops at their core. For example
5950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // this one-input operation:
5960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //
5970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //            Input(float)
5980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //                |
5990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //                v
6000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //            Operation
6010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //                |
6020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //                v
6030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //             (float)
6040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //
6050f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Will be turned into it's quantized equivalent:
6060f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //
6070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //      Input(float)          ReshapeDims
6080f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         +------v v-------------+
6090f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |    Reshape
6100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |      |
6110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |      |          ReductionDims
6120f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |      +-----+         |
6130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |      | +---c---------+
6140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |      v v   v v-------+
6150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |      Min   Max
6160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         |  +----+      |
6170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         v  v  v--------+
6180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //        Quantize
6190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //            |
6200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //            v
6210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //     QuantizedOperation
6220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //        |   |   |
6230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //        v   v   v
6240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //        Dequantize
6250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //            |
6260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //            v
6270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //         (float)
6280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  //
6290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // This keeps the inputs and outputs visible to the rest of the graph in
6300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // float
6310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // and converts them down to quantized buffers internally for the
6320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // computation.
6330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // The result will end up with a lot of redundant dequantize/quantize pairs
6340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // between adjacent quantized ops, but a later pass removes these where it
6350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // can.
6365902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower
6375902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower  std::set<string> ops_to_ignore;
6385902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower  if (context.params.count("ignore_op") > 0) {
6395902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower    for (const string& name : context.params.at("ignore_op")) {
6405902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower      ops_to_ignore.insert(name);
6415902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower    }
6425902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower  }
6435902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower
6440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  const std::vector<QuantizedOpInfo>& op_list = GetQuantizedOpList();
6450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  string op_pattern;
6460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  bool is_first = true;
6470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  std::map<string, QuantizedOpInfo> op_map;
6480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  for (const QuantizedOpInfo& op_info : op_list) {
6495902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower    if (ops_to_ignore.count(op_info.float_name) == 0) {
6505902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower      strings::StrAppend(&op_pattern, (is_first ? "" : "|"),
6515902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower                         op_info.float_name);
6525902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower      op_map.insert({op_info.float_name, op_info});
6535902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower      is_first = false;
6545902c73c9cee4388fcc5017f4b235eb2a13a2e99A. Unique TensorFlower    }
6550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  }
6560f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
6570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // If input_min and input max have been passed in, then we convert all float
6580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Placeholder nodes into quantized versions, with the supplied values as
6590f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // their range.
6600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef placeholder_graph_def;
6610f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(
6620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      QuantizePlaceholders(input_graph_def, context, &placeholder_graph_def));
6630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(IsGraphValid(placeholder_graph_def));
6640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
6650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // If there are any FakeQuantWithMinMaxVars at the end of a chain of linear
6660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // operations like Relu or MaxPool, move them up so that they're as close as
6670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // possible to ops with 32-bit outputs like BiasAdd or Conv2D.
6680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef hoisted_graph_def;
6690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(
6700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      HoistFakeQuants(placeholder_graph_def, context, &hoisted_graph_def));
6710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(IsGraphValid(hoisted_graph_def));
6720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
6730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Convert any FakeQuantWithMinMaxVars, which hold the trained ranges of
6740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // activation layers, into Requantize ops with those ranges instead. This
6750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // makes it easier to replace the dynamic range calculations that are used
6760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // by default.
6770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef converted_graph_def;
6780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(ConvertFakeQuantsToRequantize(hoisted_graph_def, context,
6790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                                   &converted_graph_def));
6800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(IsGraphValid(converted_graph_def));
6810f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
6820f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // If fallback_min and fallback_max are set, then we'll use hardwired ranges
6830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // for all the 32-bit to 8-bit requantizations.
6840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  float fallback_min;
6850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  float fallback_max;
6860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  bool has_fallback_range;
6870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(ExtractRangeFromParams(
6880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      context, "fallback_min", "fallback_max", &fallback_min, &fallback_max,
6890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      &has_fallback_range));
6900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
6910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Replace all occurrences of the current float op with its quantized
6920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // equivalent.
6930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef quantized_graph_def;
6940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes(
6950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      converted_graph_def, {op_pattern},
6960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      [&op_map, fallback_min, fallback_max, has_fallback_range](
6970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          const NodeMatch& match, const std::set<string>& input_nodes,
6980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          const std::set<string>& output_nodes,
6990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          std::vector<NodeDef>* new_nodes) {
7000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const NodeDef& float_node = match.node;
7010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        const QuantizedOpInfo& op_info = op_map[float_node.op()];
7020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7034b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        DataTypeVector input_types;
7044b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        DataTypeVector output_types;
7054b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        TF_RETURN_IF_ERROR(
7064b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden            GetInOutTypes(float_node, &input_types, &output_types));
7074b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        bool are_all_float = true;
7084b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        for (int i = 0; i < float_node.input_size(); ++i) {
7094b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          // Skip any known non-float inputs.
7104b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          if (op_info.unquantized_inputs.count(i)) {
7114b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden            continue;
7124b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          }
7134b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          if (input_types[i] != DT_FLOAT) {
7144b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden            are_all_float = false;
7154b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          }
7164b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        }
7174b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        for (const DataType& output_type : output_types) {
7184b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          if (output_type != DT_FLOAT) {
7194b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden            are_all_float = false;
7204b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          }
7214b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        }
7224b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        // This isn't a float op, so don't quantize it.
7234b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        if (!are_all_float) {
7244b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          CopyOriginalMatch(match, new_nodes);
7254b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden          return Status::OK();
7264b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden        }
7274b3d59a771252506cc34e66ebf2cd93be2564229Pete Warden
7280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        string namespace_prefix = float_node.name() + "_eightbit";
7290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        // Quantize all of the inputs.
7310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        std::vector<string> quantized_input_names;
7320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        for (int i = 0; i < float_node.input_size(); ++i) {
7330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          // Skip any non-float inputs.
7340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          if (op_info.unquantized_inputs.count(i)) {
7350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            continue;
7360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
7370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          const string& input_name = float_node.input(i);
7390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          string unique_input_name =
7400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden              namespace_prefix + "/" + UniqueNodeNameFromInput(input_name);
7410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          // Add some common constants we need for reshaping inputs.
7430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef reshape_dims;
7440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reshape_dims.set_op("Const");
7450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reshape_dims.set_name(unique_input_name + "/reshape_dims");
746e4532d20973c4c00854492362665317551661c18A. Unique TensorFlower          AddNodeInput("^" + NodeNameFromInput(input_name), &reshape_dims);
7470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("dtype", DT_INT32, &reshape_dims);
7480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          Tensor reshape_dims_tensor(DT_INT32, {1});
7490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reshape_dims_tensor.flat<int32>()(0) = -1;
7500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeTensorAttr<int32>("value", reshape_dims_tensor, &reshape_dims);
7510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(reshape_dims);
7520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef reduction_dims;
7540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reduction_dims.set_op("Const");
7550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reduction_dims.set_name(unique_input_name + "/reduction_dims");
756e4532d20973c4c00854492362665317551661c18A. Unique TensorFlower          AddNodeInput("^" + NodeNameFromInput(input_name), &reduction_dims);
7570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("dtype", DT_INT32, &reduction_dims);
7580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          Tensor reduction_dims_tensor(DT_INT32, {1});
7590f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reduction_dims_tensor.flat<int32>()(0) = 0;
7600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeTensorAttr<int32>("value", reduction_dims_tensor,
7610f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                   &reduction_dims);
7620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(reduction_dims);
7630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef reshape_node;
7650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reshape_node.set_op("Reshape");
7660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          reshape_node.set_name(unique_input_name + "/reshape");
7670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("T", DT_FLOAT, &reshape_node);
7680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(input_name, &reshape_node);
7690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(reshape_dims.name(), &reshape_node);
7700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(reshape_node);
7710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef min_node;
7730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          min_node.set_op("Min");
7740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          min_node.set_name(unique_input_name + "/min");
7750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("T", DT_FLOAT, &min_node);
7760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("keep_dims", false, &min_node);
7770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(reshape_node.name(), &min_node);
7780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(reduction_dims.name(), &min_node);
7790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(min_node);
7800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7810f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef max_node;
7820f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          max_node.set_op("Max");
7830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          max_node.set_name(unique_input_name + "/max");
7840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("T", DT_FLOAT, &max_node);
7850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("keep_dims", false, &max_node);
7860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(reshape_node.name(), &max_node);
7870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(reduction_dims.name(), &max_node);
7880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(max_node);
7890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
7900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef quantize_node;
7910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          quantize_node.set_op("QuantizeV2");
7920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          quantize_node.set_name(unique_input_name + "/quantize");
7930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("T", DT_QUINT8, &quantize_node);
7940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("mode", "MIN_FIRST", &quantize_node);
7950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(input_name, &quantize_node);
7960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(min_node.name(), &quantize_node);
7970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(max_node.name(), &quantize_node);
7980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(quantize_node);
7990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          quantized_input_names.push_back(quantize_node.name());
8000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
8010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
8020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        // Set up the quantized version of the current op.
8030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        NodeDef quantized_main_node;
8040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        quantized_main_node.set_op("Quantized" + float_node.op());
8050f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        quantized_main_node.set_name(float_node.name() + "/eightbit");
8060f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        for (const string& attr_to_copy : op_info.attrs_to_copy) {
8070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          CopyNodeAttr(float_node, attr_to_copy, attr_to_copy,
8080f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                       &quantized_main_node);
8090f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
8100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        for (const std::pair<string, DataType>& dtype_to_set :
8110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden             op_info.dtypes_to_set) {
8120f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr(dtype_to_set.first, dtype_to_set.second,
8130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                      &quantized_main_node);
8140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
8150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        int quantized_input_index = 0;
8160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        for (int i = 0; i < float_node.input_size(); ++i) {
8170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          if (op_info.unquantized_inputs.count(i)) {
8180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(float_node.input(i), &quantized_main_node);
8190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          } else {
8200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            const string& quantized_input_name =
8210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                quantized_input_names[quantized_input_index];
8220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_input_name + ":0", &quantized_main_node);
8230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            ++quantized_input_index;
8240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
8250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
8260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        if (op_info.min_max_order == QuantizedOpInfo::CONTIGUOUS_MIN_MAX) {
8270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          for (const string& quantized_input_name : quantized_input_names) {
8280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_input_name + ":1", &quantized_main_node);
8290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_input_name + ":2", &quantized_main_node);
8300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
8310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        } else {
8320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          for (const string& quantized_input_name : quantized_input_names) {
8330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_input_name + ":1", &quantized_main_node);
8340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
8350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          for (const string& quantized_input_name : quantized_input_names) {
8360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_input_name + ":2", &quantized_main_node);
8370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
8380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
8390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(quantized_main_node);
8400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
8410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        string eight_bit_node_name;
8420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        if (op_info.output_bit_depth == DT_QINT32) {
8430f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          // Shrink the range of the output down from 32 bits to 8.
8440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          string requantize_min_input;
8450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          string requantize_max_input;
8460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          if (has_fallback_range) {
8470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            // Use constant values for the min/max range if they were given.
8480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            NodeDef fallback_min_node;
8490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            fallback_min_node.set_op("Const");
8500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            fallback_min_node.set_name(quantized_main_node.name() +
8510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                       "/fallback_min");
8520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            SetNodeAttr("dtype", DT_FLOAT, &fallback_min_node);
8530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            Tensor fallback_min_tensor(DT_FLOAT, {});
8540f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            fallback_min_tensor.flat<float>()(0) = fallback_min;
8550f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            SetNodeTensorAttr<float>("value", fallback_min_tensor,
8560f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                     &fallback_min_node);
8570f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            new_nodes->push_back(fallback_min_node);
8580f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
8590f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            NodeDef fallback_max_node;
8600f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            fallback_max_node.set_op("Const");
8610f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            fallback_max_node.set_name(quantized_main_node.name() +
8620f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                       "/fallback_max");
8630f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            SetNodeAttr("dtype", DT_FLOAT, &fallback_max_node);
8640f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            Tensor fallback_max_tensor(DT_FLOAT, {});
8650f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            fallback_max_tensor.flat<float>()(0) = fallback_max;
8660f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            SetNodeTensorAttr<float>("value", fallback_max_tensor,
8670f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                     &fallback_max_node);
8680f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            new_nodes->push_back(fallback_max_node);
8690f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
8700f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            requantize_min_input = fallback_min_node.name();
8710f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            requantize_max_input = fallback_max_node.name();
8720f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          } else {
8730f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            // Otherwise dynamically measure the range each time.
8740f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            NodeDef requant_range_node;
8750f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            requant_range_node.set_op("RequantizationRange");
8760f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            requant_range_node.set_name(quantized_main_node.name() +
8770f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                        "/requant_range");
8780f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            SetNodeAttr("Tinput", DT_QINT32, &requant_range_node);
8790f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_main_node.name() + ":0",
8800f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                         &requant_range_node);
8810f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_main_node.name() + ":1",
8820f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                         &requant_range_node);
8830f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            AddNodeInput(quantized_main_node.name() + ":2",
8840f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                         &requant_range_node);
8850f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            new_nodes->push_back(requant_range_node);
8860f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
8870f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            requantize_min_input = requant_range_node.name() + ":0";
8880f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden            requantize_max_input = requant_range_node.name() + ":1";
8890f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          }
8900f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          NodeDef requantize_node;
8910f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          requantize_node.set_op("Requantize");
8920f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          requantize_node.set_name(quantized_main_node.name() + "/requantize");
8930f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("Tinput", DT_QINT32, &requantize_node);
8940f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          SetNodeAttr("out_type", DT_QUINT8, &requantize_node);
8950f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(quantized_main_node.name() + ":0", &requantize_node);
8960f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(quantized_main_node.name() + ":1", &requantize_node);
8970f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(quantized_main_node.name() + ":2", &requantize_node);
8980f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(requantize_min_input, &requantize_node);
8990f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          AddNodeInput(requantize_max_input, &requantize_node);
9000f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          new_nodes->push_back(requantize_node);
9010f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          eight_bit_node_name = requantize_node.name();
9020f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        } else {
9030f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden          eight_bit_node_name = quantized_main_node.name();
9040f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        }
9050f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9060f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        // Convert the 8-bit result back into float for the final output.
9070f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        NodeDef dequantize_node;
9080f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        dequantize_node.set_op("Dequantize");
9090f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        dequantize_node.set_name(float_node.name());
9100f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("T", DT_QUINT8, &dequantize_node);
9110f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        SetNodeAttr("mode", "MIN_FIRST", &dequantize_node);
9120f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(eight_bit_node_name + ":0", &dequantize_node);
9130f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(eight_bit_node_name + ":1", &dequantize_node);
9140f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        AddNodeInput(eight_bit_node_name + ":2", &dequantize_node);
9150f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        new_nodes->push_back(dequantize_node);
9160f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9170f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden        return Status::OK();
9180f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      },
9190f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      {}, &quantized_graph_def));
9200f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(IsGraphValid(quantized_graph_def));
9210f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9220f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // If we've ended up with two Requantize ops in a row (for example if there
9230f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // was a Conv2D feeding into a FakeQuantWithMinMaxVars) merge them together,
9240f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // using the trained range from the second op.
9250f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef merged_graph_def;
9260f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(MergeAdjacentRequantizes(quantized_graph_def, context,
9270f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                              &merged_graph_def));
9280f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(IsGraphValid(merged_graph_def));
9290f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9300f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // There can be duplicate quantize nodes if multiple ops pull from a single
9310f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // input, which makes it harder to remove redundant ones, so strip them out.
9320f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  GraphDef deduped_graph_def;
9330f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(
9340f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden      MergeDuplicateNodes(merged_graph_def, context, &deduped_graph_def));
9350f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(IsGraphValid(deduped_graph_def));
9360f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9370f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // Look for Dequantizes that immediately go into Quantizes, and remove them
9380f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // since the two together cancel each other out. This allows us to keep the
9390f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // data flow in eight bit where two adjacent ops are in eight bit, but still
9400f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  // keep interoperability with float ops.
9410f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  TF_RETURN_IF_ERROR(RemoveRedundantQuantizations(deduped_graph_def, context,
9420f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden                                                  output_graph_def));
943d0697156736ff137a8d8f6bcd934aa935bf89001Rohan Jain  TF_RETURN_IF_ERROR(IsGraphValid(*output_graph_def));
9440f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9450f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden  return Status::OK();
9460f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}
9470f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9480f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenREGISTER_GRAPH_TRANSFORM("quantize_nodes", QuantizeNodes);
9490f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9500f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete WardenREGISTER_GRAPH_TRANSFORM("merge_duplicate_nodes", MergeDuplicateNodes);
9510f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden
9520f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}  // namespace graph_transforms
9530f0e29e7ba06c50fe4a1a7718e63731b96563a8dPete Warden}  // namespace tensorflow
954