1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Benchmark for fused conv2d bias and activation op."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20import time
21
22from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op
23from tensorflow.python.client import session as session_lib
24from tensorflow.python.framework import ops
25from tensorflow.python.ops import control_flow_ops
26from tensorflow.python.ops import nn_ops
27from tensorflow.python.ops import random_ops
28from tensorflow.python.ops import variables
29from tensorflow.python.platform import test
30
31
32def build_conv_bias_relu_graph(device, input_shape, filter_shape, strides,
33                               padding, num_iters, data_format):
34  """builds a graph containing a sequence of conv2d operations.
35
36  Args:
37    device: String, the device to run on.
38    input_shape: Shape of the input tensor.
39    filter_shape: Shape of the filter tensor.
40    strides: A list of ints. 1-D of length 4. The stride of sliding
41             window for each dimension of input.
42    padding: A string from: "SAME", "VALID". The type of padding
43             algorithm to use.
44    num_iters: number of iterations to run conv2d.
45    data_format: data format string of input, 'NHWC' and 'NCHW' are
46    supported.
47
48  Returns:
49    An array of tensors to run()
50  """
51  if data_format == "NCHW":
52    input_shape = [
53        input_shape[0], input_shape[3], input_shape[1], input_shape[2]
54    ]
55  with ops.device("/%s:0" % device):
56    inp = variables.Variable(random_ops.truncated_normal(input_shape))
57    filt = variables.Variable(random_ops.truncated_normal(filter_shape))
58    bias_shape = [filter_shape[-1]]
59    bias = variables.Variable(random_ops.truncated_normal(bias_shape))
60
61    outputs = []
62    conv2d_out = nn_ops.conv2d(
63        inp, filt, strides, padding, data_format=data_format)
64    bias_out = nn_ops.bias_add(conv2d_out, bias, data_format=data_format)
65    relu_out = nn_ops.relu(bias_out)
66    outputs.append(relu_out)
67    for _ in range(1, num_iters):
68      with ops.control_dependencies([relu_out]):
69        conv2d_out = nn_ops.conv2d(
70            inp, filt, strides, padding, data_format=data_format)
71        bias_out = nn_ops.bias_add(conv2d_out, bias, data_format=data_format)
72        relu_out = nn_ops.relu(bias_out)
73        outputs.append(relu_out)
74    return control_flow_ops.group(*outputs)
75
76
77def build_fused_conv_bias_relu_graph(device, input_shape, filter_shape, strides,
78                                     padding, num_iters, data_format):
79  """builds a graph containing a sequence of conv2d operations.
80
81  Args:
82    device: String, the device to run on.
83    input_shape: Shape of the input tensor.
84    filter_shape: Shape of the filter tensor.
85    strides: A list of ints. 1-D of length 4. The stride of sliding
86             window for each dimension of input.
87    padding: A string from: "SAME", "VALID". The type of padding
88             algorithm to use.
89    num_iters: number of iterations to run conv2d.
90    data_format: data format string of input, 'NHWC' and 'NCHW' are
91    supported.
92
93  Returns:
94    An array of tensors to run()
95  """
96  if data_format == "NCHW":
97    input_shape = [
98        input_shape[0], input_shape[3], input_shape[1], input_shape[2]
99    ]
100  with ops.device("/%s:0" % device):
101    inp = variables.Variable(random_ops.truncated_normal(input_shape))
102    filt = variables.Variable(random_ops.truncated_normal(filter_shape))
103    bias_shape = [filter_shape[-1]]
104    bias = variables.Variable(random_ops.truncated_normal(bias_shape))
105
106    outputs = []
107    fused_out = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
108        inp,
109        filt,
110        bias,
111        strides,
112        padding,
113        data_format=data_format,
114        activation_mode="Relu")
115    outputs.append(fused_out)
116    for _ in range(1, num_iters):
117      with ops.control_dependencies([fused_out]):
118        # pylint: disable=g-line-too-long
119        fused_out = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(  # pylint: disable=line-too-long
120            inp,
121            filt,
122            bias,
123            strides,
124            padding,
125            data_format=data_format,
126            activation_mode="Relu")
127        outputs.append(fused_out)
128    return control_flow_ops.group(*outputs)
129
130
131class FusedConv2DBiasActivationBenchmark(test.Benchmark):
132  """Benchmark conv2d!"""
133
134  def _run_graph(self, device, input_shape, filter_shape, strides, padding,
135                 num_iters, data_format):
136    """runs the graph and print its execution time.
137
138    Args:
139      device: String, the device to run on.
140      input_shape: Shape of the input tensor.
141      filter_shape: Shape of the filter tensor.
142      strides: A list of ints. 1-D of length 4. The stride of sliding
143               window for each dimension of input.
144      padding: A string from: "SAME", "VALID". The type of padding
145               algorithm to use.  num_iters: Number of iterations to run the
146                 benchmark.
147      num_iters: number of iterations to run conv2d.
148      data_format: data format string of input, 'NHWC' and 'NCHW' are
149      supported.
150
151    Returns:
152      The duration of the run in seconds.
153    """
154    graph = ops.Graph()
155    with graph.as_default():
156      outputs = build_fused_conv_bias_relu_graph(device, input_shape,
157                                                 filter_shape, strides, padding,
158                                                 num_iters, data_format)
159      with session_lib.Session(graph=graph) as session:
160        variables.global_variables_initializer().run()
161        # warmup runs
162        session.run(outputs)
163
164        start_time = time.time()
165        session.run(outputs)
166        duration = (time.time() - start_time) / num_iters
167
168        print("%s inputshape:%s filtershape:%s strides:%s padding:%s "
169              "%d iters: %.8f sec" % (device, str(input_shape).replace(" ", ""),
170                                      str(filter_shape).replace(" ", ""),
171                                      str(strides).replace(" ", ""), padding,
172                                      num_iters, duration))
173    name_template = (
174        "conv2d_{device}_input_shape_{inputshape}_filter_shape_{filtershape}_"
175        "strides_{strides}_padding_{padding}")
176
177    self.report_benchmark(
178        name=name_template.format(
179            device=device,
180            inputshape=str(input_shape).replace(" ", ""),
181            filtershape=str(filter_shape).replace(" ", ""),
182            strides=str(strides).replace(" ", ""),
183            padding=padding).replace(" ", ""),
184        iters=num_iters,
185        wall_time=duration)
186
187    return duration
188
189  def benchmark_fused_conv2d_bias_activation(self):
190
191    stride = [1, 1, 1, 1]
192    paddings = ["VALID", "SAME"]
193    data_formats = ["NHWC", "NCHW"]
194
195    resnet50_input_shapes = [[64, 14, 14, 256], [64, 14, 14, 256], [
196        64, 14, 14, 1024
197    ], [64, 55, 55, 64], [64, 28, 28, 128], [64, 28, 28, 128], [64, 55, 55, 64],
198                             [64, 7, 7, 512], [64, 7, 7, 512],
199                             [64, 28, 28, 512], [64, 55, 55,
200                                                 256], [64, 7, 7, 2048]]
201
202    resnet50_filter_shapes = [[1, 1, 256, 1024], [3, 3, 256, 256], [
203        1, 1, 1024, 256
204    ], [1, 1, 64, 256], [1, 1, 128, 512], [3, 3, 128, 128], [3, 3, 64, 64], [
205        3, 3, 512, 512
206    ], [1, 1, 512, 2048], [1, 1, 512, 128], [1, 1, 256, 64], [1, 1, 2048, 512]]
207
208    inception3_input_shapes = [[64, 17, 17, 768], [64, 35, 35, 96], [
209        64, 35, 35, 288
210    ], [64, 8, 8, 384], [64, 8, 8, 384], [64, 17, 17, 192], [64, 35, 35, 64], [
211        64, 17, 17, 192
212    ], [64, 17, 17, 160], [64, 17, 17, 160], [64, 17, 17, 768], [
213        64, 35, 35, 256
214    ], [64, 35, 35, 48], [64, 35, 35, 192], [64, 17, 17, 128], [
215        64, 17, 17, 160
216    ], [64, 8, 8, 448], [64, 17, 17, 128], [64, 17, 17, 768], [64, 17, 17, 160]]
217    inception3_filter_shapes = [[1, 1, 768, 192], [3, 3, 96, 96], [
218        1, 1, 288, 64
219    ], [1, 3, 384, 384], [3, 1, 384, 384], [7, 1, 192, 192], [3, 3, 64, 96], [
220        1, 7, 192, 192
221    ], [7, 1, 160, 160], [1, 7, 160, 160], [1, 1, 768, 160], [1, 1, 256, 64], [
222        5, 5, 48, 64
223    ], [1, 1, 192, 64], [1, 7, 128, 128], [1, 7, 160, 192], [3, 3, 448, 384],
224                                [7, 1, 128, 128], [1, 1, 768,
225                                                   128], [7, 1, 160, 192]]
226
227    print("fused conv2d bias activation benchmark using resnet50's shapes:")
228    for ishape, fshape in zip(resnet50_input_shapes, resnet50_filter_shapes):
229      for padding in paddings:
230        for data_format in data_formats:
231          self._run_graph("gpu", ishape, fshape, stride, padding, 80,
232                          data_format)
233    print("fused conv2d bias activation benchmark using inception3's shapes:")
234    for ishape, fshape in zip(inception3_input_shapes,
235                              inception3_filter_shapes):
236      for padding in paddings:
237        for data_format in data_formats:
238          self._run_graph("gpu", ishape, fshape, stride, padding, 80,
239                          data_format)
240
241
242if __name__ == "__main__":
243  test.main()
244