1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Benchmarks for Cudnn RNN models."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import time
22
23from six.moves import xrange  # pylint: disable=redefined-builtin
24from tensorflow.contrib import rnn as contrib_rnn
25from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
26from tensorflow.contrib.rnn.python.ops import lstm_ops
27from tensorflow.python.client import session
28from tensorflow.python.framework import dtypes
29from tensorflow.python.framework import ops
30from tensorflow.python.ops import array_ops
31from tensorflow.python.ops import control_flow_ops
32from tensorflow.python.ops import gradients_impl
33from tensorflow.python.ops import rnn
34from tensorflow.python.ops import variables
35from tensorflow.python.platform import test
36
37
38class CudnnRNNBenchmark(test.Benchmark):
39  """Benchmarks Cudnn LSTM and other related models.
40  """
41
42  def _GetTestConfig(self):
43    return {
44        "large": {
45            "num_layers": 4,
46            "num_units": 1024,
47            "seq_length": 50,
48            "batch_size": 64,
49        },
50        "medium": {
51            "num_layers": 4,
52            "num_units": 512,
53            "seq_length": 50,
54            "batch_size": 64,
55        },
56        "small": {
57            "num_layers": 4,
58            "num_units": 128,
59            "seq_length": 50,
60            "batch_size": 64,
61        },
62    }
63
64  def _GetConfigDesc(self, config):
65    num_layers = config["num_layers"]
66    num_units = config["num_units"]
67    batch_size = config["batch_size"]
68    seq_length = config["seq_length"]
69
70    return "y%d_u%d_b%d_q%d" % (num_layers, num_units, batch_size, seq_length)
71
72  def _BenchmarkOp(self, op, desc):
73    burn_in_steps = 10
74    benchmark_steps = 20
75    with session.Session() as sess:
76      sess.run(variables.global_variables_initializer())
77      for i in xrange(burn_in_steps + benchmark_steps):
78        if i == burn_in_steps:
79          start_time = time.time()
80        sess.run(op)
81      total_time = time.time() - start_time
82      step_time = total_time / benchmark_steps
83      print("%s takes %.4f sec/step" % (desc, step_time))
84      self.report_benchmark(
85          name=desc, iters=benchmark_steps, wall_time=total_time)
86
87  def benchmarkCudnnLSTMTraining(self):
88    test_configs = self._GetTestConfig()
89    for config_name, config in test_configs.items():
90      config = test_configs[config_name]
91      num_layers = config["num_layers"]
92      num_units = config["num_units"]
93      batch_size = config["batch_size"]
94      seq_length = config["seq_length"]
95
96      with ops.Graph().as_default(), ops.device("/device:GPU:0"):
97        model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units)
98        params_size_t = model.params_size()
99        input_data = variables.Variable(
100            array_ops.ones([seq_length, batch_size, num_units]))
101        input_h = variables.Variable(
102            array_ops.ones([num_layers, batch_size, num_units]))
103        input_c = variables.Variable(
104            array_ops.ones([num_layers, batch_size, num_units]))
105        params = variables.Variable(
106            array_ops.ones([params_size_t]), validate_shape=False)
107        output, output_h, output_c = model(
108            is_training=True,
109            input_data=input_data,
110            input_h=input_h,
111            input_c=input_c,
112            params=params)
113        all_grads = gradients_impl.gradients(
114            [output, output_h, output_c],
115            [params, input_data, input_h, input_c])
116        training_op = control_flow_ops.group(*all_grads)
117        self._BenchmarkOp(training_op, "cudnn_lstm %s %s" %
118                          (config_name, self._GetConfigDesc(config)))
119
120  def benchmarkTfRNNLSTMTraining(self):
121    test_configs = self._GetTestConfig()
122    for config_name, config in test_configs.items():
123      num_layers = config["num_layers"]
124      num_units = config["num_units"]
125      batch_size = config["batch_size"]
126      seq_length = config["seq_length"]
127
128      with ops.Graph().as_default(), ops.device("/device:GPU:0"):
129        inputs = array_ops.zeros([batch_size, seq_length, num_units],
130                                 dtypes.float32)
131
132        multi_cell = contrib_rnn.MultiRNNCell(
133            [contrib_rnn.BasicLSTMCell(num_units) for _ in range(num_layers)])
134        outputs, final_state = rnn.dynamic_rnn(
135            multi_cell, inputs, dtype=dtypes.float32)
136        trainable_variables = ops.get_collection(
137            ops.GraphKeys.TRAINABLE_VARIABLES)
138        gradients = gradients_impl.gradients([outputs, final_state],
139                                             trainable_variables)
140        training_op = control_flow_ops.group(*gradients)
141        self._BenchmarkOp(training_op, "tf_rnn_lstm %s %s" %
142                          (config_name, self._GetConfigDesc(config)))
143
144  def benchmarkTfRNNLSTMBlockCellTraining(self):
145    test_configs = self._GetTestConfig()
146    for config_name, config in test_configs.items():
147      num_layers = config["num_layers"]
148      num_units = config["num_units"]
149      batch_size = config["batch_size"]
150      seq_length = config["seq_length"]
151
152      with ops.Graph().as_default(), ops.device("/device:GPU:0"):
153        inputs = array_ops.zeros([batch_size, seq_length, num_units],
154                                 dtypes.float32)
155
156        multi_cell = contrib_rnn.MultiRNNCell(
157            [lstm_ops.LSTMBlockCell(num_units) for _ in range(num_layers)])
158        outputs, final_state = rnn.dynamic_rnn(
159            multi_cell, inputs, dtype=dtypes.float32)
160        trainable_variables = ops.get_collection(
161            ops.GraphKeys.TRAINABLE_VARIABLES)
162        gradients = gradients_impl.gradients([outputs, final_state],
163                                             trainable_variables)
164        training_op = control_flow_ops.group(*gradients)
165        self._BenchmarkOp(training_op, "tf_rnn_lstm_block_cell %s %s" %
166                          (config_name, self._GetConfigDesc(config)))
167
168
169if __name__ == "__main__":
170  test.main()
171