1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Benchmarks for Cudnn RNN models.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import time 22 23from six.moves import xrange # pylint: disable=redefined-builtin 24from tensorflow.contrib import rnn as contrib_rnn 25from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops 26from tensorflow.contrib.rnn.python.ops import lstm_ops 27from tensorflow.python.client import session 28from tensorflow.python.framework import dtypes 29from tensorflow.python.framework import ops 30from tensorflow.python.ops import array_ops 31from tensorflow.python.ops import control_flow_ops 32from tensorflow.python.ops import gradients_impl 33from tensorflow.python.ops import rnn 34from tensorflow.python.ops import variables 35from tensorflow.python.platform import test 36 37 38class CudnnRNNBenchmark(test.Benchmark): 39 """Benchmarks Cudnn LSTM and other related models. 40 """ 41 42 def _GetTestConfig(self): 43 return { 44 "large": { 45 "num_layers": 4, 46 "num_units": 1024, 47 "seq_length": 50, 48 "batch_size": 64, 49 }, 50 "medium": { 51 "num_layers": 4, 52 "num_units": 512, 53 "seq_length": 50, 54 "batch_size": 64, 55 }, 56 "small": { 57 "num_layers": 4, 58 "num_units": 128, 59 "seq_length": 50, 60 "batch_size": 64, 61 }, 62 } 63 64 def _GetConfigDesc(self, config): 65 num_layers = config["num_layers"] 66 num_units = config["num_units"] 67 batch_size = config["batch_size"] 68 seq_length = config["seq_length"] 69 70 return "y%d_u%d_b%d_q%d" % (num_layers, num_units, batch_size, seq_length) 71 72 def _BenchmarkOp(self, op, desc): 73 burn_in_steps = 10 74 benchmark_steps = 20 75 with session.Session() as sess: 76 sess.run(variables.global_variables_initializer()) 77 for i in xrange(burn_in_steps + benchmark_steps): 78 if i == burn_in_steps: 79 start_time = time.time() 80 sess.run(op) 81 total_time = time.time() - start_time 82 step_time = total_time / benchmark_steps 83 print("%s takes %.4f sec/step" % (desc, step_time)) 84 self.report_benchmark( 85 name=desc, iters=benchmark_steps, wall_time=total_time) 86 87 def benchmarkCudnnLSTMTraining(self): 88 test_configs = self._GetTestConfig() 89 for config_name, config in test_configs.items(): 90 config = test_configs[config_name] 91 num_layers = config["num_layers"] 92 num_units = config["num_units"] 93 batch_size = config["batch_size"] 94 seq_length = config["seq_length"] 95 96 with ops.Graph().as_default(), ops.device("/device:GPU:0"): 97 model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units) 98 params_size_t = model.params_size() 99 input_data = variables.Variable( 100 array_ops.ones([seq_length, batch_size, num_units])) 101 input_h = variables.Variable( 102 array_ops.ones([num_layers, batch_size, num_units])) 103 input_c = variables.Variable( 104 array_ops.ones([num_layers, batch_size, num_units])) 105 params = variables.Variable( 106 array_ops.ones([params_size_t]), validate_shape=False) 107 output, output_h, output_c = model( 108 is_training=True, 109 input_data=input_data, 110 input_h=input_h, 111 input_c=input_c, 112 params=params) 113 all_grads = gradients_impl.gradients( 114 [output, output_h, output_c], 115 [params, input_data, input_h, input_c]) 116 training_op = control_flow_ops.group(*all_grads) 117 self._BenchmarkOp(training_op, "cudnn_lstm %s %s" % 118 (config_name, self._GetConfigDesc(config))) 119 120 def benchmarkTfRNNLSTMTraining(self): 121 test_configs = self._GetTestConfig() 122 for config_name, config in test_configs.items(): 123 num_layers = config["num_layers"] 124 num_units = config["num_units"] 125 batch_size = config["batch_size"] 126 seq_length = config["seq_length"] 127 128 with ops.Graph().as_default(), ops.device("/device:GPU:0"): 129 inputs = array_ops.zeros([batch_size, seq_length, num_units], 130 dtypes.float32) 131 132 multi_cell = contrib_rnn.MultiRNNCell( 133 [contrib_rnn.BasicLSTMCell(num_units) for _ in range(num_layers)]) 134 outputs, final_state = rnn.dynamic_rnn( 135 multi_cell, inputs, dtype=dtypes.float32) 136 trainable_variables = ops.get_collection( 137 ops.GraphKeys.TRAINABLE_VARIABLES) 138 gradients = gradients_impl.gradients([outputs, final_state], 139 trainable_variables) 140 training_op = control_flow_ops.group(*gradients) 141 self._BenchmarkOp(training_op, "tf_rnn_lstm %s %s" % 142 (config_name, self._GetConfigDesc(config))) 143 144 def benchmarkTfRNNLSTMBlockCellTraining(self): 145 test_configs = self._GetTestConfig() 146 for config_name, config in test_configs.items(): 147 num_layers = config["num_layers"] 148 num_units = config["num_units"] 149 batch_size = config["batch_size"] 150 seq_length = config["seq_length"] 151 152 with ops.Graph().as_default(), ops.device("/device:GPU:0"): 153 inputs = array_ops.zeros([batch_size, seq_length, num_units], 154 dtypes.float32) 155 156 multi_cell = contrib_rnn.MultiRNNCell( 157 [lstm_ops.LSTMBlockCell(num_units) for _ in range(num_layers)]) 158 outputs, final_state = rnn.dynamic_rnn( 159 multi_cell, inputs, dtype=dtypes.float32) 160 trainable_variables = ops.get_collection( 161 ops.GraphKeys.TRAINABLE_VARIABLES) 162 gradients = gradients_impl.gradients([outputs, final_state], 163 trainable_variables) 164 training_op = control_flow_ops.group(*gradients) 165 self._BenchmarkOp(training_op, "tf_rnn_lstm_block_cell %s %s" % 166 (config_name, self._GetConfigDesc(config))) 167 168 169if __name__ == "__main__": 170 test.main() 171