1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Benchmark for Matmul operator.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import itertools 22import time 23 24import numpy as np 25 26from tensorflow.python.client import session as session_lib 27from tensorflow.python.framework import ops 28from tensorflow.python.ops import control_flow_ops 29from tensorflow.python.ops import math_ops 30from tensorflow.python.ops import random_ops 31from tensorflow.python.ops import variables 32from tensorflow.python.platform import test 33 34 35def build_graph(device, n, m, k, transpose_a, transpose_b, dtype): 36 """Build a graph containing a sequence of matmul operations. 37 38 Args: 39 device: String, the device to run on. 40 n: tensor A's first dimension size. 41 m: tensor A's second dimension size. 42 k: tensor B's second dimension size. 43 transpose_a: boolean value to show if tensor A is transposed. 44 transpose_b: boolean value to show if tensor B is transposed. 45 dtype: numpy data type of the input tensor. 46 47 Returns: 48 A matmul operation to run() 49 """ 50 with ops.device('%s' % device): 51 if not transpose_a: 52 x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype)) 53 else: 54 x = variables.Variable(random_ops.random_uniform([m, n], dtype=dtype)) 55 if not transpose_b: 56 y = variables.Variable(random_ops.random_uniform([m, k], dtype=dtype)) 57 else: 58 y = variables.Variable(random_ops.random_uniform([k, m], dtype=dtype)) 59 60 z = math_ops.matmul(x, y, transpose_a=transpose_a, transpose_b=transpose_b) 61 return control_flow_ops.group(z) 62 63 64class MatmulBenchmark(test.Benchmark): 65 """Benchmark matmul!""" 66 67 def run_graph(self, device, n, m, k, transpose_a, transpose_b, num_iters, 68 dtype): 69 """Run the graph and print its execution time. 70 71 Args: 72 device: String, the device to run on. 73 n: tensor A's first dimension size. 74 m: tensor A's second dimension size. 75 k: tensor B's second dimension size. 76 transpose_a: boolean value to show if tensor A is transposed. 77 transpose_b: boolean value to show if tensor B is transposed. 78 num_iters: number of iterations to run the benchmark. 79 dtype: numpy data type of the input tensor. 80 81 Returns: 82 The duration of the run in seconds. 83 """ 84 graph = ops.Graph() 85 with graph.as_default(): 86 output = build_graph(device, n, m, k, transpose_a, transpose_b, dtype) 87 with session_lib.Session(graph=graph) as session: 88 variables.global_variables_initializer().run() 89 for _ in range(500): 90 session.run(output) 91 start_time = time.time() 92 for _ in range(num_iters): 93 session.run(output) 94 duration = (time.time() - start_time) 95 num_items = n * m * k * 2 96 throughput = num_items * num_iters / duration / 1e9 97 print('%s %s input_info:%s %d %.4fsec, %.4fGitems/s.' % 98 (device, str(dtype), str(n) + 'x' + str(m) + 'x' + str(k) + 99 ',ta:' + str(transpose_a) + '.tb:' + str(transpose_b), num_iters, 100 duration, throughput)) 101 102 name_template = ('matmul_{device}_{dtype}_input_info_{inputinfo}') 103 104 self.report_benchmark( 105 name=name_template.format( 106 device=device, 107 dtype=str(dtype).replace(' ', ''), 108 inputinfo=str(n) + 'x' + str(m) + 'x' + str(k) + ',ta:' + 109 str(transpose_a) + ',tb:' + str(transpose_b)).replace(' ', ''), 110 iters=num_iters, 111 wall_time=duration) 112 return duration 113 114 def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters): 115 self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b, 116 num_iters, dtype) 117 118 def test_round(self, num_iters): 119 dtypes = [np.float32, np.float64] 120 for dtype in dtypes: 121 for n, m, (transpose_a, transpose_b) in itertools.product( 122 [512, 1024], [1, 8, 16, 128], [(False, False), (True, False), 123 (False, True)]): 124 k = n 125 self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters) 126 127 for n, m, k, (transpose_a, transpose_b) in itertools.product( 128 [200], [1, 8, 20], [10000], [(False, False), (True, False), 129 (False, True)]): 130 self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters) 131 132 for (n, m, k), (transpose_a, transpose_b) in itertools.product( 133 [(200, 20, 20000), (1, 10000, 200)], [(False, False), (True, False), 134 (False, True)]): 135 self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters) 136 137 def benchmark_matmul(self): 138 self.test_round(num_iters=200) 139 140 141if __name__ == '__main__': 142 test.main() 143