1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Benchmark for Matmul operator."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import itertools
22import time
23
24import numpy as np
25
26from tensorflow.python.client import session as session_lib
27from tensorflow.python.framework import ops
28from tensorflow.python.ops import control_flow_ops
29from tensorflow.python.ops import math_ops
30from tensorflow.python.ops import random_ops
31from tensorflow.python.ops import variables
32from tensorflow.python.platform import test
33
34
35def build_graph(device, n, m, k, transpose_a, transpose_b, dtype):
36  """Build a graph containing a sequence of matmul operations.
37
38  Args:
39    device: String, the device to run on.
40    n: tensor A's first dimension size.
41    m: tensor A's second dimension size.
42    k: tensor B's second dimension size.
43    transpose_a: boolean value to show if tensor A is transposed.
44    transpose_b: boolean value to show if tensor B is transposed.
45    dtype: numpy data type of the input tensor.
46
47  Returns:
48    A matmul operation to run()
49  """
50  with ops.device('%s' % device):
51    if not transpose_a:
52      x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype))
53    else:
54      x = variables.Variable(random_ops.random_uniform([m, n], dtype=dtype))
55    if not transpose_b:
56      y = variables.Variable(random_ops.random_uniform([m, k], dtype=dtype))
57    else:
58      y = variables.Variable(random_ops.random_uniform([k, m], dtype=dtype))
59
60    z = math_ops.matmul(x, y, transpose_a=transpose_a, transpose_b=transpose_b)
61    return control_flow_ops.group(z)
62
63
64class MatmulBenchmark(test.Benchmark):
65  """Benchmark matmul!"""
66
67  def run_graph(self, device, n, m, k, transpose_a, transpose_b, num_iters,
68                dtype):
69    """Run the graph and print its execution time.
70
71    Args:
72      device: String, the device to run on.
73      n: tensor A's first dimension size.
74      m: tensor A's second dimension size.
75      k: tensor B's second dimension size.
76      transpose_a: boolean value to show if tensor A is transposed.
77      transpose_b: boolean value to show if tensor B is transposed.
78      num_iters: number of iterations to run the benchmark.
79      dtype: numpy data type of the input tensor.
80
81    Returns:
82      The duration of the run in seconds.
83    """
84    graph = ops.Graph()
85    with graph.as_default():
86      output = build_graph(device, n, m, k, transpose_a, transpose_b, dtype)
87      with session_lib.Session(graph=graph) as session:
88        variables.global_variables_initializer().run()
89        for _ in range(500):
90          session.run(output)
91        start_time = time.time()
92        for _ in range(num_iters):
93          session.run(output)
94        duration = (time.time() - start_time)
95        num_items = n * m * k * 2
96        throughput = num_items * num_iters / duration / 1e9
97        print('%s %s input_info:%s %d %.4fsec, %.4fGitems/s.' %
98              (device, str(dtype), str(n) + 'x' + str(m) + 'x' + str(k) +
99               ',ta:' + str(transpose_a) + '.tb:' + str(transpose_b), num_iters,
100               duration, throughput))
101
102    name_template = ('matmul_{device}_{dtype}_input_info_{inputinfo}')
103
104    self.report_benchmark(
105        name=name_template.format(
106            device=device,
107            dtype=str(dtype).replace(' ', ''),
108            inputinfo=str(n) + 'x' + str(m) + 'x' + str(k) + ',ta:' +
109            str(transpose_a) + ',tb:' + str(transpose_b)).replace(' ', ''),
110        iters=num_iters,
111        wall_time=duration)
112    return duration
113
114  def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
115    self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b,
116                   num_iters, dtype)
117
118  def test_round(self, num_iters):
119    dtypes = [np.float32, np.float64]
120    for dtype in dtypes:
121      for n, m, (transpose_a, transpose_b) in itertools.product(
122          [512, 1024], [1, 8, 16, 128], [(False, False), (True, False),
123                                         (False, True)]):
124        k = n
125        self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters)
126
127      for n, m, k, (transpose_a, transpose_b) in itertools.product(
128          [200], [1, 8, 20], [10000], [(False, False), (True, False),
129                                       (False, True)]):
130        self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters)
131
132      for (n, m, k), (transpose_a, transpose_b) in itertools.product(
133          [(200, 20, 20000), (1, 10000, 200)], [(False, False), (True, False),
134                                                (False, True)]):
135        self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters)
136
137  def benchmark_matmul(self):
138    self.test_round(num_iters=200)
139
140
141if __name__ == '__main__':
142  test.main()
143