1// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// compute.h: the central stage of the Gemm computation, operates
16// on already-packed LHS and RHS blocks and calls the Gemm kernel
17// to compute a block of the product.
18
19#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
20#define GEMMLOWP_INTERNAL_COMPUTE_H_
21
22#include "block_params.h"
23#include "kernel.h"
24#include "pack.h"
25
26namespace gemmlowp {
27
28template <typename PackedLhs, typename PackedRhs, typename PackedResult>
29class ComputeImpl {
30  typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
31  typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
32  typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;
33
34  const KernelBase& kernel_;
35  const BlockParams& block_params_;
36
37  PackedResult* const packed_result_;
38  const PackedLhs& packed_lhs_;
39  const PackedRhs& packed_rhs_;
40
41 public:
42  ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
43              PackedResult* _packed_result, const PackedLhs& _packed_lhs,
44              const PackedRhs& _packed_rhs)
45      : kernel_(_kernel),
46        block_params_(_block_params),
47        packed_result_(_packed_result),
48        packed_lhs_(_packed_lhs),
49        packed_rhs_(_packed_rhs) {}
50
51  void Compute(int depth) {
52    depth = RoundUp<Format::kDepth>(depth);
53    assert(depth <= block_params_.l2_depth);
54    for (int d = 0; d < depth; d += block_params_.l1_depth) {
55      int ds = std::min(block_params_.l1_depth, depth - d);
56
57      for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
58        int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);
59
60        ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
61      }
62    }
63  }
64
65 private:
66  void ComputeRun(int start_row, int start_col, int start_depth,
67                  int depth) GEMMLOWP_NOINLINE {
68    packed_lhs_.seek_run(start_row, start_depth);
69    packed_rhs_.seek_run(start_col, start_depth);
70    auto packed_result_block = packed_result_->Map().block(
71        start_row, start_col, Format::kRows, Format::kCols);
72    kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
73                packed_result_block.cols_stride(), packed_lhs_.current_data(),
74                packed_rhs_.current_data(), start_depth, depth);
75  }
76
77  void ComputeL1(int start_row, int rows, int start_col, int cols,
78                 int start_depth, int depth) {
79    assert(rows % Format::kRows == 0);
80    assert(cols % Format::kCols == 0);
81    assert(depth % Format::kDepth == 0);
82
83    for (int c = 0; c < cols; c += Format::kCols) {
84      for (int r = 0; r < rows; r += Format::kRows) {
85        ComputeRun(start_row + r, start_col + c, start_depth, depth);
86      }
87    }
88  }
89};
90
91template <typename PackedLhs, typename PackedRhs, typename PackedResult>
92void Compute(const KernelBase& kernel, const BlockParams& block_params,
93             PackedResult* packed_result, const PackedLhs& packed_lhs,
94             const PackedRhs& packed_rhs, int depth) {
95  ScopedProfilingLabel label("compute");
96  ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
97      kernel, block_params, packed_result, packed_lhs, packed_rhs);
98
99  impl.Compute(depth);
100}
101
102}  // namespace gemmlowp
103
104#endif  // GEMMLOWP_INTERNAL_COMPUTE_H_
105