1// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// compute.h: the central stage of the Gemm computation, operates 16// on already-packed LHS and RHS blocks and calls the Gemm kernel 17// to compute a block of the product. 18 19#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_ 20#define GEMMLOWP_INTERNAL_COMPUTE_H_ 21 22#include "block_params.h" 23#include "kernel.h" 24#include "pack.h" 25 26namespace gemmlowp { 27 28template <typename PackedLhs, typename PackedRhs, typename PackedResult> 29class ComputeImpl { 30 typedef typename PackedLhs::KernelSideFormat KernelLhsFormat; 31 typedef typename PackedRhs::KernelSideFormat KernelRhsFormat; 32 typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format; 33 34 const KernelBase& kernel_; 35 const BlockParams& block_params_; 36 37 PackedResult* const packed_result_; 38 const PackedLhs& packed_lhs_; 39 const PackedRhs& packed_rhs_; 40 41 public: 42 ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params, 43 PackedResult* _packed_result, const PackedLhs& _packed_lhs, 44 const PackedRhs& _packed_rhs) 45 : kernel_(_kernel), 46 block_params_(_block_params), 47 packed_result_(_packed_result), 48 packed_lhs_(_packed_lhs), 49 packed_rhs_(_packed_rhs) {} 50 51 void Compute(int depth) { 52 depth = RoundUp<Format::kDepth>(depth); 53 assert(depth <= block_params_.l2_depth); 54 for (int d = 0; d < depth; d += block_params_.l1_depth) { 55 int ds = std::min(block_params_.l1_depth, depth - d); 56 57 for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) { 58 int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r); 59 60 ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds); 61 } 62 } 63 } 64 65 private: 66 void ComputeRun(int start_row, int start_col, int start_depth, 67 int depth) GEMMLOWP_NOINLINE { 68 packed_lhs_.seek_run(start_row, start_depth); 69 packed_rhs_.seek_run(start_col, start_depth); 70 auto packed_result_block = packed_result_->Map().block( 71 start_row, start_col, Format::kRows, Format::kCols); 72 kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(), 73 packed_result_block.cols_stride(), packed_lhs_.current_data(), 74 packed_rhs_.current_data(), start_depth, depth); 75 } 76 77 void ComputeL1(int start_row, int rows, int start_col, int cols, 78 int start_depth, int depth) { 79 assert(rows % Format::kRows == 0); 80 assert(cols % Format::kCols == 0); 81 assert(depth % Format::kDepth == 0); 82 83 for (int c = 0; c < cols; c += Format::kCols) { 84 for (int r = 0; r < rows; r += Format::kRows) { 85 ComputeRun(start_row + r, start_col + c, start_depth, depth); 86 } 87 } 88 } 89}; 90 91template <typename PackedLhs, typename PackedRhs, typename PackedResult> 92void Compute(const KernelBase& kernel, const BlockParams& block_params, 93 PackedResult* packed_result, const PackedLhs& packed_lhs, 94 const PackedRhs& packed_rhs, int depth) { 95 ScopedProfilingLabel label("compute"); 96 ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl( 97 kernel, block_params, packed_result, packed_lhs, packed_rhs); 98 99 impl.Compute(depth); 100} 101 102} // namespace gemmlowp 103 104#endif // GEMMLOWP_INTERNAL_COMPUTE_H_ 105