1a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Licensed under the Apache License, Version 2.0 (the "License");
475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// you may not use this file except in compliance with the License.
575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// You may obtain a copy of the License at
675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//     http://www.apache.org/licenses/LICENSE-2.0
875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Unless required by applicable law or agreed to in writing, software
1075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// distributed under the License is distributed on an "AS IS" BASIS,
1175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// See the License for the specific language governing permissions and
1375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// limitations under the License.
1475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
1575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// kernel.h: general definitions for kernels.
1675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
1775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob#ifndef GEMMLOWP_INTERNAL_KERNEL_H_
1875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob#define GEMMLOWP_INTERNAL_KERNEL_H_
1975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
200a70f98b4be89f51cdd54bf739c953e82ec7fb55Miao Wang#include "../public/bit_depth.h"
217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#include "common.h"
2275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
2375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobnamespace gemmlowp {
2475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
2575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Explanation of general gemmlowp terminology
2675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// ===========================================
2775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
2875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// We use the following abbreviations:
2975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// LHS = "left-hand side"
3075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// RHS = "right-hand side"
3175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Sometimes when referring to either LHS or RHS, we just say a "Side".
3275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
3375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// In a matrix product of a MxK matrix times a KxN matrix,
3475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// we call K the 'depth'. Note that M is the number of rows
3575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of the result (and of the LHS), and N is the number of columns
3675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of the result (and of the RHS).
3775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
3875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// In each of the LHS and RHS matrices, we call 'width' the
3975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// other dimension, besides the depth. So in the LHS, 'width'
4075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// is the number of rows, while in the RHS, 'width' is the number
4175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of columns.
4275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
4375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//  So in the LHS MxK matrix, the depth is K and the width in M.
4475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// And in the RHS KxN matrix, the depth is K and the width in N.
4575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
4675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// This is illustrated in this picture:
4775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
4875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                             RHS width
4975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                        <----------------->
5075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                        +-----------------+ ^
5175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                        |       RHS       | | Depth
5275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                        +-----------------+ v
5375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                 ^ +--+ +-----------------+
5475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                 | |L | |                 |
5575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//       LHS width | |H | |      Result     |
5675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                 | |S | |                 |
5775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                 v +--+ +-----------------+
5875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                   <-->
5975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//                   Depth
6075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
6175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Explanation of gemmlowp kernel formats and "cells"
6275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// ==================================================
6375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
6475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Kernels operate on small LHS and RHS blocks that fit in registers.
6575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// These blocks are stored contiguously in memory, but not always
6675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// in a traditional column-major or row-major order; instead,
6775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// they consist of a number of sub-blocks, which we call "cells",
6875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// that are stored in column-major or row-major order. However,
6975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// what really matters to us is not so much rows vs columns, but
7075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// rather width vs depth. So we refer to "width-major" and "depth-major"
7175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// storage orders. In the LHS, width-major means row-major,
7275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// while in the RHS, width-major means column-major.
7375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// There is also a third possibility, "diagonal order",
7475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// which is unused at the moment.
7575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
7675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// We aim to treat both sides, LHS and RHS, on an equal footing,
7775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// so we call them both 'sides'. A KernelFormat thus is just a pair
7875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of KernelSideFormat's, one for LHS and one for RHS; each KernelSideFormat
7975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// contains a CellFormat and a number of cells; cells are only ever
8075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// stacked in the width dimension, which means stacked vertically in the
8175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// LHS and stacked horizondally in the RHS.
8275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
8375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Example
8475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// =======
8575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
8675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Let's work out the data layout expected by a kernel having the
8775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// following format (the struct names here are defined below in this file):
8875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
8975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelFormat<
9075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//   KernelSideFormat<CellFormat<3, 4>, 3>,
9175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//   KernelSideFormat<CellFormat<5, 4>, 2>
9275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// >
9375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
9475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// The LHS format, KernelSideFormat<CellFormat<3, 4>, 3>, means:
9575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 3 cells, each cell having dimensions (width=3, depth=4), laid out in
9675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor order (the default value, see CellFormat). In the LHS,
9775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor means column-major, so the LHS cells are of size 3x4 in
9875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// column-major order, so the LHS layout is:
9975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
10075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 0  3  6  9
101041e4a5d64d58e5b7348e69f89b845880aae8577Benoit Jacob// 1  4  7  10
102041e4a5d64d58e5b7348e69f89b845880aae8577Benoit Jacob// 2  5  8  11
10375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 12 15 18 21
10475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 13 16 19 22
10575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 14 17 20 23
10675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 24 27 30 33
10775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 25 28 31 34
10875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 26 29 32 35
10975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
11075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// The RHS format, KernelSideFormat<CellFormat<5, 4>, 2>, means:
11175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 2 cells each having dimensions (width=5, depth=4), laid out in
11275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor order (the default value, see CellFormat). In the RHS,
11375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor means row-major, so the RHS cells are of size 4x5 in
11475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// row-major order, so the RHS layout is:
11575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob//
11675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 0  1  2  3  4  20 21 22 23 24
11775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 5  6  7  8  9  25 26 27 28 29
11875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 10 11 12 13 14 30 31 32 33 34
11975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 15 16 17 18 19 35 36 37 38 39
12075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
12175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// CellOrder enumerates the possible storage orders (=layouts) for
12275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// a cell (see explanation above).
12375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobenum class CellOrder { DepthMajor, WidthMajor, Diagonal };
12475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
12575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// CellFormat describes how data is laid
12675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// out in a cell. That is, a CellOrder together with actual dimensions.
12775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <int tWidth, int tDepth, CellOrder tOrder = CellOrder::DepthMajor>
12875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct CellFormat {
12975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kWidth = tWidth;
13075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kDepth = tDepth;
13175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const CellOrder kOrder = tOrder;
13275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
13375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kSize = kWidth * kDepth;
13475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob};
13575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
13675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelSideFormat describes how data is laid out in a kernel side
13775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// (i.e. LHS or RHS). That is, a CellFormat together with a number of
13875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// cells. These cells are always stacked in the Width dimension.
13975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// For example, in the LHS case, the Width dimension is the rows dimension,
14075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// se we're saying that in the LHS, cells are stacked vertically.
14175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// We never stack cells in the Depth dimension.
14275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <typename tCellFormat, int tCells>
14375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct KernelSideFormat {
14475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  typedef tCellFormat Cell;
14575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kCells = tCells;
14675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kWidth = kCells * Cell::kWidth;
14775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kDepth = Cell::kDepth;
148a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef std::uint8_t Scalar;
149a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
150a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
151a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename tCellFormat, int tCells>
152a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct KernelSideFormatInt8 : KernelSideFormat<tCellFormat, tCells> {
153a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef std::int8_t Scalar;
15475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob};
15575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
15675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelFormat describes fully the input data layout that a kernel expects.
15775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// It consists of two KernelSideFormat's, one for LHS and one for RHS.
15875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <typename tLhs, typename tRhs>
15975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct KernelFormat {
16075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  typedef tLhs Lhs;
16175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  typedef tRhs Rhs;
16275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
16375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static_assert(Lhs::Cell::kDepth == Rhs::Cell::kDepth, "");
16475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kDepth = Lhs::Cell::kDepth;
16575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kRows = Lhs::Cell::kWidth * Lhs::kCells;
16675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  static const int kCols = Rhs::Cell::kWidth * Rhs::kCells;
16775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob};
16875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
16975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobinline const char* CellOrderName(CellOrder o) {
17075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  switch (o) {
17175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    case CellOrder::DepthMajor:
17275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      return "DepthMajor";
17375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    case CellOrder::WidthMajor:
17475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      return "WidthMajor";
17575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    case CellOrder::Diagonal:
17675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      return "Diagonal";
17775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    default:
17875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      assert(false);
17975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      return nullptr;
18075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  }
18175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob}
18275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
18375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Returns the offset into a cell, at which a given coefficient is stored.
18475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <typename CellFormat>
18575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobinline int OffsetIntoCell(int w, int d) {
1861963df9ac4a0424674e72ef5da522b5d830605fdMiao Wang  const int size = CellFormat::kWidth;
18775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  switch (CellFormat::kOrder) {
18875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    case CellOrder::DepthMajor:
18975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      return w + d * CellFormat::kWidth;
19075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    case CellOrder::WidthMajor:
19175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      return d + w * CellFormat::kDepth;
19275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    case CellOrder::Diagonal:
19375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      assert(CellFormat::kWidth == CellFormat::kDepth);
1940a70f98b4be89f51cdd54bf739c953e82ec7fb55Miao Wang      return ((size + w - d) * size + d) % (size * size);
19575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob    default:
19675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      assert(false);
19775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob      return 0;
19875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  }
19975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob}
20075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
20175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelBase is the virtual base class below all kernels.
20275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// The idea is that we don't need to templatize all our code on the exact
20375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// kernel type; we only need to templatize on kernel format. Kernels
20475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// sharing the same format can thus share the same packing/unpacking code.
20575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct KernelBase {
20675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  virtual const char* Name() const = 0;
20775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
20875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  // This is the kernel implementation. We use the word 'run' consistently
20975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  // throughout gemmlowp to mean an inner loop, the implementation of which
21075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  // is to be provided by a separate optimized function.
2117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang  virtual void Run(std::int32_t* dst_ptr, std::size_t dst_row_stride,
2127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang                   std::size_t dst_col_stride, const std::uint8_t* lhs_ptr,
2137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang                   const std::uint8_t* rhs_ptr, std::size_t start_depth,
2147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang                   std::size_t run_depth) const = 0;
21575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
21675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob  virtual ~KernelBase() {}
21775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob};
21875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
219a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename KernelScalarType>
220a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct ZeroPointInputValue {};
221a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
222a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <>
223a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct ZeroPointInputValue<std::uint8_t> {
224a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static constexpr std::uint8_t kValue = 0;
225a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
226a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
227a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <>
228a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct ZeroPointInputValue<std::int8_t> {
229a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static constexpr std::uint8_t kValue = 128;
230a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
231a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
23275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob}  // namespace gemmlowp
23375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob
23475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob#endif  // GEMMLOWP_INTERNAL_KERNEL_H_
235