1a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Licensed under the Apache License, Version 2.0 (the "License"); 475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// you may not use this file except in compliance with the License. 575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// You may obtain a copy of the License at 675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// http://www.apache.org/licenses/LICENSE-2.0 875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Unless required by applicable law or agreed to in writing, software 1075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// distributed under the License is distributed on an "AS IS" BASIS, 1175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// See the License for the specific language governing permissions and 1375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// limitations under the License. 1475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 1575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// kernel.h: general definitions for kernels. 1675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 1775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob#ifndef GEMMLOWP_INTERNAL_KERNEL_H_ 1875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob#define GEMMLOWP_INTERNAL_KERNEL_H_ 1975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 200a70f98b4be89f51cdd54bf739c953e82ec7fb55Miao Wang#include "../public/bit_depth.h" 217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#include "common.h" 2275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 2375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobnamespace gemmlowp { 2475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 2575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Explanation of general gemmlowp terminology 2675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// =========================================== 2775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 2875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// We use the following abbreviations: 2975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// LHS = "left-hand side" 3075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// RHS = "right-hand side" 3175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Sometimes when referring to either LHS or RHS, we just say a "Side". 3275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 3375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// In a matrix product of a MxK matrix times a KxN matrix, 3475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// we call K the 'depth'. Note that M is the number of rows 3575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of the result (and of the LHS), and N is the number of columns 3675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of the result (and of the RHS). 3775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 3875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// In each of the LHS and RHS matrices, we call 'width' the 3975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// other dimension, besides the depth. So in the LHS, 'width' 4075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// is the number of rows, while in the RHS, 'width' is the number 4175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of columns. 4275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 4375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// So in the LHS MxK matrix, the depth is K and the width in M. 4475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// And in the RHS KxN matrix, the depth is K and the width in N. 4575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 4675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// This is illustrated in this picture: 4775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 4875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// RHS width 4975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// <-----------------> 5075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// +-----------------+ ^ 5175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// | RHS | | Depth 5275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// +-----------------+ v 5375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// ^ +--+ +-----------------+ 5475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// | |L | | | 5575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// LHS width | |H | | Result | 5675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// | |S | | | 5775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// v +--+ +-----------------+ 5875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// <--> 5975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Depth 6075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 6175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Explanation of gemmlowp kernel formats and "cells" 6275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// ================================================== 6375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 6475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Kernels operate on small LHS and RHS blocks that fit in registers. 6575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// These blocks are stored contiguously in memory, but not always 6675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// in a traditional column-major or row-major order; instead, 6775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// they consist of a number of sub-blocks, which we call "cells", 6875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// that are stored in column-major or row-major order. However, 6975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// what really matters to us is not so much rows vs columns, but 7075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// rather width vs depth. So we refer to "width-major" and "depth-major" 7175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// storage orders. In the LHS, width-major means row-major, 7275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// while in the RHS, width-major means column-major. 7375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// There is also a third possibility, "diagonal order", 7475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// which is unused at the moment. 7575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 7675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// We aim to treat both sides, LHS and RHS, on an equal footing, 7775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// so we call them both 'sides'. A KernelFormat thus is just a pair 7875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// of KernelSideFormat's, one for LHS and one for RHS; each KernelSideFormat 7975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// contains a CellFormat and a number of cells; cells are only ever 8075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// stacked in the width dimension, which means stacked vertically in the 8175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// LHS and stacked horizondally in the RHS. 8275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 8375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Example 8475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// ======= 8575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 8675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Let's work out the data layout expected by a kernel having the 8775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// following format (the struct names here are defined below in this file): 8875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 8975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelFormat< 9075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelSideFormat<CellFormat<3, 4>, 3>, 9175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelSideFormat<CellFormat<5, 4>, 2> 9275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// > 9375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 9475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// The LHS format, KernelSideFormat<CellFormat<3, 4>, 3>, means: 9575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 3 cells, each cell having dimensions (width=3, depth=4), laid out in 9675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor order (the default value, see CellFormat). In the LHS, 9775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor means column-major, so the LHS cells are of size 3x4 in 9875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// column-major order, so the LHS layout is: 9975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 10075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 0 3 6 9 101041e4a5d64d58e5b7348e69f89b845880aae8577Benoit Jacob// 1 4 7 10 102041e4a5d64d58e5b7348e69f89b845880aae8577Benoit Jacob// 2 5 8 11 10375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 12 15 18 21 10475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 13 16 19 22 10575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 14 17 20 23 10675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 24 27 30 33 10775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 25 28 31 34 10875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 26 29 32 35 10975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 11075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// The RHS format, KernelSideFormat<CellFormat<5, 4>, 2>, means: 11175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 2 cells each having dimensions (width=5, depth=4), laid out in 11275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor order (the default value, see CellFormat). In the RHS, 11375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// DepthMajor means row-major, so the RHS cells are of size 4x5 in 11475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// row-major order, so the RHS layout is: 11575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 11675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 0 1 2 3 4 20 21 22 23 24 11775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 5 6 7 8 9 25 26 27 28 29 11875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 10 11 12 13 14 30 31 32 33 34 11975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// 15 16 17 18 19 35 36 37 38 39 12075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 12175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// CellOrder enumerates the possible storage orders (=layouts) for 12275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// a cell (see explanation above). 12375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobenum class CellOrder { DepthMajor, WidthMajor, Diagonal }; 12475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 12575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// CellFormat describes how data is laid 12675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// out in a cell. That is, a CellOrder together with actual dimensions. 12775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <int tWidth, int tDepth, CellOrder tOrder = CellOrder::DepthMajor> 12875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct CellFormat { 12975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kWidth = tWidth; 13075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kDepth = tDepth; 13175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const CellOrder kOrder = tOrder; 13275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 13375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kSize = kWidth * kDepth; 13475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob}; 13575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 13675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelSideFormat describes how data is laid out in a kernel side 13775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// (i.e. LHS or RHS). That is, a CellFormat together with a number of 13875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// cells. These cells are always stacked in the Width dimension. 13975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// For example, in the LHS case, the Width dimension is the rows dimension, 14075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// se we're saying that in the LHS, cells are stacked vertically. 14175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// We never stack cells in the Depth dimension. 14275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <typename tCellFormat, int tCells> 14375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct KernelSideFormat { 14475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob typedef tCellFormat Cell; 14575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kCells = tCells; 14675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kWidth = kCells * Cell::kWidth; 14775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kDepth = Cell::kDepth; 148a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef std::uint8_t Scalar; 149a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 150a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 151a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename tCellFormat, int tCells> 152a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct KernelSideFormatInt8 : KernelSideFormat<tCellFormat, tCells> { 153a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef std::int8_t Scalar; 15475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob}; 15575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 15675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelFormat describes fully the input data layout that a kernel expects. 15775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// It consists of two KernelSideFormat's, one for LHS and one for RHS. 15875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <typename tLhs, typename tRhs> 15975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct KernelFormat { 16075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob typedef tLhs Lhs; 16175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob typedef tRhs Rhs; 16275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 16375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static_assert(Lhs::Cell::kDepth == Rhs::Cell::kDepth, ""); 16475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kDepth = Lhs::Cell::kDepth; 16575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kRows = Lhs::Cell::kWidth * Lhs::kCells; 16675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob static const int kCols = Rhs::Cell::kWidth * Rhs::kCells; 16775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob}; 16875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 16975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobinline const char* CellOrderName(CellOrder o) { 17075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob switch (o) { 17175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob case CellOrder::DepthMajor: 17275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob return "DepthMajor"; 17375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob case CellOrder::WidthMajor: 17475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob return "WidthMajor"; 17575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob case CellOrder::Diagonal: 17675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob return "Diagonal"; 17775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob default: 17875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob assert(false); 17975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob return nullptr; 18075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob } 18175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob} 18275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 18375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// Returns the offset into a cell, at which a given coefficient is stored. 18475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobtemplate <typename CellFormat> 18575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobinline int OffsetIntoCell(int w, int d) { 1861963df9ac4a0424674e72ef5da522b5d830605fdMiao Wang const int size = CellFormat::kWidth; 18775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob switch (CellFormat::kOrder) { 18875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob case CellOrder::DepthMajor: 18975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob return w + d * CellFormat::kWidth; 19075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob case CellOrder::WidthMajor: 19175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob return d + w * CellFormat::kDepth; 19275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob case CellOrder::Diagonal: 19375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob assert(CellFormat::kWidth == CellFormat::kDepth); 1940a70f98b4be89f51cdd54bf739c953e82ec7fb55Miao Wang return ((size + w - d) * size + d) % (size * size); 19575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob default: 19675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob assert(false); 19775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob return 0; 19875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob } 19975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob} 20075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 20175c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// KernelBase is the virtual base class below all kernels. 20275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// The idea is that we don't need to templatize all our code on the exact 20375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// kernel type; we only need to templatize on kernel format. Kernels 20475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob// sharing the same format can thus share the same packing/unpacking code. 20575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacobstruct KernelBase { 20675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob virtual const char* Name() const = 0; 20775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 20875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob // This is the kernel implementation. We use the word 'run' consistently 20975c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob // throughout gemmlowp to mean an inner loop, the implementation of which 21075c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob // is to be provided by a separate optimized function. 2117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang virtual void Run(std::int32_t* dst_ptr, std::size_t dst_row_stride, 2127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang std::size_t dst_col_stride, const std::uint8_t* lhs_ptr, 2137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const std::uint8_t* rhs_ptr, std::size_t start_depth, 2147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang std::size_t run_depth) const = 0; 21575c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 21675c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob virtual ~KernelBase() {} 21775c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob}; 21875c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 219a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename KernelScalarType> 220a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct ZeroPointInputValue {}; 221a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 222a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <> 223a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct ZeroPointInputValue<std::uint8_t> { 224a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static constexpr std::uint8_t kValue = 0; 225a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 226a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 227a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <> 228a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct ZeroPointInputValue<std::int8_t> { 229a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static constexpr std::uint8_t kValue = 128; 230a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 231a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 23275c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob} // namespace gemmlowp 23375c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob 23475c4ec0ba4dd86e4f763a54e01002ff29f1d57aBenoit Jacob#endif // GEMMLOWP_INTERNAL_KERNEL_H_ 235