1a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Copyright 2016 The Gemmlowp Authors. All Rights Reserved. 2a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// 3a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Licensed under the Apache License, Version 2.0 (the "License"); 4a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// you may not use this file except in compliance with the License. 5a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// You may obtain a copy of the License at 6a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// 7a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// http://www.apache.org/licenses/LICENSE-2.0 8a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// 9a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Unless required by applicable law or agreed to in writing, software 10a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// distributed under the License is distributed on an "AS IS" BASIS, 11a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// See the License for the specific language governing permissions and 13a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// limitations under the License. 14a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 15a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#ifndef GEMMLOWP_META_BASE_H_ 16a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#define GEMMLOWP_META_BASE_H_ 17a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 18a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#include <cassert> 19a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#include <cstdint> 20a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 21a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#include "../internal/common.h" 22a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 23a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangnamespace gemmlowp { 24a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangnamespace meta { 25a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 26a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <int align> 27a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wanginline int AlignTo(int value) { 28a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang return ((value + align - 1) / align) * align; 29a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang} 30a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 31a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wanginline int AlignTo(int align, int value) { 32a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang return ((value + align - 1) / align) * align; 33a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang} 34a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 35a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename Kernel_, typename OutputStream_> 36a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct FusedKernelParams { 37a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public: 38a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef Kernel_ Kernel; 39a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef OutputStream_ OutputStream; 40a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 41a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang Kernel kernel; 42a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang OutputStream output_stream; 43a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 44a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 45a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType_, typename OutType_, typename LeftStream_, 46a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typename RightStream_, typename Kernel_, typename OutputStream_> 47a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct GemmParams { 48a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public: 49a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef InType_ InType; 50a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef OutType_ OutType; 51a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef LeftStream_ LeftStream; 52a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef RightStream_ RightStream; 53a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef Kernel_ Kernel; 54a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef OutputStream_ OutputStream; 55a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 56a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef FusedKernelParams<Kernel, OutputStream> FusedKernel; 57a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 58a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang // Common parameters. 59a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 60a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang int m; 61a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang int n; 62a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang int k; 63a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 64a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang const InType* lhs; 65a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang const InType* rhs; 66a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang OutType* result; 67a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang std::uint8_t* scratch; 68a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 69a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang // Specialized parameters. 70a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 71a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang LeftStream left_stream; 72a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang RightStream right_stream; 73a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang FusedKernel fused_kernel; 74a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 75a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 76a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, int lanes_count, int pack_size, int leftovers, 77a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typename StreamParams> 78a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass Stream { 79a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public: 80a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static void Pack(const InType* in, const StreamParams& params, InType* out); 81a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 82a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static int UnpackedAdvance(const StreamParams& params); 83a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 84a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static int PackedAdvance(const StreamParams& params); 85a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 86a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static int UnpackedStride(const StreamParams& params); 87a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 88a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static int PackedStride(const StreamParams& params); 89a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 90a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 91a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename StreamType> 92a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass StreamUtil { 93a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public: 94a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static const InType* Offset(const StreamType& params, const InType* source, 95a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang int offset_stride, int offset_advance); 96a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 97a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static int Scratch(const StreamType& params, int lanes); 98a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 99a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 100a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename OutType, typename Kernel, 101a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typename OutputStream, int kernel_m, int kernel_n, int pack_size> 102a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass MulKernel { 103a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public: 104a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static void Multiply(const InType* lhs, const InType* rhs, 105a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang const FusedKernelParams<Kernel, OutputStream>& params, 106a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang OutType* result); 107a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 108a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 109a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType_, typename OutType_, typename Kernel_> 110a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct Transform1DParams { 111a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef InType_ InType; 112a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef OutType_ OutType; 113a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang typedef Kernel_ Kernel; 114a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 115a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang const InType* input; 116a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang OutType* output; 117a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang std::uint8_t* scratch; 118a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 119a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang Kernel kernel; 120a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 121a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 122a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename OutType, typename Kernel, int kernel_size, 123a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang int leftovers> 124a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass Transform1DKernel { 125a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public: 126a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static void Transform(const InType* input, const Kernel& params, 127a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang OutType* output); 128a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 129a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 130a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename OutType, typename Transform> 131a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass Transform1DUtil { 132a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public: 133a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static int EstimateComputeCost(const Transform& params); 134a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 135a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static const InType* OffsetInput(const Transform& params, const InType* input, 136a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang int offset); 137a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 138a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang static OutType* OffsetOutput(const Transform& params, OutType* output, 139a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang int offset); 140a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}; 141a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 142a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang} // namespace meta 143a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang} // namespace gemmlowp 144a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang 145a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#endif // GEMMLOWP_META_BASE_H_ 146