1a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang//
3a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Licensed under the Apache License, Version 2.0 (the "License");
4a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// you may not use this file except in compliance with the License.
5a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// You may obtain a copy of the License at
6a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang//
7a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang//     http://www.apache.org/licenses/LICENSE-2.0
8a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang//
9a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// Unless required by applicable law or agreed to in writing, software
10a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// distributed under the License is distributed on an "AS IS" BASIS,
11a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// See the License for the specific language governing permissions and
13a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang// limitations under the License.
14a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
15a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#ifndef GEMMLOWP_META_BASE_H_
16a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#define GEMMLOWP_META_BASE_H_
17a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
18a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#include <cassert>
19a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#include <cstdint>
20a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
21a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#include "../internal/common.h"
22a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
23a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangnamespace gemmlowp {
24a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangnamespace meta {
25a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
26a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <int align>
27a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wanginline int AlignTo(int value) {
28a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  return ((value + align - 1) / align) * align;
29a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}
30a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
31a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wanginline int AlignTo(int align, int value) {
32a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  return ((value + align - 1) / align) * align;
33a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}
34a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
35a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename Kernel_, typename OutputStream_>
36a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct FusedKernelParams {
37a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public:
38a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef Kernel_ Kernel;
39a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef OutputStream_ OutputStream;
40a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
41a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  Kernel kernel;
42a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  OutputStream output_stream;
43a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
44a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
45a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType_, typename OutType_, typename LeftStream_,
46a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang          typename RightStream_, typename Kernel_, typename OutputStream_>
47a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct GemmParams {
48a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public:
49a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef InType_ InType;
50a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef OutType_ OutType;
51a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef LeftStream_ LeftStream;
52a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef RightStream_ RightStream;
53a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef Kernel_ Kernel;
54a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef OutputStream_ OutputStream;
55a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
56a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef FusedKernelParams<Kernel, OutputStream> FusedKernel;
57a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
58a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  // Common parameters.
59a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
60a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  int m;
61a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  int n;
62a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  int k;
63a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
64a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  const InType* lhs;
65a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  const InType* rhs;
66a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  OutType* result;
67a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  std::uint8_t* scratch;
68a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
69a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  // Specialized parameters.
70a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
71a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  LeftStream left_stream;
72a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  RightStream right_stream;
73a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  FusedKernel fused_kernel;
74a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
75a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
76a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, int lanes_count, int pack_size, int leftovers,
77a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang          typename StreamParams>
78a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass Stream {
79a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public:
80a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static void Pack(const InType* in, const StreamParams& params, InType* out);
81a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
82a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static int UnpackedAdvance(const StreamParams& params);
83a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
84a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static int PackedAdvance(const StreamParams& params);
85a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
86a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static int UnpackedStride(const StreamParams& params);
87a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
88a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static int PackedStride(const StreamParams& params);
89a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
90a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
91a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename StreamType>
92a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass StreamUtil {
93a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public:
94a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static const InType* Offset(const StreamType& params, const InType* source,
95a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang                              int offset_stride, int offset_advance);
96a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
97a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static int Scratch(const StreamType& params, int lanes);
98a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
99a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
100a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename OutType, typename Kernel,
101a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang          typename OutputStream, int kernel_m, int kernel_n, int pack_size>
102a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass MulKernel {
103a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public:
104a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static void Multiply(const InType* lhs, const InType* rhs,
105a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang                       const FusedKernelParams<Kernel, OutputStream>& params,
106a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang                       OutType* result);
107a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
108a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
109a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType_, typename OutType_, typename Kernel_>
110a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangstruct Transform1DParams {
111a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef InType_ InType;
112a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef OutType_ OutType;
113a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  typedef Kernel_ Kernel;
114a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
115a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  const InType* input;
116a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  OutType* output;
117a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  std::uint8_t* scratch;
118a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
119a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  Kernel kernel;
120a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
121a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
122a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename OutType, typename Kernel, int kernel_size,
123a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang          int leftovers>
124a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass Transform1DKernel {
125a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public:
126a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static void Transform(const InType* input, const Kernel& params,
127a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang                        OutType* output);
128a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
129a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
130a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangtemplate <typename InType, typename OutType, typename Transform>
131a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wangclass Transform1DUtil {
132a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang public:
133a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static int EstimateComputeCost(const Transform& params);
134a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
135a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static const InType* OffsetInput(const Transform& params, const InType* input,
136a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang                                   int offset);
137a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
138a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang  static OutType* OffsetOutput(const Transform& params, OutType* output,
139a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang                               int offset);
140a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang};
141a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
142a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}  // namespace meta
143a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang}  // namespace gemmlowp
144a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang
145a9fd919a0080e2c3c7ed1ce451c85a4d86f2f8c1Miao Wang#endif  // GEMMLOWP_META_BASE_H_
146