1// Copyright 2016 The Gemmlowp Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#ifndef GEMMLOWP_META_STREAMS_H_ 16#define GEMMLOWP_META_STREAMS_H_ 17 18#include <iostream> 19#include <typeinfo> 20#include "base.h" 21 22namespace gemmlowp { 23namespace meta { 24 25struct RowMajor { 26 public: 27 int count; 28 int stride; 29}; 30 31struct RowMajorWithSum { 32 public: 33 int count; 34 int stride; 35 int multiplicative_sum_offset; 36 int additive_sum_offset; 37}; 38 39struct ColumnMajorWithSum { 40 public: 41 int count; 42 int stride; 43 int multiplicative_sum_offset; 44 int additive_sum_offset; 45}; 46 47template <typename InType> 48class StreamUtil<InType, RowMajor> { 49 public: 50 static const InType* Offset(const RowMajor& params, const InType* source, 51 int offset_stride, int offset_advance) { 52 return reinterpret_cast<const InType*>( 53 reinterpret_cast<const std::uint8_t*>(source) + 54 offset_stride * params.stride + offset_advance * sizeof(InType)); 55 } 56 57 static InType* Offset(const RowMajor& params, InType* source, 58 int offset_stride, int offset_advance) { 59 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + 60 offset_stride * params.stride + 61 offset_advance * sizeof(InType)); 62 } 63 64 static int Scratch(const RowMajor& params, int lanes_count, int pack_size) { 65 return AlignTo<64>(lanes_count * AlignTo(pack_size, params.stride)); 66 } 67}; 68 69template <typename InType> 70class StreamUtil<InType, RowMajorWithSum> { 71 public: 72 static const InType* Offset(const RowMajorWithSum& params, 73 const InType* source, int offset_stride, 74 int offset_advance) { 75 return reinterpret_cast<const InType*>( 76 reinterpret_cast<const std::uint8_t*>(source) + 77 offset_stride * params.stride + offset_advance * sizeof(InType)); 78 } 79 80 static InType* Offset(const RowMajorWithSum& params, InType* source, 81 int offset_stride, int offset_advance) { 82 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + 83 offset_stride * params.stride + 84 offset_advance * sizeof(InType)); 85 } 86 87 static int Scratch(const RowMajorWithSum& params, int lanes_count, 88 int pack_size) { 89 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 90 AlignTo(pack_size, params.count)); 91 } 92}; 93 94template <typename InType> 95class StreamUtil<InType, ColumnMajorWithSum> { 96 public: 97 static const InType* Offset(const ColumnMajorWithSum& params, 98 const InType* source, int offset_stride, 99 int offset_advance) { 100 return reinterpret_cast<const InType*>( 101 reinterpret_cast<const std::uint8_t*>(source) + 102 params.stride * offset_advance + offset_stride * sizeof(InType)); 103 } 104 105 static const InType* Offset(const ColumnMajorWithSum& params, InType* source, 106 int offset_stride, int offset_advance) { 107 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + 108 params.stride * offset_advance + 109 offset_stride * sizeof(InType)); 110 } 111 112 static int Scratch(const ColumnMajorWithSum& params, int lanes_count, 113 int pack_size) { 114 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 115 AlignTo(pack_size, params.count)); 116 } 117}; 118 119template <typename InType, int lanes_count, int pack_size, int leftovers> 120class Stream<InType, lanes_count, pack_size, leftovers, RowMajor> { 121 public: 122 static void Pack(const InType* in, const RowMajor& params, InType* out) { 123#ifdef DEBUG 124#ifdef DEBUG_METAGEMM_VERBOSE 125 std::cout << "RowMajor(" << std::string(typeid(InType).name()) 126 << ")::Pack() -- " << lanes_count << "x" << pack_size << " + " 127 << leftovers << std::endl; 128#endif 129#else 130 if (lanes_count != 0) { 131 std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl; 132 std::exit(1); 133 } 134#endif 135 } 136 137 static int UnpackedAdvance(const RowMajor& params) { 138 return sizeof(InType) * pack_size; 139 } 140 141 static int PackedAdvance(const RowMajor& params) { 142 return sizeof(InType) * pack_size * lanes_count; 143 } 144 145 static int UnpackedStride(const RowMajor& params) { 146 return lanes_count * params.stride; 147 } 148 149 static int PackedStride(const RowMajor& params) { 150 return AlignTo<32>(lanes_count * AlignTo<pack_size>(params.stride)); 151 } 152 153 static int Scratch(const RowMajor& params) { return PackedStride(params); } 154 155#ifdef DEBUG 156#ifdef DEBUG_METAGEMM_VERBOSE 157 static void Debug(const RowMajor& params) { 158 std::cout << "RowMajor(" << typeid(InType).name() << ")" << std::endl; 159 std::cout << " dims: " << lanes_count << "x" << pack_size << " + " 160 << leftovers << std::endl; 161 std::cout << " scratch: " << Scratch(params) << std::endl; 162 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; 163 std::cout << " packed advance: " << PackedAdvance(params) << std::endl; 164 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; 165 std::cout << " packed stride: " << PackedStride(params) << std::endl; 166 std::cout << " params:" << std::endl; 167 std::cout << " count: " << params.count << std::endl; 168 std::cout << " stride: " << params.stride << std::endl; 169 } 170#endif 171#endif 172}; 173 174template <typename InType, int lanes_count, int pack_size, int leftovers> 175class Stream<InType, lanes_count, pack_size, leftovers, RowMajorWithSum> { 176 public: 177 static void Pack(const InType* in, const RowMajorWithSum& params, 178 InType* out) { 179#ifdef DEBUG 180#ifdef DEBUG_METAGEMM_VERBOSE 181 std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")::Pack() -- " 182 << lanes_count << "x" << pack_size << " + " << leftovers 183 << std::endl; 184#endif 185#else 186 if (lanes_count != 0) { 187 std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl; 188 std::exit(1); 189 } 190#endif 191 } 192 193 static int UnpackedAdvance(const RowMajorWithSum& params) { 194 return sizeof(InType) * pack_size; 195 } 196 197 static int PackedAdvance(const RowMajorWithSum& params) { 198 return sizeof(InType) * pack_size * lanes_count; 199 } 200 201 static int UnpackedStride(const RowMajorWithSum& params) { 202 return sizeof(InType) * lanes_count * params.stride; 203 } 204 205 static int PackedStride(const RowMajorWithSum& params) { 206 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 207 AlignTo<pack_size>(params.count)); 208 } 209 210 static int Scratch(const RowMajorWithSum& params) { 211 return PackedStride(params); 212 } 213 214#ifdef DEBUG 215#ifdef DEBUG_METAGEMM_VERBOSE 216 static void Debug(const RowMajorWithSum& params) { 217 std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")" 218 << std::endl; 219 std::cout << " dims: " << lanes_count << "x" << pack_size << " + " 220 << leftovers << std::endl; 221 std::cout << " scratch: " << Scratch(params) << std::endl; 222 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; 223 std::cout << " packed advance: " << PackedAdvance(params) << std::endl; 224 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; 225 std::cout << " packed stride: " << PackedStride(params) << std::endl; 226 std::cout << " params:" << std::endl; 227 std::cout << " count: " << params.count << std::endl; 228 std::cout << " stride: " << params.stride << std::endl; 229 std::cout << " multiplicative_sum_offset: " 230 << params.multiplicative_sum_offset << std::endl; 231 std::cout << " additive_sum_offset: " << params.additive_sum_offset 232 << std::endl; 233 } 234#endif 235#endif 236}; 237 238template <typename InType, int lanes_count, int pack_size, int leftovers> 239class Stream<InType, lanes_count, pack_size, leftovers, ColumnMajorWithSum> { 240 public: 241 static void Pack(const InType* in, const ColumnMajorWithSum& params, 242 InType* out) { 243#ifdef DEBUG 244#ifdef DEBUG_METAGEMM_VERBOSE 245 std::cout << "ColumnMajorWithSum(" << typeid(InType).name() 246 << ")::Pack() -- " << lanes_count << "x" << pack_size << " + " 247 << leftovers << std::endl; 248#endif 249#else 250 if (lanes_count != 0) { 251 std::cerr << "FATAL: ColumnMajorWithSum::Pack not implemented." 252 << std::endl; 253 std::exit(1); 254 } 255#endif 256 } 257 258 static int UnpackedAdvance(const ColumnMajorWithSum& params) { 259 return sizeof(InType) * pack_size * params.stride; 260 } 261 262 static int PackedAdvance(const ColumnMajorWithSum& params) { 263 return sizeof(InType) * pack_size * lanes_count; 264 } 265 266 static int UnpackedStride(const ColumnMajorWithSum& params) { 267 return sizeof(InType) * lanes_count; 268 } 269 270 static int PackedStride(const ColumnMajorWithSum& params) { 271 return 32 + AlignTo<32>(sizeof(InType) * lanes_count * 272 AlignTo<pack_size>(params.count)); 273 } 274 275 static int Scratch(const ColumnMajorWithSum& params) { 276 return PackedStride(params); 277 } 278 279#ifdef DEBUG 280#ifdef DEBUG_METAGEMM_VERBOSE 281 static void Debug(const ColumnMajorWithSum& params) { 282 std::cout << "ColumnMajorWithSum(" << typeid(InType).name() << ")" 283 << std::endl; 284 std::cout << " dims: " << lanes_count << "x" << pack_size << " + " 285 << leftovers << std::endl; 286 std::cout << " scratch: " << Scratch(params) << std::endl; 287 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; 288 std::cout << " packed advance: " << PackedAdvance(params) << std::endl; 289 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; 290 std::cout << " packed stride: " << PackedStride(params) << std::endl; 291 std::cout << " params:" << std::endl; 292 std::cout << " count: " << params.count << std::endl; 293 std::cout << " stride: " << params.stride << std::endl; 294 std::cout << " multiplicative_sum_offset: " 295 << params.multiplicative_sum_offset << std::endl; 296 std::cout << " additive_sum_offset: " << params.additive_sum_offset 297 << std::endl; 298 } 299#endif 300#endif 301}; 302 303} // namespace meta 304} // namespace gemmlowp 305 306#ifdef GEMMLOWP_NEON_32 307#include "streams_arm_32.h" 308#elif defined(GEMMLOWP_NEON_64) 309#include "streams_arm_64.h" 310#endif 311 312#endif // GEMMLOWP_META_STREAMS_H_ 313