1// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// kernel_default.h: Chooses default GEMM and GEMV kernels for the 16// host platform. 17 18#ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 19#define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 20 21#ifndef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK 22#define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK 23#endif 24 25#include "../public/bit_depth.h" 26#include "common.h" 27#include "kernel_reference.h" 28 29namespace gemmlowp { 30 31template <bool MaxProductIsLessThan4096, 32 bool LhsAlwaysNonzero> 33struct DefaultKernelImpl {}; 34 35// Partial specialization implementing the logic that if we want to use 36// a kernel for LhsAlwaysNonzero but do not have such a kernel, then we fall 37// back to a generic kernel not taking advantage of LhsAlwaysNonzero. 38template <bool LhsAlwaysNonzero> 39struct DefaultKernelImpl<true, LhsAlwaysNonzero> 40 : DefaultKernelImpl<false, LhsAlwaysNonzero> {}; 41 42// Partial specialization implementing the logic that if we want to use 43// a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we 44// fall back to a generic kernel not taking advantage of 45// MaxProductIsLessThan4096. 46template <bool MaxProductIsLessThan4096> 47struct DefaultKernelImpl<MaxProductIsLessThan4096, true> 48 : DefaultKernelImpl<MaxProductIsLessThan4096, false> {}; 49 50template <typename BitDepthParams> 51struct DefaultKernel 52 : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue * 53 BitDepthParams::RhsRange::kMaxValue < 54 4096), 55 (BitDepthParams::LhsRange::kMinValue > 0)> {}; 56 57} // end namespace gemmlowp 58 59#define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, \ 60 LhsAlwaysNonzero, Kernel) \ 61 namespace gemmlowp { \ 62 template <> \ 63 struct DefaultKernelImpl<MaxProductIsLessThan4096, \ 64 LhsAlwaysNonzero> : Kernel {}; \ 65 } 66 67#if defined GEMMLOWP_NEON_32 68#include "kernel_neon.h" 69GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_32_Kernel12x4Depth2) 70GEMMLOWP_SET_DEFAULT_KERNEL(true, false, 71 NEON_32_Kernel12x4Depth2Assuming12BitProducts) 72GEMMLOWP_SET_DEFAULT_KERNEL(false, true, 73 NEON_32bit_GEMM_Int8Operands_LhsNonzero) 74#elif defined GEMMLOWP_NEON_64 75#include "kernel_neon.h" 76GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_64_Kernel12x8Depth2) 77GEMMLOWP_SET_DEFAULT_KERNEL(false, true, 78 NEON_64bit_GEMM_Int8Operands_LhsNonzero) 79#elif defined GEMMLOWP_SSE4_32 80#include "kernel_sse.h" 81GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_32_Kernel4x4Depth2) 82#elif defined GEMMLOWP_SSE4_64 83#include "kernel_sse.h" 84GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_64_Kernel12x4Depth2) 85#else 86#ifndef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK 87#if defined __ARM_ARCH_5TE__ 88// SIMD is not available on this platform. The slow fallback will be used. 89// Don't require GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK because there's nothing 90// the user can do about it. 91#else 92#error \ 93 "SIMD not enabled, you'd be getting a slow software fallback. Consider \ 94enabling SIMD extensions (for example using -msse4 if you're on modern x86). \ 95If that's not an option, and you would like to continue with the \ 96slow fallback, define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK." 97#endif 98#endif 99#include "kernel_reference.h" 100namespace gemmlowp { 101typedef ReferenceKernel<KernelFormat< 102 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>, 103 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > > 104 DefaultReferenceKernel; 105} 106GEMMLOWP_SET_DEFAULT_KERNEL(false, false, DefaultReferenceKernel) 107#endif 108 109#endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 110