Conv2D.cpp revision 9447966379a4b3fba92f4dfda65aaba6b0482f1a
1eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang/* 2eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Copyright (C) 2017 The Android Open Source Project 3eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * 4eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Licensed under the Apache License, Version 2.0 (the "License"); 5eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * you may not use this file except in compliance with the License. 6eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * You may obtain a copy of the License at 7eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * 8eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * http://www.apache.org/licenses/LICENSE-2.0 9eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * 10eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Unless required by applicable law or agreed to in writing, software 11eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * distributed under the License is distributed on an "AS IS" BASIS, 12eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * See the License for the specific language governing permissions and 14eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * limitations under the License. 15eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang */ 16eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 17eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang#include "Operations.h" 18d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang#include "CpuOperationUtils.h" 19eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 20d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" 21eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 22eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangnamespace android { 23eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangnamespace nn { 24eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 25eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang// If possible we will use this static buffer for the tensor. 269447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniakstatic constexpr size_t kStaticBufferSize = 1605632; 27eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangstatic char static_scratch_buffer[kStaticBufferSize]; 28eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 2927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang#define ANDROID_NN_CONV_PARAMETERS(Type) \ 3027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t height = getSizeOfDimension(inputShape, 1); \ 3127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t width = getSizeOfDimension(inputShape, 2); \ 3227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \ 3327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \ 3427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t outHeight = getSizeOfDimension(outputShape, 1); \ 3527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t outWidth = getSizeOfDimension(outputShape, 2); \ 3627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t inDepth = getSizeOfDimension(inputShape, 3); \ 3727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 386cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang uint32_t paddingHeight = (uint32_t)padding_top; \ 396cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang uint32_t paddingWidth = (uint32_t)padding_left; \ 4027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 41d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang tflite::Dims<4> im2colDim; \ 4227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0); \ 4327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1); \ 4427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2); \ 4527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth; \ 4627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 4727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.strides[0] = 1; \ 4827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang for (int i=1; i<4; i++) { \ 4927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1]; \ 5027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } \ 5127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 5227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang Type* im2colData = nullptr; \ 539447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak uint64_t im2colByteSize = sizeof(Type); \ 549447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak std::unique_ptr<Type[]> im2colGuard; \ 5527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang for (int i=0; i<4; i++) { \ 5627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colByteSize *= im2colDim.sizes[i]; \ 5727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } \ 589447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak \ 599447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak if (sizeof(size_t) == 4 && (im2colByteSize / sizeof(Type)) > 0xFFFFFFFF) { \ 609447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak LOG(ERROR) << "Conv size is too large, not enough memory"; \ 619447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak return false; \ 629447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak } \ 6327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang if (im2colByteSize <= kStaticBufferSize) { \ 6427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ 6527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } else { \ 6627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ 679447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak if (im2colData == nullptr) { \ 689447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak LOG(ERROR) << "Conv size is too large, not enough memory"; \ 699447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak return false; \ 709447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak } \ 71eea11acb2a183642f0c5687b283f975844ea99c2Przemyslaw Szczepaniak im2colGuard.reset(im2colData); \ 7227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } 7327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 74eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangbool convFloat32(const float* inputData, const Shape& inputShape, 75eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang const float* filterData, const Shape& filterShape, 76eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang const float* biasData, const Shape& biasShape, 776cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_left, int32_t padding_right, 786cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_top, int32_t padding_bottom, 796cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t stride_width, int32_t stride_height, 806cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t activation, 81eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang float* outputData, const Shape& outputShape) { 82eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 8327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang ANDROID_NN_CONV_PARAMETERS(float) 84eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 85d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang float output_activation_min, output_activation_max; 86d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang CalculateActivationRangeFloat(activation, &output_activation_min, 87d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang &output_activation_max); 88eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 89d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang tflite::optimized_ops::Conv( 90d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang inputData, convertShapeToDims(inputShape), 91d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang filterData, convertShapeToDims(filterShape), 92d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang biasData, convertShapeToDims(biasShape), 93d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang stride_width, stride_height, paddingWidth, paddingHeight, 94d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang output_activation_min, output_activation_max, 95d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang outputData, convertShapeToDims(outputShape), 96d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang im2colData, im2colDim); 97eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang return true; 98eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang} 99eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 10027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wangbool convQuant8(const uint8_t* inputData, const Shape& inputShape, 10127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang const uint8_t* filterData, const Shape& filterShape, 10227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang const int32_t* biasData, const Shape& biasShape, 1036cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_left, int32_t padding_right, 1046cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_top, int32_t padding_bottom, 1056cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t stride_width, int32_t stride_height, 1066cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t activation, 10727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint8_t* outputData, const Shape& outputShape) { 10827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 10927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang ANDROID_NN_CONV_PARAMETERS(uint8_t) 11027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 1118eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang int32_t inputOffset = -inputShape.offset; 1128eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang int32_t filterOffset = -filterShape.offset; 1138eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang int32_t outputOffset = outputShape.offset; 1148eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang 11527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang float real_multiplier = 0.0; 11627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_multiplier = 0; 11727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_shift = 0; 11827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_activation_min = 0; 11927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_activation_max = 0; 12027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 121be2b22578baf949d7be42ba002cee94304daf53cMiao Wang if (!GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, 122be2b22578baf949d7be42ba002cee94304daf53cMiao Wang outputShape, &real_multiplier) || 123be2b22578baf949d7be42ba002cee94304daf53cMiao Wang !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, 124be2b22578baf949d7be42ba002cee94304daf53cMiao Wang &output_shift)){ 125be2b22578baf949d7be42ba002cee94304daf53cMiao Wang return false; 126be2b22578baf949d7be42ba002cee94304daf53cMiao Wang } 12727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang CalculateActivationRangeUint8(activation, outputShape, 12827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang &output_activation_min, 12927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang &output_activation_max); 13027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 13127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang static gemmlowp::GemmContext gemm_context; 1328eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang // Alow gemmlowp automatcally decide how many threads to use. 1338eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang gemm_context.set_max_num_threads(0); 13427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 135d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang tflite::optimized_ops::Conv( 136d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang inputData, convertShapeToDims(inputShape), inputOffset, 137d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang filterData, convertShapeToDims(filterShape), filterOffset, 138d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang biasData, convertShapeToDims(biasShape), 139d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang stride_width, stride_height, paddingWidth, paddingHeight, 140d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang outputOffset, output_multiplier, output_shift, 141d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang output_activation_min, output_activation_max, 142d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang outputData, convertShapeToDims(outputShape), 143d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang im2colData, im2colDim, &gemm_context); 14427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang return true; 14527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang} 14627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 14727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang#undef ANDROID_NN_CONV_PARAMETERS 148eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang} // namespace nn 149eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang} // namespace android 150