1eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang/* 2eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Copyright (C) 2017 The Android Open Source Project 3eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * 4eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Licensed under the Apache License, Version 2.0 (the "License"); 5eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * you may not use this file except in compliance with the License. 6eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * You may obtain a copy of the License at 7eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * 8eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * http://www.apache.org/licenses/LICENSE-2.0 9eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * 10eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * Unless required by applicable law or agreed to in writing, software 11eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * distributed under the License is distributed on an "AS IS" BASIS, 12eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * See the License for the specific language governing permissions and 14eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang * limitations under the License. 15eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang */ 16eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 17eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang#include "Operations.h" 18d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang#include "CpuOperationUtils.h" 19eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 20d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" 21eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 22eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangnamespace android { 23eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangnamespace nn { 24eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 25eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang// If possible we will use this static buffer for the tensor. 269447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniakstatic constexpr size_t kStaticBufferSize = 1605632; 27eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangstatic char static_scratch_buffer[kStaticBufferSize]; 28eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 29c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang// executionMutex is used to protect concurrent access of the static_scratch_buffer 30c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang// and other non-threadsafe resources like gemmlowp::GemmContext. 31c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang// std::mutex is safe for pthreads on Android. 32c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wangstatic std::mutex executionMutex; 33c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang 3427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang#define ANDROID_NN_CONV_PARAMETERS(Type) \ 3527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t height = getSizeOfDimension(inputShape, 1); \ 3627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t width = getSizeOfDimension(inputShape, 2); \ 3727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \ 3827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \ 3927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t outHeight = getSizeOfDimension(outputShape, 1); \ 4027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t outWidth = getSizeOfDimension(outputShape, 2); \ 4127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint32_t inDepth = getSizeOfDimension(inputShape, 3); \ 4227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 436cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang uint32_t paddingHeight = (uint32_t)padding_top; \ 446cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang uint32_t paddingWidth = (uint32_t)padding_left; \ 4527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 46d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang tflite::Dims<4> im2colDim; \ 4727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0); \ 4827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1); \ 4927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2); \ 5027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth; \ 5127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 5227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.strides[0] = 1; \ 5327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang for (int i=1; i<4; i++) { \ 5427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1]; \ 5527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } \ 5627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang \ 5727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang Type* im2colData = nullptr; \ 589447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak uint64_t im2colByteSize = sizeof(Type); \ 599447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak std::unique_ptr<Type[]> im2colGuard; \ 6027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang for (int i=0; i<4; i++) { \ 6127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colByteSize *= im2colDim.sizes[i]; \ 6227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } \ 636e1812de773691c1a42dbc1d1906ca158a5d4951Przemyslaw Szczepaniak /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ 646e1812de773691c1a42dbc1d1906ca158a5d4951Przemyslaw Szczepaniak if (im2colByteSize >= 0x7fffffff) { \ 659447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak LOG(ERROR) << "Conv size is too large, not enough memory"; \ 669447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak return false; \ 679447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak } \ 6827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang if (im2colByteSize <= kStaticBufferSize) { \ 6927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ 7027e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } else { \ 7127e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ 729447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak if (im2colData == nullptr) { \ 739447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak LOG(ERROR) << "Conv size is too large, not enough memory"; \ 749447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak return false; \ 759447966379a4b3fba92f4dfda65aaba6b0482f1aPrzemyslaw Szczepaniak } \ 76eea11acb2a183642f0c5687b283f975844ea99c2Przemyslaw Szczepaniak im2colGuard.reset(im2colData); \ 7727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang } 7827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 79eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wangbool convFloat32(const float* inputData, const Shape& inputShape, 80eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang const float* filterData, const Shape& filterShape, 81eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang const float* biasData, const Shape& biasShape, 826cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_left, int32_t padding_right, 836cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_top, int32_t padding_bottom, 846cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t stride_width, int32_t stride_height, 856cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t activation, 86eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang float* outputData, const Shape& outputShape) { 87eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 8827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang ANDROID_NN_CONV_PARAMETERS(float) 89eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 90d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang float output_activation_min, output_activation_max; 91d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang CalculateActivationRangeFloat(activation, &output_activation_min, 92d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang &output_activation_max); 93eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 94c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang // Prevent concurrent executions that may access the scratch buffer. 95c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang std::unique_lock<std::mutex> lock(executionMutex); 96d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang tflite::optimized_ops::Conv( 97d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang inputData, convertShapeToDims(inputShape), 98d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang filterData, convertShapeToDims(filterShape), 99d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang biasData, convertShapeToDims(biasShape), 100d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang stride_width, stride_height, paddingWidth, paddingHeight, 101d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang output_activation_min, output_activation_max, 102d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang outputData, convertShapeToDims(outputShape), 103d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang im2colData, im2colDim); 104eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang return true; 105eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang} 106eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang 10727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wangbool convQuant8(const uint8_t* inputData, const Shape& inputShape, 10827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang const uint8_t* filterData, const Shape& filterShape, 10927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang const int32_t* biasData, const Shape& biasShape, 1106cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_left, int32_t padding_right, 1116cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t padding_top, int32_t padding_bottom, 1126cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t stride_width, int32_t stride_height, 1136cd685f64bd82c003b8d0943fc6b7b8e0730b939Miao Wang int32_t activation, 11427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang uint8_t* outputData, const Shape& outputShape) { 11527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 11627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang ANDROID_NN_CONV_PARAMETERS(uint8_t) 11727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 1188eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang int32_t inputOffset = -inputShape.offset; 1198eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang int32_t filterOffset = -filterShape.offset; 1208eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang int32_t outputOffset = outputShape.offset; 1218eb598abd0e77333688e97f7ed89b0dd60d144faMiao Wang 12227e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang float real_multiplier = 0.0; 12327e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_multiplier = 0; 12427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_shift = 0; 12527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_activation_min = 0; 12627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang int32_t output_activation_max = 0; 12727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 128be2b22578baf949d7be42ba002cee94304daf53cMiao Wang if (!GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, 129be2b22578baf949d7be42ba002cee94304daf53cMiao Wang outputShape, &real_multiplier) || 130be2b22578baf949d7be42ba002cee94304daf53cMiao Wang !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, 131be2b22578baf949d7be42ba002cee94304daf53cMiao Wang &output_shift)){ 132be2b22578baf949d7be42ba002cee94304daf53cMiao Wang return false; 133be2b22578baf949d7be42ba002cee94304daf53cMiao Wang } 13427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang CalculateActivationRangeUint8(activation, outputShape, 13527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang &output_activation_min, 13627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang &output_activation_max); 13727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 13827e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang static gemmlowp::GemmContext gemm_context; 13927e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 140c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang // Prevent concurrent executions that may access the scratch buffer and 141c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang // gemm_context. 142c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang std::unique_lock<std::mutex> lock(executionMutex); 143c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang // Alow gemmlowp automatically decide how many threads to use. 144c3fb81d018487e2e85dcdfa8abbb6ab76f1ceafeMiao Wang gemm_context.set_max_num_threads(0); 145d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang tflite::optimized_ops::Conv( 146d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang inputData, convertShapeToDims(inputShape), inputOffset, 147d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang filterData, convertShapeToDims(filterShape), filterOffset, 148d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang biasData, convertShapeToDims(biasShape), 149d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang stride_width, stride_height, paddingWidth, paddingHeight, 150d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang outputOffset, output_multiplier, output_shift, 151d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang output_activation_min, output_activation_max, 152d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang outputData, convertShapeToDims(outputShape), 153d9c5ba866bb0575cbb894c672e0a800844ccf6f8Miao Wang im2colData, im2colDim, &gemm_context); 15427e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang return true; 15527e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang} 15627e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang 15727e9be3904b034e422ee9b6ab70b35ea994d2b39Miao Wang#undef ANDROID_NN_CONV_PARAMETERS 158eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang} // namespace nn 159eb1f88846f147d1d80ee0d688fe4635b89a40ffaMiao Wang} // namespace android 160