16523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei/* 26523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * cl_gauss_handler.cpp - CL gauss handler 36523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * 4a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan * Copyright (c) 2016 Intel Corporation 56523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * 66523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * Licensed under the Apache License, Version 2.0 (the "License"); 76523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * you may not use this file except in compliance with the License. 86523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * You may obtain a copy of the License at 96523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * 106523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * http://www.apache.org/licenses/LICENSE-2.0 116523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * 126523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * Unless required by applicable law or agreed to in writing, software 136523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * distributed under the License is distributed on an "AS IS" BASIS, 146523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 156523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * See the License for the specific language governing permissions and 166523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * limitations under the License. 176523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * 186523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * Author: wangfei <feix.w.wang@intel.com> 19a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan * Wind Yuan <feng.yuan@intel.com> 206523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei */ 216b5f71042b969ae41362e07660181a676685702bYinhang Liu 226b5f71042b969ae41362e07660181a676685702bYinhang Liu#include "cl_utils.h" 236523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei#include "cl_gauss_handler.h" 246523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei#include <algorithm> 256523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 26ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan#define XCAM_GAUSS_SCALE(radius) ((radius) * 2 + 1) 27ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan 286523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeinamespace XCam { 296523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 30be505049e0d0cd218324c728b840652ac54bd19fWind Yuanconst static XCamKernelInfo kernel_gauss_info = { 31be505049e0d0cd218324c728b840652ac54bd19fWind Yuan "kernel_gauss", 32be505049e0d0cd218324c728b840652ac54bd19fWind Yuan#include "kernel_gauss.clx" 33be505049e0d0cd218324c728b840652ac54bd19fWind Yuan , 0, 34be505049e0d0cd218324c728b840652ac54bd19fWind Yuan}; 35be505049e0d0cd218324c728b840652ac54bd19fWind Yuan 36be505049e0d0cd218324c728b840652ac54bd19fWind Yuanclass CLGaussImageKernelImpl 37be505049e0d0cd218324c728b840652ac54bd19fWind Yuan : public CLGaussImageKernel 38be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{ 39be505049e0d0cd218324c728b840652ac54bd19fWind Yuanpublic: 40be505049e0d0cd218324c728b840652ac54bd19fWind Yuan CLGaussImageKernelImpl ( 41be505049e0d0cd218324c728b840652ac54bd19fWind Yuan SmartPtr<CLGaussImageHandler> &handler, 42be505049e0d0cd218324c728b840652ac54bd19fWind Yuan const SmartPtr<CLContext> &context, uint32_t radius, float sigma); 43be505049e0d0cd218324c728b840652ac54bd19fWind Yuan 446b5f71042b969ae41362e07660181a676685702bYinhang Liu virtual SmartPtr<VideoBuffer> get_input_buf (); 456b5f71042b969ae41362e07660181a676685702bYinhang Liu virtual SmartPtr<VideoBuffer> get_output_buf (); 46be505049e0d0cd218324c728b840652ac54bd19fWind Yuanprivate: 47be505049e0d0cd218324c728b840652ac54bd19fWind Yuan SmartPtr<CLGaussImageHandler> _handler; 48be505049e0d0cd218324c728b840652ac54bd19fWind Yuan}; 49be505049e0d0cd218324c728b840652ac54bd19fWind Yuan 50be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernelImpl::CLGaussImageKernelImpl ( 51be505049e0d0cd218324c728b840652ac54bd19fWind Yuan SmartPtr<CLGaussImageHandler> &handler, 52be505049e0d0cd218324c728b840652ac54bd19fWind Yuan const SmartPtr<CLContext> &context, 53be505049e0d0cd218324c728b840652ac54bd19fWind Yuan uint32_t radius, 54be505049e0d0cd218324c728b840652ac54bd19fWind Yuan float sigma 55be505049e0d0cd218324c728b840652ac54bd19fWind Yuan) 56be505049e0d0cd218324c728b840652ac54bd19fWind Yuan : CLGaussImageKernel (context, radius, sigma) 57be505049e0d0cd218324c728b840652ac54bd19fWind Yuan , _handler (handler) 58be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{ 59be505049e0d0cd218324c728b840652ac54bd19fWind Yuan} 60be505049e0d0cd218324c728b840652ac54bd19fWind Yuan 616b5f71042b969ae41362e07660181a676685702bYinhang LiuSmartPtr<VideoBuffer> 62be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernelImpl::get_input_buf () 63be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{ 64be505049e0d0cd218324c728b840652ac54bd19fWind Yuan return _handler->get_input_buf (); 65be505049e0d0cd218324c728b840652ac54bd19fWind Yuan} 666b5f71042b969ae41362e07660181a676685702bYinhang LiuSmartPtr<VideoBuffer> 67be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernelImpl::get_output_buf () 68be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{ 69be505049e0d0cd218324c728b840652ac54bd19fWind Yuan return _handler->get_output_buf ();; 70be505049e0d0cd218324c728b840652ac54bd19fWind Yuan} 71be505049e0d0cd218324c728b840652ac54bd19fWind Yuan 72be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernel::CLGaussImageKernel ( 73be505049e0d0cd218324c728b840652ac54bd19fWind Yuan const SmartPtr<CLContext> &context, uint32_t radius, float sigma) 746523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei : CLImageKernel (context, "kernel_gauss") 75ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan , _g_radius (radius) 76ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan , _g_table (NULL) 77ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan{ 78ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan set_gaussian(radius, sigma); 79ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan} 80ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan 81ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind YuanCLGaussImageKernel::~CLGaussImageKernel () 826523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{ 83ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan xcam_free (_g_table); 846523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei} 856523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 866523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeibool 87ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind YuanCLGaussImageKernel::set_gaussian (uint32_t radius, float sigma) 886523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{ 89ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan uint32_t i, j; 90ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan uint32_t scale = XCAM_GAUSS_SCALE (radius); 91ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan float dis = 0.0f, sum = 0.0f; 92be505049e0d0cd218324c728b840652ac54bd19fWind Yuan uint32_t scale_size = scale * scale * sizeof (_g_table[0]); 93ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan 94ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan xcam_free (_g_table); 95ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan _g_table_buffer.release (); 96ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan _g_radius = radius; 97be505049e0d0cd218324c728b840652ac54bd19fWind Yuan _g_table = (float*) xcam_malloc0 (scale_size); 989193453aab8ce0c830bb256c670c9097b3ccc75bYinhang Liu XCAM_ASSERT (_g_table); 99ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan 100ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan for(i = 0; i < scale; i++) { 101ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan for(j = 0; j < scale; j++) { 102ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan dis = ((float)i - radius) * ((float)i - radius) + ((float)j - radius) * ((float)j - radius); 103ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan _g_table[i * scale + j] = exp(-dis / (2.0f * sigma * sigma)); 104ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan sum += _g_table[i * scale + j]; 1056523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei } 1066523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei } 1076523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 108be505049e0d0cd218324c728b840652ac54bd19fWind Yuan for(i = 0; i < scale * scale; i++) { 1096523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei _g_table[i] = _g_table[i] / sum; 110be505049e0d0cd218324c728b840652ac54bd19fWind Yuan } 1116523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 112be505049e0d0cd218324c728b840652ac54bd19fWind Yuan _g_table_buffer = new CLBuffer( 113be505049e0d0cd218324c728b840652ac54bd19fWind Yuan get_context (), scale_size, 114be505049e0d0cd218324c728b840652ac54bd19fWind Yuan CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR , _g_table); 1156523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 116be505049e0d0cd218324c728b840652ac54bd19fWind Yuan return true; 117a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan} 1186523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1196523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiXCamReturn 120be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernel::prepare_arguments (CLArgList &args, CLWorkSize &work_size) 1216523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{ 1226523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei SmartPtr<CLContext> context = get_context (); 1236b5f71042b969ae41362e07660181a676685702bYinhang Liu SmartPtr<VideoBuffer> input = get_input_buf (); 1246b5f71042b969ae41362e07660181a676685702bYinhang Liu SmartPtr<VideoBuffer> output = get_output_buf (); 125a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan 126a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan XCAM_FAIL_RETURN ( 127a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan WARNING, 1286b5f71042b969ae41362e07660181a676685702bYinhang Liu input.ptr () && output.ptr (), 129a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan XCAM_RETURN_ERROR_MEM, 130a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan "cl image kernel(%s) get input/output buffer failed", get_kernel_name ()); 131a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan 1326b5f71042b969ae41362e07660181a676685702bYinhang Liu const VideoBufferInfo & video_info_in = input->get_video_info (); 1336b5f71042b969ae41362e07660181a676685702bYinhang Liu const VideoBufferInfo & video_info_out = output->get_video_info (); 134a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan CLImageDesc cl_desc_in, cl_desc_out; 135a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan 136a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_in.format.image_channel_data_type = CL_UNORM_INT8; 137a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_in.format.image_channel_order = CL_R; 138a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_in.width = video_info_in.width; 139a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_in.height = video_info_in.height; 140a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_in.row_pitch = video_info_in.strides[0]; 1416b5f71042b969ae41362e07660181a676685702bYinhang Liu SmartPtr<CLImage> image_in = convert_to_climage (context, input, cl_desc_in, video_info_in.offsets[0]); 142a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan 143a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_out.format.image_channel_data_type = CL_UNORM_INT8; 144a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_out.format.image_channel_order = CL_RGBA; 145a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_out.width = video_info_out.width / 4; 146a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_out.height = video_info_out.height; 147a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan cl_desc_out.row_pitch = video_info_out.strides[0]; 1486b5f71042b969ae41362e07660181a676685702bYinhang Liu SmartPtr<CLImage> image_out = convert_to_climage (context, output, cl_desc_out, video_info_out.offsets[0]); 1496523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1506523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei XCAM_FAIL_RETURN ( 1516523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei WARNING, 152be505049e0d0cd218324c728b840652ac54bd19fWind Yuan image_in->is_valid () && image_out->is_valid (), 1536523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei XCAM_RETURN_ERROR_MEM, 1546523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei "cl image kernel(%s) in/out memory not available", get_kernel_name ()); 1556523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1566523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei //set args; 157be505049e0d0cd218324c728b840652ac54bd19fWind Yuan args.push_back (new CLMemArgument (image_in)); 158be505049e0d0cd218324c728b840652ac54bd19fWind Yuan args.push_back (new CLMemArgument (image_out)); 159be505049e0d0cd218324c728b840652ac54bd19fWind Yuan args.push_back (new CLMemArgument (_g_table_buffer)); 1606523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1616523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei work_size.dim = XCAM_DEFAULT_IMAGE_DIM; 162a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan work_size.global[0] = XCAM_ALIGN_UP(cl_desc_out.width, 8); 163a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan work_size.global[1] = XCAM_ALIGN_UP (cl_desc_out.height / 2, 4); 164a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan work_size.local[0] = 8; 1656523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei work_size.local[1] = 4; 1666523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1676523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei return XCAM_RETURN_NO_ERROR; 1686523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei} 1696523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 170be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageHandler::CLGaussImageHandler (const SmartPtr<CLContext> &context, const char *name) 171be505049e0d0cd218324c728b840652ac54bd19fWind Yuan : CLImageHandler (context, name) 1726523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{ 1736523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei} 1746523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1756523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeibool 1766523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiCLGaussImageHandler::set_gaussian_table (int size, float sigma) 1776523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{ 1786523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei _gauss_kernel->set_gaussian (size, sigma); 1796523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei return true; 1806523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei} 1816523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1826523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeibool 1836523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiCLGaussImageHandler::set_gauss_kernel(SmartPtr<CLGaussImageKernel> &kernel) 1846523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{ 1856523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei SmartPtr<CLImageKernel> image_kernel = kernel; 1866523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei add_kernel (image_kernel); 1876523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei _gauss_kernel = kernel; 1886523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei return true; 1896523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei} 1906523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 1916523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiSmartPtr<CLImageHandler> 192be505049e0d0cd218324c728b840652ac54bd19fWind Yuancreate_cl_gauss_image_handler (const SmartPtr<CLContext> &context, uint32_t radius, float sigma) 1936523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{ 1946523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei SmartPtr<CLGaussImageHandler> gauss_handler; 1956523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei SmartPtr<CLGaussImageKernel> gauss_kernel; 196be505049e0d0cd218324c728b840652ac54bd19fWind Yuan char build_options[1024]; 1976523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 198be505049e0d0cd218324c728b840652ac54bd19fWind Yuan xcam_mem_clear (build_options); 199be505049e0d0cd218324c728b840652ac54bd19fWind Yuan snprintf (build_options, sizeof (build_options), " -DGAUSS_RADIUS=%d ", radius); 200be505049e0d0cd218324c728b840652ac54bd19fWind Yuan 201be505049e0d0cd218324c728b840652ac54bd19fWind Yuan gauss_handler = new CLGaussImageHandler (context, "cl_handler_gauss"); 202be505049e0d0cd218324c728b840652ac54bd19fWind Yuan gauss_kernel = new CLGaussImageKernelImpl (gauss_handler, context, radius, sigma); 203be505049e0d0cd218324c728b840652ac54bd19fWind Yuan XCAM_ASSERT (gauss_kernel.ptr ()); 204be505049e0d0cd218324c728b840652ac54bd19fWind Yuan XCAM_FAIL_RETURN ( 205be505049e0d0cd218324c728b840652ac54bd19fWind Yuan ERROR, gauss_kernel->build_kernel (kernel_gauss_info, build_options) == XCAM_RETURN_NO_ERROR, NULL, 206be505049e0d0cd218324c728b840652ac54bd19fWind Yuan "build gaussian kernel(%s) failed", kernel_gauss_info.kernel_name); 207ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan 2086523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei XCAM_ASSERT (gauss_kernel->is_valid ()); 2096523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei gauss_handler->set_gauss_kernel (gauss_kernel); 2106523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 2116523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei return gauss_handler; 2126523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei} 2136523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei 2146523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei} 215