16523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei/*
26523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * cl_gauss_handler.cpp - CL gauss handler
36523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei *
4a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan *  Copyright (c) 2016 Intel Corporation
56523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei *
66523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * Licensed under the Apache License, Version 2.0 (the "License");
76523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * you may not use this file except in compliance with the License.
86523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * You may obtain a copy of the License at
96523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei *
106523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei *      http://www.apache.org/licenses/LICENSE-2.0
116523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei *
126523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * Unless required by applicable law or agreed to in writing, software
136523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * distributed under the License is distributed on an "AS IS" BASIS,
146523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
156523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * See the License for the specific language governing permissions and
166523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * limitations under the License.
176523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei *
186523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei * Author: wangfei <feix.w.wang@intel.com>
19a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan *             Wind Yuan <feng.yuan@intel.com>
206523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei */
216b5f71042b969ae41362e07660181a676685702bYinhang Liu
226b5f71042b969ae41362e07660181a676685702bYinhang Liu#include "cl_utils.h"
236523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei#include "cl_gauss_handler.h"
246523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei#include <algorithm>
256523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
26ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan#define XCAM_GAUSS_SCALE(radius) ((radius) * 2 + 1)
27ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan
286523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeinamespace XCam {
296523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
30be505049e0d0cd218324c728b840652ac54bd19fWind Yuanconst static XCamKernelInfo kernel_gauss_info = {
31be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    "kernel_gauss",
32be505049e0d0cd218324c728b840652ac54bd19fWind Yuan#include "kernel_gauss.clx"
33be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    , 0,
34be505049e0d0cd218324c728b840652ac54bd19fWind Yuan};
35be505049e0d0cd218324c728b840652ac54bd19fWind Yuan
36be505049e0d0cd218324c728b840652ac54bd19fWind Yuanclass CLGaussImageKernelImpl
37be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    : public CLGaussImageKernel
38be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{
39be505049e0d0cd218324c728b840652ac54bd19fWind Yuanpublic:
40be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    CLGaussImageKernelImpl (
41be505049e0d0cd218324c728b840652ac54bd19fWind Yuan        SmartPtr<CLGaussImageHandler> &handler,
42be505049e0d0cd218324c728b840652ac54bd19fWind Yuan        const SmartPtr<CLContext> &context, uint32_t radius, float sigma);
43be505049e0d0cd218324c728b840652ac54bd19fWind Yuan
446b5f71042b969ae41362e07660181a676685702bYinhang Liu    virtual SmartPtr<VideoBuffer> get_input_buf ();
456b5f71042b969ae41362e07660181a676685702bYinhang Liu    virtual SmartPtr<VideoBuffer> get_output_buf ();
46be505049e0d0cd218324c728b840652ac54bd19fWind Yuanprivate:
47be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    SmartPtr<CLGaussImageHandler> _handler;
48be505049e0d0cd218324c728b840652ac54bd19fWind Yuan};
49be505049e0d0cd218324c728b840652ac54bd19fWind Yuan
50be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernelImpl::CLGaussImageKernelImpl (
51be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    SmartPtr<CLGaussImageHandler> &handler,
52be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    const SmartPtr<CLContext> &context,
53be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    uint32_t radius,
54be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    float sigma
55be505049e0d0cd218324c728b840652ac54bd19fWind Yuan)
56be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    : CLGaussImageKernel (context, radius, sigma)
57be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    , _handler (handler)
58be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{
59be505049e0d0cd218324c728b840652ac54bd19fWind Yuan}
60be505049e0d0cd218324c728b840652ac54bd19fWind Yuan
616b5f71042b969ae41362e07660181a676685702bYinhang LiuSmartPtr<VideoBuffer>
62be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernelImpl::get_input_buf ()
63be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{
64be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    return _handler->get_input_buf ();
65be505049e0d0cd218324c728b840652ac54bd19fWind Yuan}
666b5f71042b969ae41362e07660181a676685702bYinhang LiuSmartPtr<VideoBuffer>
67be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernelImpl::get_output_buf ()
68be505049e0d0cd218324c728b840652ac54bd19fWind Yuan{
69be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    return _handler->get_output_buf ();;
70be505049e0d0cd218324c728b840652ac54bd19fWind Yuan}
71be505049e0d0cd218324c728b840652ac54bd19fWind Yuan
72be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernel::CLGaussImageKernel (
73be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    const SmartPtr<CLContext> &context, uint32_t radius, float sigma)
746523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    : CLImageKernel (context, "kernel_gauss")
75ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    , _g_radius (radius)
76ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    , _g_table (NULL)
77ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan{
78ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    set_gaussian(radius, sigma);
79ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan}
80ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan
81ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind YuanCLGaussImageKernel::~CLGaussImageKernel ()
826523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{
83ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    xcam_free (_g_table);
846523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei}
856523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
866523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeibool
87ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind YuanCLGaussImageKernel::set_gaussian (uint32_t radius, float sigma)
886523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{
89ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    uint32_t i, j;
90ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    uint32_t scale = XCAM_GAUSS_SCALE (radius);
91ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    float dis = 0.0f, sum = 0.0f;
92be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    uint32_t scale_size = scale * scale * sizeof (_g_table[0]);
93ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan
94ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    xcam_free (_g_table);
95ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    _g_table_buffer.release ();
96ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    _g_radius = radius;
97be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    _g_table = (float*) xcam_malloc0 (scale_size);
989193453aab8ce0c830bb256c670c9097b3ccc75bYinhang Liu    XCAM_ASSERT (_g_table);
99ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan
100ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan    for(i = 0; i < scale; i++)  {
101ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan        for(j = 0; j < scale; j++) {
102ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan            dis = ((float)i - radius) * ((float)i - radius) + ((float)j - radius) * ((float)j - radius);
103ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan            _g_table[i * scale + j] = exp(-dis / (2.0f * sigma * sigma));
104ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan            sum += _g_table[i * scale + j];
1056523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei        }
1066523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    }
1076523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
108be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    for(i = 0; i < scale * scale; i++) {
1096523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei        _g_table[i] = _g_table[i] / sum;
110be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    }
1116523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
112be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    _g_table_buffer = new CLBuffer(
113be505049e0d0cd218324c728b840652ac54bd19fWind Yuan        get_context (), scale_size,
114be505049e0d0cd218324c728b840652ac54bd19fWind Yuan        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR , _g_table);
1156523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
116be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    return true;
117a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan}
1186523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1196523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiXCamReturn
120be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageKernel::prepare_arguments (CLArgList &args, CLWorkSize &work_size)
1216523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{
1226523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    SmartPtr<CLContext> context = get_context ();
1236b5f71042b969ae41362e07660181a676685702bYinhang Liu    SmartPtr<VideoBuffer> input = get_input_buf ();
1246b5f71042b969ae41362e07660181a676685702bYinhang Liu    SmartPtr<VideoBuffer> output = get_output_buf ();
125a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan
126a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    XCAM_FAIL_RETURN (
127a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan        WARNING,
1286b5f71042b969ae41362e07660181a676685702bYinhang Liu        input.ptr () && output.ptr (),
129a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan        XCAM_RETURN_ERROR_MEM,
130a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan        "cl image kernel(%s) get input/output buffer failed", get_kernel_name ());
131a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan
1326b5f71042b969ae41362e07660181a676685702bYinhang Liu    const VideoBufferInfo & video_info_in = input->get_video_info ();
1336b5f71042b969ae41362e07660181a676685702bYinhang Liu    const VideoBufferInfo & video_info_out = output->get_video_info ();
134a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    CLImageDesc cl_desc_in, cl_desc_out;
135a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan
136a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_in.format.image_channel_data_type = CL_UNORM_INT8;
137a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_in.format.image_channel_order = CL_R;
138a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_in.width = video_info_in.width;
139a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_in.height = video_info_in.height;
140a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_in.row_pitch = video_info_in.strides[0];
1416b5f71042b969ae41362e07660181a676685702bYinhang Liu    SmartPtr<CLImage> image_in = convert_to_climage (context, input, cl_desc_in, video_info_in.offsets[0]);
142a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan
143a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_out.format.image_channel_data_type = CL_UNORM_INT8;
144a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_out.format.image_channel_order = CL_RGBA;
145a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_out.width = video_info_out.width / 4;
146a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_out.height = video_info_out.height;
147a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    cl_desc_out.row_pitch = video_info_out.strides[0];
1486b5f71042b969ae41362e07660181a676685702bYinhang Liu    SmartPtr<CLImage> image_out = convert_to_climage (context, output, cl_desc_out, video_info_out.offsets[0]);
1496523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1506523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    XCAM_FAIL_RETURN (
1516523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei        WARNING,
152be505049e0d0cd218324c728b840652ac54bd19fWind Yuan        image_in->is_valid () && image_out->is_valid (),
1536523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei        XCAM_RETURN_ERROR_MEM,
1546523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei        "cl image kernel(%s) in/out memory not available", get_kernel_name ());
1556523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1566523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    //set args;
157be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    args.push_back (new CLMemArgument (image_in));
158be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    args.push_back (new CLMemArgument (image_out));
159be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    args.push_back (new CLMemArgument (_g_table_buffer));
1606523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1616523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
162a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    work_size.global[0] = XCAM_ALIGN_UP(cl_desc_out.width, 8);
163a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    work_size.global[1] = XCAM_ALIGN_UP (cl_desc_out.height / 2, 4);
164a65d83352b0fc3ad9103ed3aa055e036ef3022a0Wind Yuan    work_size.local[0] = 8;
1656523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    work_size.local[1] = 4;
1666523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1676523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    return XCAM_RETURN_NO_ERROR;
1686523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei}
1696523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
170be505049e0d0cd218324c728b840652ac54bd19fWind YuanCLGaussImageHandler::CLGaussImageHandler (const SmartPtr<CLContext> &context, const char *name)
171be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    : CLImageHandler (context, name)
1726523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{
1736523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei}
1746523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1756523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeibool
1766523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiCLGaussImageHandler::set_gaussian_table (int size, float sigma)
1776523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{
1786523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    _gauss_kernel->set_gaussian (size, sigma);
1796523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    return true;
1806523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei}
1816523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1826523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeibool
1836523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiCLGaussImageHandler::set_gauss_kernel(SmartPtr<CLGaussImageKernel> &kernel)
1846523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{
1856523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    SmartPtr<CLImageKernel> image_kernel = kernel;
1866523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    add_kernel (image_kernel);
1876523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    _gauss_kernel = kernel;
1886523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    return true;
1896523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei}
1906523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
1916523286e77ad1338a3362fe1f6b7a1c115168ecfWangfeiSmartPtr<CLImageHandler>
192be505049e0d0cd218324c728b840652ac54bd19fWind Yuancreate_cl_gauss_image_handler (const SmartPtr<CLContext> &context, uint32_t radius, float sigma)
1936523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei{
1946523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    SmartPtr<CLGaussImageHandler> gauss_handler;
1956523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    SmartPtr<CLGaussImageKernel> gauss_kernel;
196be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    char build_options[1024];
1976523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
198be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    xcam_mem_clear (build_options);
199be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    snprintf (build_options, sizeof (build_options), " -DGAUSS_RADIUS=%d ", radius);
200be505049e0d0cd218324c728b840652ac54bd19fWind Yuan
201be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    gauss_handler = new CLGaussImageHandler (context, "cl_handler_gauss");
202be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    gauss_kernel = new CLGaussImageKernelImpl (gauss_handler, context, radius, sigma);
203be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    XCAM_ASSERT (gauss_kernel.ptr ());
204be505049e0d0cd218324c728b840652ac54bd19fWind Yuan    XCAM_FAIL_RETURN (
205be505049e0d0cd218324c728b840652ac54bd19fWind Yuan        ERROR, gauss_kernel->build_kernel (kernel_gauss_info, build_options) == XCAM_RETURN_NO_ERROR, NULL,
206be505049e0d0cd218324c728b840652ac54bd19fWind Yuan        "build gaussian kernel(%s) failed", kernel_gauss_info.kernel_name);
207ad51bba4a74c44b2c8874c4fd49ca083ebcc6f71Wind Yuan
2086523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    XCAM_ASSERT (gauss_kernel->is_valid ());
2096523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    gauss_handler->set_gauss_kernel (gauss_kernel);
2106523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
2116523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei    return gauss_handler;
2126523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei}
2136523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei
2146523286e77ad1338a3362fe1f6b7a1c115168ecfWangfei}
215