1/*
2 * cl_newtonemapping_handler.cpp - CL tonemapping handler
3 *
4 *  Copyright (c) 2015 Intel Corporation
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *      http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 *  Author: Wu Junkai <junkai.wu@intel.com>
19 */
20
21#include "cl_utils.h"
22#include "cl_newtonemapping_handler.h"
23
24namespace XCam {
25
26static const XCamKernelInfo kernel_tone_mapping_pipe_info = {
27    "kernel_newtonemapping",
28#include "kernel_newtonemapping.clx"
29    , 0,
30};
31
32CLNewTonemappingImageKernel::CLNewTonemappingImageKernel (
33    const SmartPtr<CLContext> &context, const char *name)
34    : CLImageKernel (context, name)
35{
36}
37
38static void
39haleq(int *y, int *hist, int *hist_leq, int left, int right, int level, int index_left, int index_right)
40{
41    int l;
42    float e, le;
43
44    l = (left + right) / 2;
45    int num_left = left > 0 ? hist[left - 1] : 0;
46    int pixel_num = hist[right] - num_left;
47    e = y[num_left + pixel_num / 2];
48
49    if(e != 0)
50    {
51        le = 0.5f * (e - l) + l;
52    }
53    else
54    {
55        le = l;
56    }
57
58    int index = (index_left + index_right) / 2;
59    hist_leq[index] = (int)(le + 0.5f);
60
61    if(level > 5) return;
62
63    haleq (y, hist, hist_leq, left, (int)(le + 0.5f), level + 1, index_left, index);
64    haleq (y, hist, hist_leq, (int)(le + 0.5f) + 1, right, level + 1, index + 1, index_right);
65}
66
67static void
68block_split_haleq(int* hist, int hist_bin_count, int pixel_num, int block_start_index, float* y_max, float* y_avg, float* map_hist)
69{
70    int block_id = block_start_index / hist_bin_count;
71
72    for(int i = hist_bin_count - 1; i >= 0; i--)
73    {
74        if(hist[i] > 0)
75        {
76            y_max[block_id] = i;
77            break;
78        }
79    }
80
81    for(int i = 0; i < hist_bin_count; i++)
82    {
83        y_avg[block_id] += i * hist[i];
84    }
85
86    y_max[block_id] = y_max[block_id] + 1;
87    y_avg[block_id] = y_avg[block_id] / pixel_num;
88
89    int *hist_log = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
90    int *sort_y = (int *) xcam_malloc0 ((pixel_num + 1) * sizeof (int));
91    int *map_index_leq = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
92    int *map_index_log = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
93    XCAM_ASSERT (hist_log && sort_y && map_index_leq && map_index_log);
94
95    int thres = (int)(1500 * 1500 / (y_avg[block_id] * y_avg[block_id] + 1) * 600);
96    int y_max0 = (y_max[block_id] > thres) ? thres : y_max[block_id];
97    int y_max1 = (y_max[block_id] - thres) > 0 ? (y_max[block_id] - thres) : 0;
98
99    float t0 = 0.01f * y_max0 + 0.001f;
100    float t1 = 0.001f * y_max1 + 0.001f;
101    float max0_log = log(y_max0 + t0);
102    float max1_log = log(y_max1 + t1);
103    float t0_log = log(t0);
104    float t1_log = log(t1);
105    float factor0;
106
107    if(y_max[block_id] < thres)
108    {
109        factor0 = (hist_bin_count - 1) / (max0_log - t0_log + 0.001f);
110    }
111    else
112        factor0 = y_max0 / (max0_log - t0_log + 0.001f);
113
114    float factor1 = y_max1 / (max1_log - t1_log + 0.001f);
115
116    if(y_max[block_id] < thres)
117    {
118        for(int i = 0; i < y_max[block_id]; i++)
119        {
120            int index = (int)((log(i + t0) - t0_log) * factor0 + 0.5f);
121            hist_log[index] += hist[i];
122            map_index_log[i] = index;
123        }
124    }
125    else
126    {
127        for(int i = 0; i < y_max0; i++)
128        {
129            int index = (int)((log(i + t0) - t0_log) * factor0 + 0.5f);
130            hist_log[index] += hist[i];
131            map_index_log[i] = index;
132        }
133
134        for(int i = y_max0; i < y_max[block_id]; i++)
135        {
136            int r = y_max[block_id] - i;
137            int index = (int)((log(r + t1) - t1_log) * factor1 + 0.5f);
138            index = y_max[block_id] - index;
139            hist_log[index] += hist[i];
140            map_index_log[i] = index;
141        }
142    }
143
144    for(int i = y_max[block_id]; i < hist_bin_count; i++)
145    {
146        hist_log[map_index_log[(int)y_max[block_id] - 1]] += hist[i];
147        map_index_log[i] = map_index_log[(int)y_max[block_id] - 1];
148    }
149
150    int sort_index = 1;
151    for(int i = 0; i < hist_bin_count; i++)
152    {
153        for(int l = 0; l < hist_log[i]; l++)
154        {
155            sort_y[sort_index] = i;
156            sort_index++;
157        }
158    }
159    sort_y[0] = 0;
160
161    for(int i = 1; i < hist_bin_count; i++)
162    {
163        hist_log[i] += hist_log[i - 1];
164    }
165
166    int map_leq_index[256];
167
168    haleq(sort_y, hist_log, map_leq_index, 0, hist_bin_count - 1, 0, 0, 255);
169
170    map_leq_index[255] = hist_bin_count;
171    map_leq_index[0] = 0;
172
173    for(int i = 1; i < 255; i++)
174    {
175        if(i % 2 == 0) map_leq_index[i] = (map_leq_index[i - 1] + map_leq_index[i + 1]) / 2;
176        if(map_leq_index[i] < map_leq_index[i - 1])
177            map_leq_index[i] = map_leq_index[i - 1];
178    }
179
180    for(int i = 0; i < 255; i++)
181    {
182        for(int k = map_leq_index[i]; k < map_leq_index[i + 1]; k++)
183        {
184            map_index_leq[k] = (float)i;
185        }
186    }
187
188    for(int i = 0; i < hist_bin_count; i++)
189    {
190        map_hist[i + block_start_index] = map_index_leq[map_index_log[i]] / 255.0f;
191    }
192
193    y_max[block_id] = y_max[block_id] / hist_bin_count;
194    y_avg[block_id] = y_avg[block_id] / hist_bin_count;
195
196    xcam_free (hist_log);
197    hist_log = NULL;
198    xcam_free (map_index_leq);
199    map_index_leq = NULL;
200    xcam_free (map_index_log);
201    map_index_log = NULL;
202    xcam_free (sort_y);
203    sort_y = NULL;
204}
205
206CLNewTonemappingImageHandler::CLNewTonemappingImageHandler (
207    const SmartPtr<CLContext> &context, const char *name)
208    : CLImageHandler (context, name)
209    , _output_format (XCAM_PIX_FMT_SGRBG16_planar)
210    , _block_factor (4)
211{
212    for(int i = 0; i < 65536; i++)
213    {
214        _map_hist[i] = i;
215    }
216
217    for(int i = 0; i < 4 * 4; i++)
218    {
219        _y_max[i] = 0.0f;
220        _y_avg[i] = 0.0f;
221    }
222}
223
224bool
225CLNewTonemappingImageHandler::set_tonemapping_kernel(SmartPtr<CLNewTonemappingImageKernel> &kernel)
226{
227    SmartPtr<CLImageKernel> image_kernel = kernel;
228    add_kernel (image_kernel);
229    _tonemapping_kernel = kernel;
230    return true;
231}
232
233XCamReturn
234CLNewTonemappingImageHandler::prepare_buffer_pool_video_info (
235    const VideoBufferInfo &input,
236    VideoBufferInfo &output)
237{
238    bool format_inited = output.init (_output_format, input.width, input.height);
239
240    XCAM_FAIL_RETURN (
241        WARNING,
242        format_inited,
243        XCAM_RETURN_ERROR_PARAM,
244        "CL image handler(%s) output format(%s) unsupported",
245        get_name (), xcam_fourcc_to_string (_output_format));
246
247    return XCAM_RETURN_NO_ERROR;
248}
249
250XCamReturn
251CLNewTonemappingImageHandler::prepare_parameters (
252    SmartPtr<VideoBuffer> &input, SmartPtr<VideoBuffer> &output)
253{
254    SmartPtr<CLContext> context = get_context ();
255    const VideoBufferInfo &video_info = input->get_video_info ();
256    CLArgList args;
257    CLWorkSize work_size;
258
259    XCAM_ASSERT (_tonemapping_kernel.ptr ());
260
261    CLImageDesc desc;
262    desc.format.image_channel_order = CL_RGBA;
263    desc.format.image_channel_data_type = CL_UNORM_INT16;
264    desc.width = video_info.aligned_width / 4;
265    desc.height = video_info.aligned_height * 4;
266    desc.row_pitch = video_info.strides[0];
267    desc.array_size = 4;
268    desc.slice_pitch = video_info.strides [0] * video_info.aligned_height;
269
270    SmartPtr<CLImage> image_in = convert_to_climage (context, input, desc);
271    SmartPtr<CLImage> image_out = convert_to_climage (context, output, desc);
272    int image_width = video_info.aligned_width;
273    int image_height = video_info.aligned_height;
274
275    XCAM_FAIL_RETURN (
276        WARNING,
277        image_in->is_valid () && image_out->is_valid (),
278        XCAM_RETURN_ERROR_MEM,
279        "cl image handler(%s) in/out memory not available", XCAM_STR (get_name ()));
280
281    SmartPtr<X3aStats> stats;
282    SmartPtr<CLVideoBuffer> cl_buf = input.dynamic_cast_ptr<CLVideoBuffer> ();
283    if (cl_buf.ptr ()) {
284        stats = cl_buf->find_3a_stats ();
285    }
286#if HAVE_LIBDRM
287    else {
288        SmartPtr<DrmBoBuffer> bo_buf = input.dynamic_cast_ptr<DrmBoBuffer> ();
289        stats = bo_buf->find_3a_stats ();
290    }
291#endif
292    XCAM_FAIL_RETURN (
293        ERROR, stats.ptr (), XCAM_RETURN_ERROR_MEM,
294        "new tonemapping handler prepare_arguments find_3a_stats failed");
295
296    XCam3AStats *stats_ptr = stats->get_stats ();
297    XCAM_FAIL_RETURN (
298        ERROR, stats_ptr, XCAM_RETURN_ERROR_MEM,
299        "new tonemapping handler prepare_arguments get_stats failed");
300
301    int block_factor = 4;
302    int width_per_block = stats_ptr->info.width / block_factor;
303    int height_per_block = stats_ptr->info.height / block_factor;
304    int height_last_block = height_per_block + stats_ptr->info.height % block_factor;
305    int hist_bin_count = 1 << stats_ptr->info.bit_depth;
306
307    int *hist_per_block = (int *) xcam_malloc0 (hist_bin_count * sizeof (int));
308    XCAM_ASSERT (hist_per_block);
309
310    for(int block_row = 0; block_row < block_factor; block_row++)
311    {
312        for(int block_col = 0; block_col < block_factor; block_col++)
313        {
314            int block_start_index = (block_row * block_factor + block_col) * hist_bin_count;
315            int start_index = block_row * height_per_block * stats_ptr->info.width + block_col * width_per_block;
316
317            for(int i = 0; i < hist_bin_count; i++)
318            {
319                hist_per_block[i] = 0;
320            }
321
322            if(block_row == block_factor - 1)
323            {
324                height_per_block = height_last_block;
325            }
326
327            int block_totalnum = width_per_block * height_per_block;
328            for(int i = 0; i < height_per_block; i++)
329            {
330                for(int j = 0; j < width_per_block; j++)
331                {
332                    int y = stats_ptr->stats[start_index + i * stats_ptr->info.width + j].avg_y;
333                    hist_per_block[y]++;
334                }
335            }
336
337            block_split_haleq (hist_per_block, hist_bin_count, block_totalnum, block_start_index, _y_max, _y_avg, _map_hist);
338        }
339    }
340
341    xcam_free (hist_per_block);
342    hist_per_block = NULL;
343
344    SmartPtr<CLBuffer> y_max_buffer = new CLBuffer(
345        context, sizeof(float) * block_factor * block_factor,
346        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &_y_max);
347
348    SmartPtr<CLBuffer> y_avg_buffer = new CLBuffer(
349        context, sizeof(float) * block_factor * block_factor,
350        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &_y_avg);
351
352    SmartPtr<CLBuffer> map_hist_buffer = new CLBuffer(
353        context, sizeof(float) * hist_bin_count * block_factor * block_factor,
354        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, &_map_hist);
355
356    //set args;
357    args.push_back (new CLMemArgument (image_in));
358    args.push_back (new CLMemArgument (image_out));
359    args.push_back (new CLMemArgument (y_max_buffer));
360    args.push_back (new CLMemArgument (y_avg_buffer));
361    args.push_back (new CLMemArgument (map_hist_buffer));
362    args.push_back (new CLArgumentT<int> (image_width));
363    args.push_back (new CLArgumentT<int> (image_height));
364
365    const CLImageDesc out_info = image_out->get_image_desc ();
366    work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
367    work_size.global[0] = out_info.width;
368    work_size.global[1] = out_info.height / 4;
369    work_size.local[0] = 8;
370    work_size.local[1] = 8;
371
372    XCAM_ASSERT (_tonemapping_kernel.ptr ());
373    XCamReturn ret = _tonemapping_kernel->set_arguments (args, work_size);
374    XCAM_FAIL_RETURN (
375        WARNING, ret == XCAM_RETURN_NO_ERROR, ret,
376        "new tone mapping kernel set arguments failed.");
377
378    return XCAM_RETURN_NO_ERROR;
379}
380
381
382SmartPtr<CLImageHandler>
383create_cl_newtonemapping_image_handler (const SmartPtr<CLContext> &context)
384{
385    SmartPtr<CLNewTonemappingImageHandler> tonemapping_handler;
386    SmartPtr<CLNewTonemappingImageKernel> tonemapping_kernel;
387
388    tonemapping_kernel = new CLNewTonemappingImageKernel (context, "kernel_newtonemapping");
389    XCAM_ASSERT (tonemapping_kernel.ptr ());
390    XCAM_FAIL_RETURN (
391        ERROR, tonemapping_kernel->build_kernel (kernel_tone_mapping_pipe_info, NULL) == XCAM_RETURN_NO_ERROR, NULL,
392        "build new tonemapping kernel(%s) failed", kernel_tone_mapping_pipe_info.kernel_name);
393
394    XCAM_ASSERT (tonemapping_kernel->is_valid ());
395    tonemapping_handler = new CLNewTonemappingImageHandler(context, "cl_handler_newtonemapping");
396    tonemapping_handler->set_tonemapping_kernel(tonemapping_kernel);
397
398    return tonemapping_handler;
399}
400
401};
402