kernel_bayer_pipe.cl revision 66a677ec4817859937a8886ff3f48fc79daa8430
1/* 2 * function: kernel_bayer_pipe 3 * params: 4 * input: image2d_t as read only 5 * output: image2d_t as write only 6 * blc_config: black level correction configuration 7 * wb_config: whitebalance configuration 8 * gamma_table: RGGB table 9 * stats_output: 3a stats output 10 */ 11 12 13#define WORKGROUP_CELL_WIDTH 64 14#define WORKGROUP_CELL_HEIGHT 4 15 16#define DEMOSAIC_X_CELL_PER_WORKITEM 2 17 18#define PIXEL_PER_CELL 2 19 20#define SLM_CELL_X_OFFSET 4 21#define SLM_CELL_Y_OFFSET 1 22 23// 8x8 24#define SLM_CELL_X_VALID_SIZE WORKGROUP_CELL_WIDTH 25#define SLM_CELL_Y_VALID_SIZE WORKGROUP_CELL_HEIGHT 26 27// 10x10 28#define SLM_CELL_X_SIZE (SLM_CELL_X_VALID_SIZE + SLM_CELL_X_OFFSET * 2) 29#define SLM_CELL_Y_SIZE (SLM_CELL_Y_VALID_SIZE + SLM_CELL_Y_OFFSET * 2) 30 31#define GUASS_DELTA_S_1 1.031739f 32#define GUASS_DELTA_S_1_5 1.072799f 33#define GUASS_DELTA_S_2 1.133173f 34#define GUASS_DELTA_S_2_5 1.215717f 35 36typedef struct 37{ 38 float ee_gain; 39 float ee_threshold; 40 float nr_gain; 41} CLEeConfig; 42 43inline int get_shared_pos_x (int i) 44{ 45 return i % SLM_CELL_X_SIZE; 46} 47 48inline int get_shared_pos_y (int i) 49{ 50 return i / SLM_CELL_X_SIZE; 51} 52 53inline int shared_pos (int x, int y) 54{ 55 return mad24(y, SLM_CELL_X_SIZE, x); 56} 57 58/* BA10=> GRBG */ 59inline void grbg_slm_load ( 60 __local float *px, __local float *py, __local float *pz, __local float *pw, 61 int index, __read_only image2d_t input, int x_start, int y_start 62) 63{ 64 sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 65 float4 data1, data2, line1, line2; 66 int x0 = (get_shared_pos_x (index) + x_start) / 4; 67 int y0 = get_shared_pos_y (index) + y_start; 68 int2 pos = (int2)(x0, y0); 69 float4 gr, r, b, gb; 70 71 gr = read_imagef (input, sampler, (int2)(x0, y0)); 72 r = read_imagef (input, sampler, (int2)(x0, y0 + 544)); 73 b = read_imagef (input, sampler, (int2)(x0, y0 + 544 * 2)); 74 gb = read_imagef (input, sampler, (int2)(x0, y0 + 544 * 3)); 75 76 (*(__local float4 *)(px + index)) = gr; 77 (*(__local float4 *)(py + index)) = r; 78 (*(__local float4 *)(pz + index)) = b; 79 (*(__local float4 *)(pw + index)) = gb; 80} 81 82#define MAX_DELTA_COFF 5.0f 83#define MIN_DELTA_COFF 1.0f 84#define DEFAULT_DELTA_COFF 4.0f 85 86inline float2 delta_coff (float2 in, __local float *table) 87{ 88 float2 out; 89 out.x = table[(int)(fabs(in.x * 64.0f))]; 90 out.y = table[(int)(fabs(in.y * 64.0f))]; 91 92 return out; 93} 94 95inline float2 dot_denoise (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float gain) 96{ 97 float2 coff0, coff1, coff2, coff3, coff4, coff5; 98 coff0 = delta_coff (0.0, table) * gain; 99 coff1 = delta_coff (in1 - value, table); 100 coff2 = delta_coff (in2 - value, table); 101 coff3 = delta_coff (in3 - value, table); 102 coff4 = delta_coff (in4 - value, table); 103 //(in1 * coff1 + in2 * coff2 + in3 * coff3 + in4 * coff4 + value * coff0) 104 float2 sum1 = (mad (in1, coff1, 105 mad (in2, coff2, 106 mad (in3, coff3, 107 mad (in4, coff4, value * coff0))))); 108 return sum1 / (coff0 + coff1 + coff2 + coff3 + coff4); 109} 110 111inline float2 dot_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, float2 out, CLEeConfig ee_config, float2 *egain) 112{ 113 float2 eH = mad(in1, -0.5f, value); 114 eH = mad(in3, -0.5f, eH); 115 float2 eV = mad(in2, -0.5f, value); 116 eV = mad(in4, -0.5f, eV); 117 118 eH = fmax(eH, eV); 119 120 eH = eH > ee_config.ee_threshold ? eH : 0.0f; 121 122 egain[0] = mad(eH, ee_config.ee_gain, out) / out; 123 124 return out * egain[0]; 125} 126 127inline float2 dot_denoise_ee (float2 value, float2 in1, float2 in2, float2 in3, float2 in4, __local float *table, float gain, float2 *egain, CLEeConfig ee_config) 128{ 129 float2 out = dot_denoise(value, in1, in2, in3, in4, table, gain); 130 return dot_ee(value, in1, in2, in3, in4, out, ee_config, egain); 131} 132 133void demosaic_2_cell ( 134 __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, 135 int in_x, int in_y, 136 __write_only image2d_t out, uint out_height, int out_x, int out_y) 137{ 138 float4 out_data; 139 float2 value; 140 int index; 141 { 142 float3 R_y[2]; 143 index = shared_pos (in_x - 1, in_y); 144 R_y[0] = *(__local float3*)(y_data_in + index); 145 index = shared_pos (in_x - 1, in_y + 1); 146 R_y[1] = *(__local float3*)(y_data_in + index); 147 148 out_data.s02 = (R_y[0].s01 + R_y[0].s12) * 0.5f; 149 out_data.s13 = R_y[0].s12; 150 write_imagef (out, (int2)(out_x, out_y), out_data); 151 152 out_data.s02 = (R_y[0].s01 + R_y[0].s12 + R_y[1].s01 + R_y[1].s12) * 0.25f; 153 out_data.s13 = (R_y[0].s12 + R_y[1].s12) * 0.5f; 154 write_imagef (out, (int2)(out_x, out_y + 1), out_data); 155 } 156 157 { 158 float3 B_z[2]; 159 index = shared_pos (in_x, in_y - 1); 160 B_z[0] = *(__local float3*)(z_data_in + index); 161 index = shared_pos (in_x, in_y); 162 B_z[1] = *(__local float3*)(z_data_in + index); 163 164 out_data.s02 = (B_z[0].s01 + B_z[1].s01) * 0.5f; 165 out_data.s13 = (B_z[0].s01 + B_z[0].s12 + B_z[1].s01 + B_z[1].s12) * 0.25f; 166 write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data); 167 168 out_data.s02 = B_z[1].s01; 169 out_data.s13 = (B_z[1].s01 + B_z[1].s12) * 0.5f; 170 write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data); 171 } 172 173 { 174 float3 Gr_x[2], Gb_w[2]; 175 index = shared_pos (in_x, in_y); 176 Gr_x[0] = *(__local float3*)(x_data_in + index); 177 index = shared_pos (in_x, in_y + 1); 178 Gr_x[1] = *(__local float3*)(x_data_in + index); 179 180 index = shared_pos (in_x - 1, in_y - 1); 181 Gb_w[0] = *(__local float3*)(w_data_in + index); 182 index = shared_pos (in_x - 1, in_y); 183 Gb_w[1] = *(__local float3*)(w_data_in + index); 184 185 out_data.s02 = (Gr_x[0].s01 * 4.0f + Gb_w[0].s01 + 186 Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12) * 0.125f; 187 out_data.s13 = (Gr_x[0].s01 + Gr_x[0].s12 + Gb_w[0].s12 + Gb_w[1].s12) * 0.25f; 188 write_imagef (out, (int2)(out_x, out_y + out_height), out_data); 189 190 out_data.s02 = (Gr_x[0].s01 + Gr_x[1].s01 + Gb_w[1].s01 + Gb_w[1].s12) * 0.25f; 191 192 out_data.s13 = (Gb_w[1].s12 * 4.0f + Gr_x[0].s01 + 193 Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12) * 0.125f; 194 write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data); 195 } 196} 197 198void demosaic_denoise_2_cell ( 199 __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, 200 int in_x, int in_y, 201 __write_only image2d_t out, uint out_height, int out_x, int out_y, __local float *table, CLEeConfig ee_config) 202{ 203 float4 out_data[2]; 204 float2 value; 205 int index; 206 float2 egain[4]; 207 208 ///////////////////////////////////////G/////////////////////////////////// 209 { 210 float3 Gr_x[2], Gb_w[2]; 211 index = shared_pos (in_x - 1, in_y - 1); 212 Gb_w[0] = *(__local float3*)(w_data_in + index); 213 index = shared_pos (in_x - 1, in_y); 214 Gb_w[1] = *(__local float3*)(w_data_in + index); 215 216 index = shared_pos (in_x, in_y); 217 Gr_x[0] = *(__local float3*)(x_data_in + index); 218 index = shared_pos (in_x, in_y + 1); 219 Gr_x[1] = *(__local float3*)(x_data_in + index); 220 221 value = mad (Gr_x[0].s01, 4.0f, (Gb_w[0].s01 + 222 Gb_w[0].s12 + Gb_w[1].s01 + Gb_w[1].s12)) * 0.125f; 223 out_data[0].s02 = dot_denoise_ee (value, Gb_w[0].s01, Gb_w[0].s12, Gb_w[1].s01, Gb_w[1].s12, table, GUASS_DELTA_S_1_5, &egain[0], ee_config); 224 value = (Gr_x[0].s01 + Gr_x[0].s12 + 225 Gb_w[0].s12 + Gb_w[1].s12) * 0.25f; 226 out_data[0].s13 = dot_denoise_ee(value, Gr_x[0].s01, Gr_x[0].s12, Gb_w[0].s12, Gb_w[1].s12, table, GUASS_DELTA_S_1, &egain[1], ee_config); 227 228 value = (Gr_x[0].s01 + Gr_x[1].s01 + 229 Gb_w[1].s01 + Gb_w[1].s12) * 0.25f; 230 out_data[1].s02 = dot_denoise_ee (value, Gr_x[0].s01, Gr_x[1].s01, Gb_w[1].s01, Gb_w[1].s12, table, GUASS_DELTA_S_1, &egain[2], ee_config); 231 232 value = mad (Gb_w[1].s12, 4.0f, (Gr_x[0].s01 + 233 Gr_x[0].s12 + Gr_x[1].s01 + Gr_x[1].s12)) * 0.125f; 234 out_data[1].s13 = dot_denoise_ee (value, Gr_x[0].s01, Gr_x[0].s12, Gr_x[1].s01, Gr_x[1].s12, table, GUASS_DELTA_S_1_5, &egain[3], ee_config); 235 236 write_imagef (out, (int2)(out_x, out_y + out_height), out_data[0]); 237 write_imagef (out, (int2)(out_x, out_y + 1 + out_height), out_data[1]); 238 } 239 240 ////////////////////////////////R////////////////////////////////////////// 241 { 242 float4 R_y[3]; 243 index = shared_pos (in_x - 1, in_y - 1); 244 R_y[0] = *(__local float4*)(y_data_in + index); 245 index = shared_pos (in_x - 1, in_y); 246 R_y[1] = *(__local float4*)(y_data_in + index); 247 index = shared_pos (in_x - 1, in_y + 1); 248 R_y[2] = *(__local float4*)(y_data_in + index); 249 250 value = (R_y[1].s01 + R_y[1].s12) * 0.5f; 251 out_data[0].s02 = dot_denoise (value, R_y[0].s01, R_y[0].s12, R_y[2].s01, R_y[2].s12, table, GUASS_DELTA_S_2_5) * egain[0]; 252 253 value = R_y[1].s12; 254 out_data[0].s13 = dot_denoise (value, R_y[0].s12, R_y[1].s01, R_y[1].s23, R_y[2].s12, table, GUASS_DELTA_S_2) * egain[1]; 255 256 value = (R_y[1].s01 + R_y[1].s12 + 257 R_y[2].s01 + R_y[2].s12) * 0.25f; 258 out_data[1].s02 = dot_denoise (value, R_y[1].s01, R_y[1].s12, R_y[2].s01, R_y[2].s12, table, GUASS_DELTA_S_1_5) * egain[2]; 259 260 value = (R_y[1].s12 + R_y[2].s12) * 0.5f; 261 out_data[1].s13 = dot_denoise (value, R_y[1].s01, R_y[1].s23, R_y[2].s01, R_y[2].s23, table, GUASS_DELTA_S_2_5) * egain[3]; 262 263 write_imagef (out, (int2)(out_x, out_y), out_data[0]); 264 write_imagef (out, (int2)(out_x, out_y + 1), out_data[1]); 265 266 } 267 ////////////////////////////////B////////////////////////////////////////// 268 { 269 float4 B_z[3]; 270 index = shared_pos (in_x - 1, in_y - 1); 271 B_z[0] = *(__local float4*)(z_data_in + index); 272 index = shared_pos (in_x - 1, in_y); 273 B_z[1] = *(__local float4*)(z_data_in + index); 274 index = shared_pos (in_x - 1, in_y + 1); 275 B_z[2] = *(__local float4*)(z_data_in + index); 276 277 value = (B_z[0].s12 + B_z[1].s12) * 0.5f; 278 out_data[0].s02 = dot_denoise (value, B_z[0].s01, B_z[0].s23, B_z[1].s01, B_z[1].s23, table, GUASS_DELTA_S_2_5) * egain[0]; 279 280 value = (B_z[0].s12 + B_z[0].s23 + 281 B_z[1].s12 + B_z[1].s23) * 0.25f; 282 out_data[0].s13 = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[1].s12, B_z[1].s23, table, GUASS_DELTA_S_1_5) * egain[1]; 283 284 value = B_z[1].s12; 285 out_data[1].s02 = dot_denoise (value, B_z[0].s12, B_z[1].s01, B_z[1].s23, B_z[2].s12, table, GUASS_DELTA_S_2) * egain[2]; 286 287 value = (B_z[1].s12 + B_z[1].s23) * 0.5f; 288 out_data[1].s13 = dot_denoise (value, B_z[0].s12, B_z[0].s23, B_z[2].s12, B_z[2].s23, table, GUASS_DELTA_S_2_5) * egain[3]; 289 290 write_imagef (out, (int2)(out_x, out_y + out_height * 2), out_data[0]); 291 write_imagef (out, (int2)(out_x, out_y + 1 + out_height * 2), out_data[1]); 292 } 293} 294 295void shared_demosaic ( 296 __local float *x_data_in, __local float *y_data_in, __local float *z_data_in, __local float *w_data_in, 297 int in_x, int in_y, 298 __write_only image2d_t out, uint output_height, int out_x, int out_y, 299 uint has_denoise, __local float *table, CLEeConfig ee_config) 300{ 301 if (has_denoise) { 302 demosaic_denoise_2_cell ( 303 x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y, 304 out, output_height, out_x, out_y, table, ee_config); 305 } else { 306 demosaic_2_cell ( 307 x_data_in, y_data_in, z_data_in, w_data_in, in_x, in_y, 308 out, output_height, out_x, out_y); 309 } 310} 311 312__kernel void kernel_bayer_pipe (__read_only image2d_t input, 313 uint input_height, 314 __write_only image2d_t output, 315 uint output_height, 316 __global float * bnr_table, 317 uint has_denoise, 318 CLEeConfig ee_config 319 ) 320{ 321 int g_id_x = get_global_id (0); 322 int g_id_y = get_global_id (1); 323 int g_size_x = get_global_size (0); 324 int g_size_y = get_global_size (1); 325 326 int l_id_x = get_local_id(0); 327 int l_id_y = get_local_id(1); 328 int l_size_x = get_local_size (0); 329 int l_size_y = get_local_size (1); 330 331 __local float p1_x[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_y[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_z[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE], p1_w[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE]; 332 __local float4 p2[SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE]; 333 __local float4 *stats_cache = p2; 334 __local float SLM_delta_coef_table[64]; 335 336 int out_x_start, out_y_start; 337 int x_start = get_group_id (0) * WORKGROUP_CELL_WIDTH; 338 int y_start = get_group_id (1) * WORKGROUP_CELL_HEIGHT; 339 int i = mad24 (l_id_y, l_size_x, l_id_x); 340 int j = i; 341 342 i *= 4; 343 for (; i < SLM_CELL_X_SIZE * SLM_CELL_Y_SIZE; i += (l_size_x * l_size_y) * 4) { 344 grbg_slm_load (p1_x, p1_y, p1_z, p1_w, i, 345 input, 346 x_start - SLM_CELL_X_OFFSET, y_start - SLM_CELL_Y_OFFSET); 347 } 348 for(; j < 64; j += l_size_x * l_size_y) 349 SLM_delta_coef_table[j] = bnr_table[j]; 350 351 barrier(CLK_LOCAL_MEM_FENCE); 352 353 i = mad24 (l_id_y, l_size_x, l_id_x); 354 int workitem_x_size = (SLM_CELL_X_VALID_SIZE / DEMOSAIC_X_CELL_PER_WORKITEM); 355 int input_x = (i % workitem_x_size) * DEMOSAIC_X_CELL_PER_WORKITEM; 356 int input_y = i / workitem_x_size; 357 358 shared_demosaic ( 359 p1_x, p1_y, p1_z, p1_w, 360 input_x + SLM_CELL_X_OFFSET, input_y + SLM_CELL_Y_OFFSET, 361 output, output_height, 362 (input_x + x_start) * PIXEL_PER_CELL / 4, (input_y + y_start) * PIXEL_PER_CELL, has_denoise, SLM_delta_coef_table, ee_config); 363} 364 365