143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler// This file is auto-generated. Do not edit! 243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler 343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler#include "precomp.hpp" 443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler#include "opencl_kernels_video.hpp" 543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler 643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Preslernamespace cv 743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler{ 843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Preslernamespace ocl 943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler{ 1043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Preslernamespace video 1143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler{ 1243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler 1343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Preslerconst struct ProgramEntry bgfg_mog2={"bgfg_mog2", 1443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if CN==1\n" 1543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define T_MEAN float\n" 1643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define F_ZERO (0.0f)\n" 1743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define cnMode 1\n" 1843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define frameToMean(a, b) (b) = *(a);\n" 1943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define meanToFrame(a, b) *b = convert_uchar_sat(a);\n" 2043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline float sum(float val)\n" 2143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 2243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return val;\n" 2343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 2443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#else\n" 2543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define T_MEAN float4\n" 2643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)\n" 2743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define cnMode 4\n" 2843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define meanToFrame(a, b)\\\n" 2943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b[0] = convert_uchar_sat(a.x); \\\n" 3043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b[1] = convert_uchar_sat(a.y); \\\n" 3143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b[2] = convert_uchar_sat(a.z);\n" 3243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define frameToMean(a, b)\\\n" 3343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b.x = a[0]; \\\n" 3443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b.y = a[1]; \\\n" 3543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b.z = a[2]; \\\n" 3643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b.w = 0.0f;\n" 3743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline float sum(const float4 val)\n" 3843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 3943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return (val.x + val.y + val.z);\n" 4043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 4143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 4243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void mog2_kernel(__global const uchar* frame, int frame_step, int frame_offset, int frame_row, int frame_col,\n" 4343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* modesUsed,\n" 4443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* weight,\n" 4543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* mean,\n" 4643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* variance,\n" 4743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* fgmask, int fgmask_step, int fgmask_offset,\n" 4843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float alphaT, float alpha1, float prune,\n" 4943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float c_Tb, float c_TB, float c_Tg, float c_varMin,\n" 5043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float c_varMax, float c_varInit, float c_tau\n" 5143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#ifdef SHADOW_DETECT\n" 5243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler", uchar c_shadowVal\n" 5343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 5443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler")\n" 5543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 5643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x = get_global_id(0);\n" 5743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y = get_global_id(1);\n" 5843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if( x < frame_col && y < frame_row)\n" 5943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 6043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const uchar* _frame = (frame + mad24(y, frame_step, mad24(x, CN, frame_offset)));\n" 6143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN pix;\n" 6243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"frameToMean(_frame, pix);\n" 6343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"uchar foreground = 255;\n" 6443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"bool fitsPDF = false;\n" 6543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int pt_idx = mad24(y, frame_col, x);\n" 6643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int idx_step = frame_row * frame_col;\n" 6743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* _modesUsed = modesUsed + pt_idx;\n" 6843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"uchar nmodes = _modesUsed[0];\n" 6943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float totalWeight = 0.0f;\n" 7043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* _weight = (__global float*)(weight);\n" 7143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* _variance = (__global float*)(variance);\n" 7243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global T_MEAN* _mean = (__global T_MEAN*)(mean);\n" 7343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"uchar mode = 0;\n" 7443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (; mode < nmodes; ++mode)\n" 7543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 7643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int mode_idx = mad24(mode, idx_step, pt_idx);\n" 7743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float c_weight = mad(alpha1, _weight[mode_idx], prune);\n" 7843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float c_var = _variance[mode_idx];\n" 7943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN c_mean = _mean[mode_idx];\n" 8043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN diff = c_mean - pix;\n" 8143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float dist2 = dot(diff, diff);\n" 8243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (totalWeight < c_TB && dist2 < c_Tb * c_var)\n" 8343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"foreground = 0;\n" 8443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (dist2 < c_Tg * c_var)\n" 8543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 8643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"fitsPDF = true;\n" 8743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"c_weight += alphaT;\n" 8843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float k = alphaT / c_weight;\n" 8943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN mean_new = mad((T_MEAN)-k, diff, c_mean);\n" 9043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float variance_new = clamp(mad(k, (dist2 - c_var), c_var), c_varMin, c_varMax);\n" 9143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = mode; i > 0; --i)\n" 9243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 9343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int prev_idx = mode_idx - idx_step;\n" 9443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (c_weight < _weight[prev_idx])\n" 9543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 9643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mode_idx] = _weight[prev_idx];\n" 9743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_variance[mode_idx] = _variance[prev_idx];\n" 9843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_mean[mode_idx] = _mean[prev_idx];\n" 9943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"mode_idx = prev_idx;\n" 10043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 10143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_mean[mode_idx] = mean_new;\n" 10243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_variance[mode_idx] = variance_new;\n" 10343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mode_idx] = c_weight;\n" 10443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"totalWeight += c_weight;\n" 10543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"mode ++;\n" 10643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 10743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 10843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (c_weight < -prune)\n" 10943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"c_weight = 0.0f;\n" 11043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mode_idx] = c_weight;\n" 11143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"totalWeight += c_weight;\n" 11243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 11343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (; mode < nmodes; ++mode)\n" 11443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 11543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int mode_idx = mad24(mode, idx_step, pt_idx);\n" 11643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float c_weight = mad(alpha1, _weight[mode_idx], prune);\n" 11743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (c_weight < -prune)\n" 11843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 11943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"c_weight = 0.0f;\n" 12043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"nmodes = mode;\n" 12143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 12243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 12343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mode_idx] = c_weight;\n" 12443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"totalWeight += c_weight;\n" 12543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 12643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (0.f < totalWeight)\n" 12743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 12843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"totalWeight = 1.f / totalWeight;\n" 12943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int mode = 0; mode < nmodes; ++mode)\n" 13043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mad24(mode, idx_step, pt_idx)] *= totalWeight;\n" 13143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 13243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (!fitsPDF)\n" 13343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 13443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"uchar mode = nmodes == (NMIXTURES) ? (NMIXTURES) - 1 : nmodes++;\n" 13543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int mode_idx = mad24(mode, idx_step, pt_idx);\n" 13643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (nmodes == 1)\n" 13743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mode_idx] = 1.f;\n" 13843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else\n" 13943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 14043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mode_idx] = alphaT;\n" 14143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = pt_idx; i < mode_idx; i += idx_step)\n" 14243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[i] *= alpha1;\n" 14343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 14443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = nmodes - 1; i > 0; --i)\n" 14543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 14643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int prev_idx = mode_idx - idx_step;\n" 14743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (alphaT < _weight[prev_idx])\n" 14843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 14943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_weight[mode_idx] = _weight[prev_idx];\n" 15043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_variance[mode_idx] = _variance[prev_idx];\n" 15143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_mean[mode_idx] = _mean[prev_idx];\n" 15243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"mode_idx = prev_idx;\n" 15343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 15443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_mean[mode_idx] = pix;\n" 15543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_variance[mode_idx] = c_varInit;\n" 15643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 15743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"_modesUsed[0] = nmodes;\n" 15843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#ifdef SHADOW_DETECT\n" 15943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (foreground)\n" 16043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 16143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float tWeight = 0.0f;\n" 16243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (uchar mode = 0; mode < nmodes; ++mode)\n" 16343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 16443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int mode_idx = mad24(mode, idx_step, pt_idx);\n" 16543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN c_mean = _mean[mode_idx];\n" 16643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN pix_mean = pix * c_mean;\n" 16743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float numerator = sum(pix_mean);\n" 16843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float denominator = dot(c_mean, c_mean);\n" 16943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (denominator == 0)\n" 17043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 17143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (numerator <= denominator && numerator >= c_tau * denominator)\n" 17243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 17343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float a = numerator / denominator;\n" 17443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN dD = mad(a, c_mean, -pix);\n" 17543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (dot(dD, dD) < c_Tb * _variance[mode_idx] * a * a)\n" 17643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 17743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"foreground = c_shadowVal;\n" 17843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 17943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 18043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 18143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"tWeight += _weight[mode_idx];\n" 18243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tWeight > c_TB)\n" 18343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 18443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 18543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 18643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 18743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* _fgmask = fgmask + mad24(y, fgmask_step, x + fgmask_offset);\n" 18843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*_fgmask = (uchar)foreground;\n" 18943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 19043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 19143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void getBackgroundImage2_kernel(__global const uchar* modesUsed,\n" 19243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const uchar* weight,\n" 19343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const uchar* mean,\n" 19443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* dst, int dst_step, int dst_offset, int dst_row, int dst_col,\n" 19543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float c_TB)\n" 19643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 19743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x = get_global_id(0);\n" 19843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y = get_global_id(1);\n" 19943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(x < dst_col && y < dst_row)\n" 20043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 20143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int pt_idx = mad24(y, dst_col, x);\n" 20243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const uchar* _modesUsed = modesUsed + pt_idx;\n" 20343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"uchar nmodes = _modesUsed[0];\n" 20443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN meanVal = (T_MEAN)F_ZERO;\n" 20543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float totalWeight = 0.0f;\n" 20643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* _weight = (__global const float*)weight;\n" 20743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const T_MEAN* _mean = (__global const T_MEAN*)(mean);\n" 20843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int idx_step = dst_row * dst_col;\n" 20943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (uchar mode = 0; mode < nmodes; ++mode)\n" 21043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 21143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int mode_idx = mad24(mode, idx_step, pt_idx);\n" 21243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float c_weight = _weight[mode_idx];\n" 21343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"T_MEAN c_mean = _mean[mode_idx];\n" 21443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"meanVal = mad(c_weight, c_mean, meanVal);\n" 21543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"totalWeight += c_weight;\n" 21643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (totalWeight > c_TB)\n" 21743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 21843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 21943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (0.f < totalWeight)\n" 22043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"meanVal = meanVal / totalWeight;\n" 22143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else\n" 22243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"meanVal = (T_MEAN)(0.f);\n" 22343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global uchar* _dst = dst + mad24(y, dst_step, mad24(x, CN, dst_offset));\n" 22443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"meanToFrame(meanVal, _dst);\n" 22543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 22643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 22743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler, "b6e3850899862b7f0ab67cb32f1d52e9"}; 22843a3f2149b5d3417cc5dc843032ecf05a890c147Noah PreslerProgramSource bgfg_mog2_oclsrc(bgfg_mog2.programStr); 22943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Preslerconst struct ProgramEntry optical_flow_farneback={"optical_flow_farneback", 23043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define tx (int)get_local_id(0)\n" 23143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define ty get_local_id(1)\n" 23243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define bx get_group_id(0)\n" 23343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define bdx (int)get_local_size(0)\n" 23443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define BORDER_SIZE 5\n" 23543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define MAX_KSIZE_HALF 100\n" 23643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#ifndef polyN\n" 23743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define polyN 5\n" 23843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 23943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if USE_DOUBLE\n" 24043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#ifdef cl_amd_fp64\n" 24143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma OPENCL EXTENSION cl_amd_fp64:enable\n" 24243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#elif defined (cl_khr_fp64)\n" 24343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma OPENCL EXTENSION cl_khr_fp64:enable\n" 24443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 24543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define TYPE double\n" 24643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define VECTYPE double4\n" 24743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#else\n" 24843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define TYPE float\n" 24943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define VECTYPE float4\n" 25043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 25143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void polynomialExpansion(__global __const float * src, int srcStep,\n" 25243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float * dst, int dstStep,\n" 25343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int rows, const int cols,\n" 25443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global __const float * c_g,\n" 25543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global __const float * c_xg,\n" 25643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global __const float * c_xxg,\n" 25743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float * smem,\n" 25843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const VECTYPE ig)\n" 25943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 26043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int y = get_global_id(1);\n" 26143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int x = bx * (bdx - 2*polyN) + tx - polyN;\n" 26243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xWarped;\n" 26343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float *row = smem + tx;\n" 26443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows && y >= 0)\n" 26543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 26643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xWarped = min(max(x, 0), cols - 1);\n" 26743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[0] = src[mad24(y, srcStep, xWarped)] * c_g[0];\n" 26843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[bdx] = 0.f;\n" 26943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[2*bdx] = 0.f;\n" 27043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 27143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 1; k <= polyN; ++k)\n" 27243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 27343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float t0 = src[mad24(max(y - k, 0), srcStep, xWarped)];\n" 27443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float t1 = src[mad24(min(y + k, rows - 1), srcStep, xWarped)];\n" 27543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[0] += c_g[k] * (t0 + t1);\n" 27643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[bdx] += c_xg[k] * (t1 - t0);\n" 27743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[2*bdx] += c_xxg[k] * (t0 + t1);\n" 27843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 27943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 28043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 28143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows && y >= 0 && tx >= polyN && tx + polyN < bdx && x < cols)\n" 28243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 28343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"TYPE b1 = c_g[0] * row[0];\n" 28443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"TYPE b3 = c_g[0] * row[bdx];\n" 28543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"TYPE b5 = c_g[0] * row[2*bdx];\n" 28643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"TYPE b2 = 0, b4 = 0, b6 = 0;\n" 28743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 28843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 1; k <= polyN; ++k)\n" 28943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 29043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b1 += (row[k] + row[-k]) * c_g[k];\n" 29143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b4 += (row[k] + row[-k]) * c_xxg[k];\n" 29243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b2 += (row[k] - row[-k]) * c_xg[k];\n" 29343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b3 += (row[k + bdx] + row[-k + bdx]) * c_g[k];\n" 29443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b6 += (row[k + bdx] - row[-k + bdx]) * c_xg[k];\n" 29543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b5 += (row[k + 2*bdx] + row[-k + 2*bdx]) * c_g[k];\n" 29643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 29743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(y, dstStep, xWarped)] = (float)(b3*ig.s0);\n" 29843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(rows + y, dstStep, xWarped)] = (float)(b2*ig.s0);\n" 29943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(2*rows + y, dstStep, xWarped)] = (float)(b1*ig.s1 + b5*ig.s2);\n" 30043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(3*rows + y, dstStep, xWarped)] = (float)(b1*ig.s1 + b4*ig.s2);\n" 30143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(4*rows + y, dstStep, xWarped)] = (float)(b6*ig.s3);\n" 30243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 30343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 30443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline int idx_row_low(const int y, const int last_row)\n" 30543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 30643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return abs(y) % (last_row + 1);\n" 30743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 30843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline int idx_row_high(const int y, const int last_row)\n" 30943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 31043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return abs(last_row - abs(last_row - y)) % (last_row + 1);\n" 31143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 31243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline int idx_col_low(const int x, const int last_col)\n" 31343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 31443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return abs(x) % (last_col + 1);\n" 31543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 31643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline int idx_col_high(const int x, const int last_col)\n" 31743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 31843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return abs(last_col - abs(last_col - x)) % (last_col + 1);\n" 31943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 32043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline int idx_col(const int x, const int last_col)\n" 32143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 32243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return idx_col_low(idx_col_high(x, last_col), last_col);\n" 32343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 32443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void gaussianBlur(__global const float * src, int srcStep,\n" 32543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float * dst, int dstStep, const int rows, const int cols,\n" 32643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float * c_gKer, const int ksizeHalf,\n" 32743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float * smem)\n" 32843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 32943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int y = get_global_id(1);\n" 33043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int x = get_global_id(0);\n" 33143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float *row = smem + ty * (bdx + 2*ksizeHalf);\n" 33243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows)\n" 33343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 33443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)\n" 33543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 33643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xExt = (int)(bx * bdx) + i - ksizeHalf;\n" 33743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xExt = idx_col(xExt, cols - 1);\n" 33843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[i] = src[mad24(y, srcStep, xExt)] * c_gKer[0];\n" 33943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int j = 1; j <= ksizeHalf; ++j)\n" 34043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[i] += (src[mad24(idx_row_low(y - j, rows - 1), srcStep, xExt)]\n" 34143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"+ src[mad24(idx_row_high(y + j, rows - 1), srcStep, xExt)]) * c_gKer[j];\n" 34243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 34343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 34443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 34543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows && y >= 0 && x < cols && x >= 0)\n" 34643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 34743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row += tx + ksizeHalf;\n" 34843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float res = row[0] * c_gKer[0];\n" 34943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = 1; i <= ksizeHalf; ++i)\n" 35043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"res += (row[-i] + row[i]) * c_gKer[i];\n" 35143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(y, dstStep, x)] = res;\n" 35243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 35343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 35443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void gaussianBlur5(__global const float * src, int srcStep,\n" 35543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float * dst, int dstStep,\n" 35643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int rows, const int cols,\n" 35743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float * c_gKer, const int ksizeHalf,\n" 35843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float * smem)\n" 35943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 36043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int y = get_global_id(1);\n" 36143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int x = get_global_id(0);\n" 36243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int smw = bdx + 2*ksizeHalf;\n" 36343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local volatile float *row = smem + 5 * ty * smw;\n" 36443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows)\n" 36543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 36643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)\n" 36743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 36843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xExt = (int)(bx * bdx) + i - ksizeHalf;\n" 36943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xExt = idx_col(xExt, cols - 1);\n" 37043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 37143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 37243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[k*smw + i] = src[mad24(k*rows + y, srcStep, xExt)] * c_gKer[0];\n" 37343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int j = 1; j <= ksizeHalf; ++j)\n" 37443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 37543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 37643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[k*smw + i] +=\n" 37743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"(src[mad24(k*rows + idx_row_low(y - j, rows - 1), srcStep, xExt)] +\n" 37843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"src[mad24(k*rows + idx_row_high(y + j, rows - 1), srcStep, xExt)]) * c_gKer[j];\n" 37943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 38043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 38143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 38243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows && y >= 0 && x < cols && x >= 0)\n" 38343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 38443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row += tx + ksizeHalf;\n" 38543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float res[5];\n" 38643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 38743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 38843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"res[k] = row[k*smw] * c_gKer[0];\n" 38943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = 1; i <= ksizeHalf; ++i)\n" 39043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 39143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 39243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"res[k] += (row[k*smw - i] + row[k*smw + i]) * c_gKer[i];\n" 39343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 39443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 39543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(k*rows + y, dstStep, x)] = res[k];\n" 39643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 39743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 39843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__constant float c_border[BORDER_SIZE + 1] = { 0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f };\n" 39943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void updateMatrices(__global const float * flowx, int xStep,\n" 40043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float * flowy, int yStep,\n" 40143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int rows, const int cols,\n" 40243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float * R0, int R0Step,\n" 40343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float * R1, int R1Step,\n" 40443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float * M, int mStep)\n" 40543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 40643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int y = get_global_id(1);\n" 40743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int x = get_global_id(0);\n" 40843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows && y >= 0 && x < cols && x >= 0)\n" 40943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 41043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float dx = flowx[mad24(y, xStep, x)];\n" 41143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float dy = flowy[mad24(y, yStep, x)];\n" 41243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float fx = x + dx;\n" 41343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float fy = y + dy;\n" 41443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x1 = convert_int(floor(fx));\n" 41543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y1 = convert_int(floor(fy));\n" 41643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"fx -= x1;\n" 41743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"fy -= y1;\n" 41843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float r2, r3, r4, r5, r6;\n" 41943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (x1 >= 0 && y1 >= 0 && x1 < cols - 1 && y1 < rows - 1)\n" 42043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 42143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float a00 = (1.f - fx) * (1.f - fy);\n" 42243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float a01 = fx * (1.f - fy);\n" 42343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float a10 = (1.f - fx) * fy;\n" 42443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float a11 = fx * fy;\n" 42543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r2 = a00 * R1[mad24(y1, R1Step, x1)] +\n" 42643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a01 * R1[mad24(y1, R1Step, x1 + 1)] +\n" 42743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a10 * R1[mad24(y1 + 1, R1Step, x1)] +\n" 42843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a11 * R1[mad24(y1 + 1, R1Step, x1 + 1)];\n" 42943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r3 = a00 * R1[mad24(rows + y1, R1Step, x1)] +\n" 43043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a01 * R1[mad24(rows + y1, R1Step, x1 + 1)] +\n" 43143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a10 * R1[mad24(rows + y1 + 1, R1Step, x1)] +\n" 43243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a11 * R1[mad24(rows + y1 + 1, R1Step, x1 + 1)];\n" 43343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r4 = a00 * R1[mad24(2*rows + y1, R1Step, x1)] +\n" 43443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a01 * R1[mad24(2*rows + y1, R1Step, x1 + 1)] +\n" 43543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a10 * R1[mad24(2*rows + y1 + 1, R1Step, x1)] +\n" 43643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a11 * R1[mad24(2*rows + y1 + 1, R1Step, x1 + 1)];\n" 43743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r5 = a00 * R1[mad24(3*rows + y1, R1Step, x1)] +\n" 43843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a01 * R1[mad24(3*rows + y1, R1Step, x1 + 1)] +\n" 43943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a10 * R1[mad24(3*rows + y1 + 1, R1Step, x1)] +\n" 44043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a11 * R1[mad24(3*rows + y1 + 1, R1Step, x1 + 1)];\n" 44143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r6 = a00 * R1[mad24(4*rows + y1, R1Step, x1)] +\n" 44243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a01 * R1[mad24(4*rows + y1, R1Step, x1 + 1)] +\n" 44343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a10 * R1[mad24(4*rows + y1 + 1, R1Step, x1)] +\n" 44443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"a11 * R1[mad24(4*rows + y1 + 1, R1Step, x1 + 1)];\n" 44543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r4 = (R0[mad24(2*rows + y, R0Step, x)] + r4) * 0.5f;\n" 44643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r5 = (R0[mad24(3*rows + y, R0Step, x)] + r5) * 0.5f;\n" 44743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r6 = (R0[mad24(4*rows + y, R0Step, x)] + r6) * 0.25f;\n" 44843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 44943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else\n" 45043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 45143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r2 = r3 = 0.f;\n" 45243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r4 = R0[mad24(2*rows + y, R0Step, x)];\n" 45343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r5 = R0[mad24(3*rows + y, R0Step, x)];\n" 45443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r6 = R0[mad24(4*rows + y, R0Step, x)] * 0.5f;\n" 45543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 45643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r2 = (R0[mad24(y, R0Step, x)] - r2) * 0.5f;\n" 45743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r3 = (R0[mad24(rows + y, R0Step, x)] - r3) * 0.5f;\n" 45843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r2 += r4*dy + r6*dx;\n" 45943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r3 += r6*dy + r5*dx;\n" 46043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float scale =\n" 46143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"c_border[min(x, BORDER_SIZE)] *\n" 46243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"c_border[min(y, BORDER_SIZE)] *\n" 46343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"c_border[min(cols - x - 1, BORDER_SIZE)] *\n" 46443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"c_border[min(rows - y - 1, BORDER_SIZE)];\n" 46543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r2 *= scale;\n" 46643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r3 *= scale;\n" 46743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r4 *= scale;\n" 46843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r5 *= scale;\n" 46943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"r6 *= scale;\n" 47043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"M[mad24(y, mStep, x)] = r4*r4 + r6*r6;\n" 47143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"M[mad24(rows + y, mStep, x)] = (r4 + r5)*r6;\n" 47243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"M[mad24(2*rows + y, mStep, x)] = r5*r5 + r6*r6;\n" 47343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"M[mad24(3*rows + y, mStep, x)] = r4*r2 + r6*r3;\n" 47443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"M[mad24(4*rows + y, mStep, x)] = r6*r2 + r5*r3;\n" 47543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 47643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 47743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void boxFilter5(__global const float * src, int srcStep,\n" 47843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float * dst, int dstStep,\n" 47943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int rows, const int cols,\n" 48043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int ksizeHalf,\n" 48143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float * smem)\n" 48243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 48343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int y = get_global_id(1);\n" 48443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int x = get_global_id(0);\n" 48543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));\n" 48643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int smw = bdx + 2*ksizeHalf;\n" 48743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float *row = smem + 5 * ty * smw;\n" 48843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows)\n" 48943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 49043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = tx; i < bdx + 2*ksizeHalf; i += bdx)\n" 49143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 49243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xExt = (int)(bx * bdx) + i - ksizeHalf;\n" 49343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xExt = min(max(xExt, 0), cols - 1);\n" 49443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 49543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 49643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[k*smw + i] = src[mad24(k*rows + y, srcStep, xExt)];\n" 49743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int j = 1; j <= ksizeHalf; ++j)\n" 49843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 49943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 50043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row[k*smw + i] +=\n" 50143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"src[mad24(k*rows + max(y - j, 0), srcStep, xExt)] +\n" 50243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"src[mad24(k*rows + min(y + j, rows - 1), srcStep, xExt)];\n" 50343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 50443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 50543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 50643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows && y >= 0 && x < cols && x >= 0)\n" 50743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 50843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"row += tx + ksizeHalf;\n" 50943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float res[5];\n" 51043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 51143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 51243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"res[k] = row[k*smw];\n" 51343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int i = 1; i <= ksizeHalf; ++i)\n" 51443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 51543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 51643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"res[k] += row[k*smw - i] + row[k*smw + i];\n" 51743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#pragma unroll\n" 51843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int k = 0; k < 5; ++k)\n" 51943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dst[mad24(k*rows + y, dstStep, x)] = res[k] * boxAreaInv;\n" 52043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 52143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 52243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void updateFlow(__global const float * M, int mStep,\n" 52343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float * flowx, int xStep,\n" 52443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float * flowy, int yStep,\n" 52543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int rows, const int cols)\n" 52643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 52743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int y = get_global_id(1);\n" 52843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int x = get_global_id(0);\n" 52943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y < rows && y >= 0 && x < cols && x >= 0)\n" 53043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 53143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float g11 = M[mad24(y, mStep, x)];\n" 53243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float g12 = M[mad24(rows + y, mStep, x)];\n" 53343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float g22 = M[mad24(2*rows + y, mStep, x)];\n" 53443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float h1 = M[mad24(3*rows + y, mStep, x)];\n" 53543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float h2 = M[mad24(4*rows + y, mStep, x)];\n" 53643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float detInv = 1.f / (g11*g22 - g12*g12 + 1e-3f);\n" 53743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"flowx[mad24(y, xStep, x)] = (g11*h2 - g12*h1) * detInv;\n" 53843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"flowy[mad24(y, yStep, x)] = (g22*h1 - g12*h2) * detInv;\n" 53943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 54043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 54143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler, "529300e6242f574f83d11a089cc120c0"}; 54243a3f2149b5d3417cc5dc843032ecf05a890c147Noah PreslerProgramSource optical_flow_farneback_oclsrc(optical_flow_farneback.programStr); 54343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Preslerconst struct ProgramEntry optical_flow_tvl1={"optical_flow_tvl1", 54443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void centeredGradientKernel(__global const float* src_ptr, int src_col, int src_row, int src_step,\n" 54543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* dx, __global float* dy, int d_step)\n" 54643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 54743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x = get_global_id(0);\n" 54843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y = get_global_id(1);\n" 54943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if((x < src_col)&&(y < src_row))\n" 55043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 55143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);\n" 55243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_x2 = (x - 1) > 0 ? (x -1) : 0;\n" 55343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dx[y * d_step+ x] = 0.5f * (src_ptr[y * src_step + src_x1] - src_ptr[y * src_step+ src_x2]);\n" 55443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);\n" 55543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_y2 = (y - 1) > 0 ? (y - 1) : 0;\n" 55643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dy[y * d_step+ x] = 0.5f * (src_ptr[src_y1 * src_step + x] - src_ptr[src_y2 * src_step+ x]);\n" 55743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 55843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 55943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline float bicubicCoeff(float x_)\n" 56043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 56143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float x = fabs(x_);\n" 56243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (x <= 1.0f)\n" 56343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return x * x * (1.5f * x - 2.5f) + 1.0f;\n" 56443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else if (x < 2.0f)\n" 56543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;\n" 56643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else\n" 56743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return 0.0f;\n" 56843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 56943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,\n" 57043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,\n" 57143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* u1, int u1_step,\n" 57243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* u2,\n" 57343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* I1w,\n" 57443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* I1wx, \n" 57543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* I1wy, \n" 57643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* grad, \n" 57743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* rho,\n" 57843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int I1w_step,\n" 57943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_step,\n" 58043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u1_offset_x,\n" 58143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u1_offset_y,\n" 58243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_offset_x,\n" 58343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_offset_y)\n" 58443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 58543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x = get_global_id(0);\n" 58643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y = get_global_id(1);\n" 58743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(x < I0_col&&y < I0_row)\n" 58843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 58943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];\n" 59043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];\n" 59143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wx = x + u1Val;\n" 59243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wy = y + u2Val;\n" 59343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xmin = ceil(wx - 2.0f);\n" 59443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xmax = floor(wx + 2.0f);\n" 59543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int ymin = ceil(wy - 2.0f);\n" 59643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int ymax = floor(wy + 2.0f);\n" 59743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float sum = 0.0f;\n" 59843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float sumx = 0.0f;\n" 59943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float sumy = 0.0f;\n" 60043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wsum = 0.0f;\n" 60143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n" 60243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int cy = ymin; cy <= ymax; ++cy)\n" 60343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 60443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int cx = xmin; cx <= xmax; ++cx)\n" 60543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 60643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);\n" 60743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int2 cood = (int2)(cx, cy);\n" 60843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"sum += w * read_imagef(tex_I1, sampleri, cood).x;\n" 60943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"sumx += w * read_imagef(tex_I1x, sampleri, cood).x;\n" 61043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"sumy += w * read_imagef(tex_I1y, sampleri, cood).x;\n" 61143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"wsum += w;\n" 61243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 61343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 61443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float coeff = 1.0f / wsum;\n" 61543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wVal = sum * coeff;\n" 61643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wxVal = sumx * coeff;\n" 61743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wyVal = sumy * coeff;\n" 61843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"I1w[y * I1w_step + x] = I1wVal;\n" 61943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"I1wx[y * I1w_step + x] = I1wxVal;\n" 62043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"I1wy[y * I1w_step + x] = I1wyVal;\n" 62143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float Ix2 = I1wxVal * I1wxVal;\n" 62243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float Iy2 = I1wyVal * I1wyVal;\n" 62343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"grad[y * I1w_step + x] = Ix2 + Iy2;\n" 62443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I0Val = I0[y * I0_step + x];\n" 62543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;\n" 62643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 62743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 62843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline float readImage(__global float *image, int x, int y, int rows, int cols, int elemCntPerRow)\n" 62943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 63043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int i0 = clamp(x, 0, cols - 1);\n" 63143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int j0 = clamp(y, 0, rows - 1);\n" 63243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return image[j0 * elemCntPerRow + i0];\n" 63343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 63443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,\n" 63543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,\n" 63643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* u1, int u1_step,\n" 63743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* u2,\n" 63843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* I1w,\n" 63943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* I1wx, \n" 64043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* I1wy, \n" 64143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* grad, \n" 64243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* rho,\n" 64343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int I1w_step,\n" 64443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_step,\n" 64543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int I1_step,\n" 64643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int I1x_step)\n" 64743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 64843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x = get_global_id(0);\n" 64943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y = get_global_id(1);\n" 65043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(x < I0_col&&y < I0_row)\n" 65143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 65243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u1Val = u1[y * u1_step + x];\n" 65343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u2Val = u2[y * u2_step + x];\n" 65443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wx = x + u1Val;\n" 65543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wy = y + u2Val;\n" 65643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xmin = ceil(wx - 2.0f);\n" 65743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xmax = floor(wx + 2.0f);\n" 65843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int ymin = ceil(wy - 2.0f);\n" 65943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int ymax = floor(wy + 2.0f);\n" 66043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float sum = 0.0f;\n" 66143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float sumx = 0.0f;\n" 66243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float sumy = 0.0f;\n" 66343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wsum = 0.0f;\n" 66443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int cy = ymin; cy <= ymax; ++cy)\n" 66543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 66643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (int cx = xmin; cx <= xmax; ++cx)\n" 66743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 66843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);\n" 66943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int2 cood = (int2)(cx, cy);\n" 67043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);\n" 67143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);\n" 67243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);\n" 67343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"wsum += w;\n" 67443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 67543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 67643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float coeff = 1.0f / wsum;\n" 67743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wVal = sum * coeff;\n" 67843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wxVal = sumx * coeff;\n" 67943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wyVal = sumy * coeff;\n" 68043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"I1w[y * I1w_step + x] = I1wVal;\n" 68143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"I1wx[y * I1w_step + x] = I1wxVal;\n" 68243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"I1wy[y * I1w_step + x] = I1wyVal;\n" 68343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float Ix2 = I1wxVal * I1wxVal;\n" 68443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float Iy2 = I1wyVal * I1wyVal;\n" 68543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"grad[y * I1w_step + x] = Ix2 + Iy2;\n" 68643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I0Val = I0[y * I0_step + x];\n" 68743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;\n" 68843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 68943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 69043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,\n" 69143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* u2,\n" 69243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* p11, int p11_step,\n" 69343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* p12,\n" 69443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* p21,\n" 69543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* p22,\n" 69643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float taut,\n" 69743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_step,\n" 69843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u1_offset_x,\n" 69943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u1_offset_y,\n" 70043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_offset_x,\n" 70143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_offset_y)\n" 70243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 70343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x = get_global_id(0);\n" 70443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y = get_global_id(1);\n" 70543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(x < u1_col && y < u1_row)\n" 70643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 70743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);\n" 70843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];\n" 70943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);\n" 71043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];\n" 71143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);\n" 71243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];\n" 71343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);\n" 71443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];\n" 71543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float g1 = hypot(u1x, u1y);\n" 71643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float g2 = hypot(u2x, u2y);\n" 71743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float ng1 = 1.0f + taut * g1;\n" 71843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float ng2 = 1.0f + taut * g2;\n" 71943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;\n" 72043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;\n" 72143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2;\n" 72243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2;\n" 72343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 72443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 72543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)\n" 72643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 72743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (x > 0 && y > 0)\n" 72843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 72943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];\n" 73043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];\n" 73143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return v1x + v2y;\n" 73243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 73343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else\n" 73443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 73543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (y > 0)\n" 73643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0];\n" 73743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else\n" 73843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 73943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (x > 0)\n" 74043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x];\n" 74143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else\n" 74243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return v1[0 * v1_step + 0] + v2[0 * v2_step + 0];\n" 74343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 74443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 74543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 74643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,\n" 74743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* I1wy, \n" 74843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* grad, \n" 74943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* rho_c, \n" 75043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* p11, \n" 75143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* p12, \n" 75243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* p21, \n" 75343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float* p22, \n" 75443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* u1, int u1_step,\n" 75543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* u2,\n" 75643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global float* error, float l_t, float theta, int u2_step,\n" 75743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u1_offset_x,\n" 75843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u1_offset_y,\n" 75943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_offset_x,\n" 76043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int u2_offset_y,\n" 76143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"char calc_error)\n" 76243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 76343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int x = get_global_id(0);\n" 76443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int y = get_global_id(1);\n" 76543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(x < I1wx_col && y < I1wx_row)\n" 76643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 76743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wxVal = I1wx[y * I1wx_step + x];\n" 76843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I1wyVal = I1wy[y * I1wx_step + x];\n" 76943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float gradVal = grad[y * I1wx_step + x];\n" 77043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];\n" 77143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];\n" 77243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);\n" 77343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float d1 = 0.0f;\n" 77443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float d2 = 0.0f;\n" 77543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (rho < -l_t * gradVal)\n" 77643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 77743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"d1 = l_t * I1wxVal;\n" 77843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"d2 = l_t * I1wyVal;\n" 77943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 78043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else if (rho > l_t * gradVal)\n" 78143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 78243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"d1 = -l_t * I1wxVal;\n" 78343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"d2 = -l_t * I1wyVal;\n" 78443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 78543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"else if (gradVal > 1.192092896e-07f)\n" 78643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 78743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float fi = -rho / gradVal;\n" 78843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"d1 = fi * I1wxVal;\n" 78943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"d2 = fi * I1wyVal;\n" 79043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 79143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float v1 = u1OldVal + d1;\n" 79243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float v2 = u2OldVal + d2;\n" 79343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);\n" 79443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);\n" 79543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u1NewVal = v1 + theta * div_p1;\n" 79643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float u2NewVal = v2 + theta * div_p2;\n" 79743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;\n" 79843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;\n" 79943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(calc_error)\n" 80043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 80143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);\n" 80243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);\n" 80343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"error[y * I1wx_step + x] = n1 + n2;\n" 80443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 80543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 80643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 80743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler, "a9d306a49b405703820fae23312ebd28"}; 80843a3f2149b5d3417cc5dc843032ecf05a890c147Noah PreslerProgramSource optical_flow_tvl1_oclsrc(optical_flow_tvl1.programStr); 80943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Preslerconst struct ProgramEntry pyrlk={"pyrlk", 81043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define GRIDSIZE 3\n" 81143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define LSx 8\n" 81243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define LSy 8\n" 81343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define LM_W (LSx*GRIDSIZE+2)\n" 81443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define LM_H (LSy*GRIDSIZE+2)\n" 81543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define BUFFER (LSx*LSy)\n" 81643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define BUFFER2 BUFFER>>1\n" 81743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#ifndef WAVE_SIZE\n" 81843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define WAVE_SIZE 1\n" 81943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 82043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#ifdef CPU\n" 82143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)\n" 82243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 82343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] = val1;\n" 82443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] = val2;\n" 82543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem3[tid] = val3;\n" 82643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 82743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for(int i = BUFFER2; i > 0; i >>= 1)\n" 82843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 82943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(tid < i)\n" 83043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 83143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + i];\n" 83243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] += smem2[tid + i];\n" 83343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem3[tid] += smem3[tid + i];\n" 83443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 83543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 83643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 83743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 83843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)\n" 83943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 84043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] = val1;\n" 84143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] = val2;\n" 84243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 84343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for(int i = BUFFER2; i > 0; i >>= 1)\n" 84443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 84543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(tid < i)\n" 84643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 84743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + i];\n" 84843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] += smem2[tid + i];\n" 84943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 85043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 85143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 85243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 85343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void reduce1(float val1, volatile __local float* smem1, int tid)\n" 85443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 85543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] = val1;\n" 85643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 85743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for(int i = BUFFER2; i > 0; i >>= 1)\n" 85843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 85943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(tid < i)\n" 86043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 86143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + i];\n" 86243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 86343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 86443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 86543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 86643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#else\n" 86743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void reduce3(float val1, float val2, float val3,\n" 86843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)\n" 86943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 87043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] = val1;\n" 87143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] = val2;\n" 87243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem3[tid] = val3;\n" 87343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 87443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid < 32)\n" 87543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 87643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + 32];\n" 87743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] += smem2[tid + 32];\n" 87843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem3[tid] += smem3[tid + 32];\n" 87943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if WAVE_SIZE < 32\n" 88043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 88143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 88243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid < 16)\n" 88343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 88443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 88543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + 16];\n" 88643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] += smem2[tid + 16];\n" 88743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem3[tid] += smem3[tid + 16];\n" 88843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if WAVE_SIZE <16\n" 88943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 89043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 89143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid<1)\n" 89243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 89343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 89443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"local float8* m1 = (local float8*)smem1;\n" 89543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"local float8* m2 = (local float8*)smem2;\n" 89643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"local float8* m3 = (local float8*)smem3;\n" 89743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float8 t1 = m1[0]+m1[1];\n" 89843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float8 t2 = m2[0]+m2[1];\n" 89943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float8 t3 = m3[0]+m3[1];\n" 90043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float4 t14 = t1.lo + t1.hi;\n" 90143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float4 t24 = t2.lo + t2.hi;\n" 90243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float4 t34 = t3.lo + t3.hi;\n" 90343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[0] = t14.x+t14.y+t14.z+t14.w;\n" 90443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[0] = t24.x+t24.y+t24.z+t24.w;\n" 90543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem3[0] = t34.x+t34.y+t34.z+t34.w;\n" 90643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 90743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 90843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 90943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)\n" 91043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 91143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] = val1;\n" 91243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] = val2;\n" 91343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 91443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid < 32)\n" 91543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 91643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + 32];\n" 91743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] += smem2[tid + 32];\n" 91843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if WAVE_SIZE < 32\n" 91943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 92043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 92143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid < 16)\n" 92243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 92343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 92443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + 16];\n" 92543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[tid] += smem2[tid + 16];\n" 92643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if WAVE_SIZE <16\n" 92743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 92843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 92943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid<1)\n" 93043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 93143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 93243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"local float8* m1 = (local float8*)smem1;\n" 93343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"local float8* m2 = (local float8*)smem2;\n" 93443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float8 t1 = m1[0]+m1[1];\n" 93543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float8 t2 = m2[0]+m2[1];\n" 93643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float4 t14 = t1.lo + t1.hi;\n" 93743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float4 t24 = t2.lo + t2.hi;\n" 93843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[0] = t14.x+t14.y+t14.z+t14.w;\n" 93943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem2[0] = t24.x+t24.y+t24.z+t24.w;\n" 94043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 94143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 94243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 94343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void reduce1(float val1, __local volatile float* smem1, int tid)\n" 94443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 94543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] = val1;\n" 94643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 94743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid < 32)\n" 94843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 94943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + 32];\n" 95043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if WAVE_SIZE < 32\n" 95143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 95243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 95343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid < 16)\n" 95443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 95543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 95643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[tid] += smem1[tid + 16];\n" 95743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#if WAVE_SIZE <16\n" 95843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 95943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 96043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid<1)\n" 96143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 96243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 96343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"local float8* m1 = (local float8*)smem1;\n" 96443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float8 t1 = m1[0]+m1[1];\n" 96543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float4 t14 = t1.lo + t1.hi;\n" 96643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"smem1[0] = t14.x+t14.y+t14.z+t14.w;\n" 96743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 96843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 96943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 97043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#endif\n" 97143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define SCALE (1.0f / (1 << 20))\n" 97243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define THRESHOLD 0.01f\n" 97343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;\n" 97443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define VAL(_y,_x,_yy,_xx) (IPatchLocal[(yid+((_y)*LSy)+1+(_yy))*LM_W+(xid+((_x)*LSx)+1+(_xx))])\n" 97543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void SetPatch(local float* IPatchLocal, int TileY, int TileX,\n" 97643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float* Pch, float* Dx, float* Dy,\n" 97743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float* A11, float* A12, float* A22, float w)\n" 97843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 97943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int xid=get_local_id(0);\n" 98043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int yid=get_local_id(1);\n" 98143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*Pch = VAL(TileY,TileX,0,0);\n" 98243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float dIdx = (3.0f*VAL(TileY,TileX,-1,1)+10.0f*VAL(TileY,TileX,0,1)+3.0f*VAL(TileY,TileX,+1,1))-(3.0f*VAL(TileY,TileX,-1,-1)+10.0f*VAL(TileY,TileX,0,-1)+3.0f*VAL(TileY,TileX,+1,-1));\n" 98343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float dIdy = (3.0f*VAL(TileY,TileX,1,-1)+10.0f*VAL(TileY,TileX,1,0)+3.0f*VAL(TileY,TileX,1,+1))-(3.0f*VAL(TileY,TileX,-1,-1)+10.0f*VAL(TileY,TileX,-1,0)+3.0f*VAL(TileY,TileX,-1,+1));\n" 98443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dIdx *= w;\n" 98543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"dIdy *= w;\n" 98643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*Dx = dIdx;\n" 98743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*Dy = dIdy;\n" 98843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*A11 += dIdx * dIdx;\n" 98943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*A12 += dIdx * dIdy;\n" 99043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*A22 += dIdy * dIdy;\n" 99143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 99243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#undef VAL\n" 99343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void GetPatch(image2d_t J, float x, float y,\n" 99443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float* Pch, float* Dx, float* Dy,\n" 99543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float* b1, float* b2)\n" 99643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 99743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float J_val = read_imagef(J, sampler, (float2)(x, y)).x;\n" 99843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float diff = (J_val - *Pch) * 32.0f;\n" 99943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*b1 += diff**Dx;\n" 100043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*b2 += diff**Dy;\n" 100143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 100243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval)\n" 100343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 100443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;\n" 100543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"*errval += fabs(diff);\n" 100643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 100743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#define READI(_y,_x) IPatchLocal[(yid+((_y)*LSy))*LM_W+(xid+((_x)*LSx))] = read_imagef(I, sampler, (float2)(Point.x + xid+(_x)*LSx + 0.5f-1, Point.y + yid+(_y)*LSy+ 0.5f-1)).x;\n" 100843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"void ReadPatchIToLocalMem(image2d_t I, float2 Point, local float* IPatchLocal)\n" 100943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 101043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int xid=get_local_id(0);\n" 101143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int yid=get_local_id(1);\n" 101243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(0,0);READI(0,1);READI(0,2);\n" 101343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(1,0);READI(1,1);READI(1,2);\n" 101443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(2,0);READI(2,1);READI(2,2);\n" 101543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(xid<2)\n" 101643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 101743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(0,3);\n" 101843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(1,3);\n" 101943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(2,3);\n" 102043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 102143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(yid<2)\n" 102243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 102343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(3,0);READI(3,1);READI(3,2);\n" 102443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 102543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(yid<2 && xid<2)\n" 102643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 102743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"READI(3,3);\n" 102843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 102943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 103043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 103143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"#undef READI\n" 103243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__attribute__((reqd_work_group_size(LSx, LSy, 1)))\n" 103343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__kernel void lkSparse(image2d_t I, image2d_t J,\n" 103443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__global const float2* prevPts, __global float2* nextPts, __global uchar* status, __global float* err,\n" 103543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)\n" 103643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 103743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float smem1[BUFFER];\n" 103843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float smem2[BUFFER];\n" 103943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"__local float smem3[BUFFER];\n" 104043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int xid=get_local_id(0);\n" 104143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int yid=get_local_id(1);\n" 104243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int gid=get_group_id(0);\n" 104343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int xsize=get_local_size(0);\n" 104443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"unsigned int ysize=get_local_size(1);\n" 104543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"int xBase, yBase, k;\n" 104643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wx = ((xid+2*xsize)<c_winSize_x)?1:0;\n" 104743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float wy = ((yid+2*ysize)<c_winSize_y)?1:0;\n" 104843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);\n" 104943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"const int tid = mad24(yid, xsize, xid);\n" 105043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float2 prevPt = prevPts[gid] / (float2)(1 << level);\n" 105143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (prevPt.x < 0 || prevPt.x >= cols || prevPt.y < 0 || prevPt.y >= rows)\n" 105243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 105343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid == 0 && level == 0)\n" 105443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 105543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"status[gid] = 0;\n" 105643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 105743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return;\n" 105843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 105943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"prevPt -= c_halfWin;\n" 106043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float A11 = 0;\n" 106143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float A12 = 0;\n" 106243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float A22 = 0;\n" 106343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float I_patch[GRIDSIZE][GRIDSIZE];\n" 106443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float dIdx_patch[GRIDSIZE][GRIDSIZE];\n" 106543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float dIdy_patch[GRIDSIZE][GRIDSIZE];\n" 106643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"local float IPatchLocal[LM_W*LM_H];\n" 106743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"ReadPatchIToLocalMem(I,prevPt,IPatchLocal);\n" 106843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 106943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 0, 0,\n" 107043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],\n" 107143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,1);\n" 107243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 0, 1,\n" 107343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],\n" 107443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,1);\n" 107543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 0, 2,\n" 107643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],\n" 107743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,wx);\n" 107843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 107943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 108043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 1, 0,\n" 108143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],\n" 108243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,1);\n" 108343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 1,1,\n" 108443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],\n" 108543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,1);\n" 108643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 1,2,\n" 108743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],\n" 108843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,wx);\n" 108943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 109043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 109143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 2,0,\n" 109243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],\n" 109343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,wy);\n" 109443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 2,1,\n" 109543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],\n" 109643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,wy);\n" 109743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"SetPatch(IPatchLocal, 2,2,\n" 109843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],\n" 109943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&A11, &A12, &A22,wx*wy);\n" 110043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 110143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"reduce3(A11, A12, A22, smem1, smem2, smem3, tid);\n" 110243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"A11 = smem1[0];\n" 110343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"A12 = smem2[0];\n" 110443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"A22 = smem3[0];\n" 110543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 110643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float D = A11 * A22 - A12 * A12;\n" 110743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (D < 1.192092896e-07f)\n" 110843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 110943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid == 0 && level == 0)\n" 111043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"status[gid] = 0;\n" 111143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"return;\n" 111243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 111343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"A11 /= D;\n" 111443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"A12 /= D;\n" 111543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"A22 /= D;\n" 111643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"prevPt = nextPts[gid] * 2.0f - c_halfWin;\n" 111743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"for (k = 0; k < c_iters; ++k)\n" 111843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 111943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (prevPt.x < -c_halfWin.x || prevPt.x >= cols || prevPt.y < -c_halfWin.y || prevPt.y >= rows)\n" 112043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 112143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid == 0 && level == 0)\n" 112243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"status[gid] = 0;\n" 112343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 112443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 112543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float b1 = 0;\n" 112643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float b2 = 0;\n" 112743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"yBase=yid;\n" 112843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 112943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase=xid;\n" 113043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 113143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][0], &dIdx_patch[0][0], &dIdy_patch[0][0],\n" 113243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 113343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 113443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 113543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][1], &dIdx_patch[0][1], &dIdy_patch[0][1],\n" 113643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 113743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 113843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 113943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][2], &dIdx_patch[0][2], &dIdy_patch[0][2],\n" 114043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 114143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 114243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"yBase+=ysize;\n" 114343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 114443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase=xid;\n" 114543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 114643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][0], &dIdx_patch[1][0], &dIdy_patch[1][0],\n" 114743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 114843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 114943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 115043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][1], &dIdx_patch[1][1], &dIdy_patch[1][1],\n" 115143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 115243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 115343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 115443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][2], &dIdx_patch[1][2], &dIdy_patch[1][2],\n" 115543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 115643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 115743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"yBase+=ysize;\n" 115843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 115943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase=xid;\n" 116043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 116143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][0], &dIdx_patch[2][0], &dIdy_patch[2][0],\n" 116243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 116343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 116443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 116543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][1], &dIdx_patch[2][1], &dIdy_patch[2][1],\n" 116643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 116743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 116843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetPatch(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 116943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][2], &dIdx_patch[2][2], &dIdy_patch[2][2],\n" 117043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&b1, &b2);\n" 117143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 117243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"reduce2(b1, b2, smem1, smem2, tid);\n" 117343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b1 = smem1[0];\n" 117443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"b2 = smem2[0];\n" 117543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"barrier(CLK_LOCAL_MEM_FENCE);\n" 117643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"float2 delta;\n" 117743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"delta.x = A12 * b2 - A22 * b1;\n" 117843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"delta.y = A12 * b1 - A11 * b2;\n" 117943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"prevPt += delta;\n" 118043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (fabs(delta.x) < THRESHOLD && fabs(delta.y) < THRESHOLD)\n" 118143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"break;\n" 118243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 118343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"D = 0.0f;\n" 118443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (calcErr)\n" 118543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 118643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"yBase=yid;\n" 118743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 118843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase=xid;\n" 118943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 119043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][0], &D);\n" 119143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 119243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 119343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][1], &D);\n" 119443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 119543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(xBase<c_winSize_x)\n" 119643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 119743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[0][2], &D);\n" 119843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 119943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"yBase+=ysize;\n" 120043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 120143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase=xid;\n" 120243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 120343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][0], &D);\n" 120443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 120543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 120643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][1], &D);\n" 120743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 120843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(xBase<c_winSize_x)\n" 120943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 121043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[1][2], &D);\n" 121143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 121243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"yBase+=ysize;\n" 121343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(yBase<c_winSize_y)\n" 121443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 121543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase=xid;\n" 121643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 121743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][0], &D);\n" 121843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 121943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 122043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][1], &D);\n" 122143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"xBase+=xsize;\n" 122243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if(xBase<c_winSize_x)\n" 122343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"GetError(J, prevPt.x + xBase + 0.5f, prevPt.y + yBase + 0.5f,\n" 122443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"&I_patch[2][2], &D);\n" 122543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 122643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"reduce1(D, smem1, tid);\n" 122743a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 122843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (tid == 0)\n" 122943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"{\n" 123043a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"prevPt += c_halfWin;\n" 123143a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"nextPts[gid] = prevPt;\n" 123243a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"if (calcErr)\n" 123343a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);\n" 123443a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 123543a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler"}\n" 123643a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler, "b7099fcbc60bd5528dacc491eadd88c1"}; 123743a3f2149b5d3417cc5dc843032ecf05a890c147Noah PreslerProgramSource pyrlk_oclsrc(pyrlk.programStr); 123843a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler} 123943a3f2149b5d3417cc5dc843032ecf05a890c147Noah Presler}} 1240