Lines Matching defs:src1

84             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
87 dst.x = op(src1.x, src2.x);
102 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
105 dst.x = op(src1.x, src2.x);
107 dst.y = op(src1.y, src2.y);
124 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
127 dst.x = op(src1.x, src2.x);
129 dst.y = op(src1.y, src2.y);
131 dst.z = op(src1.z, src2.z);
150 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
153 dst.x = op(src1.x, src2.x);
155 dst.y = op(src1.y, src2.y);
157 dst.z = op(src1.z, src2.z);
159 dst.w = op(src1.w, src2.w);
186 static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
189 dst.a0 = op(src1.a0, src2.a0);
191 dst.a1 = op(src1.a1, src2.a1);
193 dst.a2 = op(src1.a2, src2.a2);
195 dst.a3 = op(src1.a3, src2.a3);
197 dst.a4 = op(src1.a4, src2.a4);
199 dst.a5 = op(src1.a5, src2.a5);
201 dst.a6 = op(src1.a6, src2.a6);
203 dst.a7 = op(src1.a7, src2.a7);
270 const T1* src1 = src1_.ptr(y);
276 const read_type1 src1_n_el = ((const read_type1*)src1)[x];
289 dst[real_x] = op(src1[real_x], src2[real_x]);
296 static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
302 if (x < src1.cols && y < src1.rows && mask(y, x))
304 const T1 src1_data = src1.ptr(y)[x];
329 static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
334 const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
336 transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
370 static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
376 if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
380 TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
385 const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
387 transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);