Searched refs:src_lane (Results 1 - 6 of 6) sorted by relevance

/external/tensorflow/tensorflow/core/util/
H A Dcuda_device_functions.h122 // defined value, assuming the calling lane will read from src_lane as part of
125 // Specifically, returns true iff mask has the calling lane bit and the src_lane
126 // bit set, and the src_lane calls this function with the same mask value
133 unsigned src_lane) {
134 unsigned src_dst_mask = 1u << CudaLaneId() | 1u << src_lane;
136 unsigned src_lane_mask = __shfl_sync(mask, mask, src_lane);
138 unsigned src_lane_mask = __shfl(mask, src_lane);
144 __device__ inline unsigned CudaShuffleGetSrcLane(int src_lane, int width) {
147 int lane_offset = src_lane & width - 1;
173 int src_lane
[all...]
H A Dcuda_kernel_helper_test.cu.cc101 for (int src_lane = -warpSize; src_lane <= warpSize; ++src_lane) {
102 unsigned actual_lane = detail::CudaShuffleGetSrcLane(src_lane, width);
104 CudaShuffleSync(kCudaWarpAll, lane_id, src_lane, width);
105 check_result("Shuffle", src_lane, actual_lane, expect_lane);
H A Dcuda_kernel_helper.h69 int src_lane,
72 CudaShuffleSync(mask, static_cast<uint16>(value), src_lane, width));
68 CudaShuffleSync(unsigned mask, Eigen::half value, int src_lane, int width = warpSize) argument
/external/v8/src/arm/
H A Dmacro-assembler-arm.h567 void ReplaceLane(QwNeonRegister dst, QwNeonRegister src, Register src_lane,
570 SwVfpRegister src_lane, Register scratch, int lane);
H A Dmacro-assembler-arm.cc1166 Register src_lane, NeonDataType dt, int lane) {
1175 vmov(dt, double_dst, double_lane, src_lane);
1179 SwVfpRegister src_lane, Register scratch,
1183 VmovExtended(s_code, src_lane.code(), scratch);
/external/vixl/src/aarch64/
H A Dlogic-aarch64.cc1720 uint64_t src_lane = src.Uint(vform, i); local
1722 uint64_t shifted = src_lane << shift;
1772 uint64_t src_lane = src.Uint(vform, i); local
1780 shifted = src_lane >> shift;

Completed in 406 milliseconds