bucketize_op_gpu.cu.cc revision ad7eeec1cc06d7fdba6ee404f03a35fab9cd3e6a
1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#if GOOGLE_CUDA 17 18#define EIGEN_USE_GPU 19 20#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 21 22#include "tensorflow/core/framework/op_kernel.h" 23#include "tensorflow/core/framework/register_types.h" 24#include "tensorflow/core/framework/tensor.h" 25#include "tensorflow/core/framework/tensor_shape.h" 26#include "tensorflow/core/kernels/bucketize_op.h" 27#include "tensorflow/core/kernels/cuda_device_array.h" 28#include "tensorflow/core/platform/logging.h" 29#include "tensorflow/core/platform/types.h" 30#include "tensorflow/core/util/cuda_kernel_helper.h" 31 32namespace tensorflow { 33 34typedef Eigen::GpuDevice GPUDevice; 35 36template <typename T> 37__global__ void BucketizeCustomKernel( 38 const int32 size_in, const T* in, const int32 size_boundaries, 39 CudaDeviceArrayStruct<float> boundaries_array, int32* out) { 40 const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array); 41 CUDA_1D_KERNEL_LOOP(i, size_in) { 42 T value = in[i]; 43 int32 bucket = 0; 44 int32 count = size_boundaries; 45 while (count > 0) { 46 int32 l = bucket; 47 int32 step = count / 2; 48 l += step; 49 if (!(value < static_cast<T>(boundaries[l]))) { 50 bucket = ++l; 51 count -= step + 1; 52 } else { 53 count = step; 54 } 55 } 56 out[i] = bucket; 57 } 58} 59 60namespace functor { 61 62template <typename T> 63struct BucketizeFunctor<GPUDevice, T> { 64 // PRECONDITION: boundaries_vector must be sorted. 65 static Status Compute(OpKernelContext* context, 66 const typename TTypes<T, 1>::ConstTensor& input, 67 const std::vector<float>& boundaries_vector, 68 typename TTypes<int32, 1>::Tensor& output) { 69 const GPUDevice& d = context->eigen_device<GPUDevice>(); 70 71 CudaDeviceArrayOnHost<float> boundaries_array(context, 72 boundaries_vector.size()); 73 TF_RETURN_IF_ERROR(boundaries_array.Init()); 74 for (int i = 0; i < boundaries_vector.size(); ++i) { 75 boundaries_array.Set(i, boundaries_vector[i]); 76 } 77 TF_RETURN_IF_ERROR(boundaries_array.Finalize()); 78 79 CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); 80 BucketizeCustomKernel< 81 T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>( 82 input.size(), input.data(), boundaries_vector.size(), 83 boundaries_array.data(), output.data()); 84 85 return Status::OK(); 86 } 87}; 88} // namespace functor 89 90#define REGISTER_GPU_SPEC(type) \ 91 template struct functor::BucketizeFunctor<GPUDevice, type>; 92 93REGISTER_GPU_SPEC(int32); 94REGISTER_GPU_SPEC(int64); 95REGISTER_GPU_SPEC(float); 96REGISTER_GPU_SPEC(double); 97#undef REGISTER_GPU_SPEC 98 99} // namespace tensorflow 100 101#endif // GOOGLE_CUDA 102