bucketize_op_gpu.cu.cc revision ad7eeec1cc06d7fdba6ee404f03a35fab9cd3e6a
1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#if GOOGLE_CUDA
17
18#define EIGEN_USE_GPU
19
20#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
21
22#include "tensorflow/core/framework/op_kernel.h"
23#include "tensorflow/core/framework/register_types.h"
24#include "tensorflow/core/framework/tensor.h"
25#include "tensorflow/core/framework/tensor_shape.h"
26#include "tensorflow/core/kernels/bucketize_op.h"
27#include "tensorflow/core/kernels/cuda_device_array.h"
28#include "tensorflow/core/platform/logging.h"
29#include "tensorflow/core/platform/types.h"
30#include "tensorflow/core/util/cuda_kernel_helper.h"
31
32namespace tensorflow {
33
34typedef Eigen::GpuDevice GPUDevice;
35
36template <typename T>
37__global__ void BucketizeCustomKernel(
38    const int32 size_in, const T* in, const int32 size_boundaries,
39    CudaDeviceArrayStruct<float> boundaries_array, int32* out) {
40  const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array);
41  CUDA_1D_KERNEL_LOOP(i, size_in) {
42    T value = in[i];
43    int32 bucket = 0;
44    int32 count = size_boundaries;
45    while (count > 0) {
46      int32 l = bucket;
47      int32 step = count / 2;
48      l += step;
49      if (!(value < static_cast<T>(boundaries[l]))) {
50        bucket = ++l;
51        count -= step + 1;
52      } else {
53        count = step;
54      }
55    }
56    out[i] = bucket;
57  }
58}
59
60namespace functor {
61
62template <typename T>
63struct BucketizeFunctor<GPUDevice, T> {
64  // PRECONDITION: boundaries_vector must be sorted.
65  static Status Compute(OpKernelContext* context,
66                        const typename TTypes<T, 1>::ConstTensor& input,
67                        const std::vector<float>& boundaries_vector,
68                        typename TTypes<int32, 1>::Tensor& output) {
69    const GPUDevice& d = context->eigen_device<GPUDevice>();
70
71    CudaDeviceArrayOnHost<float> boundaries_array(context,
72                                                  boundaries_vector.size());
73    TF_RETURN_IF_ERROR(boundaries_array.Init());
74    for (int i = 0; i < boundaries_vector.size(); ++i) {
75      boundaries_array.Set(i, boundaries_vector[i]);
76    }
77    TF_RETURN_IF_ERROR(boundaries_array.Finalize());
78
79    CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
80    BucketizeCustomKernel<
81        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
82        input.size(), input.data(), boundaries_vector.size(),
83        boundaries_array.data(), output.data());
84
85    return Status::OK();
86  }
87};
88}  // namespace functor
89
90#define REGISTER_GPU_SPEC(type) \
91  template struct functor::BucketizeFunctor<GPUDevice, type>;
92
93REGISTER_GPU_SPEC(int32);
94REGISTER_GPU_SPEC(int64);
95REGISTER_GPU_SPEC(float);
96REGISTER_GPU_SPEC(double);
97#undef REGISTER_GPU_SPEC
98
99}  // namespace tensorflow
100
101#endif  // GOOGLE_CUDA
102