1/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#ifndef TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_ 17#define TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_ 18 19// Connected component analysis. The op is described in ../ops/image_ops.cc. A 20// description of the algorithm appears below. 21 22#define EIGEN_USE_THREADS 23 24#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 25#include "tensorflow/core/framework/op_kernel.h" 26#include "tensorflow/core/framework/tensor_types.h" 27#include "tensorflow/core/platform/types.h" 28#include "tensorflow/core/util/work_sharder.h" 29 30namespace tensorflow { 31 32namespace functor { 33 34template <typename T> 35bool is_nonzero(T value) { 36 return value != T(0); 37} 38 39template <> 40bool is_nonzero(string value) { 41 return value.size() != 0; 42} 43 44// Processes each pixel of an image for union-find, in parallel blocks. This is 45// loosely based on the algorithm in "GPU Computing Gems" by Ondrej Stava and 46// Bedrich Benes, available here: 47// http://hpcg.purdue.edu/bbenes/papers/Stava2011CCL.pdf 48// The bulk of the process uses blocks of each image, which have each been 49// processed separately. As long as there are multiple blocks in the image, we 50// double the height and width of the blocks, creating new blocks which each 51// consist of 2x2 previous sub-blocks. On each new block, we process adjacent 52// pixels from the previous sub-blocks serially. However, the new blocks are not 53// connected, so we can process each block in parallel. 54// The GPU algorithm first processes blocks of a fixed size in GPU shared 55// memory, with one image block per CUDA thread block. On the CPU, we just start 56// with a block size of a single pixel, and borrow the rest of the algorithm 57// unchanged. 58template <typename T> 59class BlockedImageUnionFindFunctor { 60 public: 61 using OutputType = int64; 62 63 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockedImageUnionFindFunctor( 64 const T* images, const int64 num_rows, const int64 num_cols, 65 OutputType* forest, OutputType* rank) 66 : images_(images), 67 num_rows_(num_rows), 68 num_cols_(num_cols), 69 block_height_(1), 70 block_width_(1), 71 forest_(forest), 72 rank_(rank) {} 73 74 // Returns the root of the tree that the pixel at the given index belongs to. 75 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType 76 find(OutputType index) const { 77 while (forest_[index] != index) { 78 index = forest_[index]; 79 } 80 return index; 81 } 82 83 // Returns the number of blocks along the y axis. 84 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_vertically() const { 85 return (num_rows_ + block_height_ - 1) / block_height_; 86 } 87 88 // Returns the number of blocks along the x axis. 89 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_horizontally() const { 90 return (num_cols_ + block_width_ - 1) / block_width_; 91 } 92 93 // Returns the total number of blocks in each image. 94 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks() const { 95 return num_blocks_vertically() * num_blocks_horizontally(); 96 } 97 98 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_height() const { 99 return block_height_; 100 } 101 102 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_width() const { 103 return block_width_; 104 } 105 106 // Returns whether we may merge again (the image contains more than one 107 // block). 108 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool can_merge() const { 109 return block_height_ < num_rows_ || block_width_ < num_cols_; 110 } 111 112 // Doubles the block size. After this method, you must call 113 // `merge_internal_block_edges` for each image and each *new* block's xy 114 // coordinates (typically in parallel). 115 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_blocks() { 116 block_height_ *= 2; 117 block_width_ *= 2; 118 } 119 120 // Processes pairs of pixels within the block which were adjacent in the four 121 // sub-blocks. This must be done at each stage so that the connected 122 // components in each block are joined correctly. 123 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_internal_block_edges( 124 int64 image_index, int64 block_vertical_index, 125 int64 block_horizontal_index) const { 126 int64 block_start_y = block_vertical_index * block_height_; 127 int64 block_start_x = block_horizontal_index * block_width_; 128 // Merge the 4 sub-blocks horizontally (fixing the vertical seam). 129 int64 block_center_x = block_start_x + block_width_ / 2 - 1; 130 if (0 <= block_center_x && block_center_x + 1 < num_cols_) { 131 int64 merge_blocks_limit_y = 132 std::min(num_rows_, block_start_y + block_height_); 133 for (int64 y = block_start_y; y < merge_blocks_limit_y; y++) { 134 union_right(image_index, y, block_center_x); 135 } 136 } 137 // Merge the 4 sub-blocks vertically (fixing the horizontal seam). 138 int64 block_center_y = block_start_y + block_height_ / 2 - 1; 139 if (0 <= block_center_y && block_center_y + 1 < num_rows_) { 140 int64 merge_blocks_limit_x = 141 std::min(num_cols_, block_start_x + block_width_); 142 for (int64 x = block_start_x; x < merge_blocks_limit_x; x++) { 143 union_down(image_index, block_center_y, x); 144 } 145 } 146 } 147 148 private: 149 // The input image(s). 150 const T* const images_; 151 const int64 num_rows_; 152 const int64 num_cols_; 153 // Current height of each sub-block of the image. 154 int64 block_height_; 155 // Current width of each sub-block of the image. 156 int64 block_width_; 157 // Union-find forest. This has the same size as `images_`, and each entry 158 // holds the index of its parent in `images_` (roots hold their own index). 159 // Cycles should not occur. 160 OutputType* const forest_; 161 // Union-find rank of each pixel. 162 OutputType* const rank_; 163 164 // Unions the pixel with the pixel below it if applicable (both pixels are 165 // true, and the pixel is not in the last row). 166 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_down(OutputType batch, 167 OutputType row, 168 OutputType col) const { 169 T pixel = read_pixel(batch, row, col); 170 if (is_nonzero<T>(pixel)) { 171 const int64 index_a = col + num_cols_ * (row + num_rows_ * batch); 172 if (row + 1 < num_rows_ && read_pixel(batch, row + 1, col) == pixel) { 173 const int64 index_b = col + num_cols_ * (row + 1 + num_rows_ * batch); 174 do_union(index_a, index_b); 175 } 176 } 177 } 178 179 // Unions the pixel with the pixel to the right of it if applicable. 180 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_right(OutputType batch, 181 OutputType row, 182 OutputType col) const { 183 T pixel = read_pixel(batch, row, col); 184 if (is_nonzero<T>(pixel)) { 185 const int64 index_a = col + num_cols_ * (row + num_rows_ * batch); 186 if (col + 1 < num_cols_ && read_pixel(batch, row, col + 1) == pixel) { 187 const int64 index_b = col + 1 + num_cols_ * (row + num_rows_ * batch); 188 do_union(index_a, index_b); 189 } 190 } 191 } 192 193 // Reads a pixel value in the images. 194 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T 195 read_pixel(const OutputType batch, const OutputType row, 196 const OutputType col) const { 197 return images_[col + num_cols_ * (row + num_rows_ * batch)]; 198 } 199 200 // Unions the trees that the two pixels belong to, using their index in the 201 // `images_` array. 202 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void do_union( 203 OutputType index_a, OutputType index_b) const { 204 // Find the roots of index_a and index_b in the forest, and make one the 205 // child of the other. 206 index_a = find(index_a); 207 index_b = find(index_b); 208 const OutputType rank_a = rank_[index_a]; 209 const OutputType rank_b = rank_[index_b]; 210 OutputType parent, child; 211 if (index_a == index_b) { 212 return; 213 } else if (rank_a < rank_b) { 214 parent = index_a; 215 child = index_b; 216 } else { 217 parent = index_b; 218 child = index_a; 219 rank_[parent]++; 220 } 221 forest_[child] = parent; 222 } 223}; 224 225// Runs the ImageUnionFindFunctor on all pixels. Will require different CPU and 226// GPU implementations. 227template <typename Device, typename T> 228class ImageConnectedComponentsFunctor { 229 public: 230 using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType; 231 232 void operator()(OpKernelContext* ctx, 233 typename TTypes<T, 3>::ConstTensor images, 234 typename TTypes<OutputType, 3>::Tensor forest, 235 typename TTypes<OutputType, 3>::Tensor rank); 236}; 237 238// Fills a flat Tensor with indices from 0 to n - 1. 239template <typename Device> 240class TensorRangeFunctor { 241 public: 242 using OutputType = typename BlockedImageUnionFindFunctor<bool>::OutputType; 243 244 void operator()(const Device& device, 245 typename TTypes<OutputType>::Flat tensor) { 246 tensor.device(device) = tensor.generate(TensorRangeGenerator()); 247 } 248 249 private: 250 class TensorRangeGenerator { 251 public: 252 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType 253 operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const { 254 return coords[0]; 255 } 256 }; 257}; 258 259// Given the union-find forest, generates the root index for each node. This 260// gives us arbitrary, usually non-consecutive ids for each connected component. 261// The ids are massaged in Python to get deterministic, consecutive ids. 262template <typename Device, typename T> 263class FindRootFunctor { 264 public: 265 using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType; 266 267 void operator()(const Device& device, 268 typename TTypes<OutputType>::Flat component_ids, 269 const T* images, 270 const BlockedImageUnionFindFunctor<T>& union_find) { 271 component_ids.device(device) = 272 component_ids.generate(FindRootGenerator(images, union_find)); 273 } 274 275 private: 276 class FindRootGenerator { 277 const T* const images_; 278 const BlockedImageUnionFindFunctor<T> union_find_; 279 280 public: 281 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE FindRootGenerator( 282 const T* images, BlockedImageUnionFindFunctor<T> union_find) 283 : images_(images), union_find_(union_find) {} 284 285 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType 286 operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const { 287 if (is_nonzero<T>(images_[coords[0]])) { 288 // True pixels have an arbitrary segment id > 0. The segment ids will be 289 // made contiguous later. 290 return union_find_.find(coords[0]) + 1; 291 } else { 292 // False pixels have a segment of 0. 293 return 0; 294 } 295 } 296 }; 297}; 298 299} // end namespace functor 300 301} // namespace tensorflow 302 303#endif // TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_ 304