1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ 17#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ 18 19#include <stdint.h> 20 21#include "tensorflow/examples/android/jni/object_tracking/geom.h" 22#include "tensorflow/examples/android/jni/object_tracking/utils.h" 23 24// TODO(andrewharp): Make this a cast to uint32_t if/when we go unsigned for 25// operations. 26#define ZERO 0 27 28#ifdef SANITY_CHECKS 29 #define CHECK_PIXEL(IMAGE, X, Y) {\ 30 SCHECK((IMAGE)->ValidPixel((X), (Y)), \ 31 "CHECK_PIXEL(%d,%d) in %dx%d image.", \ 32 static_cast<int>(X), static_cast<int>(Y), \ 33 (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\ 34 } 35 36 #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {\ 37 SCHECK((IMAGE)->validInterpPixel((X), (Y)), \ 38 "CHECK_PIXEL_INTERP(%.2f, %.2f) in %dx%d image.", \ 39 static_cast<float>(X), static_cast<float>(Y), \ 40 (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\ 41 } 42#else 43 #define CHECK_PIXEL(image, x, y) {} 44 #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {} 45#endif 46 47namespace tf_tracking { 48 49#ifdef SANITY_CHECKS 50// Class which exists solely to provide bounds checking for array-style image 51// data access. 52template <typename T> 53class RowData { 54 public: 55 RowData(T* const row_data, const int max_col) 56 : row_data_(row_data), max_col_(max_col) {} 57 58 inline T& operator[](const int col) const { 59 SCHECK(InRange(col, 0, max_col_), 60 "Column out of range: %d (%d max)", col, max_col_); 61 return row_data_[col]; 62 } 63 64 inline operator T*() const { 65 return row_data_; 66 } 67 68 private: 69 T* const row_data_; 70 const int max_col_; 71}; 72#endif 73 74// Naive templated sorting function. 75template <typename T> 76int Comp(const void* a, const void* b) { 77 const T val1 = *reinterpret_cast<const T*>(a); 78 const T val2 = *reinterpret_cast<const T*>(b); 79 80 if (val1 == val2) { 81 return 0; 82 } else if (val1 < val2) { 83 return -1; 84 } else { 85 return 1; 86 } 87} 88 89// TODO(andrewharp): Make explicit which operations support negative numbers or 90// struct/class types in image data (possibly create fast multi-dim array class 91// for data where pixel arithmetic does not make sense). 92 93// Image class optimized for working on numeric arrays as grayscale image data. 94// Supports other data types as a 2D array class, so long as no pixel math 95// operations are called (convolution, downsampling, etc). 96template <typename T> 97class Image { 98 public: 99 Image(const int width, const int height); 100 explicit Image(const Size& size); 101 102 // Constructor that creates an image from preallocated data. 103 // Note: The image takes ownership of the data lifecycle, unless own_data is 104 // set to false. 105 Image(const int width, const int height, T* const image_data, 106 const bool own_data = true); 107 108 ~Image(); 109 110 // Extract a pixel patch from this image, starting at a subpixel location. 111 // Uses 16:16 fixed point format for representing real values and doing the 112 // bilinear interpolation. 113 // 114 // Arguments fp_x and fp_y tell the subpixel position in fixed point format, 115 // patchwidth/patchheight give the size of the patch in pixels and 116 // to_data must be a valid pointer to a *contiguous* destination data array. 117 template<class DstType> 118 bool ExtractPatchAtSubpixelFixed1616(const int fp_x, 119 const int fp_y, 120 const int patchwidth, 121 const int patchheight, 122 DstType* to_data) const; 123 124 Image<T>* Crop( 125 const int left, const int top, const int right, const int bottom) const; 126 127 inline int GetWidth() const { return width_; } 128 inline int GetHeight() const { return height_; } 129 130 // Bilinearly sample a value between pixels. Values must be within the image. 131 inline float GetPixelInterp(const float x, const float y) const; 132 133 // Bilinearly sample a pixels at a subpixel position using fixed point 134 // arithmetic. 135 // Avoids float<->int conversions. 136 // Values must be within the image. 137 // Arguments fp_x and fp_y tell the subpixel position in 138 // 16:16 fixed point format. 139 // 140 // Important: This function only makes sense for integer-valued images, such 141 // as Image<uint8_t> or Image<int> etc. 142 inline T GetPixelInterpFixed1616(const int fp_x_whole, 143 const int fp_y_whole) const; 144 145 // Returns true iff the pixel is in the image's boundaries. 146 inline bool ValidPixel(const int x, const int y) const; 147 148 inline BoundingBox GetContainingBox() const; 149 150 inline bool Contains(const BoundingBox& bounding_box) const; 151 152 inline T GetMedianValue() { 153 qsort(image_data_, data_size_, sizeof(image_data_[0]), Comp<T>); 154 return image_data_[data_size_ >> 1]; 155 } 156 157 // Returns true iff the pixel is in the image's boundaries for interpolation 158 // purposes. 159 // TODO(andrewharp): check in interpolation follow-up change. 160 inline bool ValidInterpPixel(const float x, const float y) const; 161 162 // Safe lookup with boundary enforcement. 163 inline T GetPixelClipped(const int x, const int y) const { 164 return (*this)[Clip(y, ZERO, height_less_one_)] 165 [Clip(x, ZERO, width_less_one_)]; 166 } 167 168#ifdef SANITY_CHECKS 169 inline RowData<T> operator[](const int row) { 170 SCHECK(InRange(row, 0, height_less_one_), 171 "Row out of range: %d (%d max)", row, height_less_one_); 172 return RowData<T>(image_data_ + row * stride_, width_less_one_); 173 } 174 175 inline const RowData<T> operator[](const int row) const { 176 SCHECK(InRange(row, 0, height_less_one_), 177 "Row out of range: %d (%d max)", row, height_less_one_); 178 return RowData<T>(image_data_ + row * stride_, width_less_one_); 179 } 180#else 181 inline T* operator[](const int row) { 182 return image_data_ + row * stride_; 183 } 184 185 inline const T* operator[](const int row) const { 186 return image_data_ + row * stride_; 187 } 188#endif 189 190 const T* data() const { return image_data_; } 191 192 inline int stride() const { return stride_; } 193 194 // Clears image to a single value. 195 inline void Clear(const T& val) { 196 memset(image_data_, val, sizeof(*image_data_) * data_size_); 197 } 198 199#ifdef __ARM_NEON 200 void Downsample2x32ColumnsNeon(const uint8_t* const original, 201 const int stride, const int orig_x); 202 203 void Downsample4x32ColumnsNeon(const uint8_t* const original, 204 const int stride, const int orig_x); 205 206 void DownsampleAveragedNeon(const uint8_t* const original, const int stride, 207 const int factor); 208#endif 209 210 // Naive downsampler that reduces image size by factor by averaging pixels in 211 // blocks of size factor x factor. 212 void DownsampleAveraged(const T* const original, const int stride, 213 const int factor); 214 215 // Naive downsampler that reduces image size by factor by averaging pixels in 216 // blocks of size factor x factor. 217 inline void DownsampleAveraged(const Image<T>& original, const int factor) { 218 DownsampleAveraged(original.data(), original.GetWidth(), factor); 219 } 220 221 // Native downsampler that reduces image size using nearest interpolation 222 void DownsampleInterpolateNearest(const Image<T>& original); 223 224 // Native downsampler that reduces image size using fixed-point bilinear 225 // interpolation 226 void DownsampleInterpolateLinear(const Image<T>& original); 227 228 // Relatively efficient downsampling of an image by a factor of two with a 229 // low-pass 3x3 smoothing operation thrown in. 230 void DownsampleSmoothed3x3(const Image<T>& original); 231 232 // Relatively efficient downsampling of an image by a factor of two with a 233 // low-pass 5x5 smoothing operation thrown in. 234 void DownsampleSmoothed5x5(const Image<T>& original); 235 236 // Optimized Scharr filter on a single pixel in the X direction. 237 // Scharr filters are like central-difference operators, but have more 238 // rotational symmetry in their response because they also consider the 239 // diagonal neighbors. 240 template <typename U> 241 inline T ScharrPixelX(const Image<U>& original, 242 const int center_x, const int center_y) const; 243 244 // Optimized Scharr filter on a single pixel in the X direction. 245 // Scharr filters are like central-difference operators, but have more 246 // rotational symmetry in their response because they also consider the 247 // diagonal neighbors. 248 template <typename U> 249 inline T ScharrPixelY(const Image<U>& original, 250 const int center_x, const int center_y) const; 251 252 // Convolve the image with a Scharr filter in the X direction. 253 // Much faster than an equivalent generic convolution. 254 template <typename U> 255 inline void ScharrX(const Image<U>& original); 256 257 // Convolve the image with a Scharr filter in the Y direction. 258 // Much faster than an equivalent generic convolution. 259 template <typename U> 260 inline void ScharrY(const Image<U>& original); 261 262 static inline T HalfDiff(int32_t first, int32_t second) { 263 return (second - first) / 2; 264 } 265 266 template <typename U> 267 void DerivativeX(const Image<U>& original); 268 269 template <typename U> 270 void DerivativeY(const Image<U>& original); 271 272 // Generic function for convolving pixel with 3x3 filter. 273 // Filter pixels should be in row major order. 274 template <typename U> 275 inline T ConvolvePixel3x3(const Image<U>& original, 276 const int* const filter, 277 const int center_x, const int center_y, 278 const int total) const; 279 280 // Generic function for convolving an image with a 3x3 filter. 281 // TODO(andrewharp): Generalize this for any size filter. 282 template <typename U> 283 inline void Convolve3x3(const Image<U>& original, 284 const int32_t* const filter); 285 286 // Load this image's data from a data array. The data at pixels is assumed to 287 // have dimensions equivalent to this image's dimensions * factor. 288 inline void FromArray(const T* const pixels, const int stride, 289 const int factor = 1); 290 291 // Copy the image back out to an appropriately sized data array. 292 inline void ToArray(T* const pixels) const { 293 // If not subsampling, memcpy should be faster. 294 memcpy(pixels, this->image_data_, data_size_ * sizeof(T)); 295 } 296 297 // Precompute these for efficiency's sake as they're used by a lot of 298 // clipping code and loop code. 299 // TODO(andrewharp): make these only accessible by other Images. 300 const int width_less_one_; 301 const int height_less_one_; 302 303 // The raw size of the allocated data. 304 const int data_size_; 305 306 private: 307 inline void Allocate() { 308 image_data_ = new T[data_size_]; 309 if (image_data_ == NULL) { 310 LOGE("Couldn't allocate image data!"); 311 } 312 } 313 314 T* image_data_; 315 316 bool own_data_; 317 318 const int width_; 319 const int height_; 320 321 // The image stride (offset to next row). 322 // TODO(andrewharp): Make sure that stride is honored in all code. 323 const int stride_; 324 325 TF_DISALLOW_COPY_AND_ASSIGN(Image); 326}; 327 328template <typename t> 329inline std::ostream& operator<<(std::ostream& stream, const Image<t>& image) { 330 for (int y = 0; y < image.GetHeight(); ++y) { 331 for (int x = 0; x < image.GetWidth(); ++x) { 332 stream << image[y][x] << " "; 333 } 334 stream << std::endl; 335 } 336 return stream; 337} 338 339} // namespace tf_tracking 340 341#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_ 342