1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
17#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
18
19#include <stdint.h>
20
21#include "tensorflow/examples/android/jni/object_tracking/geom.h"
22#include "tensorflow/examples/android/jni/object_tracking/utils.h"
23
24// TODO(andrewharp): Make this a cast to uint32_t if/when we go unsigned for
25// operations.
26#define ZERO 0
27
28#ifdef SANITY_CHECKS
29  #define CHECK_PIXEL(IMAGE, X, Y) {\
30    SCHECK((IMAGE)->ValidPixel((X), (Y)), \
31          "CHECK_PIXEL(%d,%d) in %dx%d image.", \
32          static_cast<int>(X), static_cast<int>(Y), \
33          (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\
34  }
35
36  #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {\
37    SCHECK((IMAGE)->validInterpPixel((X), (Y)), \
38          "CHECK_PIXEL_INTERP(%.2f, %.2f) in %dx%d image.", \
39          static_cast<float>(X), static_cast<float>(Y), \
40          (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\
41  }
42#else
43  #define CHECK_PIXEL(image, x, y) {}
44  #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {}
45#endif
46
47namespace tf_tracking {
48
49#ifdef SANITY_CHECKS
50// Class which exists solely to provide bounds checking for array-style image
51// data access.
52template <typename T>
53class RowData {
54 public:
55  RowData(T* const row_data, const int max_col)
56      : row_data_(row_data), max_col_(max_col) {}
57
58  inline T& operator[](const int col) const {
59    SCHECK(InRange(col, 0, max_col_),
60          "Column out of range: %d (%d max)", col, max_col_);
61    return row_data_[col];
62  }
63
64  inline operator T*() const {
65    return row_data_;
66  }
67
68 private:
69  T* const row_data_;
70  const int max_col_;
71};
72#endif
73
74// Naive templated sorting function.
75template <typename T>
76int Comp(const void* a, const void* b) {
77  const T val1 = *reinterpret_cast<const T*>(a);
78  const T val2 = *reinterpret_cast<const T*>(b);
79
80  if (val1 == val2) {
81    return 0;
82  } else if (val1 < val2) {
83    return -1;
84  } else {
85    return 1;
86  }
87}
88
89// TODO(andrewharp): Make explicit which operations support negative numbers or
90// struct/class types in image data (possibly create fast multi-dim array class
91// for data where pixel arithmetic does not make sense).
92
93// Image class optimized for working on numeric arrays as grayscale image data.
94// Supports other data types as a 2D array class, so long as no pixel math
95// operations are called (convolution, downsampling, etc).
96template <typename T>
97class Image {
98 public:
99  Image(const int width, const int height);
100  explicit Image(const Size& size);
101
102  // Constructor that creates an image from preallocated data.
103  // Note: The image takes ownership of the data lifecycle, unless own_data is
104  // set to false.
105  Image(const int width, const int height, T* const image_data,
106        const bool own_data = true);
107
108  ~Image();
109
110  // Extract a pixel patch from this image, starting at a subpixel location.
111  // Uses 16:16 fixed point format for representing real values and doing the
112  // bilinear interpolation.
113  //
114  // Arguments fp_x and fp_y tell the subpixel position in fixed point format,
115  // patchwidth/patchheight give the size of the patch in pixels and
116  // to_data must be a valid pointer to a *contiguous* destination data array.
117  template<class DstType>
118  bool ExtractPatchAtSubpixelFixed1616(const int fp_x,
119                                       const int fp_y,
120                                       const int patchwidth,
121                                       const int patchheight,
122                                       DstType* to_data) const;
123
124  Image<T>* Crop(
125      const int left, const int top, const int right, const int bottom) const;
126
127  inline int GetWidth() const { return width_; }
128  inline int GetHeight() const { return height_; }
129
130  // Bilinearly sample a value between pixels.  Values must be within the image.
131  inline float GetPixelInterp(const float x, const float y) const;
132
133  // Bilinearly sample a pixels at a subpixel position using fixed point
134  // arithmetic.
135  // Avoids float<->int conversions.
136  // Values must be within the image.
137  // Arguments fp_x and fp_y tell the subpixel position in
138  // 16:16 fixed point format.
139  //
140  // Important: This function only makes sense for integer-valued images, such
141  // as Image<uint8_t> or Image<int> etc.
142  inline T GetPixelInterpFixed1616(const int fp_x_whole,
143                                   const int fp_y_whole) const;
144
145  // Returns true iff the pixel is in the image's boundaries.
146  inline bool ValidPixel(const int x, const int y) const;
147
148  inline BoundingBox GetContainingBox() const;
149
150  inline bool Contains(const BoundingBox& bounding_box) const;
151
152  inline T GetMedianValue() {
153    qsort(image_data_, data_size_, sizeof(image_data_[0]), Comp<T>);
154    return image_data_[data_size_ >> 1];
155  }
156
157  // Returns true iff the pixel is in the image's boundaries for interpolation
158  // purposes.
159  // TODO(andrewharp): check in interpolation follow-up change.
160  inline bool ValidInterpPixel(const float x, const float y) const;
161
162  // Safe lookup with boundary enforcement.
163  inline T GetPixelClipped(const int x, const int y) const {
164    return (*this)[Clip(y, ZERO, height_less_one_)]
165                  [Clip(x, ZERO, width_less_one_)];
166  }
167
168#ifdef SANITY_CHECKS
169  inline RowData<T> operator[](const int row) {
170    SCHECK(InRange(row, 0, height_less_one_),
171          "Row out of range: %d (%d max)", row, height_less_one_);
172    return RowData<T>(image_data_ + row * stride_, width_less_one_);
173  }
174
175  inline const RowData<T> operator[](const int row) const {
176    SCHECK(InRange(row, 0, height_less_one_),
177          "Row out of range: %d (%d max)", row, height_less_one_);
178    return RowData<T>(image_data_ + row * stride_, width_less_one_);
179  }
180#else
181  inline T* operator[](const int row) {
182    return image_data_ + row * stride_;
183  }
184
185  inline const T* operator[](const int row) const {
186    return image_data_ + row * stride_;
187  }
188#endif
189
190  const T* data() const { return image_data_; }
191
192  inline int stride() const { return stride_; }
193
194  // Clears image to a single value.
195  inline void Clear(const T& val) {
196    memset(image_data_, val, sizeof(*image_data_) * data_size_);
197  }
198
199#ifdef __ARM_NEON
200  void Downsample2x32ColumnsNeon(const uint8_t* const original,
201                                 const int stride, const int orig_x);
202
203  void Downsample4x32ColumnsNeon(const uint8_t* const original,
204                                 const int stride, const int orig_x);
205
206  void DownsampleAveragedNeon(const uint8_t* const original, const int stride,
207                              const int factor);
208#endif
209
210  // Naive downsampler that reduces image size by factor by averaging pixels in
211  // blocks of size factor x factor.
212  void DownsampleAveraged(const T* const original, const int stride,
213                          const int factor);
214
215  // Naive downsampler that reduces image size by factor by averaging pixels in
216  // blocks of size factor x factor.
217  inline void DownsampleAveraged(const Image<T>& original, const int factor) {
218    DownsampleAveraged(original.data(), original.GetWidth(), factor);
219  }
220
221  // Native downsampler that reduces image size using nearest interpolation
222  void DownsampleInterpolateNearest(const Image<T>& original);
223
224  // Native downsampler that reduces image size using fixed-point bilinear
225  // interpolation
226  void DownsampleInterpolateLinear(const Image<T>& original);
227
228  // Relatively efficient downsampling of an image by a factor of two with a
229  // low-pass 3x3 smoothing operation thrown in.
230  void DownsampleSmoothed3x3(const Image<T>& original);
231
232  // Relatively efficient downsampling of an image by a factor of two with a
233  // low-pass 5x5 smoothing operation thrown in.
234  void DownsampleSmoothed5x5(const Image<T>& original);
235
236  // Optimized Scharr filter on a single pixel in the X direction.
237  // Scharr filters are like central-difference operators, but have more
238  // rotational symmetry in their response because they also consider the
239  // diagonal neighbors.
240  template <typename U>
241  inline T ScharrPixelX(const Image<U>& original,
242                        const int center_x, const int center_y) const;
243
244  // Optimized Scharr filter on a single pixel in the X direction.
245  // Scharr filters are like central-difference operators, but have more
246  // rotational symmetry in their response because they also consider the
247  // diagonal neighbors.
248  template <typename U>
249  inline T ScharrPixelY(const Image<U>& original,
250                        const int center_x, const int center_y) const;
251
252  // Convolve the image with a Scharr filter in the X direction.
253  // Much faster than an equivalent generic convolution.
254  template <typename U>
255  inline void ScharrX(const Image<U>& original);
256
257  // Convolve the image with a Scharr filter in the Y direction.
258  // Much faster than an equivalent generic convolution.
259  template <typename U>
260  inline void ScharrY(const Image<U>& original);
261
262  static inline T HalfDiff(int32_t first, int32_t second) {
263    return (second - first) / 2;
264  }
265
266  template <typename U>
267  void DerivativeX(const Image<U>& original);
268
269  template <typename U>
270  void DerivativeY(const Image<U>& original);
271
272  // Generic function for convolving pixel with 3x3 filter.
273  // Filter pixels should be in row major order.
274  template <typename U>
275  inline T ConvolvePixel3x3(const Image<U>& original,
276                            const int* const filter,
277                            const int center_x, const int center_y,
278                            const int total) const;
279
280  // Generic function for convolving an image with a 3x3 filter.
281  // TODO(andrewharp): Generalize this for any size filter.
282  template <typename U>
283  inline void Convolve3x3(const Image<U>& original,
284                          const int32_t* const filter);
285
286  // Load this image's data from a data array. The data at pixels is assumed to
287  // have dimensions equivalent to this image's dimensions * factor.
288  inline void FromArray(const T* const pixels, const int stride,
289                        const int factor = 1);
290
291  // Copy the image back out to an appropriately sized data array.
292  inline void ToArray(T* const pixels) const {
293    // If not subsampling, memcpy should be faster.
294    memcpy(pixels, this->image_data_, data_size_ * sizeof(T));
295  }
296
297  // Precompute these for efficiency's sake as they're used by a lot of
298  // clipping code and loop code.
299  // TODO(andrewharp): make these only accessible by other Images.
300  const int width_less_one_;
301  const int height_less_one_;
302
303  // The raw size of the allocated data.
304  const int data_size_;
305
306 private:
307  inline void Allocate() {
308    image_data_ = new T[data_size_];
309    if (image_data_ == NULL) {
310      LOGE("Couldn't allocate image data!");
311    }
312  }
313
314  T* image_data_;
315
316  bool own_data_;
317
318  const int width_;
319  const int height_;
320
321  // The image stride (offset to next row).
322  // TODO(andrewharp): Make sure that stride is honored in all code.
323  const int stride_;
324
325  TF_DISALLOW_COPY_AND_ASSIGN(Image);
326};
327
328template <typename t>
329inline std::ostream& operator<<(std::ostream& stream, const Image<t>& image) {
330  for (int y = 0; y < image.GetHeight(); ++y) {
331    for (int x = 0; x < image.GetWidth(); ++x) {
332      stream << image[y][x] << " ";
333    }
334    stream << std::endl;
335  }
336  return stream;
337}
338
339}  // namespace tf_tracking
340
341#endif  // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
342