1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
17#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
18
19#include <math.h>
20
21#include "tensorflow/examples/android/jni/object_tracking/geom.h"
22
23namespace tf_tracking {
24
25// Arbitrary keypoint type ids for labeling the origin of tracked keypoints.
26enum KeypointType {
27  KEYPOINT_TYPE_DEFAULT = 0,
28  KEYPOINT_TYPE_FAST = 1,
29  KEYPOINT_TYPE_INTEREST = 2
30};
31
32// Struct that can be used to more richly store the results of a detection
33// than a single number, while still maintaining comparability.
34struct MatchScore {
35  explicit MatchScore(double val) : value(val) {}
36  MatchScore() { value = 0.0; }
37
38  double value;
39
40  MatchScore& operator+(const MatchScore& rhs) {
41    value += rhs.value;
42    return *this;
43  }
44
45  friend std::ostream& operator<<(std::ostream& stream,
46                                  const MatchScore& detection) {
47    stream << detection.value;
48    return stream;
49  }
50};
51inline bool operator< (const MatchScore& cC1, const MatchScore& cC2) {
52    return cC1.value < cC2.value;
53}
54inline bool operator> (const MatchScore& cC1, const MatchScore& cC2) {
55    return cC1.value > cC2.value;
56}
57inline bool operator>= (const MatchScore& cC1, const MatchScore& cC2) {
58    return cC1.value >= cC2.value;
59}
60inline bool operator<= (const MatchScore& cC1, const MatchScore& cC2) {
61    return cC1.value <= cC2.value;
62}
63
64// Fixed seed used for all random number generators.
65static const int kRandomNumberSeed = 11111;
66
67// TODO(andrewharp): Move as many of these settings as possible into a settings
68// object which can be passed in from Java at runtime.
69
70// Whether or not to use ESM instead of LK flow.
71static const bool kUseEsm = false;
72
73// This constant gets added to the diagonal of the Hessian
74// before solving for translation in 2dof ESM.
75// It ensures better behavior especially in the absence of
76// strong texture.
77static const int kEsmRegularizer = 20;
78
79// Do we want to brightness-normalize each keypoint patch when we compute
80// its flow using ESM?
81static const bool kDoBrightnessNormalize = true;
82
83// Whether or not to use fixed-point interpolated pixel lookups in optical flow.
84#define USE_FIXED_POINT_FLOW 1
85
86// Whether to normalize keypoint windows for intensity in LK optical flow.
87// This is a define for now because it helps keep the code streamlined.
88#define NORMALIZE 1
89
90// Number of keypoints to store per frame.
91static const int kMaxKeypoints = 76;
92
93// Keypoint detection.
94static const int kMaxTempKeypoints = 1024;
95
96// Number of floats each keypoint takes up when exporting to an array.
97static const int kKeypointStep = 7;
98
99// Number of frame deltas to keep around in the circular queue.
100static const int kNumFrames = 512;
101
102// Number of iterations to do tracking on each keypoint at each pyramid level.
103static const int kNumIterations = 3;
104
105// The number of bins (on a side) to divide each bin from the previous
106// cache level into.  Higher numbers will decrease performance by increasing
107// cache misses, but mean that cache hits are more locally relevant.
108static const int kCacheBranchFactor = 2;
109
110// Number of levels to put in the cache.
111// Each level of the cache is a square grid of bins, length:
112// branch_factor^(level - 1) on each side.
113//
114// This may be greater than kNumPyramidLevels. Setting it to 0 means no
115// caching is enabled.
116static const int kNumCacheLevels = 3;
117
118// The level at which the cache pyramid gets cut off and replaced by a matrix
119// transform if such a matrix has been provided to the cache.
120static const int kCacheCutoff = 1;
121
122static const int kNumPyramidLevels = 4;
123
124// The minimum number of keypoints needed in an object's area.
125static const int kMaxKeypointsForObject = 16;
126
127// Minimum number of pyramid levels to use after getting cached value.
128// This allows fine-scale adjustment from the cached value, which is taken
129// from the center of the corresponding top cache level box.
130// Can be [0, kNumPyramidLevels).
131static const int kMinNumPyramidLevelsToUseForAdjustment = 1;
132
133// Window size to integrate over to find local image derivative.
134static const int kFlowIntegrationWindowSize = 3;
135
136// Total area of integration windows.
137static const int kFlowArraySize =
138    (2 * kFlowIntegrationWindowSize + 1) * (2 * kFlowIntegrationWindowSize + 1);
139
140// Error that's considered good enough to early abort tracking.
141static const float kTrackingAbortThreshold = 0.03f;
142
143// Maximum number of deviations a keypoint-correspondence delta can be from the
144// weighted average before being thrown out for region-based queries.
145static const float kNumDeviations = 2.0f;
146
147// The length of the allowed delta between the forward and the backward
148// flow deltas in terms of the length of the forward flow vector.
149static const float kMaxForwardBackwardErrorAllowed = 0.5f;
150
151// Threshold for pixels to be considered different.
152static const int kFastDiffAmount = 10;
153
154// How far from edge of frame to stop looking for FAST keypoints.
155static const int kFastBorderBuffer = 10;
156
157// Determines if non-detected arbitrary keypoints should be added to regions.
158// This will help if no keypoints have been detected in the region yet.
159static const bool kAddArbitraryKeypoints = true;
160
161// How many arbitrary keypoints to add along each axis as candidates for each
162// region?
163static const int kNumToAddAsCandidates = 1;
164
165// In terms of region dimensions, how closely can we place keypoints
166// next to each other?
167static const float kClosestPercent = 0.6f;
168
169// How many FAST qualifying pixels must be connected to a pixel for it to be
170// considered a candidate keypoint for Harris filtering.
171static const int kMinNumConnectedForFastKeypoint = 8;
172
173// Size of the window to integrate over for Harris filtering.
174// Compare to kFlowIntegrationWindowSize.
175static const int kHarrisWindowSize = 2;
176
177
178// DETECTOR PARAMETERS
179
180// Before relocalizing, make sure the new proposed position is better than
181// the existing position by a small amount to prevent thrashing.
182static const MatchScore kMatchScoreBuffer(0.01f);
183
184// Minimum score a tracked object can have and still be considered a match.
185// TODO(andrewharp): Make this a per detector thing.
186static const MatchScore kMinimumMatchScore(0.5f);
187
188static const float kMinimumCorrelationForTracking = 0.4f;
189
190static const MatchScore kMatchScoreForImmediateTermination(0.0f);
191
192// Run the detector every N frames.
193static const int kDetectEveryNFrames = 4;
194
195// How many features does each feature_set contain?
196static const int kFeaturesPerFeatureSet = 10;
197
198// The number of FeatureSets managed by the object detector.
199// More FeatureSets can increase recall at the cost of performance.
200static const int kNumFeatureSets = 7;
201
202// How many FeatureSets must respond affirmatively for a candidate descriptor
203// and position to be given more thorough attention?
204static const int kNumFeatureSetsForCandidate = 2;
205
206// How large the thumbnails used for correlation validation are.  Used for both
207// width and height.
208static const int kNormalizedThumbnailSize = 11;
209
210// The area of intersection divided by union for the bounding boxes that tells
211// if this tracking has slipped enough to invalidate all unlocked examples.
212static const float kPositionOverlapThreshold = 0.6f;
213
214// The number of detection failures allowed before an object goes invisible.
215// Tracking will still occur, so if it is actually still being tracked and
216// comes back into a detectable position, it's likely to be found.
217static const int kMaxNumDetectionFailures = 4;
218
219
220// Minimum square size to scan with sliding window.
221static const float kScanMinSquareSize = 16.0f;
222
223// Minimum square size to scan with sliding window.
224static const float kScanMaxSquareSize = 64.0f;
225
226// Scale difference for consecutive scans of the sliding window.
227static const float kScanScaleFactor = sqrtf(2.0f);
228
229// Step size for sliding window.
230static const int kScanStepSize = 10;
231
232
233// How tightly to pack the descriptor boxes for confirmed exemplars.
234static const float kLockedScaleFactor = 1 / sqrtf(2.0f);
235
236// How tightly to pack the descriptor boxes for unconfirmed exemplars.
237static const float kUnlockedScaleFactor = 1 / 2.0f;
238
239// How tightly the boxes to scan centered at the last known position will be
240// packed.
241static const float kLastKnownPositionScaleFactor = 1.0f / sqrtf(2.0f);
242
243// The bounds on how close a new object example must be to existing object
244// examples for detection to be valid.
245static const float kMinCorrelationForNewExample = 0.75f;
246static const float kMaxCorrelationForNewExample = 0.99f;
247
248
249// The number of safe tries an exemplar has after being created before
250// missed detections count against it.
251static const int kFreeTries = 5;
252
253// A false positive is worth this many missed detections.
254static const int kFalsePositivePenalty = 5;
255
256struct ObjectDetectorConfig {
257  const Size image_size;
258
259  explicit ObjectDetectorConfig(const Size& image_size)
260      : image_size(image_size) {}
261  virtual ~ObjectDetectorConfig() = default;
262};
263
264struct KeypointDetectorConfig {
265  const Size image_size;
266
267  bool detect_skin;
268
269  explicit KeypointDetectorConfig(const Size& image_size)
270      : image_size(image_size),
271        detect_skin(false) {}
272};
273
274
275struct OpticalFlowConfig {
276  const Size image_size;
277
278  explicit OpticalFlowConfig(const Size& image_size)
279      : image_size(image_size) {}
280};
281
282struct TrackerConfig {
283  const Size image_size;
284  KeypointDetectorConfig keypoint_detector_config;
285  OpticalFlowConfig flow_config;
286  bool always_track;
287
288  float object_box_scale_factor_for_features;
289
290  explicit TrackerConfig(const Size& image_size)
291      : image_size(image_size),
292        keypoint_detector_config(image_size),
293        flow_config(image_size),
294        always_track(false),
295        object_box_scale_factor_for_features(1.0f) {}
296};
297
298}  // namespace tf_tracking
299
300#endif  // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
301