1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ 17#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ 18 19#include <math.h> 20 21#include "tensorflow/examples/android/jni/object_tracking/geom.h" 22 23namespace tf_tracking { 24 25// Arbitrary keypoint type ids for labeling the origin of tracked keypoints. 26enum KeypointType { 27 KEYPOINT_TYPE_DEFAULT = 0, 28 KEYPOINT_TYPE_FAST = 1, 29 KEYPOINT_TYPE_INTEREST = 2 30}; 31 32// Struct that can be used to more richly store the results of a detection 33// than a single number, while still maintaining comparability. 34struct MatchScore { 35 explicit MatchScore(double val) : value(val) {} 36 MatchScore() { value = 0.0; } 37 38 double value; 39 40 MatchScore& operator+(const MatchScore& rhs) { 41 value += rhs.value; 42 return *this; 43 } 44 45 friend std::ostream& operator<<(std::ostream& stream, 46 const MatchScore& detection) { 47 stream << detection.value; 48 return stream; 49 } 50}; 51inline bool operator< (const MatchScore& cC1, const MatchScore& cC2) { 52 return cC1.value < cC2.value; 53} 54inline bool operator> (const MatchScore& cC1, const MatchScore& cC2) { 55 return cC1.value > cC2.value; 56} 57inline bool operator>= (const MatchScore& cC1, const MatchScore& cC2) { 58 return cC1.value >= cC2.value; 59} 60inline bool operator<= (const MatchScore& cC1, const MatchScore& cC2) { 61 return cC1.value <= cC2.value; 62} 63 64// Fixed seed used for all random number generators. 65static const int kRandomNumberSeed = 11111; 66 67// TODO(andrewharp): Move as many of these settings as possible into a settings 68// object which can be passed in from Java at runtime. 69 70// Whether or not to use ESM instead of LK flow. 71static const bool kUseEsm = false; 72 73// This constant gets added to the diagonal of the Hessian 74// before solving for translation in 2dof ESM. 75// It ensures better behavior especially in the absence of 76// strong texture. 77static const int kEsmRegularizer = 20; 78 79// Do we want to brightness-normalize each keypoint patch when we compute 80// its flow using ESM? 81static const bool kDoBrightnessNormalize = true; 82 83// Whether or not to use fixed-point interpolated pixel lookups in optical flow. 84#define USE_FIXED_POINT_FLOW 1 85 86// Whether to normalize keypoint windows for intensity in LK optical flow. 87// This is a define for now because it helps keep the code streamlined. 88#define NORMALIZE 1 89 90// Number of keypoints to store per frame. 91static const int kMaxKeypoints = 76; 92 93// Keypoint detection. 94static const int kMaxTempKeypoints = 1024; 95 96// Number of floats each keypoint takes up when exporting to an array. 97static const int kKeypointStep = 7; 98 99// Number of frame deltas to keep around in the circular queue. 100static const int kNumFrames = 512; 101 102// Number of iterations to do tracking on each keypoint at each pyramid level. 103static const int kNumIterations = 3; 104 105// The number of bins (on a side) to divide each bin from the previous 106// cache level into. Higher numbers will decrease performance by increasing 107// cache misses, but mean that cache hits are more locally relevant. 108static const int kCacheBranchFactor = 2; 109 110// Number of levels to put in the cache. 111// Each level of the cache is a square grid of bins, length: 112// branch_factor^(level - 1) on each side. 113// 114// This may be greater than kNumPyramidLevels. Setting it to 0 means no 115// caching is enabled. 116static const int kNumCacheLevels = 3; 117 118// The level at which the cache pyramid gets cut off and replaced by a matrix 119// transform if such a matrix has been provided to the cache. 120static const int kCacheCutoff = 1; 121 122static const int kNumPyramidLevels = 4; 123 124// The minimum number of keypoints needed in an object's area. 125static const int kMaxKeypointsForObject = 16; 126 127// Minimum number of pyramid levels to use after getting cached value. 128// This allows fine-scale adjustment from the cached value, which is taken 129// from the center of the corresponding top cache level box. 130// Can be [0, kNumPyramidLevels). 131static const int kMinNumPyramidLevelsToUseForAdjustment = 1; 132 133// Window size to integrate over to find local image derivative. 134static const int kFlowIntegrationWindowSize = 3; 135 136// Total area of integration windows. 137static const int kFlowArraySize = 138 (2 * kFlowIntegrationWindowSize + 1) * (2 * kFlowIntegrationWindowSize + 1); 139 140// Error that's considered good enough to early abort tracking. 141static const float kTrackingAbortThreshold = 0.03f; 142 143// Maximum number of deviations a keypoint-correspondence delta can be from the 144// weighted average before being thrown out for region-based queries. 145static const float kNumDeviations = 2.0f; 146 147// The length of the allowed delta between the forward and the backward 148// flow deltas in terms of the length of the forward flow vector. 149static const float kMaxForwardBackwardErrorAllowed = 0.5f; 150 151// Threshold for pixels to be considered different. 152static const int kFastDiffAmount = 10; 153 154// How far from edge of frame to stop looking for FAST keypoints. 155static const int kFastBorderBuffer = 10; 156 157// Determines if non-detected arbitrary keypoints should be added to regions. 158// This will help if no keypoints have been detected in the region yet. 159static const bool kAddArbitraryKeypoints = true; 160 161// How many arbitrary keypoints to add along each axis as candidates for each 162// region? 163static const int kNumToAddAsCandidates = 1; 164 165// In terms of region dimensions, how closely can we place keypoints 166// next to each other? 167static const float kClosestPercent = 0.6f; 168 169// How many FAST qualifying pixels must be connected to a pixel for it to be 170// considered a candidate keypoint for Harris filtering. 171static const int kMinNumConnectedForFastKeypoint = 8; 172 173// Size of the window to integrate over for Harris filtering. 174// Compare to kFlowIntegrationWindowSize. 175static const int kHarrisWindowSize = 2; 176 177 178// DETECTOR PARAMETERS 179 180// Before relocalizing, make sure the new proposed position is better than 181// the existing position by a small amount to prevent thrashing. 182static const MatchScore kMatchScoreBuffer(0.01f); 183 184// Minimum score a tracked object can have and still be considered a match. 185// TODO(andrewharp): Make this a per detector thing. 186static const MatchScore kMinimumMatchScore(0.5f); 187 188static const float kMinimumCorrelationForTracking = 0.4f; 189 190static const MatchScore kMatchScoreForImmediateTermination(0.0f); 191 192// Run the detector every N frames. 193static const int kDetectEveryNFrames = 4; 194 195// How many features does each feature_set contain? 196static const int kFeaturesPerFeatureSet = 10; 197 198// The number of FeatureSets managed by the object detector. 199// More FeatureSets can increase recall at the cost of performance. 200static const int kNumFeatureSets = 7; 201 202// How many FeatureSets must respond affirmatively for a candidate descriptor 203// and position to be given more thorough attention? 204static const int kNumFeatureSetsForCandidate = 2; 205 206// How large the thumbnails used for correlation validation are. Used for both 207// width and height. 208static const int kNormalizedThumbnailSize = 11; 209 210// The area of intersection divided by union for the bounding boxes that tells 211// if this tracking has slipped enough to invalidate all unlocked examples. 212static const float kPositionOverlapThreshold = 0.6f; 213 214// The number of detection failures allowed before an object goes invisible. 215// Tracking will still occur, so if it is actually still being tracked and 216// comes back into a detectable position, it's likely to be found. 217static const int kMaxNumDetectionFailures = 4; 218 219 220// Minimum square size to scan with sliding window. 221static const float kScanMinSquareSize = 16.0f; 222 223// Minimum square size to scan with sliding window. 224static const float kScanMaxSquareSize = 64.0f; 225 226// Scale difference for consecutive scans of the sliding window. 227static const float kScanScaleFactor = sqrtf(2.0f); 228 229// Step size for sliding window. 230static const int kScanStepSize = 10; 231 232 233// How tightly to pack the descriptor boxes for confirmed exemplars. 234static const float kLockedScaleFactor = 1 / sqrtf(2.0f); 235 236// How tightly to pack the descriptor boxes for unconfirmed exemplars. 237static const float kUnlockedScaleFactor = 1 / 2.0f; 238 239// How tightly the boxes to scan centered at the last known position will be 240// packed. 241static const float kLastKnownPositionScaleFactor = 1.0f / sqrtf(2.0f); 242 243// The bounds on how close a new object example must be to existing object 244// examples for detection to be valid. 245static const float kMinCorrelationForNewExample = 0.75f; 246static const float kMaxCorrelationForNewExample = 0.99f; 247 248 249// The number of safe tries an exemplar has after being created before 250// missed detections count against it. 251static const int kFreeTries = 5; 252 253// A false positive is worth this many missed detections. 254static const int kFalsePositivePenalty = 5; 255 256struct ObjectDetectorConfig { 257 const Size image_size; 258 259 explicit ObjectDetectorConfig(const Size& image_size) 260 : image_size(image_size) {} 261 virtual ~ObjectDetectorConfig() = default; 262}; 263 264struct KeypointDetectorConfig { 265 const Size image_size; 266 267 bool detect_skin; 268 269 explicit KeypointDetectorConfig(const Size& image_size) 270 : image_size(image_size), 271 detect_skin(false) {} 272}; 273 274 275struct OpticalFlowConfig { 276 const Size image_size; 277 278 explicit OpticalFlowConfig(const Size& image_size) 279 : image_size(image_size) {} 280}; 281 282struct TrackerConfig { 283 const Size image_size; 284 KeypointDetectorConfig keypoint_detector_config; 285 OpticalFlowConfig flow_config; 286 bool always_track; 287 288 float object_box_scale_factor_for_features; 289 290 explicit TrackerConfig(const Size& image_size) 291 : image_size(image_size), 292 keypoint_detector_config(image_size), 293 flow_config(image_size), 294 always_track(false), 295 object_box_scale_factor_for_features(1.0f) {} 296}; 297 298} // namespace tf_tracking 299 300#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ 301