1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15 16// A minimal but useful C++ example showing how to load an Imagenet-style object 17// recognition TensorFlow model, prepare input images for it, run them through 18// the graph, and interpret the results. 19// 20// It has been stripped down from the tensorflow/examples/label_image sample 21// code to remove features and ops not included in the mobile/embedded core 22// library available on the Raspberry Pi. 23// 24// Full build instructions are at tensorflow/contrib/pi_examples/README.md. 25 26#include <jpeglib.h> 27#include <setjmp.h> 28#include <stdio.h> 29#include <fstream> 30#include <vector> 31 32#include "tensorflow/core/framework/graph.pb.h" 33#include "tensorflow/core/framework/tensor.h" 34#include "tensorflow/core/graph/default_device.h" 35#include "tensorflow/core/graph/graph_def_builder.h" 36#include "tensorflow/core/lib/core/errors.h" 37#include "tensorflow/core/lib/core/stringpiece.h" 38#include "tensorflow/core/lib/core/threadpool.h" 39#include "tensorflow/core/lib/io/path.h" 40#include "tensorflow/core/lib/strings/stringprintf.h" 41#include "tensorflow/core/platform/init_main.h" 42#include "tensorflow/core/platform/logging.h" 43#include "tensorflow/core/platform/types.h" 44#include "tensorflow/core/public/session.h" 45#include "tensorflow/core/util/command_line_flags.h" 46 47// These are all common classes it's handy to reference with no namespace. 48using tensorflow::Flag; 49using tensorflow::int32; 50using tensorflow::Status; 51using tensorflow::string; 52using tensorflow::Tensor; 53 54// Takes a file name, and loads a list of labels from it, one per line, and 55// returns a vector of the strings. It pads with empty strings so the length 56// of the result is a multiple of 16, because our model expects that. 57Status ReadLabelsFile(string file_name, std::vector<string>* result, 58 size_t* found_label_count) { 59 std::ifstream file(file_name); 60 if (!file) { 61 return tensorflow::errors::NotFound("Labels file ", file_name, 62 " not found."); 63 } 64 result->clear(); 65 string line; 66 while (std::getline(file, line)) { 67 result->push_back(line); 68 } 69 *found_label_count = result->size(); 70 const int padding = 16; 71 while (result->size() % padding) { 72 result->emplace_back(); 73 } 74 return Status::OK(); 75} 76 77// Error handling for JPEG decoding. 78void CatchError(j_common_ptr cinfo) { 79 (*cinfo->err->output_message)(cinfo); 80 jmp_buf* jpeg_jmpbuf = reinterpret_cast<jmp_buf*>(cinfo->client_data); 81 jpeg_destroy(cinfo); 82 longjmp(*jpeg_jmpbuf, 1); 83} 84 85// Decompresses a JPEG file from disk. 86Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data, 87 int* width, int* height, int* channels) { 88 struct jpeg_decompress_struct cinfo; 89 FILE* infile; 90 JSAMPARRAY buffer; 91 int row_stride; 92 93 if ((infile = fopen(file_name.c_str(), "rb")) == NULL) { 94 LOG(ERROR) << "Can't open " << file_name; 95 return tensorflow::errors::NotFound("JPEG file ", file_name, " not found"); 96 } 97 98 struct jpeg_error_mgr jerr; 99 jmp_buf jpeg_jmpbuf; // recovery point in case of error 100 cinfo.err = jpeg_std_error(&jerr); 101 cinfo.client_data = &jpeg_jmpbuf; 102 jerr.error_exit = CatchError; 103 if (setjmp(jpeg_jmpbuf)) { 104 fclose(infile); 105 return tensorflow::errors::Unknown("JPEG decoding failed"); 106 } 107 108 jpeg_create_decompress(&cinfo); 109 jpeg_stdio_src(&cinfo, infile); 110 jpeg_read_header(&cinfo, TRUE); 111 jpeg_start_decompress(&cinfo); 112 *width = cinfo.output_width; 113 *height = cinfo.output_height; 114 *channels = cinfo.output_components; 115 data->resize((*height) * (*width) * (*channels)); 116 117 row_stride = cinfo.output_width * cinfo.output_components; 118 buffer = (*cinfo.mem->alloc_sarray)((j_common_ptr)&cinfo, JPOOL_IMAGE, 119 row_stride, 1); 120 while (cinfo.output_scanline < cinfo.output_height) { 121 tensorflow::uint8* row_address = 122 &((*data)[cinfo.output_scanline * row_stride]); 123 jpeg_read_scanlines(&cinfo, buffer, 1); 124 memcpy(row_address, buffer[0], row_stride); 125 } 126 127 jpeg_finish_decompress(&cinfo); 128 jpeg_destroy_decompress(&cinfo); 129 fclose(infile); 130 return Status::OK(); 131} 132 133// Given an image file name, read in the data, try to decode it as an image, 134// resize it to the requested size, and then scale the values as desired. 135Status ReadTensorFromImageFile(string file_name, const int wanted_height, 136 const int wanted_width, const float input_mean, 137 const float input_std, 138 std::vector<Tensor>* out_tensors) { 139 std::vector<tensorflow::uint8> image_data; 140 int image_width; 141 int image_height; 142 int image_channels; 143 TF_RETURN_IF_ERROR(LoadJpegFile(file_name, &image_data, &image_width, 144 &image_height, &image_channels)); 145 LOG(INFO) << "Loaded JPEG: " << image_width << "x" << image_height << "x" 146 << image_channels; 147 const int wanted_channels = 3; 148 if (image_channels < wanted_channels) { 149 return tensorflow::errors::FailedPrecondition( 150 "Image needs to have at least ", wanted_channels, " but only has ", 151 image_channels); 152 } 153 // In these loops, we convert the eight-bit data in the image into float, 154 // resize it using bilinear filtering, and scale it numerically to the float 155 // range that the model expects (given by input_mean and input_std). 156 tensorflow::Tensor image_tensor( 157 tensorflow::DT_FLOAT, 158 tensorflow::TensorShape( 159 {1, wanted_height, wanted_width, wanted_channels})); 160 auto image_tensor_mapped = image_tensor.tensor<float, 4>(); 161 tensorflow::uint8* in = image_data.data(); 162 float* out = image_tensor_mapped.data(); 163 const size_t image_rowlen = image_width * image_channels; 164 const float width_scale = static_cast<float>(image_width) / wanted_width; 165 const float height_scale = static_cast<float>(image_height) / wanted_height; 166 for (int y = 0; y < wanted_height; ++y) { 167 const float in_y = y * height_scale; 168 const int top_y_index = static_cast<int>(floorf(in_y)); 169 const int bottom_y_index = 170 std::min(static_cast<int>(ceilf(in_y)), (image_height - 1)); 171 const float y_lerp = in_y - top_y_index; 172 tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen); 173 tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen); 174 float* out_row = out + (y * wanted_width * wanted_channels); 175 for (int x = 0; x < wanted_width; ++x) { 176 const float in_x = x * width_scale; 177 const int left_x_index = static_cast<int>(floorf(in_x)); 178 const int right_x_index = 179 std::min(static_cast<int>(ceilf(in_x)), (image_width - 1)); 180 tensorflow::uint8* in_top_left_pixel = 181 in_top_row + (left_x_index * wanted_channels); 182 tensorflow::uint8* in_top_right_pixel = 183 in_top_row + (right_x_index * wanted_channels); 184 tensorflow::uint8* in_bottom_left_pixel = 185 in_bottom_row + (left_x_index * wanted_channels); 186 tensorflow::uint8* in_bottom_right_pixel = 187 in_bottom_row + (right_x_index * wanted_channels); 188 const float x_lerp = in_x - left_x_index; 189 float* out_pixel = out_row + (x * wanted_channels); 190 for (int c = 0; c < wanted_channels; ++c) { 191 const float top_left((in_top_left_pixel[c] - input_mean) / input_std); 192 const float top_right((in_top_right_pixel[c] - input_mean) / input_std); 193 const float bottom_left((in_bottom_left_pixel[c] - input_mean) / 194 input_std); 195 const float bottom_right((in_bottom_right_pixel[c] - input_mean) / 196 input_std); 197 const float top = top_left + (top_right - top_left) * x_lerp; 198 const float bottom = 199 bottom_left + (bottom_right - bottom_left) * x_lerp; 200 out_pixel[c] = top + (bottom - top) * y_lerp; 201 } 202 } 203 } 204 205 out_tensors->push_back(image_tensor); 206 return Status::OK(); 207} 208 209// Reads a model graph definition from disk, and creates a session object you 210// can use to run it. 211Status LoadGraph(string graph_file_name, 212 std::unique_ptr<tensorflow::Session>* session) { 213 tensorflow::GraphDef graph_def; 214 Status load_graph_status = 215 ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def); 216 if (!load_graph_status.ok()) { 217 return tensorflow::errors::NotFound("Failed to load compute graph at '", 218 graph_file_name, "'"); 219 } 220 session->reset(tensorflow::NewSession(tensorflow::SessionOptions())); 221 Status session_create_status = (*session)->Create(graph_def); 222 if (!session_create_status.ok()) { 223 return session_create_status; 224 } 225 return Status::OK(); 226} 227 228// Analyzes the output of the Inception graph to retrieve the highest scores and 229// their positions in the tensor, which correspond to categories. 230Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels, 231 Tensor* out_indices, Tensor* out_scores) { 232 const Tensor& unsorted_scores_tensor = outputs[0]; 233 auto unsorted_scores_flat = unsorted_scores_tensor.flat<float>(); 234 std::vector<std::pair<int, float>> scores; 235 for (int i = 0; i < unsorted_scores_flat.size(); ++i) { 236 scores.push_back(std::pair<int, float>({i, unsorted_scores_flat(i)})); 237 } 238 std::sort(scores.begin(), scores.end(), 239 [](const std::pair<int, float>& left, 240 const std::pair<int, float>& right) { 241 return left.second > right.second; 242 }); 243 scores.resize(how_many_labels); 244 Tensor sorted_indices(tensorflow::DT_INT32, {scores.size()}); 245 Tensor sorted_scores(tensorflow::DT_FLOAT, {scores.size()}); 246 for (int i = 0; i < scores.size(); ++i) { 247 sorted_indices.flat<int>()(i) = scores[i].first; 248 sorted_scores.flat<float>()(i) = scores[i].second; 249 } 250 *out_indices = sorted_indices; 251 *out_scores = sorted_scores; 252 return Status::OK(); 253} 254 255// Given the output of a model run, and the name of a file containing the labels 256// this prints out the top five highest-scoring values. 257Status PrintTopLabels(const std::vector<Tensor>& outputs, 258 string labels_file_name) { 259 std::vector<string> labels; 260 size_t label_count; 261 Status read_labels_status = 262 ReadLabelsFile(labels_file_name, &labels, &label_count); 263 if (!read_labels_status.ok()) { 264 LOG(ERROR) << read_labels_status; 265 return read_labels_status; 266 } 267 const int how_many_labels = std::min(5, static_cast<int>(label_count)); 268 Tensor indices; 269 Tensor scores; 270 TF_RETURN_IF_ERROR(GetTopLabels(outputs, how_many_labels, &indices, &scores)); 271 tensorflow::TTypes<float>::Flat scores_flat = scores.flat<float>(); 272 tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>(); 273 for (int pos = 0; pos < how_many_labels; ++pos) { 274 const int label_index = indices_flat(pos); 275 const float score = scores_flat(pos); 276 LOG(INFO) << labels[label_index] << " (" << label_index << "): " << score; 277 } 278 return Status::OK(); 279} 280 281// This is a testing function that returns whether the top label index is the 282// one that's expected. 283Status CheckTopLabel(const std::vector<Tensor>& outputs, int expected, 284 bool* is_expected) { 285 *is_expected = false; 286 Tensor indices; 287 Tensor scores; 288 const int how_many_labels = 1; 289 TF_RETURN_IF_ERROR(GetTopLabels(outputs, how_many_labels, &indices, &scores)); 290 tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>(); 291 if (indices_flat(0) != expected) { 292 LOG(ERROR) << "Expected label #" << expected << " but got #" 293 << indices_flat(0); 294 *is_expected = false; 295 } else { 296 *is_expected = true; 297 } 298 return Status::OK(); 299} 300 301int main(int argc, char* argv[]) { 302 // These are the command-line flags the program can understand. 303 // They define where the graph and input data is located, and what kind of 304 // input the model expects. If you train your own model, or use something 305 // other than GoogLeNet you'll need to update these. 306 string image = 307 "tensorflow/contrib/pi_examples/label_image/data/" 308 "grace_hopper.jpg"; 309 string graph = 310 "tensorflow/contrib/pi_examples/label_image/data/" 311 "tensorflow_inception_stripped.pb"; 312 string labels = 313 "tensorflow/contrib/pi_examples/label_image/data/" 314 "imagenet_comp_graph_label_strings.txt"; 315 int32 input_width = 299; 316 int32 input_height = 299; 317 int32 input_mean = 128; 318 int32 input_std = 128; 319 string input_layer = "Mul"; 320 string output_layer = "softmax"; 321 bool self_test = false; 322 string root_dir = ""; 323 std::vector<tensorflow::Flag> flag_list = { 324 Flag("image", &image, "image to be processed"), 325 Flag("graph", &graph, "graph to be executed"), 326 Flag("labels", &labels, "name of file containing labels"), 327 Flag("input_width", &input_width, "resize image to this width in pixels"), 328 Flag("input_height", &input_height, 329 "resize image to this height in pixels"), 330 Flag("input_mean", &input_mean, "scale pixel values to this mean"), 331 Flag("input_std", &input_std, "scale pixel values to this std deviation"), 332 Flag("input_layer", &input_layer, "name of input layer"), 333 Flag("output_layer", &output_layer, "name of output layer"), 334 Flag("self_test", &self_test, "run a self test"), 335 Flag("root_dir", &root_dir, 336 "interpret image and graph file names relative to this directory"), 337 }; 338 string usage = tensorflow::Flags::Usage(argv[0], flag_list); 339 const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); 340 if (!parse_result) { 341 LOG(ERROR) << "\n" << usage; 342 return -1; 343 } 344 345 // We need to call this to set up global state for TensorFlow. 346 tensorflow::port::InitMain(usage.c_str(), &argc, &argv); 347 if (argc > 1) { 348 LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; 349 return -1; 350 } 351 352 // First we load and initialize the model. 353 std::unique_ptr<tensorflow::Session> session; 354 string graph_path = tensorflow::io::JoinPath(root_dir, graph); 355 Status load_graph_status = LoadGraph(graph_path, &session); 356 if (!load_graph_status.ok()) { 357 LOG(ERROR) << load_graph_status; 358 return -1; 359 } 360 361 // Get the image from disk as a float array of numbers, resized and normalized 362 // to the specifications the main graph expects. 363 std::vector<Tensor> resized_tensors; 364 string image_path = tensorflow::io::JoinPath(root_dir, image); 365 Status read_tensor_status = 366 ReadTensorFromImageFile(image_path, input_height, input_width, input_mean, 367 input_std, &resized_tensors); 368 if (!read_tensor_status.ok()) { 369 LOG(ERROR) << read_tensor_status; 370 return -1; 371 } 372 const Tensor& resized_tensor = resized_tensors[0]; 373 374 // Actually run the image through the model. 375 std::vector<Tensor> outputs; 376 Status run_status = session->Run({{input_layer, resized_tensor}}, 377 {output_layer}, {}, &outputs); 378 if (!run_status.ok()) { 379 LOG(ERROR) << "Running model failed: " << run_status; 380 return -1; 381 } else { 382 LOG(INFO) << "Running model succeeded!"; 383 } 384 385 // This is for automated testing to make sure we get the expected result with 386 // the default settings. We know that label 866 (military uniform) should be 387 // the top label for the Admiral Hopper image. 388 if (self_test) { 389 bool expected_matches; 390 Status check_status = CheckTopLabel(outputs, 866, &expected_matches); 391 if (!check_status.ok()) { 392 LOG(ERROR) << "Running check failed: " << check_status; 393 return -1; 394 } 395 if (!expected_matches) { 396 LOG(ERROR) << "Self-test failed!"; 397 return -1; 398 } 399 } 400 401 // Do something interesting with the results we've generated. 402 Status print_status = PrintTopLabels(outputs, labels); 403 if (!print_status.ok()) { 404 LOG(ERROR) << "Running print failed: " << print_status; 405 return -1; 406 } 407 408 return 0; 409} 410