1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// A minimal but useful C++ example showing how to load an Imagenet-style object
17// recognition TensorFlow model, prepare input images for it, run them through
18// the graph, and interpret the results.
19//
20// It has been stripped down from the tensorflow/examples/label_image sample
21// code to remove features and ops not included in the mobile/embedded core
22// library available on the Raspberry Pi.
23//
24// Full build instructions are at tensorflow/contrib/pi_examples/README.md.
25
26#include <jpeglib.h>
27#include <setjmp.h>
28#include <stdio.h>
29#include <fstream>
30#include <vector>
31
32#include "tensorflow/core/framework/graph.pb.h"
33#include "tensorflow/core/framework/tensor.h"
34#include "tensorflow/core/graph/default_device.h"
35#include "tensorflow/core/graph/graph_def_builder.h"
36#include "tensorflow/core/lib/core/errors.h"
37#include "tensorflow/core/lib/core/stringpiece.h"
38#include "tensorflow/core/lib/core/threadpool.h"
39#include "tensorflow/core/lib/io/path.h"
40#include "tensorflow/core/lib/strings/stringprintf.h"
41#include "tensorflow/core/platform/init_main.h"
42#include "tensorflow/core/platform/logging.h"
43#include "tensorflow/core/platform/types.h"
44#include "tensorflow/core/public/session.h"
45#include "tensorflow/core/util/command_line_flags.h"
46
47// These are all common classes it's handy to reference with no namespace.
48using tensorflow::Flag;
49using tensorflow::int32;
50using tensorflow::Status;
51using tensorflow::string;
52using tensorflow::Tensor;
53
54// Takes a file name, and loads a list of labels from it, one per line, and
55// returns a vector of the strings. It pads with empty strings so the length
56// of the result is a multiple of 16, because our model expects that.
57Status ReadLabelsFile(string file_name, std::vector<string>* result,
58                      size_t* found_label_count) {
59  std::ifstream file(file_name);
60  if (!file) {
61    return tensorflow::errors::NotFound("Labels file ", file_name,
62                                        " not found.");
63  }
64  result->clear();
65  string line;
66  while (std::getline(file, line)) {
67    result->push_back(line);
68  }
69  *found_label_count = result->size();
70  const int padding = 16;
71  while (result->size() % padding) {
72    result->emplace_back();
73  }
74  return Status::OK();
75}
76
77// Error handling for JPEG decoding.
78void CatchError(j_common_ptr cinfo) {
79  (*cinfo->err->output_message)(cinfo);
80  jmp_buf* jpeg_jmpbuf = reinterpret_cast<jmp_buf*>(cinfo->client_data);
81  jpeg_destroy(cinfo);
82  longjmp(*jpeg_jmpbuf, 1);
83}
84
85// Decompresses a JPEG file from disk.
86Status LoadJpegFile(string file_name, std::vector<tensorflow::uint8>* data,
87                    int* width, int* height, int* channels) {
88  struct jpeg_decompress_struct cinfo;
89  FILE* infile;
90  JSAMPARRAY buffer;
91  int row_stride;
92
93  if ((infile = fopen(file_name.c_str(), "rb")) == NULL) {
94    LOG(ERROR) << "Can't open " << file_name;
95    return tensorflow::errors::NotFound("JPEG file ", file_name, " not found");
96  }
97
98  struct jpeg_error_mgr jerr;
99  jmp_buf jpeg_jmpbuf;  // recovery point in case of error
100  cinfo.err = jpeg_std_error(&jerr);
101  cinfo.client_data = &jpeg_jmpbuf;
102  jerr.error_exit = CatchError;
103  if (setjmp(jpeg_jmpbuf)) {
104    fclose(infile);
105    return tensorflow::errors::Unknown("JPEG decoding failed");
106  }
107
108  jpeg_create_decompress(&cinfo);
109  jpeg_stdio_src(&cinfo, infile);
110  jpeg_read_header(&cinfo, TRUE);
111  jpeg_start_decompress(&cinfo);
112  *width = cinfo.output_width;
113  *height = cinfo.output_height;
114  *channels = cinfo.output_components;
115  data->resize((*height) * (*width) * (*channels));
116
117  row_stride = cinfo.output_width * cinfo.output_components;
118  buffer = (*cinfo.mem->alloc_sarray)((j_common_ptr)&cinfo, JPOOL_IMAGE,
119                                      row_stride, 1);
120  while (cinfo.output_scanline < cinfo.output_height) {
121    tensorflow::uint8* row_address =
122        &((*data)[cinfo.output_scanline * row_stride]);
123    jpeg_read_scanlines(&cinfo, buffer, 1);
124    memcpy(row_address, buffer[0], row_stride);
125  }
126
127  jpeg_finish_decompress(&cinfo);
128  jpeg_destroy_decompress(&cinfo);
129  fclose(infile);
130  return Status::OK();
131}
132
133// Given an image file name, read in the data, try to decode it as an image,
134// resize it to the requested size, and then scale the values as desired.
135Status ReadTensorFromImageFile(string file_name, const int wanted_height,
136                               const int wanted_width, const float input_mean,
137                               const float input_std,
138                               std::vector<Tensor>* out_tensors) {
139  std::vector<tensorflow::uint8> image_data;
140  int image_width;
141  int image_height;
142  int image_channels;
143  TF_RETURN_IF_ERROR(LoadJpegFile(file_name, &image_data, &image_width,
144                                  &image_height, &image_channels));
145  LOG(INFO) << "Loaded JPEG: " << image_width << "x" << image_height << "x"
146            << image_channels;
147  const int wanted_channels = 3;
148  if (image_channels < wanted_channels) {
149    return tensorflow::errors::FailedPrecondition(
150        "Image needs to have at least ", wanted_channels, " but only has ",
151        image_channels);
152  }
153  // In these loops, we convert the eight-bit data in the image into float,
154  // resize it using bilinear filtering, and scale it numerically to the float
155  // range that the model expects (given by input_mean and input_std).
156  tensorflow::Tensor image_tensor(
157      tensorflow::DT_FLOAT,
158      tensorflow::TensorShape(
159          {1, wanted_height, wanted_width, wanted_channels}));
160  auto image_tensor_mapped = image_tensor.tensor<float, 4>();
161  tensorflow::uint8* in = image_data.data();
162  float* out = image_tensor_mapped.data();
163  const size_t image_rowlen = image_width * image_channels;
164  const float width_scale = static_cast<float>(image_width) / wanted_width;
165  const float height_scale = static_cast<float>(image_height) / wanted_height;
166  for (int y = 0; y < wanted_height; ++y) {
167    const float in_y = y * height_scale;
168    const int top_y_index = static_cast<int>(floorf(in_y));
169    const int bottom_y_index =
170        std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
171    const float y_lerp = in_y - top_y_index;
172    tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen);
173    tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen);
174    float* out_row = out + (y * wanted_width * wanted_channels);
175    for (int x = 0; x < wanted_width; ++x) {
176      const float in_x = x * width_scale;
177      const int left_x_index = static_cast<int>(floorf(in_x));
178      const int right_x_index =
179          std::min(static_cast<int>(ceilf(in_x)), (image_width - 1));
180      tensorflow::uint8* in_top_left_pixel =
181          in_top_row + (left_x_index * wanted_channels);
182      tensorflow::uint8* in_top_right_pixel =
183          in_top_row + (right_x_index * wanted_channels);
184      tensorflow::uint8* in_bottom_left_pixel =
185          in_bottom_row + (left_x_index * wanted_channels);
186      tensorflow::uint8* in_bottom_right_pixel =
187          in_bottom_row + (right_x_index * wanted_channels);
188      const float x_lerp = in_x - left_x_index;
189      float* out_pixel = out_row + (x * wanted_channels);
190      for (int c = 0; c < wanted_channels; ++c) {
191        const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
192        const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
193        const float bottom_left((in_bottom_left_pixel[c] - input_mean) /
194                                input_std);
195        const float bottom_right((in_bottom_right_pixel[c] - input_mean) /
196                                 input_std);
197        const float top = top_left + (top_right - top_left) * x_lerp;
198        const float bottom =
199            bottom_left + (bottom_right - bottom_left) * x_lerp;
200        out_pixel[c] = top + (bottom - top) * y_lerp;
201      }
202    }
203  }
204
205  out_tensors->push_back(image_tensor);
206  return Status::OK();
207}
208
209// Reads a model graph definition from disk, and creates a session object you
210// can use to run it.
211Status LoadGraph(string graph_file_name,
212                 std::unique_ptr<tensorflow::Session>* session) {
213  tensorflow::GraphDef graph_def;
214  Status load_graph_status =
215      ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
216  if (!load_graph_status.ok()) {
217    return tensorflow::errors::NotFound("Failed to load compute graph at '",
218                                        graph_file_name, "'");
219  }
220  session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
221  Status session_create_status = (*session)->Create(graph_def);
222  if (!session_create_status.ok()) {
223    return session_create_status;
224  }
225  return Status::OK();
226}
227
228// Analyzes the output of the Inception graph to retrieve the highest scores and
229// their positions in the tensor, which correspond to categories.
230Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels,
231                    Tensor* out_indices, Tensor* out_scores) {
232  const Tensor& unsorted_scores_tensor = outputs[0];
233  auto unsorted_scores_flat = unsorted_scores_tensor.flat<float>();
234  std::vector<std::pair<int, float>> scores;
235  for (int i = 0; i < unsorted_scores_flat.size(); ++i) {
236    scores.push_back(std::pair<int, float>({i, unsorted_scores_flat(i)}));
237  }
238  std::sort(scores.begin(), scores.end(),
239            [](const std::pair<int, float>& left,
240               const std::pair<int, float>& right) {
241              return left.second > right.second;
242            });
243  scores.resize(how_many_labels);
244  Tensor sorted_indices(tensorflow::DT_INT32, {scores.size()});
245  Tensor sorted_scores(tensorflow::DT_FLOAT, {scores.size()});
246  for (int i = 0; i < scores.size(); ++i) {
247    sorted_indices.flat<int>()(i) = scores[i].first;
248    sorted_scores.flat<float>()(i) = scores[i].second;
249  }
250  *out_indices = sorted_indices;
251  *out_scores = sorted_scores;
252  return Status::OK();
253}
254
255// Given the output of a model run, and the name of a file containing the labels
256// this prints out the top five highest-scoring values.
257Status PrintTopLabels(const std::vector<Tensor>& outputs,
258                      string labels_file_name) {
259  std::vector<string> labels;
260  size_t label_count;
261  Status read_labels_status =
262      ReadLabelsFile(labels_file_name, &labels, &label_count);
263  if (!read_labels_status.ok()) {
264    LOG(ERROR) << read_labels_status;
265    return read_labels_status;
266  }
267  const int how_many_labels = std::min(5, static_cast<int>(label_count));
268  Tensor indices;
269  Tensor scores;
270  TF_RETURN_IF_ERROR(GetTopLabels(outputs, how_many_labels, &indices, &scores));
271  tensorflow::TTypes<float>::Flat scores_flat = scores.flat<float>();
272  tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>();
273  for (int pos = 0; pos < how_many_labels; ++pos) {
274    const int label_index = indices_flat(pos);
275    const float score = scores_flat(pos);
276    LOG(INFO) << labels[label_index] << " (" << label_index << "): " << score;
277  }
278  return Status::OK();
279}
280
281// This is a testing function that returns whether the top label index is the
282// one that's expected.
283Status CheckTopLabel(const std::vector<Tensor>& outputs, int expected,
284                     bool* is_expected) {
285  *is_expected = false;
286  Tensor indices;
287  Tensor scores;
288  const int how_many_labels = 1;
289  TF_RETURN_IF_ERROR(GetTopLabels(outputs, how_many_labels, &indices, &scores));
290  tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>();
291  if (indices_flat(0) != expected) {
292    LOG(ERROR) << "Expected label #" << expected << " but got #"
293               << indices_flat(0);
294    *is_expected = false;
295  } else {
296    *is_expected = true;
297  }
298  return Status::OK();
299}
300
301int main(int argc, char* argv[]) {
302  // These are the command-line flags the program can understand.
303  // They define where the graph and input data is located, and what kind of
304  // input the model expects. If you train your own model, or use something
305  // other than GoogLeNet you'll need to update these.
306  string image =
307      "tensorflow/contrib/pi_examples/label_image/data/"
308      "grace_hopper.jpg";
309  string graph =
310      "tensorflow/contrib/pi_examples/label_image/data/"
311      "tensorflow_inception_stripped.pb";
312  string labels =
313      "tensorflow/contrib/pi_examples/label_image/data/"
314      "imagenet_comp_graph_label_strings.txt";
315  int32 input_width = 299;
316  int32 input_height = 299;
317  int32 input_mean = 128;
318  int32 input_std = 128;
319  string input_layer = "Mul";
320  string output_layer = "softmax";
321  bool self_test = false;
322  string root_dir = "";
323  std::vector<tensorflow::Flag> flag_list = {
324      Flag("image", &image, "image to be processed"),
325      Flag("graph", &graph, "graph to be executed"),
326      Flag("labels", &labels, "name of file containing labels"),
327      Flag("input_width", &input_width, "resize image to this width in pixels"),
328      Flag("input_height", &input_height,
329           "resize image to this height in pixels"),
330      Flag("input_mean", &input_mean, "scale pixel values to this mean"),
331      Flag("input_std", &input_std, "scale pixel values to this std deviation"),
332      Flag("input_layer", &input_layer, "name of input layer"),
333      Flag("output_layer", &output_layer, "name of output layer"),
334      Flag("self_test", &self_test, "run a self test"),
335      Flag("root_dir", &root_dir,
336           "interpret image and graph file names relative to this directory"),
337  };
338  string usage = tensorflow::Flags::Usage(argv[0], flag_list);
339  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
340  if (!parse_result) {
341    LOG(ERROR) << "\n" << usage;
342    return -1;
343  }
344
345  // We need to call this to set up global state for TensorFlow.
346  tensorflow::port::InitMain(usage.c_str(), &argc, &argv);
347  if (argc > 1) {
348    LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
349    return -1;
350  }
351
352  // First we load and initialize the model.
353  std::unique_ptr<tensorflow::Session> session;
354  string graph_path = tensorflow::io::JoinPath(root_dir, graph);
355  Status load_graph_status = LoadGraph(graph_path, &session);
356  if (!load_graph_status.ok()) {
357    LOG(ERROR) << load_graph_status;
358    return -1;
359  }
360
361  // Get the image from disk as a float array of numbers, resized and normalized
362  // to the specifications the main graph expects.
363  std::vector<Tensor> resized_tensors;
364  string image_path = tensorflow::io::JoinPath(root_dir, image);
365  Status read_tensor_status =
366      ReadTensorFromImageFile(image_path, input_height, input_width, input_mean,
367                              input_std, &resized_tensors);
368  if (!read_tensor_status.ok()) {
369    LOG(ERROR) << read_tensor_status;
370    return -1;
371  }
372  const Tensor& resized_tensor = resized_tensors[0];
373
374  // Actually run the image through the model.
375  std::vector<Tensor> outputs;
376  Status run_status = session->Run({{input_layer, resized_tensor}},
377                                   {output_layer}, {}, &outputs);
378  if (!run_status.ok()) {
379    LOG(ERROR) << "Running model failed: " << run_status;
380    return -1;
381  } else {
382    LOG(INFO) << "Running model succeeded!";
383  }
384
385  // This is for automated testing to make sure we get the expected result with
386  // the default settings. We know that label 866 (military uniform) should be
387  // the top label for the Admiral Hopper image.
388  if (self_test) {
389    bool expected_matches;
390    Status check_status = CheckTopLabel(outputs, 866, &expected_matches);
391    if (!check_status.ok()) {
392      LOG(ERROR) << "Running check failed: " << check_status;
393      return -1;
394    }
395    if (!expected_matches) {
396      LOG(ERROR) << "Self-test failed!";
397      return -1;
398    }
399  }
400
401  // Do something interesting with the results we've generated.
402  Status print_status = PrintTopLabels(outputs, labels);
403  if (!print_status.ok()) {
404    LOG(ERROR) << "Running print failed: " << print_status;
405    return -1;
406  }
407
408  return 0;
409}
410