1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// Full build instructions are at tensorflow/contrib/pi_examples/README.md.
17
18#include <errno.h>
19#include <fcntl.h>
20#include <libv4l2.h>
21#include <linux/videodev2.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <sys/ioctl.h>
26#include <sys/mman.h>
27#include <sys/time.h>
28#include <sys/types.h>
29#include <fstream>
30#include <vector>
31
32#include "tensorflow/core/framework/graph.pb.h"
33#include "tensorflow/core/framework/tensor.h"
34#include "tensorflow/core/graph/default_device.h"
35#include "tensorflow/core/graph/graph_def_builder.h"
36#include "tensorflow/core/lib/core/errors.h"
37#include "tensorflow/core/lib/core/stringpiece.h"
38#include "tensorflow/core/lib/core/threadpool.h"
39#include "tensorflow/core/lib/io/path.h"
40#include "tensorflow/core/lib/strings/stringprintf.h"
41#include "tensorflow/core/platform/init_main.h"
42#include "tensorflow/core/platform/logging.h"
43#include "tensorflow/core/platform/types.h"
44#include "tensorflow/core/public/session.h"
45#include "tensorflow/core/util/command_line_flags.h"
46
47// These are all common classes it's handy to reference with no namespace.
48using tensorflow::Flag;
49using tensorflow::int32;
50using tensorflow::Status;
51using tensorflow::string;
52using tensorflow::Tensor;
53
54// Used to store the memory-mapped buffers we use for capture.
55struct CameraBuffer {
56  void* start;
57  size_t length;
58};
59
60// Wrapper around camera command sending.
61Status SendCameraCommand(int fh, int request, void* arg) {
62  int r;
63  do {
64    r = v4l2_ioctl(fh, request, arg);
65  } while (r == -1 && ((errno == EINTR) || (errno == EAGAIN)));
66  if (r == -1) {
67    LOG(ERROR) << "SendCameraCommand error " << errno << " (" << strerror(errno)
68               << ")";
69    return tensorflow::errors::Unknown("SendCameraCommand error ", errno,
70                                       strerror(errno));
71  }
72  return Status::OK();
73}
74
75Status OpenCamera(int* camera_handle) {
76  const char* dev_name = "/dev/video0";
77  int fd = v4l2_open(dev_name, O_RDWR | O_NONBLOCK, 0);
78  if (fd < 0) {
79    LOG(ERROR) << "Cannot open camera device";
80    return tensorflow::errors::NotFound("V4L2 camera device not found");
81  }
82  *camera_handle = fd;
83  return Status::OK();
84}
85
86Status CloseCamera(int camera_handle) {
87  v4l2_close(camera_handle);
88  return Status::OK();
89}
90
91Status SetCameraFormat(int camera_handle, int wanted_width, int wanted_height) {
92  struct v4l2_format fmt;
93  memset(&fmt, 0, sizeof(fmt));
94  fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
95  fmt.fmt.pix.width = wanted_width;
96  fmt.fmt.pix.height = wanted_height;
97  fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
98  fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
99  Status set_format_status =
100      SendCameraCommand(camera_handle, VIDIOC_S_FMT, &fmt);
101  if (!set_format_status.ok()) {
102    LOG(ERROR) << "Setting format failed with " << set_format_status;
103    return set_format_status;
104  }
105  if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_RGB24) {
106    LOG(ERROR) << "Libv4l didn't accept RGB24 format. Can't proceed.";
107    return tensorflow::errors::Unknown("Libv4l didn't accept RGB24 format");
108  }
109  if ((fmt.fmt.pix.width != wanted_width) ||
110      (fmt.fmt.pix.height != wanted_height)) {
111    LOG(WARNING) << "Warning: driver is sending image at " << fmt.fmt.pix.width
112                 << "x" << fmt.fmt.pix.height;
113  }
114  return Status::OK();
115}
116
117Status StartCameraCapture(int camera_handle, int buffer_count,
118                          CameraBuffer** buffers) {
119  struct v4l2_requestbuffers req;
120  memset(&req, 0, sizeof(req));
121  req.count = buffer_count;
122  req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
123  req.memory = V4L2_MEMORY_MMAP;
124  Status request_buffers_status =
125      SendCameraCommand(camera_handle, VIDIOC_REQBUFS, &req);
126  if (!request_buffers_status.ok()) {
127    LOG(ERROR) << "Request buffers failed with " << request_buffers_status;
128    return request_buffers_status;
129  }
130
131  *buffers = (CameraBuffer*)(calloc(buffer_count, sizeof(*buffers)));
132  for (int n_buffers = 0; n_buffers < buffer_count; ++n_buffers) {
133    struct v4l2_buffer buf;
134    memset(&buf, 0, sizeof(buf));
135    buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
136    buf.memory = V4L2_MEMORY_MMAP;
137    buf.index = n_buffers;
138    Status query_buffer_status =
139        SendCameraCommand(camera_handle, VIDIOC_QUERYBUF, &buf);
140    if (!query_buffer_status.ok()) {
141      LOG(ERROR) << "Query buffer failed with " << query_buffer_status;
142      return query_buffer_status;
143    }
144    (*buffers)[n_buffers].length = buf.length;
145    (*buffers)[n_buffers].start =
146        v4l2_mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED,
147                  camera_handle, buf.m.offset);
148
149    if (MAP_FAILED == (*buffers)[n_buffers].start) {
150      LOG(ERROR) << "Memory-mapping buffer failed";
151      return tensorflow::errors::Unknown("Memory-mapping buffer failed");
152    }
153  }
154
155  for (int i = 0; i < buffer_count; ++i) {
156    struct v4l2_buffer buf;
157    memset(&buf, 0, sizeof(buf));
158    buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
159    buf.memory = V4L2_MEMORY_MMAP;
160    buf.index = i;
161    Status set_buffer_status =
162        SendCameraCommand(camera_handle, VIDIOC_QBUF, &buf);
163    if (!set_buffer_status.ok()) {
164      LOG(ERROR) << "Set buffer failed with " << set_buffer_status;
165      return set_buffer_status;
166    }
167  }
168
169  enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
170  Status stream_on_status =
171      SendCameraCommand(camera_handle, VIDIOC_STREAMON, &type);
172  if (!stream_on_status.ok()) {
173    LOG(ERROR) << "Turning stream on failed with " << stream_on_status;
174    return stream_on_status;
175  }
176  return Status::OK();
177}
178
179Status EndCameraCapture(int camera_handle, CameraBuffer* buffers,
180                        int buffer_count) {
181  enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
182  Status stream_off_status =
183      SendCameraCommand(camera_handle, VIDIOC_STREAMOFF, &type);
184  if (!stream_off_status.ok()) {
185    LOG(ERROR) << "Turning stream off failed with " << stream_off_status;
186    return stream_off_status;
187  }
188  for (int i = 0; i < buffer_count; ++i)
189    v4l2_munmap(buffers[i].start, buffers[i].length);
190  return Status::OK();
191}
192
193Status CaptureNextFrame(int camera_handle, CameraBuffer* buffers,
194                        uint8_t** frame_data, int* frame_data_size,
195                        v4l2_buffer* buf) {
196  int r;
197  do {
198    fd_set fds;
199    FD_ZERO(&fds);
200    FD_SET(camera_handle, &fds);
201    struct timeval tv;
202    tv.tv_sec = 2;
203    tv.tv_usec = 0;
204    r = select(camera_handle + 1, &fds, NULL, NULL, &tv);
205  } while ((r == -1 && (errno = EINTR)));
206  if (r == -1) {
207    LOG(ERROR) << "select() failed while waiting for the camera with " << errno;
208    return tensorflow::errors::Unknown(
209        "CaptureCameraFrame: select() failed with", errno);
210  }
211
212  memset(buf, 0, sizeof(*buf));
213  buf->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
214  buf->memory = V4L2_MEMORY_MMAP;
215  Status get_buffer_status =
216      SendCameraCommand(camera_handle, VIDIOC_DQBUF, buf);
217  if (!get_buffer_status.ok()) {
218    LOG(ERROR) << "Get buffer failed with " << get_buffer_status;
219    return get_buffer_status;
220  }
221
222  *frame_data = static_cast<uint8_t*>(buffers[buf->index].start);
223  *frame_data_size = buf->bytesused;
224
225  return Status::OK();
226}
227
228Status ReleaseFrame(int camera_handle, v4l2_buffer* buf) {
229  Status release_buffer_status =
230      SendCameraCommand(camera_handle, VIDIOC_QBUF, buf);
231  if (!release_buffer_status.ok()) {
232    LOG(ERROR) << "Release buffer failed with " << release_buffer_status;
233    return release_buffer_status;
234  }
235}
236
237// Reads a model graph definition from disk, and creates a session object you
238// can use to run it.
239Status LoadGraph(string graph_file_name,
240                 std::unique_ptr<tensorflow::Session>* session) {
241  tensorflow::GraphDef graph_def;
242  Status load_graph_status =
243      ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
244  if (!load_graph_status.ok()) {
245    return tensorflow::errors::NotFound("Failed to load compute graph at '",
246                                        graph_file_name, "'");
247  }
248  session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
249  Status session_create_status = (*session)->Create(graph_def);
250  if (!session_create_status.ok()) {
251    return session_create_status;
252  }
253  return Status::OK();
254}
255
256// Analyzes the output of the Inception graph to retrieve the highest scores and
257// their positions in the tensor, which correspond to categories.
258Status GetTopLabels(const std::vector<Tensor>& outputs, int how_many_labels,
259                    Tensor* out_indices, Tensor* out_scores) {
260  const Tensor& unsorted_scores_tensor = outputs[0];
261  auto unsorted_scores_flat = unsorted_scores_tensor.flat<float>();
262  std::vector<std::pair<int, float>> scores;
263  for (int i = 0; i < unsorted_scores_flat.size(); ++i) {
264    scores.push_back(std::pair<int, float>({i, unsorted_scores_flat(i)}));
265  }
266  std::sort(scores.begin(), scores.end(),
267            [](const std::pair<int, float>& left,
268               const std::pair<int, float>& right) {
269              return left.second > right.second;
270            });
271  scores.resize(how_many_labels);
272  Tensor sorted_indices(tensorflow::DT_INT32, {scores.size()});
273  Tensor sorted_scores(tensorflow::DT_FLOAT, {scores.size()});
274  for (int i = 0; i < scores.size(); ++i) {
275    sorted_indices.flat<int>()(i) = scores[i].first;
276    sorted_scores.flat<float>()(i) = scores[i].second;
277  }
278  *out_indices = sorted_indices;
279  *out_scores = sorted_scores;
280  return Status::OK();
281}
282
283// Takes a file name, and loads a list of labels from it, one per line, and
284// returns a vector of the strings. It pads with empty strings so the length
285// of the result is a multiple of 16, because our model expects that.
286Status ReadLabelsFile(string file_name, std::vector<string>* result,
287                      size_t* found_label_count) {
288  std::ifstream file(file_name);
289  if (!file) {
290    return tensorflow::errors::NotFound("Labels file ", file_name,
291                                        " not found.");
292  }
293  result->clear();
294  string line;
295  while (std::getline(file, line)) {
296    result->push_back(line);
297  }
298  *found_label_count = result->size();
299  const int padding = 16;
300  while (result->size() % padding) {
301    result->emplace_back();
302  }
303  return Status::OK();
304}
305
306// Given the output of a model run, and the name of a file containing the labels
307// this prints out the top five highest-scoring values.
308Status PrintTopLabels(const std::vector<Tensor>& outputs,
309                      const std::vector<string>& labels, int label_count,
310                      float print_threshold) {
311  const int how_many_labels = std::min(5, static_cast<int>(label_count));
312  Tensor indices;
313  Tensor scores;
314  TF_RETURN_IF_ERROR(GetTopLabels(outputs, how_many_labels, &indices, &scores));
315  tensorflow::TTypes<float>::Flat scores_flat = scores.flat<float>();
316  tensorflow::TTypes<int32>::Flat indices_flat = indices.flat<int32>();
317  for (int pos = 0; pos < how_many_labels; ++pos) {
318    const int label_index = indices_flat(pos);
319    const float score = scores_flat(pos);
320    LOG(INFO) << labels[label_index] << " (" << label_index << "): " << score;
321    // Print the top label to stdout if it's above a threshold.
322    if ((pos == 0) && (score > print_threshold)) {
323      std::cout << labels[label_index] << std::endl;
324    }
325  }
326  return Status::OK();
327}
328
329// Given an image buffer, resize it to the requested size, and then scale the
330// values as desired.
331Status TensorFromFrame(uint8_t* image_data, int image_width, int image_height,
332                       int image_channels, const int wanted_height,
333                       const int wanted_width, const float input_mean,
334                       const float input_std,
335                       std::vector<Tensor>* out_tensors) {
336  const int wanted_channels = 3;
337  if (image_channels < wanted_channels) {
338    return tensorflow::errors::FailedPrecondition(
339        "Image needs to have at least ", wanted_channels, " but only has ",
340        image_channels);
341  }
342  // In these loops, we convert the eight-bit data in the image into float,
343  // resize it using bilinear filtering, and scale it numerically to the float
344  // range that the model expects (given by input_mean and input_std).
345  tensorflow::Tensor image_tensor(
346      tensorflow::DT_FLOAT,
347      tensorflow::TensorShape(
348          {1, wanted_height, wanted_width, wanted_channels}));
349  auto image_tensor_mapped = image_tensor.tensor<float, 4>();
350  tensorflow::uint8* in = image_data;
351  float* out = image_tensor_mapped.data();
352  const size_t image_rowlen = image_width * image_channels;
353  const float width_scale = static_cast<float>(image_width) / wanted_width;
354  const float height_scale = static_cast<float>(image_height) / wanted_height;
355  for (int y = 0; y < wanted_height; ++y) {
356    const float in_y = y * height_scale;
357    const int top_y_index = static_cast<int>(floorf(in_y));
358    const int bottom_y_index =
359        std::min(static_cast<int>(ceilf(in_y)), (image_height - 1));
360    const float y_lerp = in_y - top_y_index;
361    tensorflow::uint8* in_top_row = in + (top_y_index * image_rowlen);
362    tensorflow::uint8* in_bottom_row = in + (bottom_y_index * image_rowlen);
363    float* out_row = out + (y * wanted_width * wanted_channels);
364    for (int x = 0; x < wanted_width; ++x) {
365      const float in_x = x * width_scale;
366      const int left_x_index = static_cast<int>(floorf(in_x));
367      const int right_x_index =
368          std::min(static_cast<int>(ceilf(in_x)), (image_width - 1));
369      tensorflow::uint8* in_top_left_pixel =
370          in_top_row + (left_x_index * wanted_channels);
371      tensorflow::uint8* in_top_right_pixel =
372          in_top_row + (right_x_index * wanted_channels);
373      tensorflow::uint8* in_bottom_left_pixel =
374          in_bottom_row + (left_x_index * wanted_channels);
375      tensorflow::uint8* in_bottom_right_pixel =
376          in_bottom_row + (right_x_index * wanted_channels);
377      const float x_lerp = in_x - left_x_index;
378      float* out_pixel = out_row + (x * wanted_channels);
379      for (int c = 0; c < wanted_channels; ++c) {
380        const float top_left((in_top_left_pixel[c] - input_mean) / input_std);
381        const float top_right((in_top_right_pixel[c] - input_mean) / input_std);
382        const float bottom_left((in_bottom_left_pixel[c] - input_mean) /
383                                input_std);
384        const float bottom_right((in_bottom_right_pixel[c] - input_mean) /
385                                 input_std);
386        const float top = top_left + (top_right - top_left) * x_lerp;
387        const float bottom =
388            bottom_left + (bottom_right - bottom_left) * x_lerp;
389        out_pixel[c] = top + (bottom - top) * y_lerp;
390      }
391    }
392  }
393
394  out_tensors->push_back(image_tensor);
395  return Status::OK();
396}
397
398int main(int argc, char** argv) {
399  string graph =
400      "tensorflow/contrib/pi_examples/label_image/data/"
401      "tensorflow_inception_stripped.pb";
402  string labels_file_name =
403      "tensorflow/contrib/pi_examples/label_image/data/"
404      "imagenet_comp_graph_label_strings.txt";
405  int32 input_width = 299;
406  int32 input_height = 299;
407  int32 input_mean = 128;
408  int32 input_std = 128;
409  string input_layer = "Mul";
410  string output_layer = "softmax";
411  int32 video_width = 640;
412  int32 video_height = 480;
413  int print_threshold = 50;
414  string root_dir = "";
415  std::vector<Flag> flag_list = {
416      Flag("graph", &graph, "graph file name"),
417      Flag("labels", &labels_file_name, "labels file name"),
418      Flag("input_width", &input_width, "image input width"),
419      Flag("input_height", &input_height, "image input height"),
420      Flag("input_mean", &input_mean, "transformed mean of input pixels"),
421      Flag("input_std", &input_std, "transformed std dev of input pixels"),
422      Flag("input_layer", &input_layer, "input layer name"),
423      Flag("output_layer", &output_layer, "output layer name"),
424      Flag("video_width", &video_width, "video width expected from device"),
425      Flag("video_height", &video_height, "video height expected from device"),
426      Flag("print_threshold", &print_threshold,
427           "print labels with scoe exceeding this"),
428      Flag("root_dir", &root_dir,
429           "interpret graph file name relative to this directory")};
430  string usage = tensorflow::Flags::Usage(argv[0], flag_list);
431  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
432
433  if (!parse_result || argc != 1) {
434    LOG(ERROR) << "\n" << usage;
435    return -1;
436  }
437
438  // First we load and initialize the model.
439  std::unique_ptr<tensorflow::Session> session;
440  string graph_path = tensorflow::io::JoinPath(root_dir, graph);
441  Status load_graph_status = LoadGraph(graph_path, &session);
442  if (!load_graph_status.ok()) {
443    LOG(ERROR) << load_graph_status;
444    return -1;
445  }
446
447  std::vector<string> labels;
448  size_t label_count;
449  Status read_labels_status =
450      ReadLabelsFile(labels_file_name, &labels, &label_count);
451  if (!read_labels_status.ok()) {
452    LOG(ERROR) << read_labels_status;
453    return -1;
454  }
455
456  int camera_handle;
457  Status open_status = OpenCamera(&camera_handle);
458  if (!open_status.ok()) {
459    LOG(ERROR) << "OpenCamera failed with " << open_status;
460    return -1;
461  }
462
463  Status format_status =
464      SetCameraFormat(camera_handle, video_width, video_height);
465  if (!format_status.ok()) {
466    LOG(ERROR) << "SetCameraFormat failed with " << format_status;
467    return -1;
468  }
469
470  const int how_many_buffers = 2;
471  CameraBuffer* buffers;
472  Status start_capture_status =
473      StartCameraCapture(camera_handle, how_many_buffers, &buffers);
474  if (!start_capture_status.ok()) {
475    LOG(ERROR) << "StartCameraCapture failed with " << start_capture_status;
476    return -1;
477  }
478
479  for (int i = 0; i < 200; i++) {
480    uint8_t* frame_data;
481    int frame_data_size;
482    v4l2_buffer buf;
483    Status capture_next_status = CaptureNextFrame(
484        camera_handle, buffers, &frame_data, &frame_data_size, &buf);
485    if (!capture_next_status.ok()) {
486      LOG(ERROR) << "CaptureNextFrame failed with " << capture_next_status;
487      return -1;
488    }
489
490    std::vector<Tensor> resized_tensors;
491    Status tensor_from_frame_status =
492        TensorFromFrame(frame_data, video_width, video_height, 3, input_height,
493                        input_width, input_mean, input_std, &resized_tensors);
494    if (!tensor_from_frame_status.ok()) {
495      LOG(ERROR) << tensor_from_frame_status;
496      return -1;
497    }
498    const Tensor& resized_tensor = resized_tensors[0];
499
500    Status release_frame_status = ReleaseFrame(camera_handle, &buf);
501    if (!release_frame_status.ok()) {
502      LOG(ERROR) << "ReleaseFrame failed with " << release_frame_status;
503      return -1;
504    }
505
506    // Actually run the image through the model.
507    std::vector<Tensor> outputs;
508    Status run_status = session->Run({{input_layer, resized_tensor}},
509                                     {output_layer}, {}, &outputs);
510    if (!run_status.ok()) {
511      LOG(ERROR) << "Running model failed: " << run_status;
512      return -1;
513    }
514
515    // Do something interesting with the results we've generated.
516    Status print_status =
517        PrintTopLabels(outputs, labels, label_count, print_threshold * 0.01f);
518    if (!print_status.ok()) {
519      LOG(ERROR) << "Running print failed: " << print_status;
520      return -1;
521    }
522  }
523
524  Status end_capture_status =
525      EndCameraCapture(camera_handle, buffers, how_many_buffers);
526  if (!end_capture_status.ok()) {
527    LOG(ERROR) << "EndCameraCapture failed with " << end_capture_status;
528    return -1;
529  }
530
531  Status close_status = CloseCamera(camera_handle);
532  if (!close_status.ok()) {
533    LOG(ERROR) << "CloseCamera failed with " << open_status;
534    return -1;
535  }
536
537  return 0;
538}
539