11e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
21e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
31e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsLicensed under the Apache License, Version 2.0 (the "License");
41e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsyou may not use this file except in compliance with the License.
51e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsYou may obtain a copy of the License at
61e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
71e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    http://www.apache.org/licenses/LICENSE-2.0
81e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
91e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsUnless required by applicable law or agreed to in writing, software
101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsdistributed under the License is distributed on an "AS IS" BASIS,
111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsSee the License for the specific language governing permissions and
131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinslimitations under the License.
141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins==============================================================================*/
151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
161e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/text_literal_reader.h"
171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <limits>
191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <string>
201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <utility>
211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include <vector>
221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/literal_util.h"
241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/ptr_util.h"
251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/shape_util.h"
261e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/status_macros.h"
271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/types.h"
281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/util.h"
291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/compiler/xla/xla_data.pb.h"
301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/core/stringpiece.h"
311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/io/buffered_inputstream.h"
321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/io/random_inputstream.h"
331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/lib/strings/str_util.h"
341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/protobuf.h"
351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins#include "tensorflow/core/platform/types.h"
361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace xla {
381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsStatusOr<std::unique_ptr<Literal>> TextLiteralReader::ReadPath(
401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    tensorflow::StringPiece path) {
411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  CHECK(!path.ends_with(".gz"))
421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      << "TextLiteralReader no longer supports reading .gz files";
431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::unique_ptr<tensorflow::RandomAccessFile> file;
441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  Status s =
451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      tensorflow::Env::Default()->NewRandomAccessFile(path.ToString(), &file);
461e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  if (!s.ok()) {
471e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return s;
481e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
491e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  TextLiteralReader reader(file.release());
511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  return reader.ReadAllLines();
521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}
531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsTextLiteralReader::TextLiteralReader(tensorflow::RandomAccessFile* file)
551e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    : file_(file) {}
561e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
571e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsnamespace {
581e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// This is an optimized version of tensorflow::str_util::Split which uses
591e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// StringPiece for the delimited strings and uses an out parameter for the
601e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins// result to avoid vector creation/destruction.
611e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkinsvoid SplitByDelimToStringPieces(tensorflow::StringPiece text, char delim,
621e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins                                std::vector<tensorflow::StringPiece>* result) {
631e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  result->clear();
641e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
651e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  if (text.empty()) {
661e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return;
671e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
681e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
691e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // The following loop is a little strange: its bound is text.size() + 1
701e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // instead of the more typical text.size().
711e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // The final iteration of the loop (when i is equal to text.size()) handles
721e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  // the trailing token.
731e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  size_t token_start = 0;
741e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  for (size_t i = 0; i < text.size() + 1; i++) {
751e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    if (i == text.size() || text[i] == delim) {
761e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      tensorflow::StringPiece token(text.data() + token_start, i - token_start);
771e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      result->push_back(token);
781e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      token_start = i + 1;
791e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    }
801e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
811e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}
821e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}  // namespace
831e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
841e67c90e2caceeff82d09793d1ef5fa0300d219bPeter HawkinsStatusOr<std::unique_ptr<Literal>> TextLiteralReader::ReadAllLines() {
851e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  tensorflow::io::RandomAccessInputStream stream(file_.get());
861e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  tensorflow::io::BufferedInputStream buf(&stream, 65536);
871e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  string shape_string;
881e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  Status s = buf.ReadLine(&shape_string);
891e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  if (!s.ok()) {
901e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return s;
911e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
921e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
931e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  tensorflow::StringPiece sp(shape_string);
941e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  if (tensorflow::str_util::RemoveWhitespaceContext(&sp) > 0) {
951e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    string tmp = sp.ToString();
961e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    shape_string = tmp;
971e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
981e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  TF_ASSIGN_OR_RETURN(Shape shape, ShapeUtil::ParseShapeString(shape_string));
991e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  if (shape.element_type() != F32) {
1001e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    return Unimplemented(
1011e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins        "unsupported element type for text literal reading: %s",
1021e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins        ShapeUtil::HumanString(shape).c_str());
1031e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
1041e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1057d64e124103c8334b7d8b127cd2eff786959d185Mark Heffernan  auto result = MakeUnique<Literal>(shape);
1061e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  const float fill = std::numeric_limits<float>::quiet_NaN();
1077d64e124103c8334b7d8b127cd2eff786959d185Mark Heffernan  result->PopulateWithValue<float>(fill);
1081e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::vector<tensorflow::StringPiece> pieces;
1091e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::vector<tensorflow::StringPiece> coordinates;
1101e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  std::vector<int64> coordinate_values;
1111e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  string line;
1121e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  while (buf.ReadLine(&line).ok()) {
1131e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    SplitByDelimToStringPieces(line, ':', &pieces);
1141e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    tensorflow::StringPiece coordinates_string = pieces[0];
1151e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    tensorflow::StringPiece value_string = pieces[1];
1161e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    tensorflow::str_util::RemoveWhitespaceContext(&coordinates_string);
1171e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    tensorflow::str_util::RemoveWhitespaceContext(&value_string);
1181e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    if (!coordinates_string.Consume("(")) {
1191e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      return InvalidArgument(
1201e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins          "expected '(' at the beginning of coordinates: \"%s\"", line.c_str());
1211e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    }
1221e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    if (!tensorflow::str_util::ConsumeSuffix(&coordinates_string, ")")) {
1231e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      return InvalidArgument("expected ')' at the end of coordinates: \"%s\"",
1241e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins                             line.c_str());
1251e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    }
1261e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    float value;
1271e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    if (!tensorflow::strings::safe_strtof(value_string.ToString().c_str(),
1281e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins                                          &value)) {
1291e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      return InvalidArgument("could not parse value as float: \"%s\"",
1301e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins                             value_string.ToString().c_str());
1311e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    }
1321e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    SplitByDelimToStringPieces(coordinates_string, ',', &coordinates);
1331e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    coordinate_values.clear();
1341e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    for (tensorflow::StringPiece piece : coordinates) {
1351e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      int64 coordinate_value;
1361e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      if (!tensorflow::strings::safe_strto64(piece, &coordinate_value)) {
1371e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins        return InvalidArgument(
1381e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins            "could not parse coordinate member as int64: \"%s\"",
1391e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins            piece.ToString().c_str());
1401e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      }
1411e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      coordinate_values.push_back(coordinate_value);
1421e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    }
1431e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    if (coordinate_values.size() != shape.dimensions_size()) {
1441e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins      return InvalidArgument(
1451e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins          "line did not have expected number of coordinates; want %d got %zu: "
1461e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins          "\"%s\"",
1471e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins          shape.dimensions_size(), coordinate_values.size(), line.c_str());
1481e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins    }
1495478d53adf4e7af05449cae92fd4f7146caa3ccfA. Unique TensorFlower    result->Set<float>(coordinate_values, value);
1501e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  }
1511e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins  return std::move(result);
1521e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}
1531e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins
1541e67c90e2caceeff82d09793d1ef5fa0300d219bPeter Hawkins}  // namespace xla
155