1bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi/*
2bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * Copyright (C) 2017 The Android Open Source Project
3bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi *
4bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * Licensed under the Apache License, Version 2.0 (the "License");
5bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * you may not use this file except in compliance with the License.
6bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * You may obtain a copy of the License at
7bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi *
8bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi *      http://www.apache.org/licenses/LICENSE-2.0
9bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi *
10bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * Unless required by applicable law or agreed to in writing, software
11bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * distributed under the License is distributed on an "AS IS" BASIS,
12bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * See the License for the specific language governing permissions and
14bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * limitations under the License.
15bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi */
16bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
17bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#ifndef LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
18bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#define LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
19bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
20bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#include "common/memory_image/data-store.h"
21bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#include "common/task-spec.pb.h"
22bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#include "util/strings/stringpiece.h"
23bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
24bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifinamespace libtextclassifier {
25bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifinamespace nlp_core {
26bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
27bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// In-memory representation of data for a Saft model.  Provides access to a
28bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// TaskSpec object (produced by the "spec" stage of the Saft training model) and
29bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// to the bytes of the TaskInputs mentioned in that spec (all these bytes are in
30bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// memory, no file I/O required).
31bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi//
32bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// Technically, an InMemoryModelData is a DataStore that maps the special string
33bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// kTaskSpecDataStoreEntryName to the binary serialization of a TaskSpec.  For
34bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// each TaskInput (of the TaskSpec) with a file_pattern that starts with
35bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// kFilePatternPrefix (see below), the same DataStore maps file_pattern to some
36bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// content bytes.  This way, it is possible to have all TaskInputs in memory,
37bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// while still allowing classic, on-disk TaskInputs.
38bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharificlass InMemoryModelData {
39bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi public:
40bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // Name for the DataStore entry that stores the serialized TaskSpec for the
41bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // entire model.
42bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  static const char kTaskSpecDataStoreEntryName[];
43bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
44bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // Returns prefix for TaskInput::Part::file_pattern, to distinguish those
45bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // "files" from other files.
46bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  static const char kFilePatternPrefix[];
47bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
48bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // Constructs an InMemoryModelData based on a chunk of bytes.  Those bytes
49bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // should have been produced by a DataStoreBuilder.
50bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  explicit InMemoryModelData(StringPiece bytes) : data_store_(bytes) {}
51bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
52bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // Fills *task_spec with a TaskSpec similar to the one used by
53bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // DataStoreBuilder (when building the bytes used to construct this
54bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // InMemoryModelData) except that each file name
55bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // (TaskInput::Part::file_pattern) is replaced with a name that can be used to
56bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // retrieve the corresponding file content bytes via GetBytesForInputFile().
57bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  //
58bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // Returns true on success, false otherwise.
59bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  bool GetTaskSpec(TaskSpec *task_spec) const;
60bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
61bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // Gets content bytes for a file.  The file_name argument should be the
62bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // file_pattern for a TaskInput from the TaskSpec (see GetTaskSpec()).
63bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // Returns a StringPiece indicating a memory area with the content bytes.  On
64bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  // error, returns StringPiece(nullptr, 0).
65bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  StringPiece GetBytesForInputFile(const std::string &file_name) const;
66bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
67bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi private:
68bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi  const memory_image::DataStore data_store_;
69bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi};
70bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
71bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi}  // namespace nlp_core
72bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi}  // namespace libtextclassifier
73bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi
74bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#endif  // LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
75