1bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi/* 2bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * Copyright (C) 2017 The Android Open Source Project 3bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * 4bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * Licensed under the Apache License, Version 2.0 (the "License"); 5bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * you may not use this file except in compliance with the License. 6bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * You may obtain a copy of the License at 7bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * 8bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * http://www.apache.org/licenses/LICENSE-2.0 9bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * 10bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * Unless required by applicable law or agreed to in writing, software 11bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * distributed under the License is distributed on an "AS IS" BASIS, 12bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * See the License for the specific language governing permissions and 14bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi * limitations under the License. 15bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi */ 16bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 17bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#ifndef LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_ 18bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#define LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_ 19bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 20bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#include "common/memory_image/data-store.h" 21bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#include "common/task-spec.pb.h" 22bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#include "util/strings/stringpiece.h" 23bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 24bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifinamespace libtextclassifier { 25bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifinamespace nlp_core { 26bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 27bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// In-memory representation of data for a Saft model. Provides access to a 28bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// TaskSpec object (produced by the "spec" stage of the Saft training model) and 29bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// to the bytes of the TaskInputs mentioned in that spec (all these bytes are in 30bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// memory, no file I/O required). 31bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// 32bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// Technically, an InMemoryModelData is a DataStore that maps the special string 33bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// kTaskSpecDataStoreEntryName to the binary serialization of a TaskSpec. For 34bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// each TaskInput (of the TaskSpec) with a file_pattern that starts with 35bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// kFilePatternPrefix (see below), the same DataStore maps file_pattern to some 36bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// content bytes. This way, it is possible to have all TaskInputs in memory, 37bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi// while still allowing classic, on-disk TaskInputs. 38bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharificlass InMemoryModelData { 39bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi public: 40bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // Name for the DataStore entry that stores the serialized TaskSpec for the 41bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // entire model. 42bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi static const char kTaskSpecDataStoreEntryName[]; 43bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 44bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // Returns prefix for TaskInput::Part::file_pattern, to distinguish those 45bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // "files" from other files. 46bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi static const char kFilePatternPrefix[]; 47bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 48bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // Constructs an InMemoryModelData based on a chunk of bytes. Those bytes 49bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // should have been produced by a DataStoreBuilder. 50bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi explicit InMemoryModelData(StringPiece bytes) : data_store_(bytes) {} 51bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 52bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // Fills *task_spec with a TaskSpec similar to the one used by 53bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // DataStoreBuilder (when building the bytes used to construct this 54bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // InMemoryModelData) except that each file name 55bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // (TaskInput::Part::file_pattern) is replaced with a name that can be used to 56bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // retrieve the corresponding file content bytes via GetBytesForInputFile(). 57bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // 58bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // Returns true on success, false otherwise. 59bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi bool GetTaskSpec(TaskSpec *task_spec) const; 60bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 61bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // Gets content bytes for a file. The file_name argument should be the 62bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // file_pattern for a TaskInput from the TaskSpec (see GetTaskSpec()). 63bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // Returns a StringPiece indicating a memory area with the content bytes. On 64bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi // error, returns StringPiece(nullptr, 0). 65bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi StringPiece GetBytesForInputFile(const std::string &file_name) const; 66bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 67bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi private: 68bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi const memory_image::DataStore data_store_; 69bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi}; 70bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 71bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi} // namespace nlp_core 72bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi} // namespace libtextclassifier 73bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi 74bda09f1da39ce38a5ece4757b82a64776e53214cMatt Sharifi#endif // LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_ 75