delta_diff_generator.h revision 52490e776a9d34a94fb18ab1fe7d416425e94dda
1// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_GENERATOR_H_
6#define UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_GENERATOR_H_
7
8#include <set>
9#include <string>
10#include <vector>
11
12#include <base/macros.h>
13#include <chromeos/secure_blob.h>
14
15#include "update_engine/payload_constants.h"
16#include "update_engine/payload_generator/extent_utils.h"
17#include "update_engine/payload_generator/graph_types.h"
18#include "update_engine/payload_generator/operations_generator.h"
19#include "update_engine/payload_generator/payload_generation_config.h"
20#include "update_engine/update_metadata.pb.h"
21
22// There is one function in DeltaDiffGenerator of importance to users
23// of the class: GenerateDeltaUpdateFile(). Before calling it,
24// the old and new images must be mounted. Call GenerateDeltaUpdateFile()
25// with both the mount-points of the images in addition to the paths of
26// the images (both old and new). A delta from old to new will be
27// generated and stored in output_path.
28
29namespace chromeos_update_engine {
30
31extern const char* const kEmptyPath;
32extern const size_t kBlockSize;
33extern const size_t kRootFSPartitionSize;
34
35class DeltaDiffGenerator : public OperationsGenerator {
36 public:
37  DeltaDiffGenerator() = default;
38
39  // Represents a disk block on the install partition.
40  struct Block {
41    // During install, each block on the install partition will be written
42    // and some may be read (in all likelihood, many will be read).
43    // The reading and writing will be performed by InstallOperations,
44    // each of which has a corresponding vertex in a graph.
45    // A Block object tells which vertex will read or write this block
46    // at install time.
47    // Generally, there will be a vector of Block objects whose length
48    // is the number of blocks on the install partition.
49    Block() : reader(Vertex::kInvalidIndex), writer(Vertex::kInvalidIndex) {}
50    Vertex::Index reader;
51    Vertex::Index writer;
52  };
53
54  // These functions are public so that the unit tests can access them:
55
56  // Generate the update payload operations for the kernel and rootfs using
57  // SOURCE_* operations, used to generate deltas for the minor version
58  // kSourceMinorPayloadVersion. This function will generate operations in the
59  // rootfs that will read blocks from the source partition in random order and
60  // write the new image on the target partition, also possibly in random order.
61  // The rootfs operations are stored in |rootfs_ops| and should be executed in
62  // that order. The kernel operations are stored in |kernel_ops|. All
63  // the offsets in the operations reference the data written to |data_file_fd|.
64  // The total amount of data written to that file is stored in
65  // |data_file_size|.
66  bool GenerateOperations(
67      const PayloadGenerationConfig& config,
68      int data_file_fd,
69      off_t* data_file_size,
70      std::vector<AnnotatedOperation>* rootfs_ops,
71      std::vector<AnnotatedOperation>* kernel_ops) override;
72
73  // For each regular file within new_root, creates a node in the graph,
74  // determines the best way to compress it (REPLACE, REPLACE_BZ, COPY, BSDIFF),
75  // and writes any necessary data to the end of data_fd.
76  static bool DeltaReadFiles(Graph* graph,
77                             std::vector<Block>* blocks,
78                             const std::string& old_part,
79                             const std::string& new_part,
80                             const std::string& old_root,
81                             const std::string& new_root,
82                             off_t chunk_size,
83                             int data_fd,
84                             off_t* data_file_size,
85                             bool src_ops_allowed);
86
87  // For a given regular file which must exist at new_root + path, and
88  // may exist at old_root + path, creates a new InstallOperation and
89  // adds it to the graph. Also, populates the |blocks| array as
90  // necessary, if |blocks| is non-null.  Also, writes the data
91  // necessary to send the file down to the client into data_fd, which
92  // has length *data_file_size. *data_file_size is updated
93  // appropriately. If |existing_vertex| is no kInvalidIndex, use that
94  // rather than allocating a new vertex. Returns true on success.
95  static bool DeltaReadFile(Graph* graph,
96                            Vertex::Index existing_vertex,
97                            std::vector<Block>* blocks,
98                            const std::string& old_part,
99                            const std::string& new_part,
100                            const std::string& old_root,
101                            const std::string& new_root,
102                            const std::string& path,
103                            off_t chunk_offset,
104                            off_t chunk_size,
105                            int data_fd,
106                            off_t* data_file_size,
107                            bool src_ops_allowed);
108
109  // Reads old_filename (if it exists) and a new_filename and determines
110  // the smallest way to encode this file for the diff. It reads extents from
111  // |old_part| and |new_part|. It stores necessary data in out_data and fills
112  // in out_op. If there's no change in old and new files, it creates a MOVE
113  // operation. If there is a change, or the old file doesn't exist,
114  // the smallest of REPLACE, REPLACE_BZ, or BSDIFF wins.
115  // new_filename must contain at least one byte.
116  // |new_filename| is read starting at |chunk_offset|.
117  // If |chunk_size| is not -1, only up to |chunk_size| bytes are diffed.
118  // If |src_ops_allowed| is true, it will emit SOURCE_COPY and SOURCE_BSDIFF
119  // operations instead of MOVE and BSDIFF, respectively.
120  // Returns true on success.
121  static bool ReadFileToDiff(const std::string& old_part,
122                             const std::string& new_part,
123                             off_t chunk_offset,
124                             off_t chunk_size,
125                             bool bsdiff_allowed,
126                             chromeos::Blob* out_data,
127                             DeltaArchiveManifest_InstallOperation* out_op,
128                             bool gather_extents,
129                             bool src_ops_allowed,
130                             const std::string& old_filename,
131                             const std::string& new_filename);
132
133  // Delta compresses a kernel partition |new_kernel_part| with knowledge of the
134  // old kernel partition |old_kernel_part|. If |old_kernel_part| is an empty
135  // string, generates a full update of the partition.
136  static bool DeltaCompressKernelPartition(
137      const std::string& old_kernel_part,
138      const std::string& new_kernel_part,
139      std::vector<AnnotatedOperation>* ops,
140      int blobs_fd,
141      off_t* blobs_length,
142      bool src_ops_allowed);
143
144  // Reads blocks from image_path that are not yet marked as being written in
145  // the blocks array. These blocks that remain are either unchanged files or
146  // non-file-data blocks.  We compare each of them to the old image, and
147  // compress the ones that changed into a single REPLACE_BZ operation. This
148  // updates a newly created node in the graph to write these blocks and writes
149  // the appropriate blob to blobs_fd. Reads and updates blobs_length.
150  static bool ReadUnwrittenBlocks(
151      const std::vector<Block>& blocks,
152      int blobs_fd,
153      off_t* blobs_length,
154      const std::string& old_image_path,
155      const uint64_t old_image_size,
156      const std::string& new_image_path,
157      Vertex* vertex,
158      uint32_t minor_version);
159
160  // Stores all Extents in 'extents' into 'out'.
161  static void StoreExtents(const std::vector<Extent>& extents,
162                           google::protobuf::RepeatedPtrField<Extent>* out);
163
164  // Stores all extents in |extents| into |out_vector|.
165  static void ExtentsToVector(
166    const google::protobuf::RepeatedPtrField<Extent>& extents,
167    std::vector<Extent>* out_vector);
168
169  // Install operations in the manifest may reference data blobs, which
170  // are in data_blobs_path. This function creates a new data blobs file
171  // with the data blobs in the same order as the referencing install
172  // operations in the manifest. E.g. if manifest[0] has a data blob
173  // "X" at offset 1, manifest[1] has a data blob "Y" at offset 0,
174  // and data_blobs_path's file contains "YX", new_data_blobs_path
175  // will set to be a file that contains "XY".
176  static bool ReorderDataBlobs(DeltaArchiveManifest* manifest,
177                               const std::string& data_blobs_path,
178                               const std::string& new_data_blobs_path);
179
180  // Computes a SHA256 hash of the given buf and sets the hash value in the
181  // operation so that update_engine could verify. This hash should be set
182  // for all operations that have a non-zero data blob. One exception is the
183  // dummy operation for signature blob because the contents of the signature
184  // blob will not be available at payload creation time. So, update_engine will
185  // gracefully ignore the dummy signature operation.
186  static bool AddOperationHash(DeltaArchiveManifest_InstallOperation* op,
187                               const chromeos::Blob& buf);
188
189
190  // Returns true if |op| is a no-op operation that doesn't do any useful work
191  // (e.g., a move operation that copies blocks onto themselves).
192  static bool IsNoopOperation(const DeltaArchiveManifest_InstallOperation& op);
193
194  // Filters all the operations that are no-op, maintaining the relative order
195  // of the rest of the operations.
196  static void FilterNoopOperations(std::vector<AnnotatedOperation>* ops);
197
198  static bool InitializePartitionInfo(bool is_kernel,
199                                      const std::string& partition,
200                                      PartitionInfo* info);
201
202  // Runs the bsdiff tool on two files and returns the resulting delta in
203  // |out|. Returns true on success.
204  static bool BsdiffFiles(const std::string& old_file,
205                          const std::string& new_file,
206                          chromeos::Blob* out);
207
208  // Adds to |manifest| a dummy operation that points to a signature blob
209  // located at the specified offset/length.
210  static void AddSignatureOp(uint64_t signature_blob_offset,
211                             uint64_t signature_blob_length,
212                             DeltaArchiveManifest* manifest);
213
214  // Takes a collection (vector or RepeatedPtrField) of Extent and
215  // returns a vector of the blocks referenced, in order.
216  template<typename T>
217  static std::vector<uint64_t> ExpandExtents(const T& extents) {
218    std::vector<uint64_t> ret;
219    for (size_t i = 0, e = static_cast<size_t>(extents.size()); i != e; ++i) {
220      const Extent extent = GetElement(extents, i);
221      if (extent.start_block() == kSparseHole) {
222        ret.resize(ret.size() + extent.num_blocks(), kSparseHole);
223      } else {
224        for (uint64_t block = extent.start_block();
225             block < (extent.start_block() + extent.num_blocks()); block++) {
226          ret.push_back(block);
227        }
228      }
229    }
230    return ret;
231  }
232
233  // Takes a vector of extents and removes extents that begin in a sparse hole.
234  static void ClearSparseHoles(std::vector<Extent>* extents);
235
236  // Takes a vector of AnnotatedOperations |aops| and fragments those operations
237  // such that there is only one dst extent per operation. Sets |aops| to a
238  // vector of the new fragmented operations.
239  static bool FragmentOperations(std::vector<AnnotatedOperation>* aops,
240                                 const std::string& target_rootfs_part,
241                                 int data_fd,
242                                 off_t* data_file_size);
243
244  // Takes a vector of AnnotatedOperations |aops| and sorts them by the first
245  // start block in their destination extents. Sets |aops| to a vector of the
246  // sorted operations.
247  static void SortOperationsByDestination(
248      std::vector<AnnotatedOperation>* aops);
249
250  // Takes an SOURCE_COPY install operation, |aop|, and adds one operation for
251  // each dst extent in |aop| to |ops|. The new operations added to |ops| will
252  // have only one dst extent. The src extents are split so the number of blocks
253  // in the src and dst extents are equal.
254  // E.g. we have a SOURCE_COPY operation:
255  //   src extents: [(1, 3), (5, 1), (7, 1)], dst extents: [(2, 2), (6, 3)]
256  // Then we will get 2 new operations:
257  //   1. src extents: [(1, 2)], dst extents: [(2, 2)]
258  //   2. src extents: [(3, 1),(5, 1),(7, 1)], dst extents: [(6, 3)]
259  static bool SplitSourceCopy(const AnnotatedOperation& original_aop,
260                              std::vector<AnnotatedOperation>* result_aops);
261
262  // Takes a REPLACE/REPLACE_BZ operation |aop|, and adds one operation for each
263  // dst extent in |aop| to |ops|. The new operations added to |ops| will have
264  // only one dst extent each, and may be either a REPLACE or REPLACE_BZ
265  // depending on whether compression is advantageous.
266  static bool SplitReplaceOrReplaceBz(
267      const AnnotatedOperation& original_aop,
268      std::vector<AnnotatedOperation>* result_aops,
269      const std::string& target_part,
270      int data_fd,
271      off_t* data_file_size);
272
273  // Takes a sorted (by first destination extent) vector of operations |aops|
274  // and merges SOURCE_COPY, REPLACE, and REPLACE_BZ operations in that vector.
275  // It will merge two operations if:
276  //   - They are of the same type.
277  //   - They are contiguous.
278  //   - Their combined blocks do not exceed |chunk_size|.
279  static bool MergeOperations(std::vector<AnnotatedOperation>* aops,
280                              off_t chunk_size,
281                              const std::string& target_part,
282                              int data_fd,
283                              off_t* data_file_size);
284
285  // Takes a pointer to extents |extents| and extents |extents_to_add|, and
286  // merges them by adding |extents_to_add| to |extents| and normalizing.
287  static void ExtendExtents(
288    google::protobuf::RepeatedPtrField<Extent>* extents,
289    const google::protobuf::RepeatedPtrField<Extent>& extents_to_add);
290
291 private:
292  // Adds the data payload for a REPLACE/REPLACE_BZ operation |aop| by reading
293  // its output extents from |target_part_path| and appending a corresponding
294  // data blob to |data_fd|. The blob will be compressed if this is smaller than
295  // the uncompressed form, and the operation type will be set accordingly.
296  // |*data_file_size| will be updated as well. If the operation happens to have
297  // the right type and already points to a data blob, we check whether its
298  // content is identical to the new one, in which case nothing is written.
299  static bool AddDataAndSetType(AnnotatedOperation* aop,
300                                const std::string& target_part_path,
301                                int data_fd,
302                                off_t* data_file_size);
303
304  DISALLOW_COPY_AND_ASSIGN(DeltaDiffGenerator);
305};
306
307// This is the only function that external users of this module should call.
308// The |config| describes the payload generation request, describing both
309// old and new images for delta payloads and only the new image for full
310// payloads.
311// For delta payloads, the images should be already mounted read-only at
312// the respective rootfs_mountpt.
313// |private_key_path| points to a private key used to sign the update.
314// Pass empty string to not sign the update.
315// |output_path| is the filename where the delta update should be written.
316// Returns true on success. Also writes the size of the metadata into
317// |metadata_size|.
318bool GenerateUpdatePayloadFile(const PayloadGenerationConfig& config,
319                               const std::string& output_path,
320                               const std::string& private_key_path,
321                               uint64_t* metadata_size);
322
323
324};  // namespace chromeos_update_engine
325
326#endif  // UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_GENERATOR_H_
327