delta_diff_generator.h revision 52490e776a9d34a94fb18ab1fe7d416425e94dda
1// Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_GENERATOR_H_ 6#define UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_GENERATOR_H_ 7 8#include <set> 9#include <string> 10#include <vector> 11 12#include <base/macros.h> 13#include <chromeos/secure_blob.h> 14 15#include "update_engine/payload_constants.h" 16#include "update_engine/payload_generator/extent_utils.h" 17#include "update_engine/payload_generator/graph_types.h" 18#include "update_engine/payload_generator/operations_generator.h" 19#include "update_engine/payload_generator/payload_generation_config.h" 20#include "update_engine/update_metadata.pb.h" 21 22// There is one function in DeltaDiffGenerator of importance to users 23// of the class: GenerateDeltaUpdateFile(). Before calling it, 24// the old and new images must be mounted. Call GenerateDeltaUpdateFile() 25// with both the mount-points of the images in addition to the paths of 26// the images (both old and new). A delta from old to new will be 27// generated and stored in output_path. 28 29namespace chromeos_update_engine { 30 31extern const char* const kEmptyPath; 32extern const size_t kBlockSize; 33extern const size_t kRootFSPartitionSize; 34 35class DeltaDiffGenerator : public OperationsGenerator { 36 public: 37 DeltaDiffGenerator() = default; 38 39 // Represents a disk block on the install partition. 40 struct Block { 41 // During install, each block on the install partition will be written 42 // and some may be read (in all likelihood, many will be read). 43 // The reading and writing will be performed by InstallOperations, 44 // each of which has a corresponding vertex in a graph. 45 // A Block object tells which vertex will read or write this block 46 // at install time. 47 // Generally, there will be a vector of Block objects whose length 48 // is the number of blocks on the install partition. 49 Block() : reader(Vertex::kInvalidIndex), writer(Vertex::kInvalidIndex) {} 50 Vertex::Index reader; 51 Vertex::Index writer; 52 }; 53 54 // These functions are public so that the unit tests can access them: 55 56 // Generate the update payload operations for the kernel and rootfs using 57 // SOURCE_* operations, used to generate deltas for the minor version 58 // kSourceMinorPayloadVersion. This function will generate operations in the 59 // rootfs that will read blocks from the source partition in random order and 60 // write the new image on the target partition, also possibly in random order. 61 // The rootfs operations are stored in |rootfs_ops| and should be executed in 62 // that order. The kernel operations are stored in |kernel_ops|. All 63 // the offsets in the operations reference the data written to |data_file_fd|. 64 // The total amount of data written to that file is stored in 65 // |data_file_size|. 66 bool GenerateOperations( 67 const PayloadGenerationConfig& config, 68 int data_file_fd, 69 off_t* data_file_size, 70 std::vector<AnnotatedOperation>* rootfs_ops, 71 std::vector<AnnotatedOperation>* kernel_ops) override; 72 73 // For each regular file within new_root, creates a node in the graph, 74 // determines the best way to compress it (REPLACE, REPLACE_BZ, COPY, BSDIFF), 75 // and writes any necessary data to the end of data_fd. 76 static bool DeltaReadFiles(Graph* graph, 77 std::vector<Block>* blocks, 78 const std::string& old_part, 79 const std::string& new_part, 80 const std::string& old_root, 81 const std::string& new_root, 82 off_t chunk_size, 83 int data_fd, 84 off_t* data_file_size, 85 bool src_ops_allowed); 86 87 // For a given regular file which must exist at new_root + path, and 88 // may exist at old_root + path, creates a new InstallOperation and 89 // adds it to the graph. Also, populates the |blocks| array as 90 // necessary, if |blocks| is non-null. Also, writes the data 91 // necessary to send the file down to the client into data_fd, which 92 // has length *data_file_size. *data_file_size is updated 93 // appropriately. If |existing_vertex| is no kInvalidIndex, use that 94 // rather than allocating a new vertex. Returns true on success. 95 static bool DeltaReadFile(Graph* graph, 96 Vertex::Index existing_vertex, 97 std::vector<Block>* blocks, 98 const std::string& old_part, 99 const std::string& new_part, 100 const std::string& old_root, 101 const std::string& new_root, 102 const std::string& path, 103 off_t chunk_offset, 104 off_t chunk_size, 105 int data_fd, 106 off_t* data_file_size, 107 bool src_ops_allowed); 108 109 // Reads old_filename (if it exists) and a new_filename and determines 110 // the smallest way to encode this file for the diff. It reads extents from 111 // |old_part| and |new_part|. It stores necessary data in out_data and fills 112 // in out_op. If there's no change in old and new files, it creates a MOVE 113 // operation. If there is a change, or the old file doesn't exist, 114 // the smallest of REPLACE, REPLACE_BZ, or BSDIFF wins. 115 // new_filename must contain at least one byte. 116 // |new_filename| is read starting at |chunk_offset|. 117 // If |chunk_size| is not -1, only up to |chunk_size| bytes are diffed. 118 // If |src_ops_allowed| is true, it will emit SOURCE_COPY and SOURCE_BSDIFF 119 // operations instead of MOVE and BSDIFF, respectively. 120 // Returns true on success. 121 static bool ReadFileToDiff(const std::string& old_part, 122 const std::string& new_part, 123 off_t chunk_offset, 124 off_t chunk_size, 125 bool bsdiff_allowed, 126 chromeos::Blob* out_data, 127 DeltaArchiveManifest_InstallOperation* out_op, 128 bool gather_extents, 129 bool src_ops_allowed, 130 const std::string& old_filename, 131 const std::string& new_filename); 132 133 // Delta compresses a kernel partition |new_kernel_part| with knowledge of the 134 // old kernel partition |old_kernel_part|. If |old_kernel_part| is an empty 135 // string, generates a full update of the partition. 136 static bool DeltaCompressKernelPartition( 137 const std::string& old_kernel_part, 138 const std::string& new_kernel_part, 139 std::vector<AnnotatedOperation>* ops, 140 int blobs_fd, 141 off_t* blobs_length, 142 bool src_ops_allowed); 143 144 // Reads blocks from image_path that are not yet marked as being written in 145 // the blocks array. These blocks that remain are either unchanged files or 146 // non-file-data blocks. We compare each of them to the old image, and 147 // compress the ones that changed into a single REPLACE_BZ operation. This 148 // updates a newly created node in the graph to write these blocks and writes 149 // the appropriate blob to blobs_fd. Reads and updates blobs_length. 150 static bool ReadUnwrittenBlocks( 151 const std::vector<Block>& blocks, 152 int blobs_fd, 153 off_t* blobs_length, 154 const std::string& old_image_path, 155 const uint64_t old_image_size, 156 const std::string& new_image_path, 157 Vertex* vertex, 158 uint32_t minor_version); 159 160 // Stores all Extents in 'extents' into 'out'. 161 static void StoreExtents(const std::vector<Extent>& extents, 162 google::protobuf::RepeatedPtrField<Extent>* out); 163 164 // Stores all extents in |extents| into |out_vector|. 165 static void ExtentsToVector( 166 const google::protobuf::RepeatedPtrField<Extent>& extents, 167 std::vector<Extent>* out_vector); 168 169 // Install operations in the manifest may reference data blobs, which 170 // are in data_blobs_path. This function creates a new data blobs file 171 // with the data blobs in the same order as the referencing install 172 // operations in the manifest. E.g. if manifest[0] has a data blob 173 // "X" at offset 1, manifest[1] has a data blob "Y" at offset 0, 174 // and data_blobs_path's file contains "YX", new_data_blobs_path 175 // will set to be a file that contains "XY". 176 static bool ReorderDataBlobs(DeltaArchiveManifest* manifest, 177 const std::string& data_blobs_path, 178 const std::string& new_data_blobs_path); 179 180 // Computes a SHA256 hash of the given buf and sets the hash value in the 181 // operation so that update_engine could verify. This hash should be set 182 // for all operations that have a non-zero data blob. One exception is the 183 // dummy operation for signature blob because the contents of the signature 184 // blob will not be available at payload creation time. So, update_engine will 185 // gracefully ignore the dummy signature operation. 186 static bool AddOperationHash(DeltaArchiveManifest_InstallOperation* op, 187 const chromeos::Blob& buf); 188 189 190 // Returns true if |op| is a no-op operation that doesn't do any useful work 191 // (e.g., a move operation that copies blocks onto themselves). 192 static bool IsNoopOperation(const DeltaArchiveManifest_InstallOperation& op); 193 194 // Filters all the operations that are no-op, maintaining the relative order 195 // of the rest of the operations. 196 static void FilterNoopOperations(std::vector<AnnotatedOperation>* ops); 197 198 static bool InitializePartitionInfo(bool is_kernel, 199 const std::string& partition, 200 PartitionInfo* info); 201 202 // Runs the bsdiff tool on two files and returns the resulting delta in 203 // |out|. Returns true on success. 204 static bool BsdiffFiles(const std::string& old_file, 205 const std::string& new_file, 206 chromeos::Blob* out); 207 208 // Adds to |manifest| a dummy operation that points to a signature blob 209 // located at the specified offset/length. 210 static void AddSignatureOp(uint64_t signature_blob_offset, 211 uint64_t signature_blob_length, 212 DeltaArchiveManifest* manifest); 213 214 // Takes a collection (vector or RepeatedPtrField) of Extent and 215 // returns a vector of the blocks referenced, in order. 216 template<typename T> 217 static std::vector<uint64_t> ExpandExtents(const T& extents) { 218 std::vector<uint64_t> ret; 219 for (size_t i = 0, e = static_cast<size_t>(extents.size()); i != e; ++i) { 220 const Extent extent = GetElement(extents, i); 221 if (extent.start_block() == kSparseHole) { 222 ret.resize(ret.size() + extent.num_blocks(), kSparseHole); 223 } else { 224 for (uint64_t block = extent.start_block(); 225 block < (extent.start_block() + extent.num_blocks()); block++) { 226 ret.push_back(block); 227 } 228 } 229 } 230 return ret; 231 } 232 233 // Takes a vector of extents and removes extents that begin in a sparse hole. 234 static void ClearSparseHoles(std::vector<Extent>* extents); 235 236 // Takes a vector of AnnotatedOperations |aops| and fragments those operations 237 // such that there is only one dst extent per operation. Sets |aops| to a 238 // vector of the new fragmented operations. 239 static bool FragmentOperations(std::vector<AnnotatedOperation>* aops, 240 const std::string& target_rootfs_part, 241 int data_fd, 242 off_t* data_file_size); 243 244 // Takes a vector of AnnotatedOperations |aops| and sorts them by the first 245 // start block in their destination extents. Sets |aops| to a vector of the 246 // sorted operations. 247 static void SortOperationsByDestination( 248 std::vector<AnnotatedOperation>* aops); 249 250 // Takes an SOURCE_COPY install operation, |aop|, and adds one operation for 251 // each dst extent in |aop| to |ops|. The new operations added to |ops| will 252 // have only one dst extent. The src extents are split so the number of blocks 253 // in the src and dst extents are equal. 254 // E.g. we have a SOURCE_COPY operation: 255 // src extents: [(1, 3), (5, 1), (7, 1)], dst extents: [(2, 2), (6, 3)] 256 // Then we will get 2 new operations: 257 // 1. src extents: [(1, 2)], dst extents: [(2, 2)] 258 // 2. src extents: [(3, 1),(5, 1),(7, 1)], dst extents: [(6, 3)] 259 static bool SplitSourceCopy(const AnnotatedOperation& original_aop, 260 std::vector<AnnotatedOperation>* result_aops); 261 262 // Takes a REPLACE/REPLACE_BZ operation |aop|, and adds one operation for each 263 // dst extent in |aop| to |ops|. The new operations added to |ops| will have 264 // only one dst extent each, and may be either a REPLACE or REPLACE_BZ 265 // depending on whether compression is advantageous. 266 static bool SplitReplaceOrReplaceBz( 267 const AnnotatedOperation& original_aop, 268 std::vector<AnnotatedOperation>* result_aops, 269 const std::string& target_part, 270 int data_fd, 271 off_t* data_file_size); 272 273 // Takes a sorted (by first destination extent) vector of operations |aops| 274 // and merges SOURCE_COPY, REPLACE, and REPLACE_BZ operations in that vector. 275 // It will merge two operations if: 276 // - They are of the same type. 277 // - They are contiguous. 278 // - Their combined blocks do not exceed |chunk_size|. 279 static bool MergeOperations(std::vector<AnnotatedOperation>* aops, 280 off_t chunk_size, 281 const std::string& target_part, 282 int data_fd, 283 off_t* data_file_size); 284 285 // Takes a pointer to extents |extents| and extents |extents_to_add|, and 286 // merges them by adding |extents_to_add| to |extents| and normalizing. 287 static void ExtendExtents( 288 google::protobuf::RepeatedPtrField<Extent>* extents, 289 const google::protobuf::RepeatedPtrField<Extent>& extents_to_add); 290 291 private: 292 // Adds the data payload for a REPLACE/REPLACE_BZ operation |aop| by reading 293 // its output extents from |target_part_path| and appending a corresponding 294 // data blob to |data_fd|. The blob will be compressed if this is smaller than 295 // the uncompressed form, and the operation type will be set accordingly. 296 // |*data_file_size| will be updated as well. If the operation happens to have 297 // the right type and already points to a data blob, we check whether its 298 // content is identical to the new one, in which case nothing is written. 299 static bool AddDataAndSetType(AnnotatedOperation* aop, 300 const std::string& target_part_path, 301 int data_fd, 302 off_t* data_file_size); 303 304 DISALLOW_COPY_AND_ASSIGN(DeltaDiffGenerator); 305}; 306 307// This is the only function that external users of this module should call. 308// The |config| describes the payload generation request, describing both 309// old and new images for delta payloads and only the new image for full 310// payloads. 311// For delta payloads, the images should be already mounted read-only at 312// the respective rootfs_mountpt. 313// |private_key_path| points to a private key used to sign the update. 314// Pass empty string to not sign the update. 315// |output_path| is the filename where the delta update should be written. 316// Returns true on success. Also writes the size of the metadata into 317// |metadata_size|. 318bool GenerateUpdatePayloadFile(const PayloadGenerationConfig& config, 319 const std::string& output_path, 320 const std::string& private_key_path, 321 uint64_t* metadata_size); 322 323 324}; // namespace chromeos_update_engine 325 326#endif // UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_GENERATOR_H_ 327