delta_diff_utils.cc revision 3cd4df127c29eb90c0e203373edfbc9534c79169
1// 2// Copyright (C) 2015 The Android Open Source Project 3// 4// Licensed under the Apache License, Version 2.0 (the "License"); 5// you may not use this file except in compliance with the License. 6// You may obtain a copy of the License at 7// 8// http://www.apache.org/licenses/LICENSE-2.0 9// 10// Unless required by applicable law or agreed to in writing, software 11// distributed under the License is distributed on an "AS IS" BASIS, 12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13// See the License for the specific language governing permissions and 14// limitations under the License. 15// 16 17#include "update_engine/payload_generator/delta_diff_utils.h" 18 19#include <endian.h> 20#if defined(__clang__) 21// TODO(*): Remove these pragmas when b/35721782 is fixed. 22#pragma clang diagnostic push 23#pragma clang diagnostic ignored "-Wmacro-redefined" 24#endif 25#include <ext2fs/ext2fs.h> 26#if defined(__clang__) 27#pragma clang diagnostic pop 28#endif 29#include <unistd.h> 30 31#include <algorithm> 32#include <map> 33#include <memory> 34#include <utility> 35 36#include <base/files/file_util.h> 37#include <base/format_macros.h> 38#include <base/strings/string_util.h> 39#include <base/strings/stringprintf.h> 40#include <base/threading/simple_thread.h> 41#include <bsdiff/bsdiff.h> 42 43#include "update_engine/common/hash_calculator.h" 44#include "update_engine/common/subprocess.h" 45#include "update_engine/common/utils.h" 46#include "update_engine/payload_generator/block_mapping.h" 47#include "update_engine/payload_generator/bzip.h" 48#include "update_engine/payload_generator/deflate_utils.h" 49#include "update_engine/payload_generator/delta_diff_generator.h" 50#include "update_engine/payload_generator/extent_ranges.h" 51#include "update_engine/payload_generator/extent_utils.h" 52#include "update_engine/payload_generator/squashfs_filesystem.h" 53#include "update_engine/payload_generator/xz.h" 54 55using std::map; 56using std::string; 57using std::vector; 58 59namespace chromeos_update_engine { 60namespace { 61 62// The maximum destination size allowed for bsdiff. In general, bsdiff should 63// work for arbitrary big files, but the payload generation and payload 64// application requires a significant amount of RAM. We put a hard-limit of 65// 200 MiB that should not affect any released board, but will limit the 66// Chrome binary in ASan builders. 67const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024; // bytes 68 69// The maximum destination size allowed for puffdiff. In general, puffdiff 70// should work for arbitrary big files, but the payload application is quite 71// memory intensive, so we limit these operations to 150 MiB. 72const uint64_t kMaxPuffdiffDestinationSize = 150 * 1024 * 1024; // bytes 73 74// Process a range of blocks from |range_start| to |range_end| in the extent at 75// position |*idx_p| of |extents|. If |do_remove| is true, this range will be 76// removed, which may cause the extent to be trimmed, split or removed entirely. 77// The value of |*idx_p| is updated to point to the next extent to be processed. 78// Returns true iff the next extent to process is a new or updated one. 79bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p, 80 const bool do_remove, uint64_t range_start, 81 uint64_t range_end) { 82 size_t idx = *idx_p; 83 uint64_t start_block = (*extents)[idx].start_block(); 84 uint64_t num_blocks = (*extents)[idx].num_blocks(); 85 uint64_t range_size = range_end - range_start; 86 87 if (do_remove) { 88 if (range_size == num_blocks) { 89 // Remove the entire extent. 90 extents->erase(extents->begin() + idx); 91 } else if (range_end == num_blocks) { 92 // Trim the end of the extent. 93 (*extents)[idx].set_num_blocks(num_blocks - range_size); 94 idx++; 95 } else if (range_start == 0) { 96 // Trim the head of the extent. 97 (*extents)[idx].set_start_block(start_block + range_size); 98 (*extents)[idx].set_num_blocks(num_blocks - range_size); 99 } else { 100 // Trim the middle, splitting the remainder into two parts. 101 (*extents)[idx].set_num_blocks(range_start); 102 Extent e; 103 e.set_start_block(start_block + range_end); 104 e.set_num_blocks(num_blocks - range_end); 105 idx++; 106 extents->insert(extents->begin() + idx, e); 107 } 108 } else if (range_end == num_blocks) { 109 // Done with this extent. 110 idx++; 111 } else { 112 return false; 113 } 114 115 *idx_p = idx; 116 return true; 117} 118 119// Remove identical corresponding block ranges in |src_extents| and 120// |dst_extents|. Used for preventing moving of blocks onto themselves during 121// MOVE operations. The value of |total_bytes| indicates the actual length of 122// content; this may be slightly less than the total size of blocks, in which 123// case the last block is only partly occupied with data. Returns the total 124// number of bytes removed. 125size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents, 126 vector<Extent>* dst_extents, 127 const size_t total_bytes) { 128 size_t src_idx = 0; 129 size_t dst_idx = 0; 130 uint64_t src_offset = 0, dst_offset = 0; 131 size_t removed_bytes = 0, nonfull_block_bytes; 132 bool do_remove = false; 133 while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) { 134 do_remove = ((*src_extents)[src_idx].start_block() + src_offset == 135 (*dst_extents)[dst_idx].start_block() + dst_offset); 136 137 uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks(); 138 uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks(); 139 uint64_t min_num_blocks = std::min(src_num_blocks - src_offset, 140 dst_num_blocks - dst_offset); 141 uint64_t prev_src_offset = src_offset; 142 uint64_t prev_dst_offset = dst_offset; 143 src_offset += min_num_blocks; 144 dst_offset += min_num_blocks; 145 146 bool new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove, 147 prev_src_offset, src_offset); 148 bool new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove, 149 prev_dst_offset, dst_offset); 150 if (new_src) { 151 src_offset = 0; 152 } 153 if (new_dst) { 154 dst_offset = 0; 155 } 156 157 if (do_remove) 158 removed_bytes += min_num_blocks * kBlockSize; 159 } 160 161 // If we removed the last block and this block is only partly used by file 162 // content, deduct the unused portion from the total removed byte count. 163 if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize)) 164 removed_bytes -= kBlockSize - nonfull_block_bytes; 165 166 return removed_bytes; 167} 168 169} // namespace 170 171namespace diff_utils { 172 173// This class encapsulates a file delta processing thread work. The 174// processor computes the delta between the source and target files; 175// and write the compressed delta to the blob. 176class FileDeltaProcessor : public base::DelegateSimpleThread::Delegate { 177 public: 178 FileDeltaProcessor(const string& old_part, 179 const string& new_part, 180 const PayloadVersion& version, 181 const vector<Extent>& old_extents, 182 const vector<Extent>& new_extents, 183 const vector<puffin::BitExtent>& old_deflates, 184 const vector<puffin::BitExtent>& new_deflates, 185 const string& name, 186 ssize_t chunk_blocks, 187 BlobFileWriter* blob_file) 188 : old_part_(old_part), 189 new_part_(new_part), 190 version_(version), 191 old_extents_(old_extents), 192 new_extents_(new_extents), 193 old_deflates_(old_deflates), 194 new_deflates_(new_deflates), 195 name_(name), 196 chunk_blocks_(chunk_blocks), 197 blob_file_(blob_file) {} 198 199 FileDeltaProcessor(FileDeltaProcessor&& processor) = default; 200 201 ~FileDeltaProcessor() override = default; 202 203 // Overrides DelegateSimpleThread::Delegate. 204 // Calculate the list of operations and write their corresponding deltas to 205 // the blob_file. 206 void Run() override; 207 208 // Merge each file processor's ops list to aops. 209 void MergeOperation(vector<AnnotatedOperation>* aops); 210 211 private: 212 const string old_part_; 213 const string new_part_; 214 const PayloadVersion& version_; 215 216 // The block ranges of the old/new file within the src/tgt image 217 const vector<Extent> old_extents_; 218 const vector<Extent> new_extents_; 219 const vector<puffin::BitExtent> old_deflates_; 220 const vector<puffin::BitExtent> new_deflates_; 221 const string name_; 222 // Block limit of one aop. 223 ssize_t chunk_blocks_; 224 BlobFileWriter* blob_file_; 225 226 // The list of ops to reach the new file from the old file. 227 vector<AnnotatedOperation> file_aops_; 228 229 DISALLOW_COPY_AND_ASSIGN(FileDeltaProcessor); 230}; 231 232void FileDeltaProcessor::Run() { 233 TEST_AND_RETURN(blob_file_ != nullptr); 234 235 LOG(INFO) << "Encoding file " << name_ << " (" 236 << BlocksInExtents(new_extents_) << " blocks)"; 237 238 if (!DeltaReadFile(&file_aops_, 239 old_part_, 240 new_part_, 241 old_extents_, 242 new_extents_, 243 old_deflates_, 244 new_deflates_, 245 name_, 246 chunk_blocks_, 247 version_, 248 blob_file_)) { 249 LOG(ERROR) << "Failed to generate delta for " << name_ << " (" 250 << BlocksInExtents(new_extents_) << " blocks)"; 251 } 252} 253 254void FileDeltaProcessor::MergeOperation(vector<AnnotatedOperation>* aops) { 255 aops->reserve(aops->size() + file_aops_.size()); 256 std::move(file_aops_.begin(), file_aops_.end(), std::back_inserter(*aops)); 257} 258 259bool DeltaReadPartition(vector<AnnotatedOperation>* aops, 260 const PartitionConfig& old_part, 261 const PartitionConfig& new_part, 262 ssize_t hard_chunk_blocks, 263 size_t soft_chunk_blocks, 264 const PayloadVersion& version, 265 BlobFileWriter* blob_file) { 266 ExtentRanges old_visited_blocks; 267 ExtentRanges new_visited_blocks; 268 269 TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks( 270 aops, 271 old_part.path, 272 new_part.path, 273 old_part.size / kBlockSize, 274 new_part.size / kBlockSize, 275 soft_chunk_blocks, 276 version, 277 blob_file, 278 &old_visited_blocks, 279 &new_visited_blocks)); 280 281 bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF); 282 map<string, FilesystemInterface::File> old_files_map; 283 if (old_part.fs_interface) { 284 vector<FilesystemInterface::File> old_files; 285 TEST_AND_RETURN_FALSE(deflate_utils::PreprocessParitionFiles( 286 old_part, &old_files, puffdiff_allowed)); 287 for (const FilesystemInterface::File& file : old_files) 288 old_files_map[file.name] = file; 289 } 290 291 TEST_AND_RETURN_FALSE(new_part.fs_interface); 292 vector<FilesystemInterface::File> new_files; 293 TEST_AND_RETURN_FALSE(deflate_utils::PreprocessParitionFiles( 294 new_part, &new_files, puffdiff_allowed)); 295 296 vector<FileDeltaProcessor> file_delta_processors; 297 298 // The processing is very straightforward here, we generate operations for 299 // every file (and pseudo-file such as the metadata) in the new filesystem 300 // based on the file with the same name in the old filesystem, if any. 301 // Files with overlapping data blocks (like hardlinks or filesystems with tail 302 // packing or compression where the blocks store more than one file) are only 303 // generated once in the new image, but are also used only once from the old 304 // image due to some simplifications (see below). 305 for (const FilesystemInterface::File& new_file : new_files) { 306 // Ignore the files in the new filesystem without blocks. Symlinks with 307 // data blocks (for example, symlinks bigger than 60 bytes in ext2) are 308 // handled as normal files. We also ignore blocks that were already 309 // processed by a previous file. 310 vector<Extent> new_file_extents = FilterExtentRanges( 311 new_file.extents, new_visited_blocks); 312 new_visited_blocks.AddExtents(new_file_extents); 313 314 if (new_file_extents.empty()) 315 continue; 316 317 // We can't visit each dst image inode more than once, as that would 318 // duplicate work. Here, we avoid visiting each source image inode 319 // more than once. Technically, we could have multiple operations 320 // that read the same blocks from the source image for diffing, but 321 // we choose not to avoid complexity. Eventually we will move away 322 // from using a graph/cycle detection/etc to generate diffs, and at that 323 // time, it will be easy (non-complex) to have many operations read 324 // from the same source blocks. At that time, this code can die. -adlr 325 auto old_file = old_files_map[new_file.name]; 326 vector<Extent> old_file_extents = 327 FilterExtentRanges(old_file.extents, old_visited_blocks); 328 old_visited_blocks.AddExtents(old_file_extents); 329 330 file_delta_processors.emplace_back(old_part.path, 331 new_part.path, 332 version, 333 std::move(old_file_extents), 334 std::move(new_file_extents), 335 old_file.deflates, 336 new_file.deflates, 337 new_file.name, // operation name 338 hard_chunk_blocks, 339 blob_file); 340 } 341 342 size_t max_threads = GetMaxThreads(); 343 base::DelegateSimpleThreadPool thread_pool("incremental-update-generator", 344 max_threads); 345 thread_pool.Start(); 346 for (auto& processor : file_delta_processors) { 347 thread_pool.AddWork(&processor); 348 } 349 thread_pool.JoinAll(); 350 351 for (auto& processor : file_delta_processors) { 352 processor.MergeOperation(aops); 353 } 354 355 // Process all the blocks not included in any file. We provided all the unused 356 // blocks in the old partition as available data. 357 vector<Extent> new_unvisited = { 358 ExtentForRange(0, new_part.size / kBlockSize)}; 359 new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks); 360 if (new_unvisited.empty()) 361 return true; 362 363 vector<Extent> old_unvisited; 364 if (old_part.fs_interface) { 365 old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize)); 366 old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks); 367 } 368 369 LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited) 370 << " unwritten blocks using chunk size of " 371 << soft_chunk_blocks << " blocks."; 372 // We use the soft_chunk_blocks limit for the <non-file-data> as we don't 373 // really know the structure of this data and we should not expect it to have 374 // redundancy between partitions. 375 TEST_AND_RETURN_FALSE(DeltaReadFile(aops, 376 old_part.path, 377 new_part.path, 378 old_unvisited, 379 new_unvisited, 380 {}, // old_deflates, 381 {}, // new_deflates 382 "<non-file-data>", // operation name 383 soft_chunk_blocks, 384 version, 385 blob_file)); 386 387 return true; 388} 389 390bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops, 391 const string& old_part, 392 const string& new_part, 393 size_t old_num_blocks, 394 size_t new_num_blocks, 395 ssize_t chunk_blocks, 396 const PayloadVersion& version, 397 BlobFileWriter* blob_file, 398 ExtentRanges* old_visited_blocks, 399 ExtentRanges* new_visited_blocks) { 400 vector<BlockMapping::BlockId> old_block_ids; 401 vector<BlockMapping::BlockId> new_block_ids; 402 TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part, 403 new_part, 404 old_num_blocks * kBlockSize, 405 new_num_blocks * kBlockSize, 406 kBlockSize, 407 &old_block_ids, 408 &new_block_ids)); 409 410 // If the update is inplace, we map all the blocks that didn't move, 411 // regardless of the contents since they are already copied and no operation 412 // is required. 413 if (version.InplaceUpdate()) { 414 uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks); 415 for (uint64_t block = 0; block < num_blocks; block++) { 416 if (old_block_ids[block] == new_block_ids[block] && 417 !old_visited_blocks->ContainsBlock(block) && 418 !new_visited_blocks->ContainsBlock(block)) { 419 old_visited_blocks->AddBlock(block); 420 new_visited_blocks->AddBlock(block); 421 } 422 } 423 } 424 425 // A mapping from the block_id to the list of block numbers with that block id 426 // in the old partition. This is used to lookup where in the old partition 427 // is a block from the new partition. 428 map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map; 429 430 for (uint64_t block = old_num_blocks; block-- > 0; ) { 431 if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block)) 432 old_blocks_map[old_block_ids[block]].push_back(block); 433 434 // Mark all zeroed blocks in the old image as "used" since it doesn't make 435 // any sense to spend I/O to read zeros from the source partition and more 436 // importantly, these could sometimes be blocks discarded in the SSD which 437 // would read non-zero values. 438 if (old_block_ids[block] == 0) 439 old_visited_blocks->AddBlock(block); 440 } 441 442 // The collection of blocks in the new partition with just zeros. This is a 443 // common case for free-space that's also problematic for bsdiff, so we want 444 // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of 445 // just zeros is so small that it doesn't make sense to spend the I/O reading 446 // zeros from the old partition. 447 vector<Extent> new_zeros; 448 449 vector<Extent> old_identical_blocks; 450 vector<Extent> new_identical_blocks; 451 452 for (uint64_t block = 0; block < new_num_blocks; block++) { 453 // Only produce operations for blocks that were not yet visited. 454 if (new_visited_blocks->ContainsBlock(block)) 455 continue; 456 if (new_block_ids[block] == 0) { 457 AppendBlockToExtents(&new_zeros, block); 458 continue; 459 } 460 461 auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]); 462 // Check if the block exists in the old partition at all. 463 if (old_blocks_map_it == old_blocks_map.end() || 464 old_blocks_map_it->second.empty()) 465 continue; 466 467 AppendBlockToExtents(&old_identical_blocks, 468 old_blocks_map_it->second.back()); 469 AppendBlockToExtents(&new_identical_blocks, block); 470 // We can't reuse source blocks in minor version 1 because the cycle 471 // breaking algorithm used in the in-place update doesn't support that. 472 if (version.InplaceUpdate()) 473 old_blocks_map_it->second.pop_back(); 474 } 475 476 // Produce operations for the zero blocks split per output extent. 477 // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile(). 478 size_t num_ops = aops->size(); 479 new_visited_blocks->AddExtents(new_zeros); 480 for (const Extent& extent : new_zeros) { 481 TEST_AND_RETURN_FALSE(DeltaReadFile(aops, 482 "", 483 new_part, 484 vector<Extent>(), // old_extents 485 vector<Extent>{extent}, // new_extents 486 {}, // old_deflates 487 {}, // new_deflates 488 "<zeros>", 489 chunk_blocks, 490 version, 491 blob_file)); 492 } 493 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 494 << BlocksInExtents(new_zeros) << " zeroed blocks"; 495 496 // Produce MOVE/SOURCE_COPY operations for the moved blocks. 497 num_ops = aops->size(); 498 if (chunk_blocks == -1) 499 chunk_blocks = new_num_blocks; 500 uint64_t used_blocks = 0; 501 old_visited_blocks->AddExtents(old_identical_blocks); 502 new_visited_blocks->AddExtents(new_identical_blocks); 503 for (const Extent& extent : new_identical_blocks) { 504 // We split the operation at the extent boundary or when bigger than 505 // chunk_blocks. 506 for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks(); 507 op_block_offset += chunk_blocks) { 508 aops->emplace_back(); 509 AnnotatedOperation* aop = &aops->back(); 510 aop->name = "<identical-blocks>"; 511 aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 512 ? InstallOperation::SOURCE_COPY 513 : InstallOperation::MOVE); 514 515 uint64_t chunk_num_blocks = 516 std::min(static_cast<uint64_t>(extent.num_blocks()) - op_block_offset, 517 static_cast<uint64_t>(chunk_blocks)); 518 519 // The current operation represents the move/copy operation for the 520 // sublist starting at |used_blocks| of length |chunk_num_blocks| where 521 // the src and dst are from |old_identical_blocks| and 522 // |new_identical_blocks| respectively. 523 StoreExtents( 524 ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks), 525 aop->op.mutable_src_extents()); 526 527 Extent* op_dst_extent = aop->op.add_dst_extents(); 528 op_dst_extent->set_start_block(extent.start_block() + op_block_offset); 529 op_dst_extent->set_num_blocks(chunk_num_blocks); 530 CHECK( 531 vector<Extent>{*op_dst_extent} == // NOLINT(whitespace/braces) 532 ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks)); 533 534 used_blocks += chunk_num_blocks; 535 } 536 } 537 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 538 << used_blocks << " identical blocks moved"; 539 540 return true; 541} 542 543bool DeltaReadFile(vector<AnnotatedOperation>* aops, 544 const string& old_part, 545 const string& new_part, 546 const vector<Extent>& old_extents, 547 const vector<Extent>& new_extents, 548 const vector<puffin::BitExtent>& old_deflates, 549 const vector<puffin::BitExtent>& new_deflates, 550 const string& name, 551 ssize_t chunk_blocks, 552 const PayloadVersion& version, 553 BlobFileWriter* blob_file) { 554 brillo::Blob data; 555 InstallOperation operation; 556 557 uint64_t total_blocks = BlocksInExtents(new_extents); 558 if (chunk_blocks == -1) 559 chunk_blocks = total_blocks; 560 561 for (uint64_t block_offset = 0; block_offset < total_blocks; 562 block_offset += chunk_blocks) { 563 // Split the old/new file in the same chunks. Note that this could drop 564 // some information from the old file used for the new chunk. If the old 565 // file is smaller (or even empty when there's no old file) the chunk will 566 // also be empty. 567 vector<Extent> old_extents_chunk = ExtentsSublist( 568 old_extents, block_offset, chunk_blocks); 569 vector<Extent> new_extents_chunk = ExtentsSublist( 570 new_extents, block_offset, chunk_blocks); 571 NormalizeExtents(&old_extents_chunk); 572 NormalizeExtents(&new_extents_chunk); 573 574 TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part, 575 new_part, 576 old_extents_chunk, 577 new_extents_chunk, 578 old_deflates, 579 new_deflates, 580 version, 581 &data, 582 &operation)); 583 584 // Check if the operation writes nothing. 585 if (operation.dst_extents_size() == 0) { 586 if (operation.type() == InstallOperation::MOVE) { 587 LOG(INFO) << "Empty MOVE operation (" 588 << name << "), skipping"; 589 continue; 590 } else { 591 LOG(ERROR) << "Empty non-MOVE operation"; 592 return false; 593 } 594 } 595 596 // Now, insert into the list of operations. 597 AnnotatedOperation aop; 598 aop.name = name; 599 if (static_cast<uint64_t>(chunk_blocks) < total_blocks) { 600 aop.name = base::StringPrintf("%s:%" PRIu64, 601 name.c_str(), block_offset / chunk_blocks); 602 } 603 aop.op = operation; 604 605 // Write the data 606 TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file)); 607 aops->emplace_back(aop); 608 } 609 return true; 610} 611 612bool GenerateBestFullOperation(const brillo::Blob& new_data, 613 const PayloadVersion& version, 614 brillo::Blob* out_blob, 615 InstallOperation_Type* out_type) { 616 if (new_data.empty()) 617 return false; 618 619 if (version.OperationAllowed(InstallOperation::ZERO) && 620 std::all_of( 621 new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) { 622 // The read buffer is all zeros, so produce a ZERO operation. No need to 623 // check other types of operations in this case. 624 *out_blob = brillo::Blob(); 625 *out_type = InstallOperation::ZERO; 626 return true; 627 } 628 629 bool out_blob_set = false; 630 631 // Try compressing |new_data| with xz first. 632 if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) { 633 brillo::Blob new_data_xz; 634 if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) { 635 *out_type = InstallOperation::REPLACE_XZ; 636 *out_blob = std::move(new_data_xz); 637 out_blob_set = true; 638 } 639 } 640 641 // Try compressing it with bzip2. 642 if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) { 643 brillo::Blob new_data_bz; 644 // TODO(deymo): Implement some heuristic to determine if it is worth trying 645 // to compress the blob with bzip2 if we already have a good REPLACE_XZ. 646 if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() && 647 (!out_blob_set || out_blob->size() > new_data_bz.size())) { 648 // A REPLACE_BZ is better or nothing else was set. 649 *out_type = InstallOperation::REPLACE_BZ; 650 *out_blob = std::move(new_data_bz); 651 out_blob_set = true; 652 } 653 } 654 655 // If nothing else worked or it was badly compressed we try a REPLACE. 656 if (!out_blob_set || out_blob->size() >= new_data.size()) { 657 *out_type = InstallOperation::REPLACE; 658 // This needs to make a copy of the data in the case bzip or xz didn't 659 // compress well, which is not the common case so the performance hit is 660 // low. 661 *out_blob = new_data; 662 } 663 return true; 664} 665 666bool ReadExtentsToDiff(const string& old_part, 667 const string& new_part, 668 const vector<Extent>& old_extents, 669 const vector<Extent>& new_extents, 670 const vector<puffin::BitExtent>& old_deflates, 671 const vector<puffin::BitExtent>& new_deflates, 672 const PayloadVersion& version, 673 brillo::Blob* out_data, 674 InstallOperation* out_op) { 675 InstallOperation operation; 676 677 // We read blocks from old_extents and write blocks to new_extents. 678 uint64_t blocks_to_read = BlocksInExtents(old_extents); 679 uint64_t blocks_to_write = BlocksInExtents(new_extents); 680 681 // Disable bsdiff, and puffdiff when the data is too big. 682 bool bsdiff_allowed = 683 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) || 684 version.OperationAllowed(InstallOperation::BSDIFF); 685 if (bsdiff_allowed && 686 blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) { 687 LOG(INFO) << "bsdiff blacklisted, data too big: " 688 << blocks_to_read * kBlockSize << " bytes"; 689 bsdiff_allowed = false; 690 } 691 692 bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF); 693 if (puffdiff_allowed && 694 blocks_to_read * kBlockSize > kMaxPuffdiffDestinationSize) { 695 LOG(INFO) << "puffdiff blacklisted, data too big: " 696 << blocks_to_read * kBlockSize << " bytes"; 697 puffdiff_allowed = false; 698 } 699 700 // Make copies of the extents so we can modify them. 701 vector<Extent> src_extents = old_extents; 702 vector<Extent> dst_extents = new_extents; 703 704 // Read in bytes from new data. 705 brillo::Blob new_data; 706 TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part, 707 new_extents, 708 &new_data, 709 kBlockSize * blocks_to_write, 710 kBlockSize)); 711 TEST_AND_RETURN_FALSE(!new_data.empty()); 712 713 // Data blob that will be written to delta file. 714 brillo::Blob data_blob; 715 716 // Try generating a full operation for the given new data, regardless of the 717 // old_data. 718 InstallOperation_Type op_type; 719 TEST_AND_RETURN_FALSE( 720 GenerateBestFullOperation(new_data, version, &data_blob, &op_type)); 721 operation.set_type(op_type); 722 723 brillo::Blob old_data; 724 if (blocks_to_read > 0) { 725 // Read old data. 726 TEST_AND_RETURN_FALSE( 727 utils::ReadExtents(old_part, src_extents, &old_data, 728 kBlockSize * blocks_to_read, kBlockSize)); 729 if (old_data == new_data) { 730 // No change in data. 731 operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 732 ? InstallOperation::SOURCE_COPY 733 : InstallOperation::MOVE); 734 data_blob = brillo::Blob(); 735 } else { 736 if (bsdiff_allowed) { 737 base::FilePath patch; 738 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch)); 739 ScopedPathUnlinker unlinker(patch.value()); 740 741 brillo::Blob bsdiff_delta; 742 TEST_AND_RETURN_FALSE(0 == bsdiff::bsdiff(old_data.data(), 743 old_data.size(), 744 new_data.data(), 745 new_data.size(), 746 patch.value().c_str(), 747 nullptr)); 748 749 TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta)); 750 CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0)); 751 if (bsdiff_delta.size() < data_blob.size()) { 752 operation.set_type( 753 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) 754 ? InstallOperation::SOURCE_BSDIFF 755 : InstallOperation::BSDIFF); 756 data_blob = std::move(bsdiff_delta); 757 } 758 } 759 if (puffdiff_allowed) { 760 // Find all deflate positions inside the given extents and then put all 761 // deflates together because we have already read all the extents into 762 // one buffer. 763 vector<puffin::BitExtent> src_deflates; 764 TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates( 765 src_extents, old_deflates, &src_deflates)); 766 767 vector<puffin::BitExtent> dst_deflates; 768 TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates( 769 dst_extents, new_deflates, &dst_deflates)); 770 771 // Remove equal deflates. TODO(*): We can do a N*N check using 772 // hashing. It will not reduce the payload size, but it will speeds up 773 // the puffing on the client device. 774 auto src = src_deflates.begin(); 775 auto dst = dst_deflates.begin(); 776 for (; src != src_deflates.end() && dst != dst_deflates.end();) { 777 auto src_in_bytes = deflate_utils::ExpandToByteExtent(*src); 778 auto dst_in_bytes = deflate_utils::ExpandToByteExtent(*dst); 779 if (src_in_bytes.length == dst_in_bytes.length && 780 !memcmp(old_data.data() + src_in_bytes.offset, 781 new_data.data() + dst_in_bytes.offset, 782 src_in_bytes.length)) { 783 src = src_deflates.erase(src); 784 dst = dst_deflates.erase(dst); 785 } else { 786 src++; 787 dst++; 788 } 789 } 790 791 // Only Puffdiff if both files have at least one deflate left. 792 if (!src_deflates.empty() && !dst_deflates.empty()) { 793 brillo::Blob puffdiff_delta; 794 string temp_file_path; 795 TEST_AND_RETURN_FALSE(utils::MakeTempFile( 796 "puffdiff-delta.XXXXXX", &temp_file_path, nullptr)); 797 ScopedPathUnlinker temp_file_unlinker(temp_file_path); 798 799 // Perform PuffDiff operation. 800 TEST_AND_RETURN_FALSE(puffin::PuffDiff(old_data, 801 new_data, 802 src_deflates, 803 dst_deflates, 804 temp_file_path, 805 &puffdiff_delta)); 806 TEST_AND_RETURN_FALSE(puffdiff_delta.size() > 0); 807 if (puffdiff_delta.size() < data_blob.size()) { 808 operation.set_type(InstallOperation::PUFFDIFF); 809 data_blob = std::move(puffdiff_delta); 810 } 811 } 812 } 813 } 814 } 815 816 // Remove identical src/dst block ranges in MOVE operations. 817 if (operation.type() == InstallOperation::MOVE) { 818 auto removed_bytes = RemoveIdenticalBlockRanges( 819 &src_extents, &dst_extents, new_data.size()); 820 operation.set_src_length(old_data.size() - removed_bytes); 821 operation.set_dst_length(new_data.size() - removed_bytes); 822 } 823 824 // Set legacy |src_length| and |dst_length| fields for both BSDIFF and 825 // SOURCE_BSDIFF as only these two use these parameters. 826 if (operation.type() == InstallOperation::BSDIFF || 827 operation.type() == InstallOperation::SOURCE_BSDIFF) { 828 operation.set_src_length(old_data.size()); 829 operation.set_dst_length(new_data.size()); 830 } 831 832 // Embed extents in the operation. Replace (all variants), zero and discard 833 // operations should not have source extents. 834 if (!IsNoSourceOperation(operation.type())) { 835 StoreExtents(src_extents, operation.mutable_src_extents()); 836 } 837 // All operations have dst_extents. 838 StoreExtents(dst_extents, operation.mutable_dst_extents()); 839 840 *out_data = std::move(data_blob); 841 *out_op = operation; 842 return true; 843} 844 845bool IsAReplaceOperation(InstallOperation_Type op_type) { 846 return (op_type == InstallOperation::REPLACE || 847 op_type == InstallOperation::REPLACE_BZ || 848 op_type == InstallOperation::REPLACE_XZ); 849} 850 851bool IsNoSourceOperation(InstallOperation_Type op_type) { 852 return (IsAReplaceOperation(op_type) || 853 op_type == InstallOperation::ZERO || 854 op_type == InstallOperation::DISCARD); 855} 856 857// Returns true if |op| is a no-op operation that doesn't do any useful work 858// (e.g., a move operation that copies blocks onto themselves). 859bool IsNoopOperation(const InstallOperation& op) { 860 return (op.type() == InstallOperation::MOVE && 861 ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents())); 862} 863 864void FilterNoopOperations(vector<AnnotatedOperation>* ops) { 865 ops->erase( 866 std::remove_if( 867 ops->begin(), ops->end(), 868 [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}), 869 ops->end()); 870} 871 872bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) { 873 info->set_size(part.size); 874 HashCalculator hasher; 875 TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) == 876 static_cast<off_t>(part.size)); 877 TEST_AND_RETURN_FALSE(hasher.Finalize()); 878 const brillo::Blob& hash = hasher.raw_hash(); 879 info->set_hash(hash.data(), hash.size()); 880 LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash(); 881 return true; 882} 883 884bool CompareAopsByDestination(AnnotatedOperation first_aop, 885 AnnotatedOperation second_aop) { 886 // We want empty operations to be at the end of the payload. 887 if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size()) 888 return ((!first_aop.op.dst_extents().size()) < 889 (!second_aop.op.dst_extents().size())); 890 uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block(); 891 uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block(); 892 return first_dst_start < second_dst_start; 893} 894 895bool IsExtFilesystem(const string& device) { 896 brillo::Blob header; 897 // See include/linux/ext2_fs.h for more details on the structure. We obtain 898 // ext2 constants from ext2fs/ext2fs.h header but we don't link with the 899 // library. 900 if (!utils::ReadFileChunk( 901 device, 0, SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE, &header) || 902 header.size() < SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE) 903 return false; 904 905 const uint8_t* superblock = header.data() + SUPERBLOCK_OFFSET; 906 907 // ext3_fs.h: ext3_super_block.s_blocks_count 908 uint32_t block_count = 909 *reinterpret_cast<const uint32_t*>(superblock + 1 * sizeof(int32_t)); 910 911 // ext3_fs.h: ext3_super_block.s_log_block_size 912 uint32_t log_block_size = 913 *reinterpret_cast<const uint32_t*>(superblock + 6 * sizeof(int32_t)); 914 915 // ext3_fs.h: ext3_super_block.s_magic 916 uint16_t magic = 917 *reinterpret_cast<const uint16_t*>(superblock + 14 * sizeof(int32_t)); 918 919 block_count = le32toh(block_count); 920 log_block_size = le32toh(log_block_size) + EXT2_MIN_BLOCK_LOG_SIZE; 921 magic = le16toh(magic); 922 923 if (magic != EXT2_SUPER_MAGIC) 924 return false; 925 926 // Sanity check the parameters. 927 TEST_AND_RETURN_FALSE(log_block_size >= EXT2_MIN_BLOCK_LOG_SIZE && 928 log_block_size <= EXT2_MAX_BLOCK_LOG_SIZE); 929 TEST_AND_RETURN_FALSE(block_count > 0); 930 return true; 931} 932 933// Return the number of CPUs on the machine, and 4 threads in minimum. 934size_t GetMaxThreads() { 935 return std::max(sysconf(_SC_NPROCESSORS_ONLN), 4L); 936} 937 938} // namespace diff_utils 939 940} // namespace chromeos_update_engine 941