delta_diff_utils.cc revision 06c6d088b89656a120d40c4b578200c87af5c4c8
1// 2// Copyright (C) 2015 The Android Open Source Project 3// 4// Licensed under the Apache License, Version 2.0 (the "License"); 5// you may not use this file except in compliance with the License. 6// You may obtain a copy of the License at 7// 8// http://www.apache.org/licenses/LICENSE-2.0 9// 10// Unless required by applicable law or agreed to in writing, software 11// distributed under the License is distributed on an "AS IS" BASIS, 12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13// See the License for the specific language governing permissions and 14// limitations under the License. 15// 16 17#include "update_engine/payload_generator/delta_diff_utils.h" 18 19#include <endian.h> 20#if defined(__clang__) 21// TODO(*): Remove these pragmas when b/35721782 is fixed. 22#pragma clang diagnostic push 23#pragma clang diagnostic ignored "-Wmacro-redefined" 24#endif 25#include <ext2fs/ext2fs.h> 26#if defined(__clang__) 27#pragma clang diagnostic pop 28#endif 29#include <unistd.h> 30 31#include <algorithm> 32#include <map> 33#include <memory> 34#include <utility> 35 36#include <base/files/file_util.h> 37#include <base/format_macros.h> 38#include <base/strings/string_util.h> 39#include <base/strings/stringprintf.h> 40#include <base/threading/simple_thread.h> 41#include <bsdiff/bsdiff.h> 42 43#include "update_engine/common/hash_calculator.h" 44#include "update_engine/common/subprocess.h" 45#include "update_engine/common/utils.h" 46#include "update_engine/payload_consumer/payload_constants.h" 47#include "update_engine/payload_generator/block_mapping.h" 48#include "update_engine/payload_generator/bzip.h" 49#include "update_engine/payload_generator/deflate_utils.h" 50#include "update_engine/payload_generator/delta_diff_generator.h" 51#include "update_engine/payload_generator/extent_ranges.h" 52#include "update_engine/payload_generator/extent_utils.h" 53#include "update_engine/payload_generator/squashfs_filesystem.h" 54#include "update_engine/payload_generator/xz.h" 55 56using std::map; 57using std::string; 58using std::vector; 59 60namespace chromeos_update_engine { 61namespace { 62 63// The maximum destination size allowed for bsdiff. In general, bsdiff should 64// work for arbitrary big files, but the payload generation and payload 65// application requires a significant amount of RAM. We put a hard-limit of 66// 200 MiB that should not affect any released board, but will limit the 67// Chrome binary in ASan builders. 68const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024; // bytes 69 70// The maximum destination size allowed for puffdiff. In general, puffdiff 71// should work for arbitrary big files, but the payload application is quite 72// memory intensive, so we limit these operations to 150 MiB. 73const uint64_t kMaxPuffdiffDestinationSize = 150 * 1024 * 1024; // bytes 74 75// Process a range of blocks from |range_start| to |range_end| in the extent at 76// position |*idx_p| of |extents|. If |do_remove| is true, this range will be 77// removed, which may cause the extent to be trimmed, split or removed entirely. 78// The value of |*idx_p| is updated to point to the next extent to be processed. 79// Returns true iff the next extent to process is a new or updated one. 80bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p, 81 const bool do_remove, uint64_t range_start, 82 uint64_t range_end) { 83 size_t idx = *idx_p; 84 uint64_t start_block = (*extents)[idx].start_block(); 85 uint64_t num_blocks = (*extents)[idx].num_blocks(); 86 uint64_t range_size = range_end - range_start; 87 88 if (do_remove) { 89 if (range_size == num_blocks) { 90 // Remove the entire extent. 91 extents->erase(extents->begin() + idx); 92 } else if (range_end == num_blocks) { 93 // Trim the end of the extent. 94 (*extents)[idx].set_num_blocks(num_blocks - range_size); 95 idx++; 96 } else if (range_start == 0) { 97 // Trim the head of the extent. 98 (*extents)[idx].set_start_block(start_block + range_size); 99 (*extents)[idx].set_num_blocks(num_blocks - range_size); 100 } else { 101 // Trim the middle, splitting the remainder into two parts. 102 (*extents)[idx].set_num_blocks(range_start); 103 Extent e; 104 e.set_start_block(start_block + range_end); 105 e.set_num_blocks(num_blocks - range_end); 106 idx++; 107 extents->insert(extents->begin() + idx, e); 108 } 109 } else if (range_end == num_blocks) { 110 // Done with this extent. 111 idx++; 112 } else { 113 return false; 114 } 115 116 *idx_p = idx; 117 return true; 118} 119 120// Remove identical corresponding block ranges in |src_extents| and 121// |dst_extents|. Used for preventing moving of blocks onto themselves during 122// MOVE operations. The value of |total_bytes| indicates the actual length of 123// content; this may be slightly less than the total size of blocks, in which 124// case the last block is only partly occupied with data. Returns the total 125// number of bytes removed. 126size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents, 127 vector<Extent>* dst_extents, 128 const size_t total_bytes) { 129 size_t src_idx = 0; 130 size_t dst_idx = 0; 131 uint64_t src_offset = 0, dst_offset = 0; 132 size_t removed_bytes = 0, nonfull_block_bytes; 133 bool do_remove = false; 134 while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) { 135 do_remove = ((*src_extents)[src_idx].start_block() + src_offset == 136 (*dst_extents)[dst_idx].start_block() + dst_offset); 137 138 uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks(); 139 uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks(); 140 uint64_t min_num_blocks = std::min(src_num_blocks - src_offset, 141 dst_num_blocks - dst_offset); 142 uint64_t prev_src_offset = src_offset; 143 uint64_t prev_dst_offset = dst_offset; 144 src_offset += min_num_blocks; 145 dst_offset += min_num_blocks; 146 147 bool new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove, 148 prev_src_offset, src_offset); 149 bool new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove, 150 prev_dst_offset, dst_offset); 151 if (new_src) { 152 src_offset = 0; 153 } 154 if (new_dst) { 155 dst_offset = 0; 156 } 157 158 if (do_remove) 159 removed_bytes += min_num_blocks * kBlockSize; 160 } 161 162 // If we removed the last block and this block is only partly used by file 163 // content, deduct the unused portion from the total removed byte count. 164 if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize)) 165 removed_bytes -= kBlockSize - nonfull_block_bytes; 166 167 return removed_bytes; 168} 169 170} // namespace 171 172namespace diff_utils { 173 174// This class encapsulates a file delta processing thread work. The 175// processor computes the delta between the source and target files; 176// and write the compressed delta to the blob. 177class FileDeltaProcessor : public base::DelegateSimpleThread::Delegate { 178 public: 179 FileDeltaProcessor(const string& old_part, 180 const string& new_part, 181 const PayloadVersion& version, 182 const vector<Extent>& old_extents, 183 const vector<Extent>& new_extents, 184 const vector<puffin::BitExtent>& old_deflates, 185 const vector<puffin::BitExtent>& new_deflates, 186 const string& name, 187 ssize_t chunk_blocks, 188 BlobFileWriter* blob_file) 189 : old_part_(old_part), 190 new_part_(new_part), 191 version_(version), 192 old_extents_(old_extents), 193 new_extents_(new_extents), 194 old_deflates_(old_deflates), 195 new_deflates_(new_deflates), 196 name_(name), 197 chunk_blocks_(chunk_blocks), 198 blob_file_(blob_file) {} 199 200 FileDeltaProcessor(FileDeltaProcessor&& processor) = default; 201 202 ~FileDeltaProcessor() override = default; 203 204 // Overrides DelegateSimpleThread::Delegate. 205 // Calculate the list of operations and write their corresponding deltas to 206 // the blob_file. 207 void Run() override; 208 209 // Merge each file processor's ops list to aops. 210 void MergeOperation(vector<AnnotatedOperation>* aops); 211 212 private: 213 const string old_part_; 214 const string new_part_; 215 const PayloadVersion& version_; 216 217 // The block ranges of the old/new file within the src/tgt image 218 const vector<Extent> old_extents_; 219 const vector<Extent> new_extents_; 220 const vector<puffin::BitExtent> old_deflates_; 221 const vector<puffin::BitExtent> new_deflates_; 222 const string name_; 223 // Block limit of one aop. 224 ssize_t chunk_blocks_; 225 BlobFileWriter* blob_file_; 226 227 // The list of ops to reach the new file from the old file. 228 vector<AnnotatedOperation> file_aops_; 229 230 DISALLOW_COPY_AND_ASSIGN(FileDeltaProcessor); 231}; 232 233void FileDeltaProcessor::Run() { 234 TEST_AND_RETURN(blob_file_ != nullptr); 235 236 LOG(INFO) << "Encoding file " << name_ << " (" 237 << BlocksInExtents(new_extents_) << " blocks)"; 238 239 if (!DeltaReadFile(&file_aops_, 240 old_part_, 241 new_part_, 242 old_extents_, 243 new_extents_, 244 old_deflates_, 245 new_deflates_, 246 name_, 247 chunk_blocks_, 248 version_, 249 blob_file_)) { 250 LOG(ERROR) << "Failed to generate delta for " << name_ << " (" 251 << BlocksInExtents(new_extents_) << " blocks)"; 252 } 253} 254 255void FileDeltaProcessor::MergeOperation(vector<AnnotatedOperation>* aops) { 256 aops->reserve(aops->size() + file_aops_.size()); 257 std::move(file_aops_.begin(), file_aops_.end(), std::back_inserter(*aops)); 258} 259 260bool DeltaReadPartition(vector<AnnotatedOperation>* aops, 261 const PartitionConfig& old_part, 262 const PartitionConfig& new_part, 263 ssize_t hard_chunk_blocks, 264 size_t soft_chunk_blocks, 265 const PayloadVersion& version, 266 BlobFileWriter* blob_file) { 267 ExtentRanges old_visited_blocks; 268 ExtentRanges new_visited_blocks; 269 270 TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks( 271 aops, 272 old_part.path, 273 new_part.path, 274 old_part.size / kBlockSize, 275 new_part.size / kBlockSize, 276 soft_chunk_blocks, 277 version, 278 blob_file, 279 &old_visited_blocks, 280 &new_visited_blocks)); 281 282 bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF); 283 map<string, FilesystemInterface::File> old_files_map; 284 if (old_part.fs_interface) { 285 vector<FilesystemInterface::File> old_files; 286 TEST_AND_RETURN_FALSE(deflate_utils::PreprocessParitionFiles( 287 old_part, &old_files, puffdiff_allowed)); 288 for (const FilesystemInterface::File& file : old_files) 289 old_files_map[file.name] = file; 290 } 291 292 TEST_AND_RETURN_FALSE(new_part.fs_interface); 293 vector<FilesystemInterface::File> new_files; 294 TEST_AND_RETURN_FALSE(deflate_utils::PreprocessParitionFiles( 295 new_part, &new_files, puffdiff_allowed)); 296 297 vector<FileDeltaProcessor> file_delta_processors; 298 299 // The processing is very straightforward here, we generate operations for 300 // every file (and pseudo-file such as the metadata) in the new filesystem 301 // based on the file with the same name in the old filesystem, if any. 302 // Files with overlapping data blocks (like hardlinks or filesystems with tail 303 // packing or compression where the blocks store more than one file) are only 304 // generated once in the new image, but are also used only once from the old 305 // image due to some simplifications (see below). 306 for (const FilesystemInterface::File& new_file : new_files) { 307 // Ignore the files in the new filesystem without blocks. Symlinks with 308 // data blocks (for example, symlinks bigger than 60 bytes in ext2) are 309 // handled as normal files. We also ignore blocks that were already 310 // processed by a previous file. 311 vector<Extent> new_file_extents = FilterExtentRanges( 312 new_file.extents, new_visited_blocks); 313 new_visited_blocks.AddExtents(new_file_extents); 314 315 if (new_file_extents.empty()) 316 continue; 317 318 // We can't visit each dst image inode more than once, as that would 319 // duplicate work. Here, we avoid visiting each source image inode 320 // more than once. Technically, we could have multiple operations 321 // that read the same blocks from the source image for diffing, but 322 // we choose not to avoid complexity. Eventually we will move away 323 // from using a graph/cycle detection/etc to generate diffs, and at that 324 // time, it will be easy (non-complex) to have many operations read 325 // from the same source blocks. At that time, this code can die. -adlr 326 auto old_file = old_files_map[new_file.name]; 327 vector<Extent> old_file_extents = 328 FilterExtentRanges(old_file.extents, old_visited_blocks); 329 old_visited_blocks.AddExtents(old_file_extents); 330 331 file_delta_processors.emplace_back(old_part.path, 332 new_part.path, 333 version, 334 std::move(old_file_extents), 335 std::move(new_file_extents), 336 old_file.deflates, 337 new_file.deflates, 338 new_file.name, // operation name 339 hard_chunk_blocks, 340 blob_file); 341 } 342 343 size_t max_threads = GetMaxThreads(); 344 base::DelegateSimpleThreadPool thread_pool("incremental-update-generator", 345 max_threads); 346 thread_pool.Start(); 347 for (auto& processor : file_delta_processors) { 348 thread_pool.AddWork(&processor); 349 } 350 thread_pool.JoinAll(); 351 352 for (auto& processor : file_delta_processors) { 353 processor.MergeOperation(aops); 354 } 355 356 // Process all the blocks not included in any file. We provided all the unused 357 // blocks in the old partition as available data. 358 vector<Extent> new_unvisited = { 359 ExtentForRange(0, new_part.size / kBlockSize)}; 360 new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks); 361 if (new_unvisited.empty()) 362 return true; 363 364 vector<Extent> old_unvisited; 365 if (old_part.fs_interface) { 366 old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize)); 367 old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks); 368 } 369 370 LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited) 371 << " unwritten blocks using chunk size of " 372 << soft_chunk_blocks << " blocks."; 373 // We use the soft_chunk_blocks limit for the <non-file-data> as we don't 374 // really know the structure of this data and we should not expect it to have 375 // redundancy between partitions. 376 TEST_AND_RETURN_FALSE(DeltaReadFile(aops, 377 old_part.path, 378 new_part.path, 379 old_unvisited, 380 new_unvisited, 381 {}, // old_deflates, 382 {}, // new_deflates 383 "<non-file-data>", // operation name 384 soft_chunk_blocks, 385 version, 386 blob_file)); 387 388 return true; 389} 390 391bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops, 392 const string& old_part, 393 const string& new_part, 394 size_t old_num_blocks, 395 size_t new_num_blocks, 396 ssize_t chunk_blocks, 397 const PayloadVersion& version, 398 BlobFileWriter* blob_file, 399 ExtentRanges* old_visited_blocks, 400 ExtentRanges* new_visited_blocks) { 401 vector<BlockMapping::BlockId> old_block_ids; 402 vector<BlockMapping::BlockId> new_block_ids; 403 TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part, 404 new_part, 405 old_num_blocks * kBlockSize, 406 new_num_blocks * kBlockSize, 407 kBlockSize, 408 &old_block_ids, 409 &new_block_ids)); 410 411 // If the update is inplace, we map all the blocks that didn't move, 412 // regardless of the contents since they are already copied and no operation 413 // is required. 414 if (version.InplaceUpdate()) { 415 uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks); 416 for (uint64_t block = 0; block < num_blocks; block++) { 417 if (old_block_ids[block] == new_block_ids[block] && 418 !old_visited_blocks->ContainsBlock(block) && 419 !new_visited_blocks->ContainsBlock(block)) { 420 old_visited_blocks->AddBlock(block); 421 new_visited_blocks->AddBlock(block); 422 } 423 } 424 } 425 426 // A mapping from the block_id to the list of block numbers with that block id 427 // in the old partition. This is used to lookup where in the old partition 428 // is a block from the new partition. 429 map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map; 430 431 for (uint64_t block = old_num_blocks; block-- > 0; ) { 432 if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block)) 433 old_blocks_map[old_block_ids[block]].push_back(block); 434 435 // Mark all zeroed blocks in the old image as "used" since it doesn't make 436 // any sense to spend I/O to read zeros from the source partition and more 437 // importantly, these could sometimes be blocks discarded in the SSD which 438 // would read non-zero values. 439 if (old_block_ids[block] == 0) 440 old_visited_blocks->AddBlock(block); 441 } 442 443 // The collection of blocks in the new partition with just zeros. This is a 444 // common case for free-space that's also problematic for bsdiff, so we want 445 // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of 446 // just zeros is so small that it doesn't make sense to spend the I/O reading 447 // zeros from the old partition. 448 vector<Extent> new_zeros; 449 450 vector<Extent> old_identical_blocks; 451 vector<Extent> new_identical_blocks; 452 453 for (uint64_t block = 0; block < new_num_blocks; block++) { 454 // Only produce operations for blocks that were not yet visited. 455 if (new_visited_blocks->ContainsBlock(block)) 456 continue; 457 if (new_block_ids[block] == 0) { 458 AppendBlockToExtents(&new_zeros, block); 459 continue; 460 } 461 462 auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]); 463 // Check if the block exists in the old partition at all. 464 if (old_blocks_map_it == old_blocks_map.end() || 465 old_blocks_map_it->second.empty()) 466 continue; 467 468 AppendBlockToExtents(&old_identical_blocks, 469 old_blocks_map_it->second.back()); 470 AppendBlockToExtents(&new_identical_blocks, block); 471 // We can't reuse source blocks in minor version 1 because the cycle 472 // breaking algorithm used in the in-place update doesn't support that. 473 if (version.InplaceUpdate()) 474 old_blocks_map_it->second.pop_back(); 475 } 476 477 // Produce operations for the zero blocks split per output extent. 478 // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile(). 479 size_t num_ops = aops->size(); 480 new_visited_blocks->AddExtents(new_zeros); 481 for (const Extent& extent : new_zeros) { 482 TEST_AND_RETURN_FALSE(DeltaReadFile(aops, 483 "", 484 new_part, 485 vector<Extent>(), // old_extents 486 vector<Extent>{extent}, // new_extents 487 {}, // old_deflates 488 {}, // new_deflates 489 "<zeros>", 490 chunk_blocks, 491 version, 492 blob_file)); 493 } 494 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 495 << BlocksInExtents(new_zeros) << " zeroed blocks"; 496 497 // Produce MOVE/SOURCE_COPY operations for the moved blocks. 498 num_ops = aops->size(); 499 if (chunk_blocks == -1) 500 chunk_blocks = new_num_blocks; 501 uint64_t used_blocks = 0; 502 old_visited_blocks->AddExtents(old_identical_blocks); 503 new_visited_blocks->AddExtents(new_identical_blocks); 504 for (const Extent& extent : new_identical_blocks) { 505 // We split the operation at the extent boundary or when bigger than 506 // chunk_blocks. 507 for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks(); 508 op_block_offset += chunk_blocks) { 509 aops->emplace_back(); 510 AnnotatedOperation* aop = &aops->back(); 511 aop->name = "<identical-blocks>"; 512 aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 513 ? InstallOperation::SOURCE_COPY 514 : InstallOperation::MOVE); 515 516 uint64_t chunk_num_blocks = 517 std::min(static_cast<uint64_t>(extent.num_blocks()) - op_block_offset, 518 static_cast<uint64_t>(chunk_blocks)); 519 520 // The current operation represents the move/copy operation for the 521 // sublist starting at |used_blocks| of length |chunk_num_blocks| where 522 // the src and dst are from |old_identical_blocks| and 523 // |new_identical_blocks| respectively. 524 StoreExtents( 525 ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks), 526 aop->op.mutable_src_extents()); 527 528 Extent* op_dst_extent = aop->op.add_dst_extents(); 529 op_dst_extent->set_start_block(extent.start_block() + op_block_offset); 530 op_dst_extent->set_num_blocks(chunk_num_blocks); 531 CHECK( 532 vector<Extent>{*op_dst_extent} == // NOLINT(whitespace/braces) 533 ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks)); 534 535 used_blocks += chunk_num_blocks; 536 } 537 } 538 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 539 << used_blocks << " identical blocks moved"; 540 541 return true; 542} 543 544bool DeltaReadFile(vector<AnnotatedOperation>* aops, 545 const string& old_part, 546 const string& new_part, 547 const vector<Extent>& old_extents, 548 const vector<Extent>& new_extents, 549 const vector<puffin::BitExtent>& old_deflates, 550 const vector<puffin::BitExtent>& new_deflates, 551 const string& name, 552 ssize_t chunk_blocks, 553 const PayloadVersion& version, 554 BlobFileWriter* blob_file) { 555 brillo::Blob data; 556 InstallOperation operation; 557 558 uint64_t total_blocks = BlocksInExtents(new_extents); 559 if (chunk_blocks == -1) 560 chunk_blocks = total_blocks; 561 562 for (uint64_t block_offset = 0; block_offset < total_blocks; 563 block_offset += chunk_blocks) { 564 // Split the old/new file in the same chunks. Note that this could drop 565 // some information from the old file used for the new chunk. If the old 566 // file is smaller (or even empty when there's no old file) the chunk will 567 // also be empty. 568 vector<Extent> old_extents_chunk = ExtentsSublist( 569 old_extents, block_offset, chunk_blocks); 570 vector<Extent> new_extents_chunk = ExtentsSublist( 571 new_extents, block_offset, chunk_blocks); 572 NormalizeExtents(&old_extents_chunk); 573 NormalizeExtents(&new_extents_chunk); 574 575 TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part, 576 new_part, 577 old_extents_chunk, 578 new_extents_chunk, 579 old_deflates, 580 new_deflates, 581 version, 582 &data, 583 &operation)); 584 585 // Check if the operation writes nothing. 586 if (operation.dst_extents_size() == 0) { 587 if (operation.type() == InstallOperation::MOVE) { 588 LOG(INFO) << "Empty MOVE operation (" 589 << name << "), skipping"; 590 continue; 591 } else { 592 LOG(ERROR) << "Empty non-MOVE operation"; 593 return false; 594 } 595 } 596 597 // Now, insert into the list of operations. 598 AnnotatedOperation aop; 599 aop.name = name; 600 if (static_cast<uint64_t>(chunk_blocks) < total_blocks) { 601 aop.name = base::StringPrintf("%s:%" PRIu64, 602 name.c_str(), block_offset / chunk_blocks); 603 } 604 aop.op = operation; 605 606 // Write the data 607 TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file)); 608 aops->emplace_back(aop); 609 } 610 return true; 611} 612 613bool GenerateBestFullOperation(const brillo::Blob& new_data, 614 const PayloadVersion& version, 615 brillo::Blob* out_blob, 616 InstallOperation_Type* out_type) { 617 if (new_data.empty()) 618 return false; 619 620 if (version.OperationAllowed(InstallOperation::ZERO) && 621 std::all_of( 622 new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) { 623 // The read buffer is all zeros, so produce a ZERO operation. No need to 624 // check other types of operations in this case. 625 *out_blob = brillo::Blob(); 626 *out_type = InstallOperation::ZERO; 627 return true; 628 } 629 630 bool out_blob_set = false; 631 632 // Try compressing |new_data| with xz first. 633 if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) { 634 brillo::Blob new_data_xz; 635 if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) { 636 *out_type = InstallOperation::REPLACE_XZ; 637 *out_blob = std::move(new_data_xz); 638 out_blob_set = true; 639 } 640 } 641 642 // Try compressing it with bzip2. 643 if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) { 644 brillo::Blob new_data_bz; 645 // TODO(deymo): Implement some heuristic to determine if it is worth trying 646 // to compress the blob with bzip2 if we already have a good REPLACE_XZ. 647 if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() && 648 (!out_blob_set || out_blob->size() > new_data_bz.size())) { 649 // A REPLACE_BZ is better or nothing else was set. 650 *out_type = InstallOperation::REPLACE_BZ; 651 *out_blob = std::move(new_data_bz); 652 out_blob_set = true; 653 } 654 } 655 656 // If nothing else worked or it was badly compressed we try a REPLACE. 657 if (!out_blob_set || out_blob->size() >= new_data.size()) { 658 *out_type = InstallOperation::REPLACE; 659 // This needs to make a copy of the data in the case bzip or xz didn't 660 // compress well, which is not the common case so the performance hit is 661 // low. 662 *out_blob = new_data; 663 } 664 return true; 665} 666 667bool ReadExtentsToDiff(const string& old_part, 668 const string& new_part, 669 const vector<Extent>& old_extents, 670 const vector<Extent>& new_extents, 671 const vector<puffin::BitExtent>& old_deflates, 672 const vector<puffin::BitExtent>& new_deflates, 673 const PayloadVersion& version, 674 brillo::Blob* out_data, 675 InstallOperation* out_op) { 676 InstallOperation operation; 677 678 // We read blocks from old_extents and write blocks to new_extents. 679 uint64_t blocks_to_read = BlocksInExtents(old_extents); 680 uint64_t blocks_to_write = BlocksInExtents(new_extents); 681 682 // Disable bsdiff, and puffdiff when the data is too big. 683 bool bsdiff_allowed = 684 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) || 685 version.OperationAllowed(InstallOperation::BSDIFF); 686 if (bsdiff_allowed && 687 blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) { 688 LOG(INFO) << "bsdiff blacklisted, data too big: " 689 << blocks_to_read * kBlockSize << " bytes"; 690 bsdiff_allowed = false; 691 } 692 693 bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF); 694 if (puffdiff_allowed && 695 blocks_to_read * kBlockSize > kMaxPuffdiffDestinationSize) { 696 LOG(INFO) << "puffdiff blacklisted, data too big: " 697 << blocks_to_read * kBlockSize << " bytes"; 698 puffdiff_allowed = false; 699 } 700 701 // Make copies of the extents so we can modify them. 702 vector<Extent> src_extents = old_extents; 703 vector<Extent> dst_extents = new_extents; 704 705 // Read in bytes from new data. 706 brillo::Blob new_data; 707 TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part, 708 new_extents, 709 &new_data, 710 kBlockSize * blocks_to_write, 711 kBlockSize)); 712 TEST_AND_RETURN_FALSE(!new_data.empty()); 713 714 // Data blob that will be written to delta file. 715 brillo::Blob data_blob; 716 717 // Try generating a full operation for the given new data, regardless of the 718 // old_data. 719 InstallOperation_Type op_type; 720 TEST_AND_RETURN_FALSE( 721 GenerateBestFullOperation(new_data, version, &data_blob, &op_type)); 722 operation.set_type(op_type); 723 724 brillo::Blob old_data; 725 if (blocks_to_read > 0) { 726 // Read old data. 727 TEST_AND_RETURN_FALSE( 728 utils::ReadExtents(old_part, src_extents, &old_data, 729 kBlockSize * blocks_to_read, kBlockSize)); 730 if (old_data == new_data) { 731 // No change in data. 732 operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 733 ? InstallOperation::SOURCE_COPY 734 : InstallOperation::MOVE); 735 data_blob = brillo::Blob(); 736 } else { 737 if (bsdiff_allowed) { 738 base::FilePath patch; 739 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch)); 740 ScopedPathUnlinker unlinker(patch.value()); 741 742 brillo::Blob bsdiff_delta; 743 TEST_AND_RETURN_FALSE(0 == bsdiff::bsdiff(old_data.data(), 744 old_data.size(), 745 new_data.data(), 746 new_data.size(), 747 patch.value().c_str(), 748 nullptr)); 749 750 TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta)); 751 CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0)); 752 if (bsdiff_delta.size() < data_blob.size()) { 753 operation.set_type( 754 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) 755 ? InstallOperation::SOURCE_BSDIFF 756 : InstallOperation::BSDIFF); 757 data_blob = std::move(bsdiff_delta); 758 } 759 } 760 if (puffdiff_allowed) { 761 // Find all deflate positions inside the given extents and then put all 762 // deflates together because we have already read all the extents into 763 // one buffer. 764 vector<puffin::BitExtent> src_deflates; 765 TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates( 766 src_extents, old_deflates, &src_deflates)); 767 768 vector<puffin::BitExtent> dst_deflates; 769 TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates( 770 dst_extents, new_deflates, &dst_deflates)); 771 772 // Remove equal deflates. TODO(*): We can do a N*N check using 773 // hashing. It will not reduce the payload size, but it will speeds up 774 // the puffing on the client device. 775 auto src = src_deflates.begin(); 776 auto dst = dst_deflates.begin(); 777 for (; src != src_deflates.end() && dst != dst_deflates.end();) { 778 auto src_in_bytes = deflate_utils::ExpandToByteExtent(*src); 779 auto dst_in_bytes = deflate_utils::ExpandToByteExtent(*dst); 780 if (src_in_bytes.length == dst_in_bytes.length && 781 !memcmp(old_data.data() + src_in_bytes.offset, 782 new_data.data() + dst_in_bytes.offset, 783 src_in_bytes.length)) { 784 src = src_deflates.erase(src); 785 dst = dst_deflates.erase(dst); 786 } else { 787 src++; 788 dst++; 789 } 790 } 791 792 // Only Puffdiff if both files have at least one deflate left. 793 if (!src_deflates.empty() && !dst_deflates.empty()) { 794 brillo::Blob puffdiff_delta; 795 string temp_file_path; 796 TEST_AND_RETURN_FALSE(utils::MakeTempFile( 797 "puffdiff-delta.XXXXXX", &temp_file_path, nullptr)); 798 ScopedPathUnlinker temp_file_unlinker(temp_file_path); 799 800 // Perform PuffDiff operation. 801 TEST_AND_RETURN_FALSE(puffin::PuffDiff(old_data, 802 new_data, 803 src_deflates, 804 dst_deflates, 805 temp_file_path, 806 &puffdiff_delta)); 807 TEST_AND_RETURN_FALSE(puffdiff_delta.size() > 0); 808 if (puffdiff_delta.size() < data_blob.size()) { 809 operation.set_type(InstallOperation::PUFFDIFF); 810 data_blob = std::move(puffdiff_delta); 811 } 812 } 813 } 814 } 815 } 816 817 // Remove identical src/dst block ranges in MOVE operations. 818 if (operation.type() == InstallOperation::MOVE) { 819 auto removed_bytes = RemoveIdenticalBlockRanges( 820 &src_extents, &dst_extents, new_data.size()); 821 operation.set_src_length(old_data.size() - removed_bytes); 822 operation.set_dst_length(new_data.size() - removed_bytes); 823 } 824 825 // WARNING: We always set legacy |src_length| and |dst_length| fields for 826 // BSDIFF. For SOURCE_BSDIFF we only set them for minor version 3 and 827 // lower. This is needed because we used to use these two parameters in the 828 // SOURCE_BSDIFF for minor version 3 and lower, but we do not need them 829 // anymore in higher minor versions. This means if we stop adding these 830 // parameters for those minor versions, the delta payloads will be invalid. 831 if (operation.type() == InstallOperation::BSDIFF || 832 (operation.type() == InstallOperation::SOURCE_BSDIFF && 833 version.minor <= kOpSrcHashMinorPayloadVersion)) { 834 operation.set_src_length(old_data.size()); 835 operation.set_dst_length(new_data.size()); 836 } 837 838 // Embed extents in the operation. Replace (all variants), zero and discard 839 // operations should not have source extents. 840 if (!IsNoSourceOperation(operation.type())) { 841 StoreExtents(src_extents, operation.mutable_src_extents()); 842 } 843 // All operations have dst_extents. 844 StoreExtents(dst_extents, operation.mutable_dst_extents()); 845 846 *out_data = std::move(data_blob); 847 *out_op = operation; 848 return true; 849} 850 851bool IsAReplaceOperation(InstallOperation_Type op_type) { 852 return (op_type == InstallOperation::REPLACE || 853 op_type == InstallOperation::REPLACE_BZ || 854 op_type == InstallOperation::REPLACE_XZ); 855} 856 857bool IsNoSourceOperation(InstallOperation_Type op_type) { 858 return (IsAReplaceOperation(op_type) || 859 op_type == InstallOperation::ZERO || 860 op_type == InstallOperation::DISCARD); 861} 862 863// Returns true if |op| is a no-op operation that doesn't do any useful work 864// (e.g., a move operation that copies blocks onto themselves). 865bool IsNoopOperation(const InstallOperation& op) { 866 return (op.type() == InstallOperation::MOVE && 867 ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents())); 868} 869 870void FilterNoopOperations(vector<AnnotatedOperation>* ops) { 871 ops->erase( 872 std::remove_if( 873 ops->begin(), ops->end(), 874 [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}), 875 ops->end()); 876} 877 878bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) { 879 info->set_size(part.size); 880 HashCalculator hasher; 881 TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) == 882 static_cast<off_t>(part.size)); 883 TEST_AND_RETURN_FALSE(hasher.Finalize()); 884 const brillo::Blob& hash = hasher.raw_hash(); 885 info->set_hash(hash.data(), hash.size()); 886 LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash(); 887 return true; 888} 889 890bool CompareAopsByDestination(AnnotatedOperation first_aop, 891 AnnotatedOperation second_aop) { 892 // We want empty operations to be at the end of the payload. 893 if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size()) 894 return ((!first_aop.op.dst_extents().size()) < 895 (!second_aop.op.dst_extents().size())); 896 uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block(); 897 uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block(); 898 return first_dst_start < second_dst_start; 899} 900 901bool IsExtFilesystem(const string& device) { 902 brillo::Blob header; 903 // See include/linux/ext2_fs.h for more details on the structure. We obtain 904 // ext2 constants from ext2fs/ext2fs.h header but we don't link with the 905 // library. 906 if (!utils::ReadFileChunk( 907 device, 0, SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE, &header) || 908 header.size() < SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE) 909 return false; 910 911 const uint8_t* superblock = header.data() + SUPERBLOCK_OFFSET; 912 913 // ext3_fs.h: ext3_super_block.s_blocks_count 914 uint32_t block_count = 915 *reinterpret_cast<const uint32_t*>(superblock + 1 * sizeof(int32_t)); 916 917 // ext3_fs.h: ext3_super_block.s_log_block_size 918 uint32_t log_block_size = 919 *reinterpret_cast<const uint32_t*>(superblock + 6 * sizeof(int32_t)); 920 921 // ext3_fs.h: ext3_super_block.s_magic 922 uint16_t magic = 923 *reinterpret_cast<const uint16_t*>(superblock + 14 * sizeof(int32_t)); 924 925 block_count = le32toh(block_count); 926 log_block_size = le32toh(log_block_size) + EXT2_MIN_BLOCK_LOG_SIZE; 927 magic = le16toh(magic); 928 929 if (magic != EXT2_SUPER_MAGIC) 930 return false; 931 932 // Sanity check the parameters. 933 TEST_AND_RETURN_FALSE(log_block_size >= EXT2_MIN_BLOCK_LOG_SIZE && 934 log_block_size <= EXT2_MAX_BLOCK_LOG_SIZE); 935 TEST_AND_RETURN_FALSE(block_count > 0); 936 return true; 937} 938 939// Return the number of CPUs on the machine, and 4 threads in minimum. 940size_t GetMaxThreads() { 941 return std::max(sysconf(_SC_NPROCESSORS_ONLN), 4L); 942} 943 944} // namespace diff_utils 945 946} // namespace chromeos_update_engine 947