delta_diff_utils.cc revision c4ad1ebc33abc088aca2909ba5cbaf7ae5e5659f
1//
2// Copyright (C) 2015 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16
17#include "update_engine/payload_generator/delta_diff_utils.h"
18
19#include <endian.h>
20// TODO: Remove these pragmas when b/35721782 is fixed.
21#pragma clang diagnostic push
22#pragma clang diagnostic ignored "-Wmacro-redefined"
23#include <ext2fs/ext2fs.h>
24#pragma clang diagnostic pop
25#include <unistd.h>
26
27#include <algorithm>
28#include <map>
29
30#include <base/files/file_util.h>
31#include <base/format_macros.h>
32#include <base/strings/stringprintf.h>
33#include <base/threading/simple_thread.h>
34
35#include "update_engine/common/hash_calculator.h"
36#include "update_engine/common/subprocess.h"
37#include "update_engine/common/utils.h"
38#include "update_engine/payload_generator/block_mapping.h"
39#include "update_engine/payload_generator/bzip.h"
40#include "update_engine/payload_generator/delta_diff_generator.h"
41#include "update_engine/payload_generator/extent_ranges.h"
42#include "update_engine/payload_generator/extent_utils.h"
43#include "update_engine/payload_generator/xz.h"
44
45using std::map;
46using std::string;
47using std::vector;
48
49namespace chromeos_update_engine {
50namespace {
51
52const char* const kBsdiffPath = "bsdiff";
53const char* const kImgdiffPath = "imgdiff";
54
55// The maximum destination size allowed for bsdiff. In general, bsdiff should
56// work for arbitrary big files, but the payload generation and payload
57// application requires a significant amount of RAM. We put a hard-limit of
58// 200 MiB that should not affect any released board, but will limit the
59// Chrome binary in ASan builders.
60const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024;  // bytes
61
62// The maximum destination size allowed for imgdiff. In general, imgdiff should
63// work for arbitrary big files, but the payload application is quite memory
64// intensive, so we limit these operations to 50 MiB.
65const uint64_t kMaxImgdiffDestinationSize = 50 * 1024 * 1024;  // bytes
66
67// Process a range of blocks from |range_start| to |range_end| in the extent at
68// position |*idx_p| of |extents|. If |do_remove| is true, this range will be
69// removed, which may cause the extent to be trimmed, split or removed entirely.
70// The value of |*idx_p| is updated to point to the next extent to be processed.
71// Returns true iff the next extent to process is a new or updated one.
72bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
73                             const bool do_remove, uint64_t range_start,
74                             uint64_t range_end) {
75  size_t idx = *idx_p;
76  uint64_t start_block = (*extents)[idx].start_block();
77  uint64_t num_blocks = (*extents)[idx].num_blocks();
78  uint64_t range_size = range_end - range_start;
79
80  if (do_remove) {
81    if (range_size == num_blocks) {
82      // Remove the entire extent.
83      extents->erase(extents->begin() + idx);
84    } else if (range_end == num_blocks) {
85      // Trim the end of the extent.
86      (*extents)[idx].set_num_blocks(num_blocks - range_size);
87      idx++;
88    } else if (range_start == 0) {
89      // Trim the head of the extent.
90      (*extents)[idx].set_start_block(start_block + range_size);
91      (*extents)[idx].set_num_blocks(num_blocks - range_size);
92    } else {
93      // Trim the middle, splitting the remainder into two parts.
94      (*extents)[idx].set_num_blocks(range_start);
95      Extent e;
96      e.set_start_block(start_block + range_end);
97      e.set_num_blocks(num_blocks - range_end);
98      idx++;
99      extents->insert(extents->begin() + idx, e);
100    }
101  } else if (range_end == num_blocks) {
102    // Done with this extent.
103    idx++;
104  } else {
105    return false;
106  }
107
108  *idx_p = idx;
109  return true;
110}
111
112// Remove identical corresponding block ranges in |src_extents| and
113// |dst_extents|. Used for preventing moving of blocks onto themselves during
114// MOVE operations. The value of |total_bytes| indicates the actual length of
115// content; this may be slightly less than the total size of blocks, in which
116// case the last block is only partly occupied with data. Returns the total
117// number of bytes removed.
118size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
119                                  vector<Extent>* dst_extents,
120                                  const size_t total_bytes) {
121  size_t src_idx = 0;
122  size_t dst_idx = 0;
123  uint64_t src_offset = 0, dst_offset = 0;
124  size_t removed_bytes = 0, nonfull_block_bytes;
125  bool do_remove = false;
126  while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
127    do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
128                 (*dst_extents)[dst_idx].start_block() + dst_offset);
129
130    uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
131    uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
132    uint64_t min_num_blocks = std::min(src_num_blocks - src_offset,
133                                       dst_num_blocks - dst_offset);
134    uint64_t prev_src_offset = src_offset;
135    uint64_t prev_dst_offset = dst_offset;
136    src_offset += min_num_blocks;
137    dst_offset += min_num_blocks;
138
139    bool new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
140                                           prev_src_offset, src_offset);
141    bool new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
142                                           prev_dst_offset, dst_offset);
143    if (new_src) {
144      src_offset = 0;
145    }
146    if (new_dst) {
147      dst_offset = 0;
148    }
149
150    if (do_remove)
151      removed_bytes += min_num_blocks * kBlockSize;
152  }
153
154  // If we removed the last block and this block is only partly used by file
155  // content, deduct the unused portion from the total removed byte count.
156  if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
157    removed_bytes -= kBlockSize - nonfull_block_bytes;
158
159  return removed_bytes;
160}
161
162// Returns true if the given blob |data| contains gzip header magic.
163bool ContainsGZip(const brillo::Blob& data) {
164  const uint8_t kGZipMagic[] = {0x1f, 0x8b, 0x08, 0x00};
165  return std::search(data.begin(),
166                     data.end(),
167                     std::begin(kGZipMagic),
168                     std::end(kGZipMagic)) != data.end();
169}
170
171}  // namespace
172
173namespace diff_utils {
174
175// This class encapsulates a file delta processing thread work. The
176// processor computes the delta between the source and target files;
177// and write the compressed delta to the blob.
178class FileDeltaProcessor : public base::DelegateSimpleThread::Delegate {
179 public:
180  FileDeltaProcessor(const string& old_part,
181                     const string& new_part,
182                     const PayloadVersion& version,
183                     const vector<Extent>& old_extents,
184                     const vector<Extent>& new_extents,
185                     const string& name,
186                     ssize_t chunk_blocks,
187                     BlobFileWriter* blob_file)
188      : old_part_(old_part),
189        new_part_(new_part),
190        version_(version),
191        old_extents_(old_extents),
192        new_extents_(new_extents),
193        name_(name),
194        chunk_blocks_(chunk_blocks),
195        blob_file_(blob_file) {}
196
197  FileDeltaProcessor(FileDeltaProcessor&& processor) = default;
198
199  ~FileDeltaProcessor() override = default;
200
201  // Overrides DelegateSimpleThread::Delegate.
202  // Calculate the list of operations and write their corresponding deltas to
203  // the blob_file.
204  void Run() override;
205
206  // Merge each file processor's ops list to aops.
207  void MergeOperation(vector<AnnotatedOperation>* aops);
208
209 private:
210  const string& old_part_;
211  const string& new_part_;
212  const PayloadVersion& version_;
213
214  // The block ranges of the old/new file within the src/tgt image
215  const vector<Extent> old_extents_;
216  const vector<Extent> new_extents_;
217  const string name_;
218  // Block limit of one aop.
219  ssize_t chunk_blocks_;
220  BlobFileWriter* blob_file_;
221
222  // The list of ops to reach the new file from the old file.
223  vector<AnnotatedOperation> file_aops_;
224
225  DISALLOW_COPY_AND_ASSIGN(FileDeltaProcessor);
226};
227
228void FileDeltaProcessor::Run() {
229  TEST_AND_RETURN(blob_file_ != nullptr);
230
231  if (!DeltaReadFile(&file_aops_,
232                     old_part_,
233                     new_part_,
234                     old_extents_,
235                     new_extents_,
236                     name_,
237                     chunk_blocks_,
238                     version_,
239                     blob_file_)) {
240    LOG(ERROR) << "Failed to generate delta for " << name_ << " ("
241               << BlocksInExtents(new_extents_) << " blocks)";
242  }
243}
244
245void FileDeltaProcessor::MergeOperation(vector<AnnotatedOperation>* aops) {
246  aops->reserve(aops->size() + file_aops_.size());
247  std::move(file_aops_.begin(), file_aops_.end(), std::back_inserter(*aops));
248}
249
250bool DeltaReadPartition(vector<AnnotatedOperation>* aops,
251                        const PartitionConfig& old_part,
252                        const PartitionConfig& new_part,
253                        ssize_t hard_chunk_blocks,
254                        size_t soft_chunk_blocks,
255                        const PayloadVersion& version,
256                        BlobFileWriter* blob_file) {
257  ExtentRanges old_visited_blocks;
258  ExtentRanges new_visited_blocks;
259
260  TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks(
261      aops,
262      old_part.path,
263      new_part.path,
264      old_part.size / kBlockSize,
265      new_part.size / kBlockSize,
266      soft_chunk_blocks,
267      version,
268      blob_file,
269      &old_visited_blocks,
270      &new_visited_blocks));
271
272  map<string, vector<Extent>> old_files_map;
273  if (old_part.fs_interface) {
274    vector<FilesystemInterface::File> old_files;
275    old_part.fs_interface->GetFiles(&old_files);
276    for (const FilesystemInterface::File& file : old_files)
277      old_files_map[file.name] = file.extents;
278  }
279
280  TEST_AND_RETURN_FALSE(new_part.fs_interface);
281  vector<FilesystemInterface::File> new_files;
282  new_part.fs_interface->GetFiles(&new_files);
283
284  vector<FileDeltaProcessor> file_delta_processors;
285
286  // The processing is very straightforward here, we generate operations for
287  // every file (and pseudo-file such as the metadata) in the new filesystem
288  // based on the file with the same name in the old filesystem, if any.
289  // Files with overlapping data blocks (like hardlinks or filesystems with tail
290  // packing or compression where the blocks store more than one file) are only
291  // generated once in the new image, but are also used only once from the old
292  // image due to some simplifications (see below).
293  for (const FilesystemInterface::File& new_file : new_files) {
294    // Ignore the files in the new filesystem without blocks. Symlinks with
295    // data blocks (for example, symlinks bigger than 60 bytes in ext2) are
296    // handled as normal files. We also ignore blocks that were already
297    // processed by a previous file.
298    vector<Extent> new_file_extents = FilterExtentRanges(
299        new_file.extents, new_visited_blocks);
300    new_visited_blocks.AddExtents(new_file_extents);
301
302    if (new_file_extents.empty())
303      continue;
304
305    LOG(INFO) << "Encoding file " << new_file.name << " ("
306              << BlocksInExtents(new_file_extents) << " blocks)";
307
308    // We can't visit each dst image inode more than once, as that would
309    // duplicate work. Here, we avoid visiting each source image inode
310    // more than once. Technically, we could have multiple operations
311    // that read the same blocks from the source image for diffing, but
312    // we choose not to avoid complexity. Eventually we will move away
313    // from using a graph/cycle detection/etc to generate diffs, and at that
314    // time, it will be easy (non-complex) to have many operations read
315    // from the same source blocks. At that time, this code can die. -adlr
316    vector<Extent> old_file_extents = FilterExtentRanges(
317        old_files_map[new_file.name], old_visited_blocks);
318    old_visited_blocks.AddExtents(old_file_extents);
319
320    file_delta_processors.emplace_back(old_part.path,
321                                       new_part.path,
322                                       version,
323                                       std::move(old_file_extents),
324                                       std::move(new_file_extents),
325                                       new_file.name,  // operation name
326                                       hard_chunk_blocks,
327                                       blob_file);
328  }
329
330  size_t max_threads = GetMaxThreads();
331  base::DelegateSimpleThreadPool thread_pool("incremental-update-generator",
332                                             max_threads);
333  thread_pool.Start();
334  for (auto& processor : file_delta_processors) {
335    thread_pool.AddWork(&processor);
336  }
337  thread_pool.JoinAll();
338
339  for (auto& processor : file_delta_processors) {
340    processor.MergeOperation(aops);
341  }
342
343  // Process all the blocks not included in any file. We provided all the unused
344  // blocks in the old partition as available data.
345  vector<Extent> new_unvisited = {
346      ExtentForRange(0, new_part.size / kBlockSize)};
347  new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks);
348  if (new_unvisited.empty())
349    return true;
350
351  vector<Extent> old_unvisited;
352  if (old_part.fs_interface) {
353    old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize));
354    old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks);
355  }
356
357  LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited)
358            << " unwritten blocks using chunk size of "
359            << soft_chunk_blocks << " blocks.";
360  // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
361  // really know the structure of this data and we should not expect it to have
362  // redundancy between partitions.
363  TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
364                                      old_part.path,
365                                      new_part.path,
366                                      old_unvisited,
367                                      new_unvisited,
368                                      "<non-file-data>",  // operation name
369                                      soft_chunk_blocks,
370                                      version,
371                                      blob_file));
372
373  return true;
374}
375
376bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops,
377                             const string& old_part,
378                             const string& new_part,
379                             size_t old_num_blocks,
380                             size_t new_num_blocks,
381                             ssize_t chunk_blocks,
382                             const PayloadVersion& version,
383                             BlobFileWriter* blob_file,
384                             ExtentRanges* old_visited_blocks,
385                             ExtentRanges* new_visited_blocks) {
386  vector<BlockMapping::BlockId> old_block_ids;
387  vector<BlockMapping::BlockId> new_block_ids;
388  TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part,
389                                           new_part,
390                                           old_num_blocks * kBlockSize,
391                                           new_num_blocks * kBlockSize,
392                                           kBlockSize,
393                                           &old_block_ids,
394                                           &new_block_ids));
395
396  // If the update is inplace, we map all the blocks that didn't move,
397  // regardless of the contents since they are already copied and no operation
398  // is required.
399  if (version.InplaceUpdate()) {
400    uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks);
401    for (uint64_t block = 0; block < num_blocks; block++) {
402      if (old_block_ids[block] == new_block_ids[block] &&
403          !old_visited_blocks->ContainsBlock(block) &&
404          !new_visited_blocks->ContainsBlock(block)) {
405        old_visited_blocks->AddBlock(block);
406        new_visited_blocks->AddBlock(block);
407      }
408    }
409  }
410
411  // A mapping from the block_id to the list of block numbers with that block id
412  // in the old partition. This is used to lookup where in the old partition
413  // is a block from the new partition.
414  map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map;
415
416  for (uint64_t block = old_num_blocks; block-- > 0; ) {
417    if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block))
418      old_blocks_map[old_block_ids[block]].push_back(block);
419
420    // Mark all zeroed blocks in the old image as "used" since it doesn't make
421    // any sense to spend I/O to read zeros from the source partition and more
422    // importantly, these could sometimes be blocks discarded in the SSD which
423    // would read non-zero values.
424    if (old_block_ids[block] == 0)
425      old_visited_blocks->AddBlock(block);
426  }
427
428  // The collection of blocks in the new partition with just zeros. This is a
429  // common case for free-space that's also problematic for bsdiff, so we want
430  // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of
431  // just zeros is so small that it doesn't make sense to spend the I/O reading
432  // zeros from the old partition.
433  vector<Extent> new_zeros;
434
435  vector<Extent> old_identical_blocks;
436  vector<Extent> new_identical_blocks;
437
438  for (uint64_t block = 0; block < new_num_blocks; block++) {
439    // Only produce operations for blocks that were not yet visited.
440    if (new_visited_blocks->ContainsBlock(block))
441      continue;
442    if (new_block_ids[block] == 0) {
443      AppendBlockToExtents(&new_zeros, block);
444      continue;
445    }
446
447    auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]);
448    // Check if the block exists in the old partition at all.
449    if (old_blocks_map_it == old_blocks_map.end() ||
450        old_blocks_map_it->second.empty())
451      continue;
452
453    AppendBlockToExtents(&old_identical_blocks,
454                         old_blocks_map_it->second.back());
455    AppendBlockToExtents(&new_identical_blocks, block);
456    // We can't reuse source blocks in minor version 1 because the cycle
457    // breaking algorithm used in the in-place update doesn't support that.
458    if (version.InplaceUpdate())
459      old_blocks_map_it->second.pop_back();
460  }
461
462  // Produce operations for the zero blocks split per output extent.
463  // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile().
464  size_t num_ops = aops->size();
465  new_visited_blocks->AddExtents(new_zeros);
466  for (const Extent& extent : new_zeros) {
467    TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
468                                        "",
469                                        new_part,
470                                        vector<Extent>(),        // old_extents
471                                        vector<Extent>{extent},  // new_extents
472                                        "<zeros>",
473                                        chunk_blocks,
474                                        version,
475                                        blob_file));
476  }
477  LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
478            << BlocksInExtents(new_zeros) << " zeroed blocks";
479
480  // Produce MOVE/SOURCE_COPY operations for the moved blocks.
481  num_ops = aops->size();
482  if (chunk_blocks == -1)
483    chunk_blocks = new_num_blocks;
484  uint64_t used_blocks = 0;
485  old_visited_blocks->AddExtents(old_identical_blocks);
486  new_visited_blocks->AddExtents(new_identical_blocks);
487  for (const Extent& extent : new_identical_blocks) {
488    // We split the operation at the extent boundary or when bigger than
489    // chunk_blocks.
490    for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks();
491         op_block_offset += chunk_blocks) {
492      aops->emplace_back();
493      AnnotatedOperation* aop = &aops->back();
494      aop->name = "<identical-blocks>";
495      aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
496                           ? InstallOperation::SOURCE_COPY
497                           : InstallOperation::MOVE);
498
499      uint64_t chunk_num_blocks =
500          std::min(static_cast<uint64_t>(extent.num_blocks()) - op_block_offset,
501                   static_cast<uint64_t>(chunk_blocks));
502
503      // The current operation represents the move/copy operation for the
504      // sublist starting at |used_blocks| of length |chunk_num_blocks| where
505      // the src and dst are from |old_identical_blocks| and
506      // |new_identical_blocks| respectively.
507      StoreExtents(
508          ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks),
509          aop->op.mutable_src_extents());
510
511      Extent* op_dst_extent = aop->op.add_dst_extents();
512      op_dst_extent->set_start_block(extent.start_block() + op_block_offset);
513      op_dst_extent->set_num_blocks(chunk_num_blocks);
514      CHECK(
515          vector<Extent>{*op_dst_extent} ==  // NOLINT(whitespace/braces)
516          ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks));
517
518      used_blocks += chunk_num_blocks;
519    }
520  }
521  LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
522            << used_blocks << " identical blocks moved";
523
524  return true;
525}
526
527bool DeltaReadFile(vector<AnnotatedOperation>* aops,
528                   const string& old_part,
529                   const string& new_part,
530                   const vector<Extent>& old_extents,
531                   const vector<Extent>& new_extents,
532                   const string& name,
533                   ssize_t chunk_blocks,
534                   const PayloadVersion& version,
535                   BlobFileWriter* blob_file) {
536  brillo::Blob data;
537  InstallOperation operation;
538
539  uint64_t total_blocks = BlocksInExtents(new_extents);
540  if (chunk_blocks == -1)
541    chunk_blocks = total_blocks;
542
543  for (uint64_t block_offset = 0; block_offset < total_blocks;
544      block_offset += chunk_blocks) {
545    // Split the old/new file in the same chunks. Note that this could drop
546    // some information from the old file used for the new chunk. If the old
547    // file is smaller (or even empty when there's no old file) the chunk will
548    // also be empty.
549    vector<Extent> old_extents_chunk = ExtentsSublist(
550        old_extents, block_offset, chunk_blocks);
551    vector<Extent> new_extents_chunk = ExtentsSublist(
552        new_extents, block_offset, chunk_blocks);
553    NormalizeExtents(&old_extents_chunk);
554    NormalizeExtents(&new_extents_chunk);
555
556    TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
557                                            new_part,
558                                            old_extents_chunk,
559                                            new_extents_chunk,
560                                            version,
561                                            &data,
562                                            &operation));
563
564    // Check if the operation writes nothing.
565    if (operation.dst_extents_size() == 0) {
566      if (operation.type() == InstallOperation::MOVE) {
567        LOG(INFO) << "Empty MOVE operation ("
568                  << name << "), skipping";
569        continue;
570      } else {
571        LOG(ERROR) << "Empty non-MOVE operation";
572        return false;
573      }
574    }
575
576    // Now, insert into the list of operations.
577    AnnotatedOperation aop;
578    aop.name = name;
579    if (static_cast<uint64_t>(chunk_blocks) < total_blocks) {
580      aop.name = base::StringPrintf("%s:%" PRIu64,
581                                    name.c_str(), block_offset / chunk_blocks);
582    }
583    aop.op = operation;
584
585    // Write the data
586    TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file));
587    aops->emplace_back(aop);
588  }
589  return true;
590}
591
592bool GenerateBestFullOperation(const brillo::Blob& new_data,
593                               const PayloadVersion& version,
594                               brillo::Blob* out_blob,
595                               InstallOperation_Type* out_type) {
596  if (new_data.empty())
597    return false;
598
599  if (version.OperationAllowed(InstallOperation::ZERO) &&
600      std::all_of(
601          new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) {
602    // The read buffer is all zeros, so produce a ZERO operation. No need to
603    // check other types of operations in this case.
604    *out_blob = brillo::Blob();
605    *out_type = InstallOperation::ZERO;
606    return true;
607  }
608
609  bool out_blob_set = false;
610
611  // Try compressing |new_data| with xz first.
612  if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) {
613    brillo::Blob new_data_xz;
614    if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) {
615      *out_type = InstallOperation::REPLACE_XZ;
616      *out_blob = std::move(new_data_xz);
617      out_blob_set = true;
618    }
619  }
620
621  // Try compressing it with bzip2.
622  if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) {
623    brillo::Blob new_data_bz;
624    // TODO(deymo): Implement some heuristic to determine if it is worth trying
625    // to compress the blob with bzip2 if we already have a good REPLACE_XZ.
626    if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() &&
627        (!out_blob_set || out_blob->size() > new_data_bz.size())) {
628      // A REPLACE_BZ is better or nothing else was set.
629      *out_type = InstallOperation::REPLACE_BZ;
630      *out_blob = std::move(new_data_bz);
631      out_blob_set = true;
632    }
633  }
634
635  // If nothing else worked or it was badly compressed we try a REPLACE.
636  if (!out_blob_set || out_blob->size() >= new_data.size()) {
637    *out_type = InstallOperation::REPLACE;
638    // This needs to make a copy of the data in the case bzip or xz didn't
639    // compress well, which is not the common case so the performance hit is
640    // low.
641    *out_blob = new_data;
642  }
643  return true;
644}
645
646bool ReadExtentsToDiff(const string& old_part,
647                       const string& new_part,
648                       const vector<Extent>& old_extents,
649                       const vector<Extent>& new_extents,
650                       const PayloadVersion& version,
651                       brillo::Blob* out_data,
652                       InstallOperation* out_op) {
653  InstallOperation operation;
654
655  // We read blocks from old_extents and write blocks to new_extents.
656  uint64_t blocks_to_read = BlocksInExtents(old_extents);
657  uint64_t blocks_to_write = BlocksInExtents(new_extents);
658
659  // Disable bsdiff and imgdiff when the data is too big.
660  bool bsdiff_allowed =
661      version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) ||
662      version.OperationAllowed(InstallOperation::BSDIFF);
663  if (bsdiff_allowed &&
664      blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) {
665    LOG(INFO) << "bsdiff blacklisted, data too big: "
666              << blocks_to_read * kBlockSize << " bytes";
667    bsdiff_allowed = false;
668  }
669
670  bool imgdiff_allowed = version.OperationAllowed(InstallOperation::IMGDIFF);
671  if (imgdiff_allowed &&
672      blocks_to_read * kBlockSize > kMaxImgdiffDestinationSize) {
673    LOG(INFO) << "imgdiff blacklisted, data too big: "
674              << blocks_to_read * kBlockSize << " bytes";
675    imgdiff_allowed = false;
676  }
677
678  // Make copies of the extents so we can modify them.
679  vector<Extent> src_extents = old_extents;
680  vector<Extent> dst_extents = new_extents;
681
682  // Read in bytes from new data.
683  brillo::Blob new_data;
684  TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
685                                           new_extents,
686                                           &new_data,
687                                           kBlockSize * blocks_to_write,
688                                           kBlockSize));
689  TEST_AND_RETURN_FALSE(!new_data.empty());
690
691  // Data blob that will be written to delta file.
692  brillo::Blob data_blob;
693
694  // Try generating a full operation for the given new data, regardless of the
695  // old_data.
696  InstallOperation_Type op_type;
697  TEST_AND_RETURN_FALSE(
698      GenerateBestFullOperation(new_data, version, &data_blob, &op_type));
699  operation.set_type(op_type);
700
701  brillo::Blob old_data;
702  if (blocks_to_read > 0) {
703    // Read old data.
704    TEST_AND_RETURN_FALSE(
705        utils::ReadExtents(old_part, src_extents, &old_data,
706                           kBlockSize * blocks_to_read, kBlockSize));
707    if (old_data == new_data) {
708      // No change in data.
709      operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
710                             ? InstallOperation::SOURCE_COPY
711                             : InstallOperation::MOVE);
712      data_blob = brillo::Blob();
713    } else if (bsdiff_allowed || imgdiff_allowed) {
714      // If the source file is considered bsdiff safe (no bsdiff bugs
715      // triggered), see if BSDIFF encoding is smaller.
716      base::FilePath old_chunk;
717      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
718      ScopedPathUnlinker old_unlinker(old_chunk.value());
719      TEST_AND_RETURN_FALSE(utils::WriteFile(
720          old_chunk.value().c_str(), old_data.data(), old_data.size()));
721      base::FilePath new_chunk;
722      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
723      ScopedPathUnlinker new_unlinker(new_chunk.value());
724      TEST_AND_RETURN_FALSE(utils::WriteFile(
725          new_chunk.value().c_str(), new_data.data(), new_data.size()));
726
727      if (bsdiff_allowed) {
728        brillo::Blob bsdiff_delta;
729        TEST_AND_RETURN_FALSE(DiffFiles(
730            kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta));
731        CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
732        if (bsdiff_delta.size() < data_blob.size()) {
733          operation.set_type(
734              version.OperationAllowed(InstallOperation::SOURCE_BSDIFF)
735                  ? InstallOperation::SOURCE_BSDIFF
736                  : InstallOperation::BSDIFF);
737          data_blob = std::move(bsdiff_delta);
738        }
739      }
740      if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) {
741        brillo::Blob imgdiff_delta;
742        // Imgdiff might fail in some cases, only use the result if it succeed,
743        // otherwise print the extents to analyze.
744        if (DiffFiles(kImgdiffPath,
745                      old_chunk.value(),
746                      new_chunk.value(),
747                      &imgdiff_delta) &&
748            imgdiff_delta.size() > 0) {
749          if (imgdiff_delta.size() < data_blob.size()) {
750            operation.set_type(InstallOperation::IMGDIFF);
751            data_blob = std::move(imgdiff_delta);
752          }
753        } else {
754          LOG(ERROR) << "Imgdiff failed with source extents: "
755                     << ExtentsToString(src_extents)
756                     << ", destination extents: "
757                     << ExtentsToString(dst_extents);
758        }
759      }
760    }
761  }
762
763  size_t removed_bytes = 0;
764  // Remove identical src/dst block ranges in MOVE operations.
765  if (operation.type() == InstallOperation::MOVE) {
766    removed_bytes = RemoveIdenticalBlockRanges(
767        &src_extents, &dst_extents, new_data.size());
768  }
769  // Set legacy src_length and dst_length fields.
770  operation.set_src_length(old_data.size() - removed_bytes);
771  operation.set_dst_length(new_data.size() - removed_bytes);
772
773  // Embed extents in the operation.
774  StoreExtents(src_extents, operation.mutable_src_extents());
775  StoreExtents(dst_extents, operation.mutable_dst_extents());
776
777  // Replace operations should not have source extents.
778  if (IsAReplaceOperation(operation.type())) {
779    operation.clear_src_extents();
780    operation.clear_src_length();
781  }
782
783  *out_data = std::move(data_blob);
784  *out_op = operation;
785
786  return true;
787}
788
789// Runs the bsdiff or imgdiff tool in |diff_path| on two files and returns the
790// resulting delta in |out|. Returns true on success.
791bool DiffFiles(const string& diff_path,
792               const string& old_file,
793               const string& new_file,
794               brillo::Blob* out) {
795  const string kPatchFile = "delta.patchXXXXXX";
796  string patch_file_path;
797
798  TEST_AND_RETURN_FALSE(
799      utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
800
801  vector<string> cmd;
802  cmd.push_back(diff_path);
803  cmd.push_back(old_file);
804  cmd.push_back(new_file);
805  cmd.push_back(patch_file_path);
806
807  int rc = 1;
808  string stdout;
809  TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, &stdout));
810  if (rc != 0) {
811    LOG(ERROR) << diff_path << " returned " << rc << std::endl << stdout;
812    return false;
813  }
814  TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
815  unlink(patch_file_path.c_str());
816  return true;
817}
818
819bool IsAReplaceOperation(InstallOperation_Type op_type) {
820  return (op_type == InstallOperation::REPLACE ||
821          op_type == InstallOperation::REPLACE_BZ ||
822          op_type == InstallOperation::REPLACE_XZ);
823}
824
825// Returns true if |op| is a no-op operation that doesn't do any useful work
826// (e.g., a move operation that copies blocks onto themselves).
827bool IsNoopOperation(const InstallOperation& op) {
828  return (op.type() == InstallOperation::MOVE &&
829          ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
830}
831
832void FilterNoopOperations(vector<AnnotatedOperation>* ops) {
833  ops->erase(
834      std::remove_if(
835          ops->begin(), ops->end(),
836          [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
837      ops->end());
838}
839
840bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) {
841  info->set_size(part.size);
842  HashCalculator hasher;
843  TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) ==
844                        static_cast<off_t>(part.size));
845  TEST_AND_RETURN_FALSE(hasher.Finalize());
846  const brillo::Blob& hash = hasher.raw_hash();
847  info->set_hash(hash.data(), hash.size());
848  LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash();
849  return true;
850}
851
852bool CompareAopsByDestination(AnnotatedOperation first_aop,
853                              AnnotatedOperation second_aop) {
854  // We want empty operations to be at the end of the payload.
855  if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
856    return ((!first_aop.op.dst_extents().size()) <
857            (!second_aop.op.dst_extents().size()));
858  uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
859  uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
860  return first_dst_start < second_dst_start;
861}
862
863bool IsExtFilesystem(const string& device) {
864  brillo::Blob header;
865  // See include/linux/ext2_fs.h for more details on the structure. We obtain
866  // ext2 constants from ext2fs/ext2fs.h header but we don't link with the
867  // library.
868  if (!utils::ReadFileChunk(
869          device, 0, SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE, &header) ||
870      header.size() < SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE)
871    return false;
872
873  const uint8_t* superblock = header.data() + SUPERBLOCK_OFFSET;
874
875  // ext3_fs.h: ext3_super_block.s_blocks_count
876  uint32_t block_count =
877      *reinterpret_cast<const uint32_t*>(superblock + 1 * sizeof(int32_t));
878
879  // ext3_fs.h: ext3_super_block.s_log_block_size
880  uint32_t log_block_size =
881      *reinterpret_cast<const uint32_t*>(superblock + 6 * sizeof(int32_t));
882
883  // ext3_fs.h: ext3_super_block.s_magic
884  uint16_t magic =
885      *reinterpret_cast<const uint16_t*>(superblock + 14 * sizeof(int32_t));
886
887  block_count = le32toh(block_count);
888  log_block_size = le32toh(log_block_size) + EXT2_MIN_BLOCK_LOG_SIZE;
889  magic = le16toh(magic);
890
891  if (magic != EXT2_SUPER_MAGIC)
892    return false;
893
894  // Sanity check the parameters.
895  TEST_AND_RETURN_FALSE(log_block_size >= EXT2_MIN_BLOCK_LOG_SIZE &&
896                        log_block_size <= EXT2_MAX_BLOCK_LOG_SIZE);
897  TEST_AND_RETURN_FALSE(block_count > 0);
898  return true;
899}
900
901// Return the number of CPUs on the machine, and 4 threads in minimum.
902size_t GetMaxThreads() {
903  return std::max(sysconf(_SC_NPROCESSORS_ONLN), 4L);
904}
905
906}  // namespace diff_utils
907
908}  // namespace chromeos_update_engine
909