delta_diff_utils.cc revision dcb18612576aeb59fbd8ba066b1b7a3a7bca8490
1//
2// Copyright (C) 2015 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16
17#include "update_engine/payload_generator/delta_diff_utils.h"
18
19#include <endian.h>
20// TODO: Remove these pragmas when b/35721782 is fixed.
21#pragma clang diagnostic push
22#pragma clang diagnostic ignored "-Wmacro-redefined"
23#include <ext2fs/ext2fs.h>
24#pragma clang diagnostic pop
25
26
27#include <algorithm>
28#include <map>
29
30#include <base/files/file_util.h>
31#include <base/format_macros.h>
32#include <base/strings/stringprintf.h>
33
34#include "update_engine/common/hash_calculator.h"
35#include "update_engine/common/subprocess.h"
36#include "update_engine/common/utils.h"
37#include "update_engine/payload_generator/block_mapping.h"
38#include "update_engine/payload_generator/bzip.h"
39#include "update_engine/payload_generator/delta_diff_generator.h"
40#include "update_engine/payload_generator/extent_ranges.h"
41#include "update_engine/payload_generator/extent_utils.h"
42#include "update_engine/payload_generator/xz.h"
43
44using std::map;
45using std::string;
46using std::vector;
47
48namespace chromeos_update_engine {
49namespace {
50
51const char* const kBsdiffPath = "bsdiff";
52const char* const kImgdiffPath = "imgdiff";
53
54// The maximum destination size allowed for bsdiff. In general, bsdiff should
55// work for arbitrary big files, but the payload generation and payload
56// application requires a significant amount of RAM. We put a hard-limit of
57// 200 MiB that should not affect any released board, but will limit the
58// Chrome binary in ASan builders.
59const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024;  // bytes
60
61// The maximum destination size allowed for imgdiff. In general, imgdiff should
62// work for arbitrary big files, but the payload application is quite memory
63// intensive, so we limit these operations to 50 MiB.
64const uint64_t kMaxImgdiffDestinationSize = 50 * 1024 * 1024;  // bytes
65
66// Process a range of blocks from |range_start| to |range_end| in the extent at
67// position |*idx_p| of |extents|. If |do_remove| is true, this range will be
68// removed, which may cause the extent to be trimmed, split or removed entirely.
69// The value of |*idx_p| is updated to point to the next extent to be processed.
70// Returns true iff the next extent to process is a new or updated one.
71bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
72                             const bool do_remove, uint64_t range_start,
73                             uint64_t range_end) {
74  size_t idx = *idx_p;
75  uint64_t start_block = (*extents)[idx].start_block();
76  uint64_t num_blocks = (*extents)[idx].num_blocks();
77  uint64_t range_size = range_end - range_start;
78
79  if (do_remove) {
80    if (range_size == num_blocks) {
81      // Remove the entire extent.
82      extents->erase(extents->begin() + idx);
83    } else if (range_end == num_blocks) {
84      // Trim the end of the extent.
85      (*extents)[idx].set_num_blocks(num_blocks - range_size);
86      idx++;
87    } else if (range_start == 0) {
88      // Trim the head of the extent.
89      (*extents)[idx].set_start_block(start_block + range_size);
90      (*extents)[idx].set_num_blocks(num_blocks - range_size);
91    } else {
92      // Trim the middle, splitting the remainder into two parts.
93      (*extents)[idx].set_num_blocks(range_start);
94      Extent e;
95      e.set_start_block(start_block + range_end);
96      e.set_num_blocks(num_blocks - range_end);
97      idx++;
98      extents->insert(extents->begin() + idx, e);
99    }
100  } else if (range_end == num_blocks) {
101    // Done with this extent.
102    idx++;
103  } else {
104    return false;
105  }
106
107  *idx_p = idx;
108  return true;
109}
110
111// Remove identical corresponding block ranges in |src_extents| and
112// |dst_extents|. Used for preventing moving of blocks onto themselves during
113// MOVE operations. The value of |total_bytes| indicates the actual length of
114// content; this may be slightly less than the total size of blocks, in which
115// case the last block is only partly occupied with data. Returns the total
116// number of bytes removed.
117size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
118                                  vector<Extent>* dst_extents,
119                                  const size_t total_bytes) {
120  size_t src_idx = 0;
121  size_t dst_idx = 0;
122  uint64_t src_offset = 0, dst_offset = 0;
123  size_t removed_bytes = 0, nonfull_block_bytes;
124  bool do_remove = false;
125  while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
126    do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
127                 (*dst_extents)[dst_idx].start_block() + dst_offset);
128
129    uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
130    uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
131    uint64_t min_num_blocks = std::min(src_num_blocks - src_offset,
132                                       dst_num_blocks - dst_offset);
133    uint64_t prev_src_offset = src_offset;
134    uint64_t prev_dst_offset = dst_offset;
135    src_offset += min_num_blocks;
136    dst_offset += min_num_blocks;
137
138    bool new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
139                                           prev_src_offset, src_offset);
140    bool new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
141                                           prev_dst_offset, dst_offset);
142    if (new_src) {
143      src_offset = 0;
144    }
145    if (new_dst) {
146      dst_offset = 0;
147    }
148
149    if (do_remove)
150      removed_bytes += min_num_blocks * kBlockSize;
151  }
152
153  // If we removed the last block and this block is only partly used by file
154  // content, deduct the unused portion from the total removed byte count.
155  if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
156    removed_bytes -= kBlockSize - nonfull_block_bytes;
157
158  return removed_bytes;
159}
160
161// Returns true if the given blob |data| contains gzip header magic.
162bool ContainsGZip(const brillo::Blob& data) {
163  const uint8_t kGZipMagic[] = {0x1f, 0x8b, 0x08, 0x00};
164  return std::search(data.begin(),
165                     data.end(),
166                     std::begin(kGZipMagic),
167                     std::end(kGZipMagic)) != data.end();
168}
169
170}  // namespace
171
172namespace diff_utils {
173
174bool DeltaReadPartition(vector<AnnotatedOperation>* aops,
175                        const PartitionConfig& old_part,
176                        const PartitionConfig& new_part,
177                        ssize_t hard_chunk_blocks,
178                        size_t soft_chunk_blocks,
179                        const PayloadVersion& version,
180                        BlobFileWriter* blob_file) {
181  ExtentRanges old_visited_blocks;
182  ExtentRanges new_visited_blocks;
183
184  TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks(
185      aops,
186      old_part.path,
187      new_part.path,
188      old_part.size / kBlockSize,
189      new_part.size / kBlockSize,
190      soft_chunk_blocks,
191      version,
192      blob_file,
193      &old_visited_blocks,
194      &new_visited_blocks));
195
196  map<string, vector<Extent>> old_files_map;
197  if (old_part.fs_interface) {
198    vector<FilesystemInterface::File> old_files;
199    old_part.fs_interface->GetFiles(&old_files);
200    for (const FilesystemInterface::File& file : old_files)
201      old_files_map[file.name] = file.extents;
202  }
203
204  TEST_AND_RETURN_FALSE(new_part.fs_interface);
205  vector<FilesystemInterface::File> new_files;
206  new_part.fs_interface->GetFiles(&new_files);
207
208  // The processing is very straightforward here, we generate operations for
209  // every file (and pseudo-file such as the metadata) in the new filesystem
210  // based on the file with the same name in the old filesystem, if any.
211  // Files with overlapping data blocks (like hardlinks or filesystems with tail
212  // packing or compression where the blocks store more than one file) are only
213  // generated once in the new image, but are also used only once from the old
214  // image due to some simplifications (see below).
215  for (const FilesystemInterface::File& new_file : new_files) {
216    // Ignore the files in the new filesystem without blocks. Symlinks with
217    // data blocks (for example, symlinks bigger than 60 bytes in ext2) are
218    // handled as normal files. We also ignore blocks that were already
219    // processed by a previous file.
220    vector<Extent> new_file_extents = FilterExtentRanges(
221        new_file.extents, new_visited_blocks);
222    new_visited_blocks.AddExtents(new_file_extents);
223
224    if (new_file_extents.empty())
225      continue;
226
227    LOG(INFO) << "Encoding file " << new_file.name << " ("
228              << BlocksInExtents(new_file_extents) << " blocks)";
229
230    // We can't visit each dst image inode more than once, as that would
231    // duplicate work. Here, we avoid visiting each source image inode
232    // more than once. Technically, we could have multiple operations
233    // that read the same blocks from the source image for diffing, but
234    // we choose not to avoid complexity. Eventually we will move away
235    // from using a graph/cycle detection/etc to generate diffs, and at that
236    // time, it will be easy (non-complex) to have many operations read
237    // from the same source blocks. At that time, this code can die. -adlr
238    vector<Extent> old_file_extents = FilterExtentRanges(
239        old_files_map[new_file.name], old_visited_blocks);
240    old_visited_blocks.AddExtents(old_file_extents);
241
242    TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
243                                        old_part.path,
244                                        new_part.path,
245                                        old_file_extents,
246                                        new_file_extents,
247                                        new_file.name,  // operation name
248                                        hard_chunk_blocks,
249                                        version,
250                                        blob_file));
251  }
252  // Process all the blocks not included in any file. We provided all the unused
253  // blocks in the old partition as available data.
254  vector<Extent> new_unvisited = {
255      ExtentForRange(0, new_part.size / kBlockSize)};
256  new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks);
257  if (new_unvisited.empty())
258    return true;
259
260  vector<Extent> old_unvisited;
261  if (old_part.fs_interface) {
262    old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize));
263    old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks);
264  }
265
266  LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited)
267            << " unwritten blocks using chunk size of "
268            << soft_chunk_blocks << " blocks.";
269  // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
270  // really know the structure of this data and we should not expect it to have
271  // redundancy between partitions.
272  TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
273                                      old_part.path,
274                                      new_part.path,
275                                      old_unvisited,
276                                      new_unvisited,
277                                      "<non-file-data>",  // operation name
278                                      soft_chunk_blocks,
279                                      version,
280                                      blob_file));
281
282  return true;
283}
284
285bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops,
286                             const string& old_part,
287                             const string& new_part,
288                             size_t old_num_blocks,
289                             size_t new_num_blocks,
290                             ssize_t chunk_blocks,
291                             const PayloadVersion& version,
292                             BlobFileWriter* blob_file,
293                             ExtentRanges* old_visited_blocks,
294                             ExtentRanges* new_visited_blocks) {
295  vector<BlockMapping::BlockId> old_block_ids;
296  vector<BlockMapping::BlockId> new_block_ids;
297  TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part,
298                                           new_part,
299                                           old_num_blocks * kBlockSize,
300                                           new_num_blocks * kBlockSize,
301                                           kBlockSize,
302                                           &old_block_ids,
303                                           &new_block_ids));
304
305  // If the update is inplace, we map all the blocks that didn't move,
306  // regardless of the contents since they are already copied and no operation
307  // is required.
308  if (version.InplaceUpdate()) {
309    uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks);
310    for (uint64_t block = 0; block < num_blocks; block++) {
311      if (old_block_ids[block] == new_block_ids[block] &&
312          !old_visited_blocks->ContainsBlock(block) &&
313          !new_visited_blocks->ContainsBlock(block)) {
314        old_visited_blocks->AddBlock(block);
315        new_visited_blocks->AddBlock(block);
316      }
317    }
318  }
319
320  // A mapping from the block_id to the list of block numbers with that block id
321  // in the old partition. This is used to lookup where in the old partition
322  // is a block from the new partition.
323  map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map;
324
325  for (uint64_t block = old_num_blocks; block-- > 0; ) {
326    if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block))
327      old_blocks_map[old_block_ids[block]].push_back(block);
328
329    // Mark all zeroed blocks in the old image as "used" since it doesn't make
330    // any sense to spend I/O to read zeros from the source partition and more
331    // importantly, these could sometimes be blocks discarded in the SSD which
332    // would read non-zero values.
333    if (old_block_ids[block] == 0)
334      old_visited_blocks->AddBlock(block);
335  }
336
337  // The collection of blocks in the new partition with just zeros. This is a
338  // common case for free-space that's also problematic for bsdiff, so we want
339  // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of
340  // just zeros is so small that it doesn't make sense to spend the I/O reading
341  // zeros from the old partition.
342  vector<Extent> new_zeros;
343
344  vector<Extent> old_identical_blocks;
345  vector<Extent> new_identical_blocks;
346
347  for (uint64_t block = 0; block < new_num_blocks; block++) {
348    // Only produce operations for blocks that were not yet visited.
349    if (new_visited_blocks->ContainsBlock(block))
350      continue;
351    if (new_block_ids[block] == 0) {
352      AppendBlockToExtents(&new_zeros, block);
353      continue;
354    }
355
356    auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]);
357    // Check if the block exists in the old partition at all.
358    if (old_blocks_map_it == old_blocks_map.end() ||
359        old_blocks_map_it->second.empty())
360      continue;
361
362    AppendBlockToExtents(&old_identical_blocks,
363                         old_blocks_map_it->second.back());
364    AppendBlockToExtents(&new_identical_blocks, block);
365    // We can't reuse source blocks in minor version 1 because the cycle
366    // breaking algorithm used in the in-place update doesn't support that.
367    if (version.InplaceUpdate())
368      old_blocks_map_it->second.pop_back();
369  }
370
371  // Produce operations for the zero blocks split per output extent.
372  // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile().
373  size_t num_ops = aops->size();
374  new_visited_blocks->AddExtents(new_zeros);
375  for (const Extent& extent : new_zeros) {
376    TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
377                                        "",
378                                        new_part,
379                                        vector<Extent>(),        // old_extents
380                                        vector<Extent>{extent},  // new_extents
381                                        "<zeros>",
382                                        chunk_blocks,
383                                        version,
384                                        blob_file));
385  }
386  LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
387            << BlocksInExtents(new_zeros) << " zeroed blocks";
388
389  // Produce MOVE/SOURCE_COPY operations for the moved blocks.
390  num_ops = aops->size();
391  if (chunk_blocks == -1)
392    chunk_blocks = new_num_blocks;
393  uint64_t used_blocks = 0;
394  old_visited_blocks->AddExtents(old_identical_blocks);
395  new_visited_blocks->AddExtents(new_identical_blocks);
396  for (const Extent& extent : new_identical_blocks) {
397    // We split the operation at the extent boundary or when bigger than
398    // chunk_blocks.
399    for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks();
400         op_block_offset += chunk_blocks) {
401      aops->emplace_back();
402      AnnotatedOperation* aop = &aops->back();
403      aop->name = "<identical-blocks>";
404      aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
405                           ? InstallOperation::SOURCE_COPY
406                           : InstallOperation::MOVE);
407
408      uint64_t chunk_num_blocks =
409          std::min(static_cast<uint64_t>(extent.num_blocks()) - op_block_offset,
410                   static_cast<uint64_t>(chunk_blocks));
411
412      // The current operation represents the move/copy operation for the
413      // sublist starting at |used_blocks| of length |chunk_num_blocks| where
414      // the src and dst are from |old_identical_blocks| and
415      // |new_identical_blocks| respectively.
416      StoreExtents(
417          ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks),
418          aop->op.mutable_src_extents());
419
420      Extent* op_dst_extent = aop->op.add_dst_extents();
421      op_dst_extent->set_start_block(extent.start_block() + op_block_offset);
422      op_dst_extent->set_num_blocks(chunk_num_blocks);
423      CHECK(
424          vector<Extent>{*op_dst_extent} ==  // NOLINT(whitespace/braces)
425          ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks));
426
427      used_blocks += chunk_num_blocks;
428    }
429  }
430  LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
431            << used_blocks << " identical blocks moved";
432
433  return true;
434}
435
436bool DeltaReadFile(vector<AnnotatedOperation>* aops,
437                   const string& old_part,
438                   const string& new_part,
439                   const vector<Extent>& old_extents,
440                   const vector<Extent>& new_extents,
441                   const string& name,
442                   ssize_t chunk_blocks,
443                   const PayloadVersion& version,
444                   BlobFileWriter* blob_file) {
445  brillo::Blob data;
446  InstallOperation operation;
447
448  uint64_t total_blocks = BlocksInExtents(new_extents);
449  if (chunk_blocks == -1)
450    chunk_blocks = total_blocks;
451
452  for (uint64_t block_offset = 0; block_offset < total_blocks;
453      block_offset += chunk_blocks) {
454    // Split the old/new file in the same chunks. Note that this could drop
455    // some information from the old file used for the new chunk. If the old
456    // file is smaller (or even empty when there's no old file) the chunk will
457    // also be empty.
458    vector<Extent> old_extents_chunk = ExtentsSublist(
459        old_extents, block_offset, chunk_blocks);
460    vector<Extent> new_extents_chunk = ExtentsSublist(
461        new_extents, block_offset, chunk_blocks);
462    NormalizeExtents(&old_extents_chunk);
463    NormalizeExtents(&new_extents_chunk);
464
465    TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
466                                            new_part,
467                                            old_extents_chunk,
468                                            new_extents_chunk,
469                                            version,
470                                            &data,
471                                            &operation));
472
473    // Check if the operation writes nothing.
474    if (operation.dst_extents_size() == 0) {
475      if (operation.type() == InstallOperation::MOVE) {
476        LOG(INFO) << "Empty MOVE operation ("
477                  << name << "), skipping";
478        continue;
479      } else {
480        LOG(ERROR) << "Empty non-MOVE operation";
481        return false;
482      }
483    }
484
485    // Now, insert into the list of operations.
486    AnnotatedOperation aop;
487    aop.name = name;
488    if (static_cast<uint64_t>(chunk_blocks) < total_blocks) {
489      aop.name = base::StringPrintf("%s:%" PRIu64,
490                                    name.c_str(), block_offset / chunk_blocks);
491    }
492    aop.op = operation;
493
494    // Write the data
495    TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file));
496    aops->emplace_back(aop);
497  }
498  return true;
499}
500
501bool GenerateBestFullOperation(const brillo::Blob& new_data,
502                               const PayloadVersion& version,
503                               brillo::Blob* out_blob,
504                               InstallOperation_Type* out_type) {
505  if (new_data.empty())
506    return false;
507
508  if (version.OperationAllowed(InstallOperation::ZERO) &&
509      std::all_of(
510          new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) {
511    // The read buffer is all zeros, so produce a ZERO operation. No need to
512    // check other types of operations in this case.
513    *out_blob = brillo::Blob();
514    *out_type = InstallOperation::ZERO;
515    return true;
516  }
517
518  bool out_blob_set = false;
519
520  // Try compressing |new_data| with xz first.
521  if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) {
522    brillo::Blob new_data_xz;
523    if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) {
524      *out_type = InstallOperation::REPLACE_XZ;
525      *out_blob = std::move(new_data_xz);
526      out_blob_set = true;
527    }
528  }
529
530  // Try compressing it with bzip2.
531  if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) {
532    brillo::Blob new_data_bz;
533    // TODO(deymo): Implement some heuristic to determine if it is worth trying
534    // to compress the blob with bzip2 if we already have a good REPLACE_XZ.
535    if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() &&
536        (!out_blob_set || out_blob->size() > new_data_bz.size())) {
537      // A REPLACE_BZ is better or nothing else was set.
538      *out_type = InstallOperation::REPLACE_BZ;
539      *out_blob = std::move(new_data_bz);
540      out_blob_set = true;
541    }
542  }
543
544  // If nothing else worked or it was badly compressed we try a REPLACE.
545  if (!out_blob_set || out_blob->size() >= new_data.size()) {
546    *out_type = InstallOperation::REPLACE;
547    // This needs to make a copy of the data in the case bzip or xz didn't
548    // compress well, which is not the common case so the performance hit is
549    // low.
550    *out_blob = new_data;
551  }
552  return true;
553}
554
555bool ReadExtentsToDiff(const string& old_part,
556                       const string& new_part,
557                       const vector<Extent>& old_extents,
558                       const vector<Extent>& new_extents,
559                       const PayloadVersion& version,
560                       brillo::Blob* out_data,
561                       InstallOperation* out_op) {
562  InstallOperation operation;
563
564  // We read blocks from old_extents and write blocks to new_extents.
565  uint64_t blocks_to_read = BlocksInExtents(old_extents);
566  uint64_t blocks_to_write = BlocksInExtents(new_extents);
567
568  // Disable bsdiff and imgdiff when the data is too big.
569  bool bsdiff_allowed =
570      version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) ||
571      version.OperationAllowed(InstallOperation::BSDIFF);
572  if (bsdiff_allowed &&
573      blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) {
574    LOG(INFO) << "bsdiff blacklisted, data too big: "
575              << blocks_to_read * kBlockSize << " bytes";
576    bsdiff_allowed = false;
577  }
578
579  bool imgdiff_allowed = version.OperationAllowed(InstallOperation::IMGDIFF);
580  if (imgdiff_allowed &&
581      blocks_to_read * kBlockSize > kMaxImgdiffDestinationSize) {
582    LOG(INFO) << "imgdiff blacklisted, data too big: "
583              << blocks_to_read * kBlockSize << " bytes";
584    imgdiff_allowed = false;
585  }
586
587  // Make copies of the extents so we can modify them.
588  vector<Extent> src_extents = old_extents;
589  vector<Extent> dst_extents = new_extents;
590
591  // Read in bytes from new data.
592  brillo::Blob new_data;
593  TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
594                                           new_extents,
595                                           &new_data,
596                                           kBlockSize * blocks_to_write,
597                                           kBlockSize));
598  TEST_AND_RETURN_FALSE(!new_data.empty());
599
600  // Data blob that will be written to delta file.
601  brillo::Blob data_blob;
602
603  // Try generating a full operation for the given new data, regardless of the
604  // old_data.
605  InstallOperation_Type op_type;
606  TEST_AND_RETURN_FALSE(
607      GenerateBestFullOperation(new_data, version, &data_blob, &op_type));
608  operation.set_type(op_type);
609
610  brillo::Blob old_data;
611  if (blocks_to_read > 0) {
612    // Read old data.
613    TEST_AND_RETURN_FALSE(
614        utils::ReadExtents(old_part, src_extents, &old_data,
615                           kBlockSize * blocks_to_read, kBlockSize));
616    if (old_data == new_data) {
617      // No change in data.
618      operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
619                             ? InstallOperation::SOURCE_COPY
620                             : InstallOperation::MOVE);
621      data_blob = brillo::Blob();
622    } else if (bsdiff_allowed || imgdiff_allowed) {
623      // If the source file is considered bsdiff safe (no bsdiff bugs
624      // triggered), see if BSDIFF encoding is smaller.
625      base::FilePath old_chunk;
626      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
627      ScopedPathUnlinker old_unlinker(old_chunk.value());
628      TEST_AND_RETURN_FALSE(utils::WriteFile(
629          old_chunk.value().c_str(), old_data.data(), old_data.size()));
630      base::FilePath new_chunk;
631      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
632      ScopedPathUnlinker new_unlinker(new_chunk.value());
633      TEST_AND_RETURN_FALSE(utils::WriteFile(
634          new_chunk.value().c_str(), new_data.data(), new_data.size()));
635
636      if (bsdiff_allowed) {
637        brillo::Blob bsdiff_delta;
638        TEST_AND_RETURN_FALSE(DiffFiles(
639            kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta));
640        CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
641        if (bsdiff_delta.size() < data_blob.size()) {
642          operation.set_type(
643              version.OperationAllowed(InstallOperation::SOURCE_BSDIFF)
644                  ? InstallOperation::SOURCE_BSDIFF
645                  : InstallOperation::BSDIFF);
646          data_blob = std::move(bsdiff_delta);
647        }
648      }
649      if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) {
650        brillo::Blob imgdiff_delta;
651        // Imgdiff might fail in some cases, only use the result if it succeed,
652        // otherwise print the extents to analyze.
653        if (DiffFiles(kImgdiffPath,
654                      old_chunk.value(),
655                      new_chunk.value(),
656                      &imgdiff_delta) &&
657            imgdiff_delta.size() > 0) {
658          if (imgdiff_delta.size() < data_blob.size()) {
659            operation.set_type(InstallOperation::IMGDIFF);
660            data_blob = std::move(imgdiff_delta);
661          }
662        } else {
663          LOG(ERROR) << "Imgdiff failed with source extents: "
664                     << ExtentsToString(src_extents)
665                     << ", destination extents: "
666                     << ExtentsToString(dst_extents);
667        }
668      }
669    }
670  }
671
672  size_t removed_bytes = 0;
673  // Remove identical src/dst block ranges in MOVE operations.
674  if (operation.type() == InstallOperation::MOVE) {
675    removed_bytes = RemoveIdenticalBlockRanges(
676        &src_extents, &dst_extents, new_data.size());
677  }
678  // Set legacy src_length and dst_length fields.
679  operation.set_src_length(old_data.size() - removed_bytes);
680  operation.set_dst_length(new_data.size() - removed_bytes);
681
682  // Embed extents in the operation.
683  StoreExtents(src_extents, operation.mutable_src_extents());
684  StoreExtents(dst_extents, operation.mutable_dst_extents());
685
686  // Replace operations should not have source extents.
687  if (IsAReplaceOperation(operation.type())) {
688    operation.clear_src_extents();
689    operation.clear_src_length();
690  }
691
692  *out_data = std::move(data_blob);
693  *out_op = operation;
694
695  return true;
696}
697
698// Runs the bsdiff or imgdiff tool in |diff_path| on two files and returns the
699// resulting delta in |out|. Returns true on success.
700bool DiffFiles(const string& diff_path,
701               const string& old_file,
702               const string& new_file,
703               brillo::Blob* out) {
704  const string kPatchFile = "delta.patchXXXXXX";
705  string patch_file_path;
706
707  TEST_AND_RETURN_FALSE(
708      utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
709
710  vector<string> cmd;
711  cmd.push_back(diff_path);
712  cmd.push_back(old_file);
713  cmd.push_back(new_file);
714  cmd.push_back(patch_file_path);
715
716  int rc = 1;
717  string stdout;
718  TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, &stdout));
719  if (rc != 0) {
720    LOG(ERROR) << diff_path << " returned " << rc << std::endl << stdout;
721    return false;
722  }
723  TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
724  unlink(patch_file_path.c_str());
725  return true;
726}
727
728bool IsAReplaceOperation(InstallOperation_Type op_type) {
729  return (op_type == InstallOperation::REPLACE ||
730          op_type == InstallOperation::REPLACE_BZ ||
731          op_type == InstallOperation::REPLACE_XZ);
732}
733
734// Returns true if |op| is a no-op operation that doesn't do any useful work
735// (e.g., a move operation that copies blocks onto themselves).
736bool IsNoopOperation(const InstallOperation& op) {
737  return (op.type() == InstallOperation::MOVE &&
738          ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
739}
740
741void FilterNoopOperations(vector<AnnotatedOperation>* ops) {
742  ops->erase(
743      std::remove_if(
744          ops->begin(), ops->end(),
745          [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
746      ops->end());
747}
748
749bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) {
750  info->set_size(part.size);
751  HashCalculator hasher;
752  TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) ==
753                        static_cast<off_t>(part.size));
754  TEST_AND_RETURN_FALSE(hasher.Finalize());
755  const brillo::Blob& hash = hasher.raw_hash();
756  info->set_hash(hash.data(), hash.size());
757  LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash();
758  return true;
759}
760
761bool CompareAopsByDestination(AnnotatedOperation first_aop,
762                              AnnotatedOperation second_aop) {
763  // We want empty operations to be at the end of the payload.
764  if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
765    return ((!first_aop.op.dst_extents().size()) <
766            (!second_aop.op.dst_extents().size()));
767  uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
768  uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
769  return first_dst_start < second_dst_start;
770}
771
772bool IsExtFilesystem(const string& device) {
773  brillo::Blob header;
774  // See include/linux/ext2_fs.h for more details on the structure. We obtain
775  // ext2 constants from ext2fs/ext2fs.h header but we don't link with the
776  // library.
777  if (!utils::ReadFileChunk(
778          device, 0, SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE, &header) ||
779      header.size() < SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE)
780    return false;
781
782  const uint8_t* superblock = header.data() + SUPERBLOCK_OFFSET;
783
784  // ext3_fs.h: ext3_super_block.s_blocks_count
785  uint32_t block_count =
786      *reinterpret_cast<const uint32_t*>(superblock + 1 * sizeof(int32_t));
787
788  // ext3_fs.h: ext3_super_block.s_log_block_size
789  uint32_t log_block_size =
790      *reinterpret_cast<const uint32_t*>(superblock + 6 * sizeof(int32_t));
791
792  // ext3_fs.h: ext3_super_block.s_magic
793  uint16_t magic =
794      *reinterpret_cast<const uint16_t*>(superblock + 14 * sizeof(int32_t));
795
796  block_count = le32toh(block_count);
797  log_block_size = le32toh(log_block_size) + EXT2_MIN_BLOCK_LOG_SIZE;
798  magic = le16toh(magic);
799
800  if (magic != EXT2_SUPER_MAGIC)
801    return false;
802
803  // Sanity check the parameters.
804  TEST_AND_RETURN_FALSE(log_block_size >= EXT2_MIN_BLOCK_LOG_SIZE &&
805                        log_block_size <= EXT2_MAX_BLOCK_LOG_SIZE);
806  TEST_AND_RETURN_FALSE(block_count > 0);
807  return true;
808}
809
810}  // namespace diff_utils
811
812}  // namespace chromeos_update_engine
813