delta_diff_generator.cc revision 58455ae7fe87a312ae648d871a92e1485d0e9989
1// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "update_engine/payload_generator/delta_diff_generator.h"
6
7#include <errno.h>
8#include <fcntl.h>
9#include <inttypes.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12
13#include <algorithm>
14#include <map>
15#include <memory>
16#include <string>
17#include <utility>
18#include <vector>
19
20#include <base/files/file_path.h>
21#include <base/files/file_util.h>
22#include <base/logging.h>
23#include <base/strings/stringprintf.h>
24#include <base/strings/string_util.h>
25#include <bzlib.h>
26
27#include "update_engine/bzip.h"
28#include "update_engine/delta_performer.h"
29#include "update_engine/file_writer.h"
30#include "update_engine/omaha_hash_calculator.h"
31#include "update_engine/payload_constants.h"
32#include "update_engine/payload_generator/extent_mapper.h"
33#include "update_engine/payload_generator/filesystem_iterator.h"
34#include "update_engine/payload_generator/full_update_generator.h"
35#include "update_engine/payload_generator/graph_types.h"
36#include "update_engine/payload_generator/graph_utils.h"
37#include "update_engine/payload_generator/inplace_generator.h"
38#include "update_engine/payload_generator/metadata.h"
39#include "update_engine/payload_generator/payload_signer.h"
40#include "update_engine/payload_verifier.h"
41#include "update_engine/subprocess.h"
42#include "update_engine/update_metadata.pb.h"
43#include "update_engine/utils.h"
44
45using std::map;
46using std::max;
47using std::min;
48using std::set;
49using std::sort;
50using std::string;
51using std::unique_ptr;
52using std::vector;
53
54namespace {
55
56const uint64_t kMajorVersionNumber = 1;
57
58// The maximum destination size allowed for bsdiff. In general, bsdiff should
59// work for arbitrary big files, but the payload generation and payload
60// application requires a significant amount of RAM. We put a hard-limit of
61// 200 MiB that should not affect any released board, but will limit the
62// Chrome binary in ASan builders.
63const off_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024;  // bytes
64
65static const char* kInstallOperationTypes[] = {
66  "REPLACE",
67  "REPLACE_BZ",
68  "MOVE",
69  "BSDIFF",
70  "SOURCE_COPY",
71  "SOURCE_BSDIFF"
72};
73
74}  // namespace
75
76namespace chromeos_update_engine {
77
78typedef DeltaDiffGenerator::Block Block;
79typedef map<const DeltaArchiveManifest_InstallOperation*,
80            string> OperationNameMap;
81
82// bytes
83const size_t kRootFSPartitionSize = static_cast<size_t>(2) * 1024 * 1024 * 1024;
84const size_t kBlockSize = 4096;  // bytes
85const char* const kEmptyPath = "";
86const char* const kBsdiffPath = "bsdiff";
87
88// Needed for testing purposes, in case we can't use actual filesystem objects.
89// TODO(garnold) (chromium:331965) Replace this hack with a properly injected
90// parameter in form of a mockable abstract class.
91bool (*get_extents_with_chunk_func)(const string&, off_t, off_t,
92                                    vector<Extent>*) =
93    extent_mapper::ExtentsForFileChunkFibmap;
94
95namespace {
96
97bool IsSparseHole(const Extent &extent) {
98  return (extent.start_block() == kSparseHole);
99}
100
101// Stores all the extents of |path| into |extents|. Returns true on success.
102bool GatherExtents(const string& path,
103                   off_t chunk_offset,
104                   off_t chunk_size,
105                   vector<Extent>* extents) {
106  extents->clear();
107  TEST_AND_RETURN_FALSE(
108      get_extents_with_chunk_func(path, chunk_offset, chunk_size, extents));
109  return true;
110}
111
112// Writes the uint64_t passed in in host-endian to the file as big-endian.
113// Returns true on success.
114bool WriteUint64AsBigEndian(FileWriter* writer, const uint64_t value) {
115  uint64_t value_be = htobe64(value);
116  TEST_AND_RETURN_FALSE(writer->Write(&value_be, sizeof(value_be)));
117  return true;
118}
119
120// Adds each operation from |rootfs_ops| and |kernel_ops| to |out_manifest| in
121// the order they come in those vectors. reports the operations names
122void InstallOperationsToManifest(
123    const vector<AnnotatedOperation>& rootfs_ops,
124    const vector<AnnotatedOperation>& kernel_ops,
125    DeltaArchiveManifest* out_manifest,
126    OperationNameMap* out_op_name_map) {
127  for (const AnnotatedOperation& aop : rootfs_ops) {
128    if (DeltaDiffGenerator::IsNoopOperation(aop.op))
129      continue;
130    DeltaArchiveManifest_InstallOperation* new_op =
131        out_manifest->add_install_operations();
132    (*out_op_name_map)[new_op] = aop.name;
133    *new_op = aop.op;
134  }
135  for (const AnnotatedOperation& aop : kernel_ops) {
136    if (DeltaDiffGenerator::IsNoopOperation(aop.op))
137      continue;
138    DeltaArchiveManifest_InstallOperation* new_op =
139        out_manifest->add_kernel_install_operations();
140    (*out_op_name_map)[new_op] = aop.name;
141    *new_op = aop.op;
142  }
143}
144
145struct DeltaObject {
146  DeltaObject(const string& in_name, const int in_type, const off_t in_size)
147      : name(in_name),
148        type(in_type),
149        size(in_size) {}
150  bool operator <(const DeltaObject& object) const {
151    return (size != object.size) ? (size < object.size) : (name < object.name);
152  }
153  string name;
154  int type;
155  off_t size;
156};
157
158void ReportPayloadUsage(const DeltaArchiveManifest& manifest,
159                        const int64_t manifest_metadata_size,
160                        const OperationNameMap& op_name_map) {
161  vector<DeltaObject> objects;
162  off_t total_size = 0;
163
164  // Rootfs install operations.
165  for (int i = 0; i < manifest.install_operations_size(); ++i) {
166    const DeltaArchiveManifest_InstallOperation& op =
167        manifest.install_operations(i);
168    objects.push_back(DeltaObject(op_name_map.find(&op)->second,
169                                  op.type(),
170                                  op.data_length()));
171    total_size += op.data_length();
172  }
173
174  // Kernel install operations.
175  for (int i = 0; i < manifest.kernel_install_operations_size(); ++i) {
176    const DeltaArchiveManifest_InstallOperation& op =
177        manifest.kernel_install_operations(i);
178    objects.push_back(DeltaObject(base::StringPrintf("<kernel-operation-%d>",
179                                                     i),
180                                  op.type(),
181                                  op.data_length()));
182    total_size += op.data_length();
183  }
184
185  objects.push_back(DeltaObject("<manifest-metadata>",
186                                -1,
187                                manifest_metadata_size));
188  total_size += manifest_metadata_size;
189
190  std::sort(objects.begin(), objects.end());
191
192  static const char kFormatString[] = "%6.2f%% %10jd %-10s %s\n";
193  for (const DeltaObject& object : objects) {
194    fprintf(stderr, kFormatString,
195            object.size * 100.0 / total_size,
196            static_cast<intmax_t>(object.size),
197            object.type >= 0 ? kInstallOperationTypes[object.type] : "-",
198            object.name.c_str());
199  }
200  fprintf(stderr, kFormatString,
201          100.0, static_cast<intmax_t>(total_size), "", "<total>");
202}
203
204// Process a range of blocks from |range_start| to |range_end| in the extent at
205// position |*idx_p| of |extents|. If |do_remove| is true, this range will be
206// removed, which may cause the extent to be trimmed, split or removed entirely.
207// The value of |*idx_p| is updated to point to the next extent to be processed.
208// Returns true iff the next extent to process is a new or updated one.
209bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
210                             const bool do_remove, uint64_t range_start,
211                             uint64_t range_end) {
212  size_t idx = *idx_p;
213  uint64_t start_block = (*extents)[idx].start_block();
214  uint64_t num_blocks = (*extents)[idx].num_blocks();
215  uint64_t range_size = range_end - range_start;
216
217  if (do_remove) {
218    if (range_size == num_blocks) {
219      // Remove the entire extent.
220      extents->erase(extents->begin() + idx);
221    } else if (range_end == num_blocks) {
222      // Trim the end of the extent.
223      (*extents)[idx].set_num_blocks(num_blocks - range_size);
224      idx++;
225    } else if (range_start == 0) {
226      // Trim the head of the extent.
227      (*extents)[idx].set_start_block(start_block + range_size);
228      (*extents)[idx].set_num_blocks(num_blocks - range_size);
229    } else {
230      // Trim the middle, splitting the remainder into two parts.
231      (*extents)[idx].set_num_blocks(range_start);
232      Extent e;
233      e.set_start_block(start_block + range_end);
234      e.set_num_blocks(num_blocks - range_end);
235      idx++;
236      extents->insert(extents->begin() + idx, e);
237    }
238  } else if (range_end == num_blocks) {
239    // Done with this extent.
240    idx++;
241  } else {
242    return false;
243  }
244
245  *idx_p = idx;
246  return true;
247}
248
249// Remove identical corresponding block ranges in |src_extents| and
250// |dst_extents|. Used for preventing moving of blocks onto themselves during
251// MOVE operations. The value of |total_bytes| indicates the actual length of
252// content; this may be slightly less than the total size of blocks, in which
253// case the last block is only partly occupied with data. Returns the total
254// number of bytes removed.
255size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
256                                  vector<Extent>* dst_extents,
257                                  const size_t total_bytes) {
258  size_t src_idx = 0;
259  size_t dst_idx = 0;
260  uint64_t src_offset = 0, dst_offset = 0;
261  bool new_src = true, new_dst = true;
262  size_t removed_bytes = 0, nonfull_block_bytes;
263  bool do_remove = false;
264  while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
265    if (new_src) {
266      src_offset = 0;
267      new_src = false;
268    }
269    if (new_dst) {
270      dst_offset = 0;
271      new_dst = false;
272    }
273
274    do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
275                 (*dst_extents)[dst_idx].start_block() + dst_offset);
276
277    uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
278    uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
279    uint64_t min_num_blocks = min(src_num_blocks - src_offset,
280                                  dst_num_blocks - dst_offset);
281    uint64_t prev_src_offset = src_offset;
282    uint64_t prev_dst_offset = dst_offset;
283    src_offset += min_num_blocks;
284    dst_offset += min_num_blocks;
285
286    new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
287                                      prev_src_offset, src_offset);
288    new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
289                                      prev_dst_offset, dst_offset);
290    if (do_remove)
291      removed_bytes += min_num_blocks * kBlockSize;
292  }
293
294  // If we removed the last block and this block is only partly used by file
295  // content, deduct the unused portion from the total removed byte count.
296  if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
297    removed_bytes -= kBlockSize - nonfull_block_bytes;
298
299  return removed_bytes;
300}
301
302// Compare two AnnotatedOperations by the start block of the first Extent in
303// their destination extents.
304bool CompareAopsByDestination(AnnotatedOperation first_aop,
305                              AnnotatedOperation second_aop) {
306  // We want empty operations to be at the end of the payload.
307  if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
308    return ((!first_aop.op.dst_extents().size()) <
309            (!second_aop.op.dst_extents().size()));
310  uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
311  uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
312  return first_dst_start < second_dst_start;
313}
314
315}  // namespace
316
317bool DeltaDiffGenerator::DeltaReadFiles(Graph* graph,
318                                        vector<Block>* blocks,
319                                        const string& old_part,
320                                        const string& new_part,
321                                        const string& old_root,
322                                        const string& new_root,
323                                        off_t chunk_size,
324                                        int data_fd,
325                                        off_t* data_file_size,
326                                        bool src_ops_allowed) {
327  set<ino_t> visited_inodes;
328  set<ino_t> visited_src_inodes;
329  for (FilesystemIterator fs_iter(new_root,
330                                  set<string>{"/lost+found"});
331       !fs_iter.IsEnd(); fs_iter.Increment()) {
332    // We never diff symlinks (here, we check that dst file is not a symlink).
333    if (!S_ISREG(fs_iter.GetStat().st_mode))
334      continue;
335
336    // Make sure we visit each inode only once.
337    if (utils::SetContainsKey(visited_inodes, fs_iter.GetStat().st_ino))
338      continue;
339    visited_inodes.insert(fs_iter.GetStat().st_ino);
340    off_t dst_size = fs_iter.GetFileSize();
341    if (dst_size == 0)
342      continue;
343
344    LOG(INFO) << "Encoding file " << fs_iter.GetPartialPath();
345
346    // We can't visit each dst image inode more than once, as that would
347    // duplicate work. Here, we avoid visiting each source image inode
348    // more than once. Technically, we could have multiple operations
349    // that read the same blocks from the source image for diffing, but
350    // we choose not to avoid complexity. Eventually we will move away
351    // from using a graph/cycle detection/etc to generate diffs, and at that
352    // time, it will be easy (non-complex) to have many operations read
353    // from the same source blocks. At that time, this code can die. -adlr
354    bool should_diff_from_source = false;
355    string src_path = old_root + fs_iter.GetPartialPath();
356    struct stat src_stbuf;
357    // We never diff symlinks (here, we check that src file is not a symlink).
358    if (0 == lstat(src_path.c_str(), &src_stbuf) &&
359        S_ISREG(src_stbuf.st_mode)) {
360      should_diff_from_source = !utils::SetContainsKey(visited_src_inodes,
361                                                       src_stbuf.st_ino);
362      visited_src_inodes.insert(src_stbuf.st_ino);
363    }
364
365    off_t size = chunk_size == -1 ? dst_size : chunk_size;
366    off_t step = size;
367    for (off_t offset = 0; offset < dst_size; offset += step) {
368      if (offset + size >= dst_size) {
369        size = -1;  // Read through the end of the file.
370      }
371      TEST_AND_RETURN_FALSE(DeltaDiffGenerator::DeltaReadFile(
372          graph,
373          Vertex::kInvalidIndex,
374          blocks,
375          old_part,
376          new_part,
377          (should_diff_from_source ? old_root : kEmptyPath),
378          new_root,
379          fs_iter.GetPartialPath(),
380          offset,
381          size,
382          data_fd,
383          data_file_size,
384          src_ops_allowed));
385    }
386  }
387  return true;
388}
389
390bool DeltaDiffGenerator::DeltaReadFile(Graph* graph,
391                                       Vertex::Index existing_vertex,
392                                       vector<Block>* blocks,
393                                       const string& old_part,
394                                       const string& new_part,
395                                       const string& old_root,
396                                       const string& new_root,
397                                       const string& path,  // within new_root
398                                       off_t chunk_offset,
399                                       off_t chunk_size,
400                                       int data_fd,
401                                       off_t* data_file_size,
402                                       bool src_ops_allowed) {
403  chromeos::Blob data;
404  DeltaArchiveManifest_InstallOperation operation;
405
406  // If bsdiff breaks again, blacklist the problem file by using:
407  //   bsdiff_allowed = (path != "/foo/bar")
408  //
409  // TODO(dgarrett): chromium-os:15274 connect this test to the command line.
410  bool bsdiff_allowed = true;
411
412  if (utils::FileSize(new_root + path) > kMaxBsdiffDestinationSize)
413    bsdiff_allowed = false;
414
415  if (!bsdiff_allowed)
416    LOG(INFO) << "bsdiff blacklisting: " << path;
417
418  string old_filename = (old_root == kEmptyPath) ? kEmptyPath : old_root + path;
419
420  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ReadFileToDiff(old_part,
421                                                           new_part,
422                                                           chunk_offset,
423                                                           chunk_size,
424                                                           bsdiff_allowed,
425                                                           &data,
426                                                           &operation,
427                                                           true,
428                                                           src_ops_allowed,
429                                                           old_filename,
430                                                           new_root + path));
431
432  // Check if the operation writes nothing.
433  if (operation.dst_extents_size() == 0) {
434    if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE) {
435      LOG(INFO) << "Empty MOVE operation ("
436                << new_root + path << "), skipping";
437      return true;
438    } else {
439      LOG(ERROR) << "Empty non-MOVE operation";
440      return false;
441    }
442  }
443
444  // Write the data
445  if (operation.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE &&
446      operation.type() !=
447          DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY) {
448    operation.set_data_offset(*data_file_size);
449    operation.set_data_length(data.size());
450  }
451
452  TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, data.data(), data.size()));
453  *data_file_size += data.size();
454
455  // Now, insert into graph and blocks vector
456  Vertex::Index vertex = existing_vertex;
457  if (vertex == Vertex::kInvalidIndex) {
458    graph->emplace_back();
459    vertex = graph->size() - 1;
460  }
461  (*graph)[vertex].op = operation;
462  CHECK((*graph)[vertex].op.has_type());
463  (*graph)[vertex].file_name = path;
464  (*graph)[vertex].chunk_offset = chunk_offset;
465  (*graph)[vertex].chunk_size = chunk_size;
466
467  if (blocks)
468    TEST_AND_RETURN_FALSE(InplaceGenerator::AddInstallOpToBlocksVector(
469        (*graph)[vertex].op,
470        *graph,
471        vertex,
472        blocks));
473  return true;
474}
475
476bool DeltaDiffGenerator::ReadFileToDiff(
477    const string& old_part,
478    const string& new_part,
479    off_t chunk_offset,
480    off_t chunk_size,
481    bool bsdiff_allowed,
482    chromeos::Blob* out_data,
483    DeltaArchiveManifest_InstallOperation* out_op,
484    bool gather_extents,
485    bool src_ops_allowed,
486    const string& old_filename,
487    const string& new_filename) {
488
489  // Do we have an original file to consider?
490  off_t old_size = 0;
491  bool original = !old_filename.empty();
492  if (original && (old_size = utils::FileSize(old_filename)) < 0) {
493    // If stat-ing the old file fails, it should be because it doesn't exist.
494    TEST_AND_RETURN_FALSE(!utils::FileExists(old_filename.c_str()));
495    original = false;
496  }
497
498  DeltaArchiveManifest_InstallOperation operation;
499  vector<Extent> src_extents, dst_extents;
500  // Gather source extents if we have an original file.
501  if (original) {
502    if (gather_extents) {
503      TEST_AND_RETURN_FALSE(
504          GatherExtents(old_filename, chunk_offset, chunk_size, &src_extents));
505      ClearSparseHoles(&src_extents);
506      if (src_extents.size() == 0) {
507        // Reading from sparse hole, do nothing.
508        operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
509        *out_op = operation;
510        return true;
511      }
512    } else {
513      // We have a kernel, so make one extent to cover it all.
514      Extent* src_extent = operation.add_src_extents();
515      src_extent->set_start_block(0);
516      src_extent->set_num_blocks(
517          (utils::FileSize(old_filename) + (kBlockSize - 1)) / kBlockSize);
518      src_extents.push_back(*src_extent);
519    }
520  }
521
522  // Gather destination extents.
523  if (gather_extents) {
524    TEST_AND_RETURN_FALSE(
525        GatherExtents(new_filename, chunk_offset, chunk_size, &dst_extents));
526    ClearSparseHoles(&dst_extents);
527    if (dst_extents.size() == 0) {
528      // Make an empty move operation.
529      operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
530      *out_op = operation;
531      return true;
532    }
533  } else {
534    Extent* dst_extent = operation.add_dst_extents();
535    dst_extent->set_start_block(0);
536    dst_extent->set_num_blocks(
537        (utils::FileSize(new_filename) + (kBlockSize - 1)) / kBlockSize);
538    dst_extents.push_back(*dst_extent);
539  }
540
541  NormalizeExtents(&src_extents);
542  NormalizeExtents(&dst_extents);
543
544  // Figure out how many blocks we need to write to dst_extents.
545  uint64_t blocks_to_write = 0;
546  for (uint32_t i = 0; i < dst_extents.size(); i++)
547    blocks_to_write += dst_extents[i].num_blocks();
548
549  // Figure out how many blocks we need to read to src_extents.
550  uint64_t blocks_to_read = 0;
551  for (uint32_t i = 0; i < src_extents.size(); i++)
552    blocks_to_read += src_extents[i].num_blocks();
553
554  // Read in bytes from new data.
555  chromeos::Blob new_data;
556  TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
557                                           dst_extents,
558                                           &new_data,
559                                           kBlockSize * blocks_to_write,
560                                           kBlockSize));
561
562  TEST_AND_RETURN_FALSE(!new_data.empty());
563  TEST_AND_RETURN_FALSE(chunk_size == -1 ||
564                        static_cast<off_t>(new_data.size()) <= chunk_size);
565
566  chromeos::Blob new_data_bz;
567  TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
568  CHECK(!new_data_bz.empty());
569  chromeos::Blob data;  // Data blob that will be written to delta file.
570
571  size_t current_best_size = 0;
572  if (new_data.size() <= new_data_bz.size()) {
573    operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
574    current_best_size = new_data.size();
575    data = new_data;
576  } else {
577    operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
578    current_best_size = new_data_bz.size();
579    data = new_data_bz;
580  }
581  chromeos::Blob old_data;
582  if (original) {
583    // Read old data.
584    TEST_AND_RETURN_FALSE(
585        utils::ReadExtents(old_part, src_extents, &old_data,
586                           kBlockSize * blocks_to_read, kBlockSize));
587    if (old_data == new_data) {
588      // No change in data.
589      if (src_ops_allowed) {
590        operation.set_type(
591            DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY);
592      } else {
593        operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
594      }
595      current_best_size = 0;
596      data.clear();
597    } else if (!old_data.empty() && bsdiff_allowed) {
598      // If the source file is considered bsdiff safe (no bsdiff bugs
599      // triggered), see if BSDIFF encoding is smaller.
600      base::FilePath old_chunk;
601      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
602      ScopedPathUnlinker old_unlinker(old_chunk.value());
603      TEST_AND_RETURN_FALSE(
604          utils::WriteFile(old_chunk.value().c_str(),
605                           old_data.data(), old_data.size()));
606      base::FilePath new_chunk;
607      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
608      ScopedPathUnlinker new_unlinker(new_chunk.value());
609      TEST_AND_RETURN_FALSE(
610          utils::WriteFile(new_chunk.value().c_str(),
611                           new_data.data(), new_data.size()));
612
613      chromeos::Blob bsdiff_delta;
614      TEST_AND_RETURN_FALSE(
615          BsdiffFiles(old_chunk.value(), new_chunk.value(), &bsdiff_delta));
616      CHECK_GT(bsdiff_delta.size(), static_cast<chromeos::Blob::size_type>(0));
617      if (bsdiff_delta.size() < current_best_size) {
618        if (src_ops_allowed) {
619          operation.set_type(
620              DeltaArchiveManifest_InstallOperation_Type_SOURCE_BSDIFF);
621        } else {
622          operation.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF);
623        }
624        current_best_size = bsdiff_delta.size();
625        data = bsdiff_delta;
626      }
627    }
628  }
629
630  operation.set_src_length(old_data.size());
631  operation.set_dst_length(new_data.size());
632
633  // Set parameters of the operations
634  CHECK_EQ(data.size(), current_best_size);
635
636  if (gather_extents) {
637    // Remove identical src/dst block ranges in MOVE operations.
638    if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE) {
639      size_t removed_bytes = RemoveIdenticalBlockRanges(
640          &src_extents, &dst_extents, new_data.size());
641
642      // Adjust the file length field accordingly.
643      if (removed_bytes) {
644        operation.set_src_length(old_data.size() - removed_bytes);
645        operation.set_dst_length(new_data.size() - removed_bytes);
646      }
647    }
648
649    // Embed extents in the operation.
650    StoreExtents(src_extents, operation.mutable_src_extents());
651    StoreExtents(dst_extents, operation.mutable_dst_extents());
652  }
653
654  // Replace operations should not have source extents.
655  if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
656      operation.type() ==
657          DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
658    operation.clear_src_extents();
659    operation.clear_src_length();
660  }
661
662  out_data->swap(data);
663  *out_op = operation;
664
665  return true;
666}
667
668bool DeltaDiffGenerator::DeltaCompressKernelPartition(
669    const string& old_kernel_part,
670    const string& new_kernel_part,
671    vector<AnnotatedOperation>* kernel_ops,
672    int blobs_fd,
673    off_t* blobs_length,
674    bool src_ops_allowed) {
675  LOG(INFO) << "Delta compressing kernel partition...";
676  LOG_IF(INFO, old_kernel_part.empty()) << "Generating full kernel update...";
677
678  DeltaArchiveManifest_InstallOperation op;
679  chromeos::Blob data;
680  TEST_AND_RETURN_FALSE(
681      ReadFileToDiff(old_kernel_part,
682                     new_kernel_part,
683                     0,  // chunk_offset
684                     -1,  // chunk_size
685                     true,  // bsdiff_allowed
686                     &data,
687                     &op,
688                     false,  // gather_extents
689                     src_ops_allowed,
690                     old_kernel_part,  // Doesn't matter, kernel has no files.
691                     new_kernel_part));
692
693  // Check if the operation writes nothing.
694  if (op.dst_extents_size() == 0) {
695    if (op.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE) {
696      LOG(INFO) << "Empty MOVE operation, nothing to do.";
697      return true;
698    } else {
699      LOG(ERROR) << "Empty non-MOVE operation";
700      return false;
701    }
702  }
703
704  // Write the data.
705  if (op.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE &&
706      op.type() != DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY) {
707    op.set_data_offset(*blobs_length);
708    op.set_data_length(data.size());
709  }
710
711  // Add the new install operation.
712  kernel_ops->clear();
713  kernel_ops->emplace_back();
714  kernel_ops->back().op = op;
715  kernel_ops->back().name = "<kernel-delta-operation>";
716
717  TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd, data.data(), data.size()));
718  *blobs_length += data.size();
719
720  LOG(INFO) << "Done delta compressing kernel partition: "
721            << kInstallOperationTypes[op.type()];
722  return true;
723}
724
725// TODO(deymo): Replace Vertex with AnnotatedOperation. This requires to move
726// out the code that adds the reader dependencies on the new vertex.
727bool DeltaDiffGenerator::ReadUnwrittenBlocks(
728    const vector<Block>& blocks,
729    int blobs_fd,
730    off_t* blobs_length,
731    const string& old_image_path,
732    const uint64_t old_image_size,
733    const string& new_image_path,
734    Vertex* vertex,
735    uint32_t minor_version) {
736  vertex->file_name = "<rootfs-non-file-data>";
737
738  DeltaArchiveManifest_InstallOperation* out_op = &vertex->op;
739  int new_image_fd = open(new_image_path.c_str(), O_RDONLY, 000);
740  TEST_AND_RETURN_FALSE_ERRNO(new_image_fd >= 0);
741  ScopedFdCloser new_image_fd_closer(&new_image_fd);
742  int old_image_fd = open(old_image_path.c_str(), O_RDONLY, 000);
743  TEST_AND_RETURN_FALSE_ERRNO(old_image_fd >= 0);
744  ScopedFdCloser old_image_fd_closer(&old_image_fd);
745
746  string temp_file_path;
747  TEST_AND_RETURN_FALSE(utils::MakeTempFile("CrAU_temp_data.XXXXXX",
748                                            &temp_file_path,
749                                            nullptr));
750
751  FILE* file = fopen(temp_file_path.c_str(), "w");
752  TEST_AND_RETURN_FALSE(file);
753  int err = BZ_OK;
754
755  BZFILE* bz_file = BZ2_bzWriteOpen(&err,
756                                    file,
757                                    9,  // max compression
758                                    0,  // verbosity
759                                    0);  // default work factor
760  TEST_AND_RETURN_FALSE(err == BZ_OK);
761
762  vector<Extent> extents;
763  vector<Block>::size_type block_count = 0;
764
765  LOG(INFO) << "Appending unwritten blocks to extents";
766  for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
767    if (blocks[i].writer != Vertex::kInvalidIndex)
768      continue;
769    graph_utils::AppendBlockToExtents(&extents, i);
770    block_count++;
771  }
772
773  // Code will handle buffers of any size that's a multiple of kBlockSize,
774  // so we arbitrarily set it to 1024 * kBlockSize.
775  chromeos::Blob new_buf(1024 * kBlockSize);
776  chromeos::Blob old_buf(1024 * kBlockSize);
777
778  LOG(INFO) << "Scanning " << block_count << " unwritten blocks";
779  vector<Extent> changed_extents;
780  vector<Block>::size_type changed_block_count = 0;
781  vector<Block>::size_type blocks_copied_count = 0;
782
783  // For each extent in extents, write the unchanged blocks into BZ2_bzWrite,
784  // which sends it to an output file.  We use the temporary buffers to hold the
785  // old and new data, which may be smaller than the extent, so in that case we
786  // have to loop to get the extent's data (that's the inner while loop).
787  for (const Extent& extent : extents) {
788    vector<Block>::size_type blocks_read = 0;
789    float printed_progress = -1;
790    while (blocks_read < extent.num_blocks()) {
791      const uint64_t copy_first_block = extent.start_block() + blocks_read;
792      const int copy_block_cnt =
793          min(new_buf.size() / kBlockSize,
794              static_cast<chromeos::Blob::size_type>(
795                  extent.num_blocks() - blocks_read));
796      const size_t count = copy_block_cnt * kBlockSize;
797      const off_t offset = copy_first_block * kBlockSize;
798      ssize_t rc = pread(new_image_fd, new_buf.data(), count, offset);
799      TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
800      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) == count);
801
802      const off_t old_offset = min(offset, static_cast<off_t>(old_image_size));
803      const size_t old_count =
804          min(static_cast<size_t>(offset + count),
805              static_cast<size_t>(old_image_size)) - old_offset;
806      rc = pread(old_image_fd, old_buf.data(), old_count, old_offset);
807      TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
808      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) == old_count);
809
810      // Compare each block in the buffer to its counterpart in the old image
811      // and only compress it if its content has changed.
812      int buf_offset = 0;
813      for (int i = 0; i < copy_block_cnt; ++i) {
814        int buf_end_offset = buf_offset + kBlockSize;
815        if (minor_version == kSourceMinorPayloadVersion ||
816            static_cast<size_t>(buf_end_offset) > old_count ||
817            !std::equal(new_buf.begin() + buf_offset,
818                        new_buf.begin() + buf_end_offset,
819                        old_buf.begin() + buf_offset)) {
820          BZ2_bzWrite(&err, bz_file, &new_buf[buf_offset], kBlockSize);
821          TEST_AND_RETURN_FALSE(err == BZ_OK);
822          const uint64_t block_idx = copy_first_block + i;
823          if (blocks[block_idx].reader != Vertex::kInvalidIndex) {
824            graph_utils::AddReadBeforeDep(vertex, blocks[block_idx].reader,
825                                          block_idx);
826          }
827          graph_utils::AppendBlockToExtents(&changed_extents, block_idx);
828          changed_block_count++;
829        }
830        buf_offset = buf_end_offset;
831      }
832
833      blocks_read += copy_block_cnt;
834      blocks_copied_count += copy_block_cnt;
835      float current_progress =
836          static_cast<float>(blocks_copied_count) / block_count;
837      if (printed_progress + 0.1 < current_progress ||
838          blocks_copied_count == block_count) {
839        LOG(INFO) << "progress: " << current_progress;
840        printed_progress = current_progress;
841      }
842    }
843  }
844  BZ2_bzWriteClose(&err, bz_file, 0, nullptr, nullptr);
845  TEST_AND_RETURN_FALSE(err == BZ_OK);
846  bz_file = nullptr;
847  TEST_AND_RETURN_FALSE_ERRNO(0 == fclose(file));
848  file = nullptr;
849
850  LOG(INFO) << "Compressed " << changed_block_count << " blocks ("
851            << block_count - changed_block_count << " blocks unchanged)";
852  chromeos::Blob compressed_data;
853  if (changed_block_count > 0) {
854    LOG(INFO) << "Reading compressed data off disk";
855    TEST_AND_RETURN_FALSE(utils::ReadFile(temp_file_path, &compressed_data));
856  }
857  TEST_AND_RETURN_FALSE(unlink(temp_file_path.c_str()) == 0);
858
859  // Add node to graph to write these blocks
860  out_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
861  out_op->set_data_offset(*blobs_length);
862  out_op->set_data_length(compressed_data.size());
863  LOG(INFO) << "Rootfs non-data blocks compressed take up "
864            << compressed_data.size();
865  *blobs_length += compressed_data.size();
866  out_op->set_dst_length(kBlockSize * changed_block_count);
867  DeltaDiffGenerator::StoreExtents(changed_extents,
868                                   out_op->mutable_dst_extents());
869
870  TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd,
871                                        compressed_data.data(),
872                                        compressed_data.size()));
873  LOG(INFO) << "Done processing unwritten blocks";
874  return true;
875}
876
877bool DeltaDiffGenerator::InitializePartitionInfo(bool is_kernel,
878                                                 const string& partition,
879                                                 PartitionInfo* info) {
880  int64_t size = 0;
881  if (is_kernel) {
882    size = utils::FileSize(partition);
883  } else {
884    int block_count = 0, block_size = 0;
885    TEST_AND_RETURN_FALSE(utils::GetFilesystemSize(partition,
886                                                   &block_count,
887                                                   &block_size));
888    size = static_cast<int64_t>(block_count) * block_size;
889  }
890  TEST_AND_RETURN_FALSE(size > 0);
891  info->set_size(size);
892  OmahaHashCalculator hasher;
893  TEST_AND_RETURN_FALSE(hasher.UpdateFile(partition, size) == size);
894  TEST_AND_RETURN_FALSE(hasher.Finalize());
895  const chromeos::Blob& hash = hasher.raw_hash();
896  info->set_hash(hash.data(), hash.size());
897  LOG(INFO) << partition << ": size=" << size << " hash=" << hasher.hash();
898  return true;
899}
900
901bool InitializePartitionInfos(const PayloadGenerationConfig& config,
902                              DeltaArchiveManifest* manifest) {
903  if (!config.source.kernel_part.empty()) {
904    TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
905        true,
906        config.source.kernel_part,
907        manifest->mutable_old_kernel_info()));
908  }
909  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
910      true,
911      config.target.kernel_part,
912      manifest->mutable_new_kernel_info()));
913  if (!config.source.rootfs_part.empty()) {
914    TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
915        false,
916        config.source.rootfs_part,
917        manifest->mutable_old_rootfs_info()));
918  }
919  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
920      false,
921      config.target.rootfs_part,
922      manifest->mutable_new_rootfs_info()));
923  return true;
924}
925
926// Stores all Extents in 'extents' into 'out'.
927void DeltaDiffGenerator::StoreExtents(
928    const vector<Extent>& extents,
929    google::protobuf::RepeatedPtrField<Extent>* out) {
930  for (const Extent& extent : extents) {
931    Extent* new_extent = out->Add();
932    *new_extent = extent;
933  }
934}
935
936// Stores all extents in |extents| into |out_vector|.
937void DeltaDiffGenerator::ExtentsToVector(
938    const google::protobuf::RepeatedPtrField<Extent>& extents,
939    vector<Extent>* out_vector) {
940  out_vector->clear();
941  for (int i = 0; i < extents.size(); i++) {
942    out_vector->push_back(extents.Get(i));
943  }
944}
945
946// Returns true if |op| is a no-op operation that doesn't do any useful work
947// (e.g., a move operation that copies blocks onto themselves).
948bool DeltaDiffGenerator::IsNoopOperation(
949    const DeltaArchiveManifest_InstallOperation& op) {
950  return (op.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE &&
951          ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
952}
953
954void DeltaDiffGenerator::FilterNoopOperations(vector<AnnotatedOperation>* ops) {
955  ops->erase(
956      std::remove_if(
957          ops->begin(), ops->end(),
958          [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
959      ops->end());
960}
961
962bool DeltaDiffGenerator::ReorderDataBlobs(
963    DeltaArchiveManifest* manifest,
964    const string& data_blobs_path,
965    const string& new_data_blobs_path) {
966  int in_fd = open(data_blobs_path.c_str(), O_RDONLY, 0);
967  TEST_AND_RETURN_FALSE_ERRNO(in_fd >= 0);
968  ScopedFdCloser in_fd_closer(&in_fd);
969
970  DirectFileWriter writer;
971  TEST_AND_RETURN_FALSE(
972      writer.Open(new_data_blobs_path.c_str(),
973                  O_WRONLY | O_TRUNC | O_CREAT,
974                  0644) == 0);
975  ScopedFileWriterCloser writer_closer(&writer);
976  uint64_t out_file_size = 0;
977
978  for (int i = 0; i < (manifest->install_operations_size() +
979                       manifest->kernel_install_operations_size()); i++) {
980    DeltaArchiveManifest_InstallOperation* op = nullptr;
981    if (i < manifest->install_operations_size()) {
982      op = manifest->mutable_install_operations(i);
983    } else {
984      op = manifest->mutable_kernel_install_operations(
985          i - manifest->install_operations_size());
986    }
987    if (!op->has_data_offset())
988      continue;
989    CHECK(op->has_data_length());
990    chromeos::Blob buf(op->data_length());
991    ssize_t rc = pread(in_fd, buf.data(), buf.size(), op->data_offset());
992    TEST_AND_RETURN_FALSE(rc == static_cast<ssize_t>(buf.size()));
993
994    // Add the hash of the data blobs for this operation
995    TEST_AND_RETURN_FALSE(AddOperationHash(op, buf));
996
997    op->set_data_offset(out_file_size);
998    TEST_AND_RETURN_FALSE(writer.Write(buf.data(), buf.size()));
999    out_file_size += buf.size();
1000  }
1001  return true;
1002}
1003
1004bool DeltaDiffGenerator::AddOperationHash(
1005    DeltaArchiveManifest_InstallOperation* op,
1006    const chromeos::Blob& buf) {
1007  OmahaHashCalculator hasher;
1008  TEST_AND_RETURN_FALSE(hasher.Update(buf.data(), buf.size()));
1009  TEST_AND_RETURN_FALSE(hasher.Finalize());
1010  const chromeos::Blob& hash = hasher.raw_hash();
1011  op->set_data_sha256_hash(hash.data(), hash.size());
1012  return true;
1013}
1014
1015bool DeltaDiffGenerator::GenerateOperations(
1016    const PayloadGenerationConfig& config,
1017    int data_file_fd,
1018    off_t* data_file_size,
1019    vector<AnnotatedOperation>* rootfs_ops,
1020    vector<AnnotatedOperation>* kernel_ops) {
1021  // List of blocks in the target partition, with the operation that needs to
1022  // write it and the operation that needs to read it. This is used here to
1023  // keep track of the blocks that no operation is writing it.
1024  vector<Block> blocks(config.target.rootfs_size / config.block_size);
1025
1026  // TODO(deymo): DeltaReadFiles() should not use a graph to generate the
1027  // operations, either in the in-place or source uprate. Split out the
1028  // graph dependency generation.
1029  Graph graph;
1030  TEST_AND_RETURN_FALSE(DeltaReadFiles(&graph,
1031                                       &blocks,
1032                                       config.source.rootfs_part,
1033                                       config.target.rootfs_part,
1034                                       config.source.rootfs_mountpt,
1035                                       config.target.rootfs_mountpt,
1036                                       config.chunk_size,
1037                                       data_file_fd,
1038                                       data_file_size,
1039                                       true));  // src_ops_allowed
1040  rootfs_ops->clear();
1041  for (const Vertex& v : graph) {
1042    rootfs_ops->emplace_back();
1043    AnnotatedOperation& aop = rootfs_ops->back();
1044    aop.op = v.op;
1045    aop.SetNameFromFileAndChunk(v.file_name, v.chunk_offset, v.chunk_size);
1046  }
1047
1048  LOG(INFO) << "done reading normal files";
1049
1050  // Read kernel partition
1051  TEST_AND_RETURN_FALSE(
1052      DeltaCompressKernelPartition(config.source.kernel_part,
1053                                   config.target.kernel_part,
1054                                   kernel_ops,
1055                                   data_file_fd,
1056                                   data_file_size,
1057                                   true));  // src_ops_allowed
1058  LOG(INFO) << "done reading kernel";
1059
1060  Vertex unwritten_vertex;
1061  TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks,
1062                                            data_file_fd,
1063                                            data_file_size,
1064                                            config.source.rootfs_part,
1065                                            config.source.rootfs_size,
1066                                            config.target.rootfs_part,
1067                                            &unwritten_vertex,
1068                                            config.minor_version));
1069  if (unwritten_vertex.op.data_length() == 0) {
1070    LOG(INFO) << "No unwritten blocks to write, omitting operation";
1071  } else {
1072    rootfs_ops->emplace_back();
1073    rootfs_ops->back().op = unwritten_vertex.op;
1074    rootfs_ops->back().name = unwritten_vertex.file_name;
1075  }
1076
1077  TEST_AND_RETURN_FALSE(FragmentOperations(rootfs_ops,
1078                                           config.target.rootfs_part,
1079                                           data_file_fd,
1080                                           data_file_size));
1081  TEST_AND_RETURN_FALSE(FragmentOperations(kernel_ops,
1082                                           config.target.kernel_part,
1083                                           data_file_fd,
1084                                           data_file_size));
1085  SortOperationsByDestination(rootfs_ops);
1086  SortOperationsByDestination(kernel_ops);
1087  // TODO(alliewood): Change merge operations to use config.chunk_size once
1088  // specifying chunk_size on the command line works. crbug/485397.
1089  TEST_AND_RETURN_FALSE(MergeOperations(rootfs_ops,
1090                                        kDefaultChunkSize,
1091                                        config.target.rootfs_part,
1092                                        data_file_fd,
1093                                        data_file_size));
1094  TEST_AND_RETURN_FALSE(MergeOperations(kernel_ops,
1095                                        kDefaultChunkSize,
1096                                        config.target.kernel_part,
1097                                        data_file_fd,
1098                                        data_file_size));
1099  return true;
1100}
1101
1102bool GenerateUpdatePayloadFile(
1103    const PayloadGenerationConfig& config,
1104    const string& output_path,
1105    const string& private_key_path,
1106    uint64_t* metadata_size) {
1107  if (config.is_delta) {
1108    LOG_IF(WARNING, config.source.rootfs_size != config.target.rootfs_size)
1109        << "Old and new images have different block counts.";
1110    // TODO(deymo): Our tools only support growing the filesystem size during
1111    // an update. Remove this check when that's fixed. crbug.com/192136
1112    LOG_IF(FATAL, config.source.rootfs_size > config.target.rootfs_size)
1113        << "Shirking the rootfs size is not supported at the moment.";
1114  }
1115
1116  // Sanity checks for the partition size.
1117  LOG(INFO) << "Rootfs partition size: " << config.rootfs_partition_size;
1118  LOG(INFO) << "Actual filesystem size: " << config.target.rootfs_size;
1119
1120  LOG(INFO) << "Invalid block index: " << Vertex::kInvalidIndex;
1121  LOG(INFO) << "Block count: "
1122            << config.target.rootfs_size / config.block_size;
1123
1124  const string kTempFileTemplate("CrAU_temp_data.XXXXXX");
1125  string temp_file_path;
1126  unique_ptr<ScopedPathUnlinker> temp_file_unlinker;
1127  off_t data_file_size = 0;
1128
1129  LOG(INFO) << "Reading files...";
1130
1131  // Create empty protobuf Manifest object
1132  DeltaArchiveManifest manifest;
1133  manifest.set_minor_version(config.minor_version);
1134
1135  vector<AnnotatedOperation> rootfs_ops;
1136  vector<AnnotatedOperation> kernel_ops;
1137
1138  // Select payload generation strategy based on the config.
1139  unique_ptr<OperationsGenerator> strategy;
1140  if (config.is_delta) {
1141    // We don't efficiently support deltas on squashfs. For now, we will
1142    // produce full operations in that case.
1143    if (utils::IsSquashfsFilesystem(config.target.rootfs_part)) {
1144      LOG(INFO) << "Using generator FullUpdateGenerator::Run for squashfs "
1145                   "deltas";
1146      strategy.reset(new FullUpdateGenerator());
1147    } else if (utils::IsExtFilesystem(config.target.rootfs_part)) {
1148      // Delta update (with possibly a full kernel update).
1149      if (config.minor_version == kInPlaceMinorPayloadVersion) {
1150        LOG(INFO) << "Using generator InplaceGenerator::GenerateInplaceDelta";
1151        strategy.reset(new InplaceGenerator());
1152      } else if (config.minor_version == kSourceMinorPayloadVersion) {
1153        LOG(INFO) << "Using generator DeltaDiffGenerator::GenerateSourceDelta";
1154        strategy.reset(new DeltaDiffGenerator());
1155      } else {
1156        LOG(ERROR) << "Unsupported minor version given for delta payload: "
1157                   << config.minor_version;
1158        return false;
1159      }
1160    } else {
1161      LOG(ERROR) << "Unsupported filesystem for delta payload in "
1162                 << config.target.rootfs_part;
1163      return false;
1164    }
1165  } else {
1166    // Full update.
1167    LOG(INFO) << "Using generator FullUpdateGenerator::Run";
1168    strategy.reset(new FullUpdateGenerator());
1169  }
1170
1171  {
1172    int data_file_fd;
1173    TEST_AND_RETURN_FALSE(
1174        utils::MakeTempFile(kTempFileTemplate, &temp_file_path, &data_file_fd));
1175    temp_file_unlinker.reset(new ScopedPathUnlinker(temp_file_path));
1176    TEST_AND_RETURN_FALSE(data_file_fd >= 0);
1177    ScopedFdCloser data_file_fd_closer(&data_file_fd);
1178
1179    // Generate the operations using the strategy we selected above.
1180    TEST_AND_RETURN_FALSE(strategy->GenerateOperations(config,
1181                                                       data_file_fd,
1182                                                       &data_file_size,
1183                                                       &rootfs_ops,
1184                                                       &kernel_ops));
1185  }
1186
1187  if (!config.source.ImageInfoIsEmpty())
1188    *(manifest.mutable_old_image_info()) = config.source.image_info;
1189
1190  if (!config.target.ImageInfoIsEmpty())
1191    *(manifest.mutable_new_image_info()) = config.target.image_info;
1192
1193  // Filter the no-operations. OperationsGenerators should not output this kind
1194  // of operations normally, but this is an extra step to fix that if
1195  // happened.
1196  DeltaDiffGenerator::FilterNoopOperations(&rootfs_ops);
1197  DeltaDiffGenerator::FilterNoopOperations(&kernel_ops);
1198
1199  OperationNameMap op_name_map;
1200  InstallOperationsToManifest(rootfs_ops, kernel_ops, &manifest, &op_name_map);
1201  manifest.set_block_size(config.block_size);
1202
1203  // Reorder the data blobs with the newly ordered manifest.
1204  string ordered_blobs_path;
1205  TEST_AND_RETURN_FALSE(utils::MakeTempFile(
1206      "CrAU_temp_data.ordered.XXXXXX",
1207      &ordered_blobs_path,
1208      nullptr));
1209  ScopedPathUnlinker ordered_blobs_unlinker(ordered_blobs_path);
1210  TEST_AND_RETURN_FALSE(
1211      DeltaDiffGenerator::ReorderDataBlobs(&manifest,
1212                                           temp_file_path,
1213                                           ordered_blobs_path));
1214  temp_file_unlinker.reset();
1215
1216  // Check that install op blobs are in order.
1217  uint64_t next_blob_offset = 0;
1218  {
1219    for (int i = 0; i < (manifest.install_operations_size() +
1220                         manifest.kernel_install_operations_size()); i++) {
1221      DeltaArchiveManifest_InstallOperation* op =
1222          i < manifest.install_operations_size() ?
1223          manifest.mutable_install_operations(i) :
1224          manifest.mutable_kernel_install_operations(
1225              i - manifest.install_operations_size());
1226      if (op->has_data_offset()) {
1227        if (op->data_offset() != next_blob_offset) {
1228          LOG(FATAL) << "bad blob offset! " << op->data_offset() << " != "
1229                     << next_blob_offset;
1230        }
1231        next_blob_offset += op->data_length();
1232      }
1233    }
1234  }
1235
1236  // Signatures appear at the end of the blobs. Note the offset in the
1237  // manifest
1238  if (!private_key_path.empty()) {
1239    uint64_t signature_blob_length = 0;
1240    TEST_AND_RETURN_FALSE(
1241        PayloadSigner::SignatureBlobLength(vector<string>(1, private_key_path),
1242                                           &signature_blob_length));
1243    DeltaDiffGenerator::AddSignatureOp(
1244        next_blob_offset, signature_blob_length, &manifest);
1245  }
1246
1247  TEST_AND_RETURN_FALSE(InitializePartitionInfos(config, &manifest));
1248
1249  // Serialize protobuf
1250  string serialized_manifest;
1251
1252  TEST_AND_RETURN_FALSE(manifest.AppendToString(&serialized_manifest));
1253
1254  LOG(INFO) << "Writing final delta file header...";
1255  DirectFileWriter writer;
1256  TEST_AND_RETURN_FALSE_ERRNO(writer.Open(output_path.c_str(),
1257                                          O_WRONLY | O_CREAT | O_TRUNC,
1258                                          0644) == 0);
1259  ScopedFileWriterCloser writer_closer(&writer);
1260
1261  // Write header
1262  TEST_AND_RETURN_FALSE(writer.Write(kDeltaMagic, strlen(kDeltaMagic)));
1263
1264  // Write major version number
1265  TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer, kMajorVersionNumber));
1266
1267  // Write protobuf length
1268  TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer,
1269                                               serialized_manifest.size()));
1270
1271  // Write protobuf
1272  LOG(INFO) << "Writing final delta file protobuf... "
1273            << serialized_manifest.size();
1274  TEST_AND_RETURN_FALSE(writer.Write(serialized_manifest.data(),
1275                                     serialized_manifest.size()));
1276
1277  // Append the data blobs
1278  LOG(INFO) << "Writing final delta file data blobs...";
1279  int blobs_fd = open(ordered_blobs_path.c_str(), O_RDONLY, 0);
1280  ScopedFdCloser blobs_fd_closer(&blobs_fd);
1281  TEST_AND_RETURN_FALSE(blobs_fd >= 0);
1282  for (;;) {
1283    vector<char> buf(config.block_size);
1284    ssize_t rc = read(blobs_fd, buf.data(), buf.size());
1285    if (0 == rc) {
1286      // EOF
1287      break;
1288    }
1289    TEST_AND_RETURN_FALSE_ERRNO(rc > 0);
1290    TEST_AND_RETURN_FALSE(writer.Write(buf.data(), rc));
1291  }
1292
1293  // Write signature blob.
1294  if (!private_key_path.empty()) {
1295    LOG(INFO) << "Signing the update...";
1296    chromeos::Blob signature_blob;
1297    TEST_AND_RETURN_FALSE(PayloadSigner::SignPayload(
1298        output_path,
1299        vector<string>(1, private_key_path),
1300        &signature_blob));
1301    TEST_AND_RETURN_FALSE(writer.Write(signature_blob.data(),
1302                                       signature_blob.size()));
1303  }
1304
1305  *metadata_size =
1306      strlen(kDeltaMagic) + 2 * sizeof(uint64_t) + serialized_manifest.size();
1307  ReportPayloadUsage(manifest, *metadata_size, op_name_map);
1308
1309  LOG(INFO) << "All done. Successfully created delta file with "
1310            << "metadata size = " << *metadata_size;
1311  return true;
1312}
1313
1314// Runs the bsdiff tool on two files and returns the resulting delta in
1315// 'out'. Returns true on success.
1316bool DeltaDiffGenerator::BsdiffFiles(const string& old_file,
1317                                     const string& new_file,
1318                                     chromeos::Blob* out) {
1319  const string kPatchFile = "delta.patchXXXXXX";
1320  string patch_file_path;
1321
1322  TEST_AND_RETURN_FALSE(
1323      utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
1324
1325  vector<string> cmd;
1326  cmd.push_back(kBsdiffPath);
1327  cmd.push_back(old_file);
1328  cmd.push_back(new_file);
1329  cmd.push_back(patch_file_path);
1330
1331  int rc = 1;
1332  chromeos::Blob patch_file;
1333  TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, nullptr));
1334  TEST_AND_RETURN_FALSE(rc == 0);
1335  TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
1336  unlink(patch_file_path.c_str());
1337  return true;
1338}
1339
1340void DeltaDiffGenerator::AddSignatureOp(uint64_t signature_blob_offset,
1341                                        uint64_t signature_blob_length,
1342                                        DeltaArchiveManifest* manifest) {
1343  LOG(INFO) << "Making room for signature in file";
1344  manifest->set_signatures_offset(signature_blob_offset);
1345  LOG(INFO) << "set? " << manifest->has_signatures_offset();
1346  // Add a dummy op at the end to appease older clients
1347  DeltaArchiveManifest_InstallOperation* dummy_op =
1348      manifest->add_kernel_install_operations();
1349  dummy_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
1350  dummy_op->set_data_offset(signature_blob_offset);
1351  manifest->set_signatures_offset(signature_blob_offset);
1352  dummy_op->set_data_length(signature_blob_length);
1353  manifest->set_signatures_size(signature_blob_length);
1354  Extent* dummy_extent = dummy_op->add_dst_extents();
1355  // Tell the dummy op to write this data to a big sparse hole
1356  dummy_extent->set_start_block(kSparseHole);
1357  dummy_extent->set_num_blocks((signature_blob_length + kBlockSize - 1) /
1358                               kBlockSize);
1359}
1360
1361void DeltaDiffGenerator::ClearSparseHoles(vector<Extent>* extents) {
1362  extents->erase(std::remove_if(extents->begin(), extents->end(), IsSparseHole),
1363                 extents->end());
1364}
1365
1366void DeltaDiffGenerator::NormalizeExtents(vector<Extent>* extents) {
1367  vector<Extent> new_extents;
1368  for (const Extent& curr_ext : *extents) {
1369    if (new_extents.empty()) {
1370      new_extents.push_back(curr_ext);
1371      continue;
1372    }
1373    Extent& last_ext = new_extents.back();
1374    if (last_ext.start_block() + last_ext.num_blocks() ==
1375        curr_ext.start_block()) {
1376      // If the extents are touching, we want to combine them.
1377      last_ext.set_num_blocks(last_ext.num_blocks() + curr_ext.num_blocks());
1378    } else {
1379      // Otherwise just include the extent as is.
1380      new_extents.push_back(curr_ext);
1381    }
1382  }
1383  *extents = new_extents;
1384}
1385
1386bool DeltaDiffGenerator::FragmentOperations(
1387    vector<AnnotatedOperation>* aops,
1388    const string& target_part_path,
1389    int data_fd,
1390    off_t* data_file_size) {
1391  vector<AnnotatedOperation> fragmented_aops;
1392  for (const AnnotatedOperation& aop : *aops) {
1393    if (aop.op.type() ==
1394        DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY) {
1395      TEST_AND_RETURN_FALSE(SplitSourceCopy(aop, &fragmented_aops));
1396    } else if (aop.op.type() ==
1397               DeltaArchiveManifest_InstallOperation_Type_REPLACE) {
1398      TEST_AND_RETURN_FALSE(SplitReplace(aop, &fragmented_aops));
1399    } else if (aop.op.type() ==
1400               DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
1401      TEST_AND_RETURN_FALSE(SplitReplaceBz(aop,
1402                                           &fragmented_aops,
1403                                           target_part_path,
1404                                           data_fd,
1405                                           data_file_size));
1406    } else {
1407      fragmented_aops.push_back(aop);
1408    }
1409  }
1410  *aops = fragmented_aops;
1411  return true;
1412}
1413
1414void DeltaDiffGenerator::SortOperationsByDestination(
1415    vector<AnnotatedOperation>* aops) {
1416  sort(aops->begin(), aops->end(), CompareAopsByDestination);
1417}
1418
1419bool DeltaDiffGenerator::SplitSourceCopy(
1420    const AnnotatedOperation& original_aop,
1421    vector<AnnotatedOperation>* result_aops) {
1422  DeltaArchiveManifest_InstallOperation original_op = original_aop.op;
1423  TEST_AND_RETURN_FALSE(original_op.type() ==
1424                        DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY);
1425  // Keeps track of the index of curr_src_ext.
1426  int curr_src_ext_index = 0;
1427  Extent curr_src_ext = original_op.src_extents(curr_src_ext_index);
1428  for (int i = 0; i < original_op.dst_extents_size(); i++) {
1429    Extent dst_ext = original_op.dst_extents(i);
1430    // The new operation which will have only one dst extent.
1431    DeltaArchiveManifest_InstallOperation new_op;
1432    uint64_t blocks_left = dst_ext.num_blocks();
1433    while (blocks_left > 0) {
1434      if (curr_src_ext.num_blocks() <= blocks_left) {
1435        // If the curr_src_ext is smaller than dst_ext, add it.
1436        blocks_left -= curr_src_ext.num_blocks();
1437        *(new_op.add_src_extents()) = curr_src_ext;
1438        if (curr_src_ext_index + 1 < original_op.src_extents().size()) {
1439          curr_src_ext = original_op.src_extents(++curr_src_ext_index);
1440        } else {
1441          break;
1442        }
1443      } else {
1444        // Split src_exts that are bigger than the dst_ext we're dealing with.
1445        Extent first_ext;
1446        first_ext.set_num_blocks(blocks_left);
1447        first_ext.set_start_block(curr_src_ext.start_block());
1448        *(new_op.add_src_extents()) = first_ext;
1449        // Keep the second half of the split op.
1450        curr_src_ext.set_num_blocks(curr_src_ext.num_blocks() - blocks_left);
1451        curr_src_ext.set_start_block(curr_src_ext.start_block() + blocks_left);
1452        blocks_left -= first_ext.num_blocks();
1453      }
1454    }
1455    // Fix up our new operation and add it to the results.
1456    new_op.set_type(DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY);
1457    *(new_op.add_dst_extents()) = dst_ext;
1458    new_op.set_src_length(dst_ext.num_blocks() * kBlockSize);
1459    new_op.set_dst_length(dst_ext.num_blocks() * kBlockSize);
1460
1461    AnnotatedOperation new_aop;
1462    new_aop.op = new_op;
1463    new_aop.name = base::StringPrintf("%s:%d", original_aop.name.c_str(), i);
1464    result_aops->push_back(new_aop);
1465  }
1466  if (curr_src_ext_index != original_op.src_extents().size() - 1) {
1467    LOG(FATAL) << "Incorrectly split SOURCE_COPY operation. Did not use all "
1468               << "source extents.";
1469  }
1470  return true;
1471}
1472
1473bool DeltaDiffGenerator::SplitReplace(const AnnotatedOperation& original_aop,
1474                                      vector<AnnotatedOperation>* result_aops) {
1475  DeltaArchiveManifest_InstallOperation original_op = original_aop.op;
1476  TEST_AND_RETURN_FALSE(original_op.type() ==
1477                        DeltaArchiveManifest_InstallOperation_Type_REPLACE);
1478  uint32_t data_offset = original_op.data_offset();
1479
1480  for (int i = 0; i < original_op.dst_extents_size(); i++) {
1481    Extent dst_ext = original_op.dst_extents(i);
1482    // Make a new operation with only one dst extent.
1483    DeltaArchiveManifest_InstallOperation new_op;
1484    *(new_op.add_dst_extents()) = dst_ext;
1485    new_op.set_type(original_op.type());
1486    uint32_t data_size = dst_ext.num_blocks() * kBlockSize;
1487    new_op.set_dst_length(data_size);
1488    new_op.set_data_length(data_size);
1489    new_op.set_data_offset(data_offset);
1490    data_offset += data_size;
1491
1492    AnnotatedOperation new_aop;
1493    new_aop.op = new_op;
1494    new_aop.name = base::StringPrintf("%s:%d", original_aop.name.c_str(), i);
1495    result_aops->push_back(new_aop);
1496  }
1497  if (data_offset != original_op.data_offset() + original_op.data_length()) {
1498    LOG(FATAL) << "Incorrectly split REPLACE operation. New data lengths do "
1499               << "not sum to original data length.";
1500  }
1501  return true;
1502}
1503
1504bool DeltaDiffGenerator::SplitReplaceBz(
1505    const AnnotatedOperation& original_aop,
1506    vector<AnnotatedOperation>* result_aops,
1507    const string& target_part_path,
1508    int data_fd,
1509    off_t* data_file_size) {
1510  DeltaArchiveManifest_InstallOperation original_op = original_aop.op;
1511  TEST_AND_RETURN_FALSE(original_op.type() ==
1512                        DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
1513
1514  int target_part_fd = open(target_part_path.c_str(), O_RDONLY, 000);
1515  TEST_AND_RETURN_FALSE_ERRNO(target_part_fd >= 0);
1516  ScopedFdCloser target_part_fd_closer(&target_part_fd);
1517
1518  for (int i = 0; i < original_op.dst_extents_size(); i++) {
1519    Extent dst_ext = original_op.dst_extents(i);
1520    // Make a new operation with only one dst extent.
1521    DeltaArchiveManifest_InstallOperation new_op;
1522    *(new_op.add_dst_extents()) = dst_ext;
1523    new_op.set_type(original_op.type());
1524    uint32_t uncompressed_data_size = dst_ext.num_blocks() * kBlockSize;
1525    new_op.set_dst_length(uncompressed_data_size);
1526
1527    // Get the original uncompressed data for this extent.
1528    ssize_t bytes_read;
1529    chromeos::Blob uncompressed_data(uncompressed_data_size);
1530    TEST_AND_RETURN_FALSE(utils::PReadAll(target_part_fd,
1531                                          uncompressed_data.data(),
1532                                          uncompressed_data_size,
1533                                          kBlockSize * dst_ext.start_block(),
1534                                          &bytes_read));
1535    TEST_AND_RETURN_FALSE(bytes_read ==
1536                          static_cast<ssize_t>(uncompressed_data_size));
1537
1538    chromeos::Blob new_data_bz;
1539    TEST_AND_RETURN_FALSE(BzipCompress(uncompressed_data, &new_data_bz));
1540    CHECK(!new_data_bz.empty());
1541
1542    AnnotatedOperation new_aop;
1543    new_aop.op = new_op;
1544    new_aop.SetOperationBlob(&new_data_bz, data_fd, data_file_size);
1545    new_aop.name = base::StringPrintf("%s:%d", original_aop.name.c_str(), i);
1546    result_aops->push_back(new_aop);
1547  }
1548  return true;
1549}
1550
1551bool DeltaDiffGenerator::MergeOperations(vector<AnnotatedOperation>* aops,
1552                                         off_t chunk_size,
1553                                         const string& target_part_path,
1554                                         int data_fd,
1555                                         off_t* data_file_size) {
1556  vector<AnnotatedOperation> new_aops;
1557  for (const AnnotatedOperation& curr_aop : *aops) {
1558    if (new_aops.empty()) {
1559      new_aops.push_back(curr_aop);
1560      continue;
1561    }
1562    AnnotatedOperation& last_aop = new_aops.back();
1563
1564    if (last_aop.op.dst_extents_size() <= 0 ||
1565        curr_aop.op.dst_extents_size() <= 0) {
1566      new_aops.push_back(curr_aop);
1567      continue;
1568    }
1569    uint32_t last_dst_idx = last_aop.op.dst_extents_size() - 1;
1570    uint32_t last_end_block =
1571        last_aop.op.dst_extents(last_dst_idx).start_block() +
1572        last_aop.op.dst_extents(last_dst_idx).num_blocks();
1573    uint32_t curr_start_block = curr_aop.op.dst_extents(0).start_block();
1574    uint32_t combined_block_count =
1575        last_aop.op.dst_extents(last_dst_idx).num_blocks() +
1576        curr_aop.op.dst_extents(0).num_blocks();
1577    bool good_op_type =
1578        curr_aop.op.type() ==
1579            DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY ||
1580        curr_aop.op.type() ==
1581            DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
1582        curr_aop.op.type() ==
1583            DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ;
1584    if (good_op_type &&
1585        last_aop.op.type() == curr_aop.op.type() &&
1586        last_end_block == curr_start_block &&
1587        static_cast<off_t>(combined_block_count * kBlockSize) <= chunk_size) {
1588      // If the operations have the same type (which is a type that we can
1589      // merge), are contiguous, are fragmented to have one destination extent,
1590      // and their combined block count would be less than chunk size, merge
1591      // them.
1592      last_aop.name = base::StringPrintf("%s,%s",
1593                                         last_aop.name.c_str(),
1594                                         curr_aop.name.c_str());
1595
1596      ExtendExtents(last_aop.op.mutable_src_extents(),
1597                    curr_aop.op.src_extents());
1598      last_aop.op.set_src_length(last_aop.op.src_length() +
1599                                 curr_aop.op.src_length());
1600      ExtendExtents(last_aop.op.mutable_dst_extents(),
1601                    curr_aop.op.dst_extents());
1602      last_aop.op.set_dst_length(last_aop.op.dst_length() +
1603                                 curr_aop.op.dst_length());
1604      // Set the data length to zero so we know to add the blob later.
1605      if (curr_aop.op.type() ==
1606          DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
1607          curr_aop.op.type() ==
1608          DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
1609        last_aop.op.set_data_length(0);
1610      }
1611    } else {
1612      // Otherwise just include the extent as is.
1613      new_aops.push_back(curr_aop);
1614    }
1615  }
1616
1617  // Set the blobs for REPLACE/REPLACE_BZ operations that have been merged.
1618  for (AnnotatedOperation& curr_aop : new_aops) {
1619    if (curr_aop.op.data_length() == 0 &&
1620        (curr_aop.op.type() ==
1621            DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
1622         curr_aop.op.type() ==
1623            DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ)) {
1624      chromeos::Blob data(curr_aop.op.dst_length());
1625      vector<Extent> dst_extents;
1626      ExtentsToVector(curr_aop.op.dst_extents(), &dst_extents);
1627      TEST_AND_RETURN_FALSE(utils::ReadExtents(target_part_path,
1628                                               dst_extents,
1629                                               &data,
1630                                               data.size(),
1631                                               kBlockSize));
1632      if (curr_aop.op.type() ==
1633          DeltaArchiveManifest_InstallOperation_Type_REPLACE) {
1634        curr_aop.SetOperationBlob(&data, data_fd, data_file_size);
1635      } else if (curr_aop.op.type() ==
1636          DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
1637        chromeos::Blob data_bz;
1638        TEST_AND_RETURN_FALSE(BzipCompress(data, &data_bz));
1639        curr_aop.SetOperationBlob(&data_bz, data_fd, data_file_size);
1640      }
1641    }
1642  }
1643
1644  *aops = new_aops;
1645  return true;
1646}
1647
1648void DeltaDiffGenerator::ExtendExtents(
1649    google::protobuf::RepeatedPtrField<Extent>* extents,
1650    const google::protobuf::RepeatedPtrField<Extent>& extents_to_add) {
1651  vector<Extent> extents_vector;
1652  vector<Extent> extents_to_add_vector;
1653  ExtentsToVector(*extents, &extents_vector);
1654  ExtentsToVector(extents_to_add, &extents_to_add_vector);
1655  extents_vector.insert(extents_vector.end(),
1656                        extents_to_add_vector.begin(),
1657                        extents_to_add_vector.end());
1658  NormalizeExtents(&extents_vector);
1659  extents->Clear();
1660  StoreExtents(extents_vector, extents);
1661}
1662
1663};  // namespace chromeos_update_engine
1664