delta_diff_generator.cc revision 46bd4e20c20d6cb1a2f18d39237b9345d576669e
1// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "update_engine/payload_generator/delta_diff_generator.h"
6
7#include <errno.h>
8#include <fcntl.h>
9#include <inttypes.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12
13#include <algorithm>
14#include <map>
15#include <memory>
16#include <string>
17#include <utility>
18#include <vector>
19
20#include <base/files/file_path.h>
21#include <base/files/file_util.h>
22#include <base/logging.h>
23#include <base/strings/stringprintf.h>
24#include <base/strings/string_util.h>
25#include <bzlib.h>
26#include <chromeos/secure_blob.h>
27
28#include "update_engine/bzip.h"
29#include "update_engine/delta_performer.h"
30#include "update_engine/file_writer.h"
31#include "update_engine/omaha_hash_calculator.h"
32#include "update_engine/payload_constants.h"
33#include "update_engine/payload_generator/extent_mapper.h"
34#include "update_engine/payload_generator/filesystem_iterator.h"
35#include "update_engine/payload_generator/full_update_generator.h"
36#include "update_engine/payload_generator/graph_types.h"
37#include "update_engine/payload_generator/graph_utils.h"
38#include "update_engine/payload_generator/inplace_generator.h"
39#include "update_engine/payload_generator/metadata.h"
40#include "update_engine/payload_generator/payload_signer.h"
41#include "update_engine/payload_verifier.h"
42#include "update_engine/subprocess.h"
43#include "update_engine/update_metadata.pb.h"
44#include "update_engine/utils.h"
45
46using std::map;
47using std::max;
48using std::min;
49using std::set;
50using std::sort;
51using std::string;
52using std::unique_ptr;
53using std::vector;
54
55namespace {
56
57const uint64_t kMajorVersionNumber = 1;
58
59// The maximum destination size allowed for bsdiff. In general, bsdiff should
60// work for arbitrary big files, but the payload generation and payload
61// application requires a significant amount of RAM. We put a hard-limit of
62// 200 MiB that should not affect any released board, but will limit the
63// Chrome binary in ASan builders.
64const off_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024;  // bytes
65
66static const char* kInstallOperationTypes[] = {
67  "REPLACE",
68  "REPLACE_BZ",
69  "MOVE",
70  "BSDIFF",
71  "SOURCE_COPY",
72  "SOURCE_BSDIFF"
73};
74
75}  // namespace
76
77namespace chromeos_update_engine {
78
79typedef DeltaDiffGenerator::Block Block;
80typedef map<const DeltaArchiveManifest_InstallOperation*,
81            string> OperationNameMap;
82
83// bytes
84const size_t kRootFSPartitionSize = static_cast<size_t>(2) * 1024 * 1024 * 1024;
85const size_t kBlockSize = 4096;  // bytes
86const char* const kEmptyPath = "";
87const char* const kBsdiffPath = "bsdiff";
88
89// Needed for testing purposes, in case we can't use actual filesystem objects.
90// TODO(garnold) (chromium:331965) Replace this hack with a properly injected
91// parameter in form of a mockable abstract class.
92bool (*get_extents_with_chunk_func)(const string&, off_t, off_t,
93                                    vector<Extent>*) =
94    extent_mapper::ExtentsForFileChunkFibmap;
95
96namespace {
97
98bool IsSparseHole(const Extent &extent) {
99  return (extent.start_block() == kSparseHole);
100}
101
102// Stores all the extents of |path| into |extents|. Returns true on success.
103bool GatherExtents(const string& path,
104                   off_t chunk_offset,
105                   off_t chunk_size,
106                   vector<Extent>* extents) {
107  extents->clear();
108  TEST_AND_RETURN_FALSE(
109      get_extents_with_chunk_func(path, chunk_offset, chunk_size, extents));
110  return true;
111}
112
113// Writes the uint64_t passed in in host-endian to the file as big-endian.
114// Returns true on success.
115bool WriteUint64AsBigEndian(FileWriter* writer, const uint64_t value) {
116  uint64_t value_be = htobe64(value);
117  TEST_AND_RETURN_FALSE(writer->Write(&value_be, sizeof(value_be)));
118  return true;
119}
120
121// Adds each operation from |rootfs_ops| and |kernel_ops| to |out_manifest| in
122// the order they come in those vectors. reports the operations names
123void InstallOperationsToManifest(
124    const vector<AnnotatedOperation>& rootfs_ops,
125    const vector<AnnotatedOperation>& kernel_ops,
126    DeltaArchiveManifest* out_manifest,
127    OperationNameMap* out_op_name_map) {
128  for (const AnnotatedOperation& aop : rootfs_ops) {
129    if (DeltaDiffGenerator::IsNoopOperation(aop.op))
130      continue;
131    DeltaArchiveManifest_InstallOperation* new_op =
132        out_manifest->add_install_operations();
133    (*out_op_name_map)[new_op] = aop.name;
134    *new_op = aop.op;
135  }
136  for (const AnnotatedOperation& aop : kernel_ops) {
137    if (DeltaDiffGenerator::IsNoopOperation(aop.op))
138      continue;
139    DeltaArchiveManifest_InstallOperation* new_op =
140        out_manifest->add_kernel_install_operations();
141    (*out_op_name_map)[new_op] = aop.name;
142    *new_op = aop.op;
143  }
144}
145
146struct DeltaObject {
147  DeltaObject(const string& in_name, const int in_type, const off_t in_size)
148      : name(in_name),
149        type(in_type),
150        size(in_size) {}
151  bool operator <(const DeltaObject& object) const {
152    return (size != object.size) ? (size < object.size) : (name < object.name);
153  }
154  string name;
155  int type;
156  off_t size;
157};
158
159void ReportPayloadUsage(const DeltaArchiveManifest& manifest,
160                        const int64_t manifest_metadata_size,
161                        const OperationNameMap& op_name_map) {
162  vector<DeltaObject> objects;
163  off_t total_size = 0;
164
165  // Rootfs install operations.
166  for (int i = 0; i < manifest.install_operations_size(); ++i) {
167    const DeltaArchiveManifest_InstallOperation& op =
168        manifest.install_operations(i);
169    objects.push_back(DeltaObject(op_name_map.find(&op)->second,
170                                  op.type(),
171                                  op.data_length()));
172    total_size += op.data_length();
173  }
174
175  // Kernel install operations.
176  for (int i = 0; i < manifest.kernel_install_operations_size(); ++i) {
177    const DeltaArchiveManifest_InstallOperation& op =
178        manifest.kernel_install_operations(i);
179    objects.push_back(DeltaObject(base::StringPrintf("<kernel-operation-%d>",
180                                                     i),
181                                  op.type(),
182                                  op.data_length()));
183    total_size += op.data_length();
184  }
185
186  objects.push_back(DeltaObject("<manifest-metadata>",
187                                -1,
188                                manifest_metadata_size));
189  total_size += manifest_metadata_size;
190
191  std::sort(objects.begin(), objects.end());
192
193  static const char kFormatString[] = "%6.2f%% %10jd %-10s %s\n";
194  for (const DeltaObject& object : objects) {
195    fprintf(stderr, kFormatString,
196            object.size * 100.0 / total_size,
197            static_cast<intmax_t>(object.size),
198            object.type >= 0 ? kInstallOperationTypes[object.type] : "-",
199            object.name.c_str());
200  }
201  fprintf(stderr, kFormatString,
202          100.0, static_cast<intmax_t>(total_size), "", "<total>");
203}
204
205// Process a range of blocks from |range_start| to |range_end| in the extent at
206// position |*idx_p| of |extents|. If |do_remove| is true, this range will be
207// removed, which may cause the extent to be trimmed, split or removed entirely.
208// The value of |*idx_p| is updated to point to the next extent to be processed.
209// Returns true iff the next extent to process is a new or updated one.
210bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
211                             const bool do_remove, uint64_t range_start,
212                             uint64_t range_end) {
213  size_t idx = *idx_p;
214  uint64_t start_block = (*extents)[idx].start_block();
215  uint64_t num_blocks = (*extents)[idx].num_blocks();
216  uint64_t range_size = range_end - range_start;
217
218  if (do_remove) {
219    if (range_size == num_blocks) {
220      // Remove the entire extent.
221      extents->erase(extents->begin() + idx);
222    } else if (range_end == num_blocks) {
223      // Trim the end of the extent.
224      (*extents)[idx].set_num_blocks(num_blocks - range_size);
225      idx++;
226    } else if (range_start == 0) {
227      // Trim the head of the extent.
228      (*extents)[idx].set_start_block(start_block + range_size);
229      (*extents)[idx].set_num_blocks(num_blocks - range_size);
230    } else {
231      // Trim the middle, splitting the remainder into two parts.
232      (*extents)[idx].set_num_blocks(range_start);
233      Extent e;
234      e.set_start_block(start_block + range_end);
235      e.set_num_blocks(num_blocks - range_end);
236      idx++;
237      extents->insert(extents->begin() + idx, e);
238    }
239  } else if (range_end == num_blocks) {
240    // Done with this extent.
241    idx++;
242  } else {
243    return false;
244  }
245
246  *idx_p = idx;
247  return true;
248}
249
250// Remove identical corresponding block ranges in |src_extents| and
251// |dst_extents|. Used for preventing moving of blocks onto themselves during
252// MOVE operations. The value of |total_bytes| indicates the actual length of
253// content; this may be slightly less than the total size of blocks, in which
254// case the last block is only partly occupied with data. Returns the total
255// number of bytes removed.
256size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
257                                  vector<Extent>* dst_extents,
258                                  const size_t total_bytes) {
259  size_t src_idx = 0;
260  size_t dst_idx = 0;
261  uint64_t src_offset = 0, dst_offset = 0;
262  bool new_src = true, new_dst = true;
263  size_t removed_bytes = 0, nonfull_block_bytes;
264  bool do_remove = false;
265  while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
266    if (new_src) {
267      src_offset = 0;
268      new_src = false;
269    }
270    if (new_dst) {
271      dst_offset = 0;
272      new_dst = false;
273    }
274
275    do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
276                 (*dst_extents)[dst_idx].start_block() + dst_offset);
277
278    uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
279    uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
280    uint64_t min_num_blocks = min(src_num_blocks - src_offset,
281                                  dst_num_blocks - dst_offset);
282    uint64_t prev_src_offset = src_offset;
283    uint64_t prev_dst_offset = dst_offset;
284    src_offset += min_num_blocks;
285    dst_offset += min_num_blocks;
286
287    new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
288                                      prev_src_offset, src_offset);
289    new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
290                                      prev_dst_offset, dst_offset);
291    if (do_remove)
292      removed_bytes += min_num_blocks * kBlockSize;
293  }
294
295  // If we removed the last block and this block is only partly used by file
296  // content, deduct the unused portion from the total removed byte count.
297  if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
298    removed_bytes -= kBlockSize - nonfull_block_bytes;
299
300  return removed_bytes;
301}
302
303// Compare two AnnotatedOperations by the start block of the first Extent in
304// their destination extents.
305bool CompareAopsByDestination(AnnotatedOperation first_aop,
306                              AnnotatedOperation second_aop) {
307  // We want empty operations to be at the end of the payload.
308  if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
309    return ((!first_aop.op.dst_extents().size()) <
310            (!second_aop.op.dst_extents().size()));
311  uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
312  uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
313  return first_dst_start < second_dst_start;
314}
315
316}  // namespace
317
318bool DeltaDiffGenerator::DeltaReadFiles(Graph* graph,
319                                        vector<Block>* blocks,
320                                        const string& old_part,
321                                        const string& new_part,
322                                        const string& old_root,
323                                        const string& new_root,
324                                        off_t chunk_size,
325                                        int data_fd,
326                                        off_t* data_file_size,
327                                        bool src_ops_allowed) {
328  set<ino_t> visited_inodes;
329  set<ino_t> visited_src_inodes;
330  for (FilesystemIterator fs_iter(new_root,
331                                  set<string>{"/lost+found"});
332       !fs_iter.IsEnd(); fs_iter.Increment()) {
333    // We never diff symlinks (here, we check that dst file is not a symlink).
334    if (!S_ISREG(fs_iter.GetStat().st_mode))
335      continue;
336
337    // Make sure we visit each inode only once.
338    if (utils::SetContainsKey(visited_inodes, fs_iter.GetStat().st_ino))
339      continue;
340    visited_inodes.insert(fs_iter.GetStat().st_ino);
341    off_t dst_size = fs_iter.GetFileSize();
342    if (dst_size == 0)
343      continue;
344
345    LOG(INFO) << "Encoding file " << fs_iter.GetPartialPath();
346
347    // We can't visit each dst image inode more than once, as that would
348    // duplicate work. Here, we avoid visiting each source image inode
349    // more than once. Technically, we could have multiple operations
350    // that read the same blocks from the source image for diffing, but
351    // we choose not to avoid complexity. Eventually we will move away
352    // from using a graph/cycle detection/etc to generate diffs, and at that
353    // time, it will be easy (non-complex) to have many operations read
354    // from the same source blocks. At that time, this code can die. -adlr
355    bool should_diff_from_source = false;
356    string src_path = old_root + fs_iter.GetPartialPath();
357    struct stat src_stbuf;
358    // We never diff symlinks (here, we check that src file is not a symlink).
359    if (0 == lstat(src_path.c_str(), &src_stbuf) &&
360        S_ISREG(src_stbuf.st_mode)) {
361      should_diff_from_source = !utils::SetContainsKey(visited_src_inodes,
362                                                       src_stbuf.st_ino);
363      visited_src_inodes.insert(src_stbuf.st_ino);
364    }
365
366    off_t size = chunk_size == -1 ? dst_size : chunk_size;
367    off_t step = size;
368    for (off_t offset = 0; offset < dst_size; offset += step) {
369      if (offset + size >= dst_size) {
370        size = -1;  // Read through the end of the file.
371      }
372      TEST_AND_RETURN_FALSE(DeltaDiffGenerator::DeltaReadFile(
373          graph,
374          Vertex::kInvalidIndex,
375          blocks,
376          old_part,
377          new_part,
378          (should_diff_from_source ? old_root : kEmptyPath),
379          new_root,
380          fs_iter.GetPartialPath(),
381          offset,
382          size,
383          data_fd,
384          data_file_size,
385          src_ops_allowed));
386    }
387  }
388  return true;
389}
390
391bool DeltaDiffGenerator::DeltaReadFile(Graph* graph,
392                                       Vertex::Index existing_vertex,
393                                       vector<Block>* blocks,
394                                       const string& old_part,
395                                       const string& new_part,
396                                       const string& old_root,
397                                       const string& new_root,
398                                       const string& path,  // within new_root
399                                       off_t chunk_offset,
400                                       off_t chunk_size,
401                                       int data_fd,
402                                       off_t* data_file_size,
403                                       bool src_ops_allowed) {
404  chromeos::Blob data;
405  DeltaArchiveManifest_InstallOperation operation;
406
407  // If bsdiff breaks again, blacklist the problem file by using:
408  //   bsdiff_allowed = (path != "/foo/bar")
409  //
410  // TODO(dgarrett): chromium-os:15274 connect this test to the command line.
411  bool bsdiff_allowed = true;
412
413  if (utils::FileSize(new_root + path) > kMaxBsdiffDestinationSize)
414    bsdiff_allowed = false;
415
416  if (!bsdiff_allowed)
417    LOG(INFO) << "bsdiff blacklisting: " << path;
418
419  string old_filename = (old_root == kEmptyPath) ? kEmptyPath : old_root + path;
420
421  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ReadFileToDiff(old_part,
422                                                           new_part,
423                                                           chunk_offset,
424                                                           chunk_size,
425                                                           bsdiff_allowed,
426                                                           &data,
427                                                           &operation,
428                                                           true,
429                                                           src_ops_allowed,
430                                                           old_filename,
431                                                           new_root + path));
432
433  // Check if the operation writes nothing.
434  if (operation.dst_extents_size() == 0) {
435    if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE) {
436      LOG(INFO) << "Empty MOVE operation ("
437                << new_root + path << "), skipping";
438      return true;
439    } else {
440      LOG(ERROR) << "Empty non-MOVE operation";
441      return false;
442    }
443  }
444
445  // Write the data
446  if (operation.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE &&
447      operation.type() !=
448          DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY) {
449    operation.set_data_offset(*data_file_size);
450    operation.set_data_length(data.size());
451  }
452
453  TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, data.data(), data.size()));
454  *data_file_size += data.size();
455
456  // Now, insert into graph and blocks vector
457  Vertex::Index vertex = existing_vertex;
458  if (vertex == Vertex::kInvalidIndex) {
459    graph->emplace_back();
460    vertex = graph->size() - 1;
461  }
462  (*graph)[vertex].op = operation;
463  CHECK((*graph)[vertex].op.has_type());
464  (*graph)[vertex].file_name = path;
465  (*graph)[vertex].chunk_offset = chunk_offset;
466  (*graph)[vertex].chunk_size = chunk_size;
467
468  if (blocks)
469    TEST_AND_RETURN_FALSE(InplaceGenerator::AddInstallOpToBlocksVector(
470        (*graph)[vertex].op,
471        *graph,
472        vertex,
473        blocks));
474  return true;
475}
476
477bool DeltaDiffGenerator::ReadFileToDiff(
478    const string& old_part,
479    const string& new_part,
480    off_t chunk_offset,
481    off_t chunk_size,
482    bool bsdiff_allowed,
483    chromeos::Blob* out_data,
484    DeltaArchiveManifest_InstallOperation* out_op,
485    bool gather_extents,
486    bool src_ops_allowed,
487    const string& old_filename,
488    const string& new_filename) {
489
490  // Do we have an original file to consider?
491  off_t old_size = 0;
492  bool original = !old_filename.empty();
493  if (original && (old_size = utils::FileSize(old_filename)) < 0) {
494    // If stat-ing the old file fails, it should be because it doesn't exist.
495    TEST_AND_RETURN_FALSE(!utils::FileExists(old_filename.c_str()));
496    original = false;
497  }
498
499  DeltaArchiveManifest_InstallOperation operation;
500  vector<Extent> src_extents, dst_extents;
501  // Gather source extents if we have an original file.
502  if (original) {
503    if (gather_extents) {
504      TEST_AND_RETURN_FALSE(
505          GatherExtents(old_filename, chunk_offset, chunk_size, &src_extents));
506      ClearSparseHoles(&src_extents);
507      if (src_extents.size() == 0) {
508        // Reading from sparse hole, do nothing.
509        operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
510        *out_op = operation;
511        return true;
512      }
513    } else {
514      // We have a kernel, so make one extent to cover it all.
515      Extent* src_extent = operation.add_src_extents();
516      src_extent->set_start_block(0);
517      src_extent->set_num_blocks(
518          (utils::FileSize(old_filename) + (kBlockSize - 1)) / kBlockSize);
519      src_extents.push_back(*src_extent);
520    }
521  }
522
523  // Gather destination extents.
524  if (gather_extents) {
525    TEST_AND_RETURN_FALSE(
526        GatherExtents(new_filename, chunk_offset, chunk_size, &dst_extents));
527    ClearSparseHoles(&dst_extents);
528    if (dst_extents.size() == 0) {
529      // Make an empty move operation.
530      operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
531      *out_op = operation;
532      return true;
533    }
534  } else {
535    Extent* dst_extent = operation.add_dst_extents();
536    dst_extent->set_start_block(0);
537    dst_extent->set_num_blocks(
538        (utils::FileSize(new_filename) + (kBlockSize - 1)) / kBlockSize);
539    dst_extents.push_back(*dst_extent);
540  }
541
542  NormalizeExtents(&src_extents);
543  NormalizeExtents(&dst_extents);
544
545  // Figure out how many blocks we need to write to dst_extents.
546  uint64_t blocks_to_write = 0;
547  for (uint32_t i = 0; i < dst_extents.size(); i++)
548    blocks_to_write += dst_extents[i].num_blocks();
549
550  // Figure out how many blocks we need to read to src_extents.
551  uint64_t blocks_to_read = 0;
552  for (uint32_t i = 0; i < src_extents.size(); i++)
553    blocks_to_read += src_extents[i].num_blocks();
554
555  // Read in bytes from new data.
556  chromeos::Blob new_data;
557  TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
558                                           dst_extents,
559                                           &new_data,
560                                           kBlockSize * blocks_to_write,
561                                           kBlockSize));
562
563  TEST_AND_RETURN_FALSE(!new_data.empty());
564  TEST_AND_RETURN_FALSE(chunk_size == -1 ||
565                        static_cast<off_t>(new_data.size()) <= chunk_size);
566
567  chromeos::Blob new_data_bz;
568  TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
569  CHECK(!new_data_bz.empty());
570  chromeos::Blob data;  // Data blob that will be written to delta file.
571
572  size_t current_best_size = 0;
573  if (new_data.size() <= new_data_bz.size()) {
574    operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
575    current_best_size = new_data.size();
576    data = new_data;
577  } else {
578    operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
579    current_best_size = new_data_bz.size();
580    data = new_data_bz;
581  }
582  chromeos::Blob old_data;
583  if (original) {
584    // Read old data.
585    TEST_AND_RETURN_FALSE(
586        utils::ReadExtents(old_part, src_extents, &old_data,
587                           kBlockSize * blocks_to_read, kBlockSize));
588    if (old_data == new_data) {
589      // No change in data.
590      if (src_ops_allowed) {
591        operation.set_type(
592            DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY);
593      } else {
594        operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
595      }
596      current_best_size = 0;
597      data.clear();
598    } else if (!old_data.empty() && bsdiff_allowed) {
599      // If the source file is considered bsdiff safe (no bsdiff bugs
600      // triggered), see if BSDIFF encoding is smaller.
601      base::FilePath old_chunk;
602      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
603      ScopedPathUnlinker old_unlinker(old_chunk.value());
604      TEST_AND_RETURN_FALSE(
605          utils::WriteFile(old_chunk.value().c_str(),
606                           old_data.data(), old_data.size()));
607      base::FilePath new_chunk;
608      TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
609      ScopedPathUnlinker new_unlinker(new_chunk.value());
610      TEST_AND_RETURN_FALSE(
611          utils::WriteFile(new_chunk.value().c_str(),
612                           new_data.data(), new_data.size()));
613
614      chromeos::Blob bsdiff_delta;
615      TEST_AND_RETURN_FALSE(
616          BsdiffFiles(old_chunk.value(), new_chunk.value(), &bsdiff_delta));
617      CHECK_GT(bsdiff_delta.size(), static_cast<chromeos::Blob::size_type>(0));
618      if (bsdiff_delta.size() < current_best_size) {
619        if (src_ops_allowed) {
620          operation.set_type(
621              DeltaArchiveManifest_InstallOperation_Type_SOURCE_BSDIFF);
622        } else {
623          operation.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF);
624        }
625        current_best_size = bsdiff_delta.size();
626        data = bsdiff_delta;
627      }
628    }
629  }
630
631  operation.set_src_length(old_data.size());
632  operation.set_dst_length(new_data.size());
633
634  // Set parameters of the operations
635  CHECK_EQ(data.size(), current_best_size);
636
637  if (gather_extents) {
638    // Remove identical src/dst block ranges in MOVE operations.
639    if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE) {
640      size_t removed_bytes = RemoveIdenticalBlockRanges(
641          &src_extents, &dst_extents, new_data.size());
642
643      // Adjust the file length field accordingly.
644      if (removed_bytes) {
645        operation.set_src_length(old_data.size() - removed_bytes);
646        operation.set_dst_length(new_data.size() - removed_bytes);
647      }
648    }
649
650    // Embed extents in the operation.
651    StoreExtents(src_extents, operation.mutable_src_extents());
652    StoreExtents(dst_extents, operation.mutable_dst_extents());
653  }
654
655  // Replace operations should not have source extents.
656  if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
657      operation.type() ==
658          DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
659    operation.clear_src_extents();
660    operation.clear_src_length();
661  }
662
663  out_data->swap(data);
664  *out_op = operation;
665
666  return true;
667}
668
669bool DeltaDiffGenerator::DeltaCompressKernelPartition(
670    const string& old_kernel_part,
671    const string& new_kernel_part,
672    vector<AnnotatedOperation>* kernel_ops,
673    int blobs_fd,
674    off_t* blobs_length,
675    bool src_ops_allowed) {
676  LOG(INFO) << "Delta compressing kernel partition...";
677  LOG_IF(INFO, old_kernel_part.empty()) << "Generating full kernel update...";
678
679  DeltaArchiveManifest_InstallOperation op;
680  chromeos::Blob data;
681  TEST_AND_RETURN_FALSE(
682      ReadFileToDiff(old_kernel_part,
683                     new_kernel_part,
684                     0,  // chunk_offset
685                     -1,  // chunk_size
686                     true,  // bsdiff_allowed
687                     &data,
688                     &op,
689                     false,  // gather_extents
690                     src_ops_allowed,
691                     old_kernel_part,  // Doesn't matter, kernel has no files.
692                     new_kernel_part));
693
694  // Check if the operation writes nothing.
695  if (op.dst_extents_size() == 0) {
696    if (op.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE) {
697      LOG(INFO) << "Empty MOVE operation, nothing to do.";
698      return true;
699    } else {
700      LOG(ERROR) << "Empty non-MOVE operation";
701      return false;
702    }
703  }
704
705  // Write the data.
706  if (op.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE &&
707      op.type() != DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY) {
708    op.set_data_offset(*blobs_length);
709    op.set_data_length(data.size());
710  }
711
712  // Add the new install operation.
713  kernel_ops->clear();
714  kernel_ops->emplace_back();
715  kernel_ops->back().op = op;
716  kernel_ops->back().name = "<kernel-delta-operation>";
717
718  TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd, data.data(), data.size()));
719  *blobs_length += data.size();
720
721  LOG(INFO) << "Done delta compressing kernel partition: "
722            << kInstallOperationTypes[op.type()];
723  return true;
724}
725
726// TODO(deymo): Replace Vertex with AnnotatedOperation. This requires to move
727// out the code that adds the reader dependencies on the new vertex.
728bool DeltaDiffGenerator::ReadUnwrittenBlocks(
729    const vector<Block>& blocks,
730    int blobs_fd,
731    off_t* blobs_length,
732    const string& old_image_path,
733    const uint64_t old_image_size,
734    const string& new_image_path,
735    Vertex* vertex,
736    uint32_t minor_version) {
737  vertex->file_name = "<rootfs-non-file-data>";
738
739  DeltaArchiveManifest_InstallOperation* out_op = &vertex->op;
740  int new_image_fd = open(new_image_path.c_str(), O_RDONLY, 000);
741  TEST_AND_RETURN_FALSE_ERRNO(new_image_fd >= 0);
742  ScopedFdCloser new_image_fd_closer(&new_image_fd);
743  int old_image_fd = open(old_image_path.c_str(), O_RDONLY, 000);
744  TEST_AND_RETURN_FALSE_ERRNO(old_image_fd >= 0);
745  ScopedFdCloser old_image_fd_closer(&old_image_fd);
746
747  string temp_file_path;
748  TEST_AND_RETURN_FALSE(utils::MakeTempFile("CrAU_temp_data.XXXXXX",
749                                            &temp_file_path,
750                                            nullptr));
751
752  FILE* file = fopen(temp_file_path.c_str(), "w");
753  TEST_AND_RETURN_FALSE(file);
754  int err = BZ_OK;
755
756  BZFILE* bz_file = BZ2_bzWriteOpen(&err,
757                                    file,
758                                    9,  // max compression
759                                    0,  // verbosity
760                                    0);  // default work factor
761  TEST_AND_RETURN_FALSE(err == BZ_OK);
762
763  vector<Extent> extents;
764  vector<Block>::size_type block_count = 0;
765
766  LOG(INFO) << "Appending unwritten blocks to extents";
767  for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
768    if (blocks[i].writer != Vertex::kInvalidIndex)
769      continue;
770    graph_utils::AppendBlockToExtents(&extents, i);
771    block_count++;
772  }
773
774  // Code will handle buffers of any size that's a multiple of kBlockSize,
775  // so we arbitrarily set it to 1024 * kBlockSize.
776  chromeos::Blob new_buf(1024 * kBlockSize);
777  chromeos::Blob old_buf(1024 * kBlockSize);
778
779  LOG(INFO) << "Scanning " << block_count << " unwritten blocks";
780  vector<Extent> changed_extents;
781  vector<Block>::size_type changed_block_count = 0;
782  vector<Block>::size_type blocks_copied_count = 0;
783
784  // For each extent in extents, write the unchanged blocks into BZ2_bzWrite,
785  // which sends it to an output file.  We use the temporary buffers to hold the
786  // old and new data, which may be smaller than the extent, so in that case we
787  // have to loop to get the extent's data (that's the inner while loop).
788  for (const Extent& extent : extents) {
789    vector<Block>::size_type blocks_read = 0;
790    float printed_progress = -1;
791    while (blocks_read < extent.num_blocks()) {
792      const uint64_t copy_first_block = extent.start_block() + blocks_read;
793      const int copy_block_cnt =
794          min(new_buf.size() / kBlockSize,
795              static_cast<chromeos::Blob::size_type>(
796                  extent.num_blocks() - blocks_read));
797      const size_t count = copy_block_cnt * kBlockSize;
798      const off_t offset = copy_first_block * kBlockSize;
799      ssize_t rc = pread(new_image_fd, new_buf.data(), count, offset);
800      TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
801      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) == count);
802
803      const off_t old_offset = min(offset, static_cast<off_t>(old_image_size));
804      const size_t old_count =
805          min(static_cast<size_t>(offset + count),
806              static_cast<size_t>(old_image_size)) - old_offset;
807      rc = pread(old_image_fd, old_buf.data(), old_count, old_offset);
808      TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
809      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) == old_count);
810
811      // Compare each block in the buffer to its counterpart in the old image
812      // and only compress it if its content has changed.
813      int buf_offset = 0;
814      for (int i = 0; i < copy_block_cnt; ++i) {
815        int buf_end_offset = buf_offset + kBlockSize;
816        if (minor_version == kSourceMinorPayloadVersion ||
817            static_cast<size_t>(buf_end_offset) > old_count ||
818            !std::equal(new_buf.begin() + buf_offset,
819                        new_buf.begin() + buf_end_offset,
820                        old_buf.begin() + buf_offset)) {
821          BZ2_bzWrite(&err, bz_file, &new_buf[buf_offset], kBlockSize);
822          TEST_AND_RETURN_FALSE(err == BZ_OK);
823          const uint64_t block_idx = copy_first_block + i;
824          if (blocks[block_idx].reader != Vertex::kInvalidIndex) {
825            graph_utils::AddReadBeforeDep(vertex, blocks[block_idx].reader,
826                                          block_idx);
827          }
828          graph_utils::AppendBlockToExtents(&changed_extents, block_idx);
829          changed_block_count++;
830        }
831        buf_offset = buf_end_offset;
832      }
833
834      blocks_read += copy_block_cnt;
835      blocks_copied_count += copy_block_cnt;
836      float current_progress =
837          static_cast<float>(blocks_copied_count) / block_count;
838      if (printed_progress + 0.1 < current_progress ||
839          blocks_copied_count == block_count) {
840        LOG(INFO) << "progress: " << current_progress;
841        printed_progress = current_progress;
842      }
843    }
844  }
845  BZ2_bzWriteClose(&err, bz_file, 0, nullptr, nullptr);
846  TEST_AND_RETURN_FALSE(err == BZ_OK);
847  bz_file = nullptr;
848  TEST_AND_RETURN_FALSE_ERRNO(0 == fclose(file));
849  file = nullptr;
850
851  LOG(INFO) << "Compressed " << changed_block_count << " blocks ("
852            << block_count - changed_block_count << " blocks unchanged)";
853  chromeos::Blob compressed_data;
854  if (changed_block_count > 0) {
855    LOG(INFO) << "Reading compressed data off disk";
856    TEST_AND_RETURN_FALSE(utils::ReadFile(temp_file_path, &compressed_data));
857  }
858  TEST_AND_RETURN_FALSE(unlink(temp_file_path.c_str()) == 0);
859
860  // Add node to graph to write these blocks
861  out_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
862  out_op->set_data_offset(*blobs_length);
863  out_op->set_data_length(compressed_data.size());
864  LOG(INFO) << "Rootfs non-data blocks compressed take up "
865            << compressed_data.size();
866  *blobs_length += compressed_data.size();
867  out_op->set_dst_length(kBlockSize * changed_block_count);
868  DeltaDiffGenerator::StoreExtents(changed_extents,
869                                   out_op->mutable_dst_extents());
870
871  TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd,
872                                        compressed_data.data(),
873                                        compressed_data.size()));
874  LOG(INFO) << "Done processing unwritten blocks";
875  return true;
876}
877
878bool DeltaDiffGenerator::InitializePartitionInfo(bool is_kernel,
879                                                 const string& partition,
880                                                 PartitionInfo* info) {
881  int64_t size = 0;
882  if (is_kernel) {
883    size = utils::FileSize(partition);
884  } else {
885    int block_count = 0, block_size = 0;
886    TEST_AND_RETURN_FALSE(utils::GetFilesystemSize(partition,
887                                                   &block_count,
888                                                   &block_size));
889    size = static_cast<int64_t>(block_count) * block_size;
890  }
891  TEST_AND_RETURN_FALSE(size > 0);
892  info->set_size(size);
893  OmahaHashCalculator hasher;
894  TEST_AND_RETURN_FALSE(hasher.UpdateFile(partition, size) == size);
895  TEST_AND_RETURN_FALSE(hasher.Finalize());
896  const chromeos::Blob& hash = hasher.raw_hash();
897  info->set_hash(hash.data(), hash.size());
898  LOG(INFO) << partition << ": size=" << size << " hash=" << hasher.hash();
899  return true;
900}
901
902bool InitializePartitionInfos(const PayloadGenerationConfig& config,
903                              DeltaArchiveManifest* manifest) {
904  if (!config.source.kernel_part.empty()) {
905    TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
906        true,
907        config.source.kernel_part,
908        manifest->mutable_old_kernel_info()));
909  }
910  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
911      true,
912      config.target.kernel_part,
913      manifest->mutable_new_kernel_info()));
914  if (!config.source.rootfs_part.empty()) {
915    TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
916        false,
917        config.source.rootfs_part,
918        manifest->mutable_old_rootfs_info()));
919  }
920  TEST_AND_RETURN_FALSE(DeltaDiffGenerator::InitializePartitionInfo(
921      false,
922      config.target.rootfs_part,
923      manifest->mutable_new_rootfs_info()));
924  return true;
925}
926
927// Stores all Extents in 'extents' into 'out'.
928void DeltaDiffGenerator::StoreExtents(
929    const vector<Extent>& extents,
930    google::protobuf::RepeatedPtrField<Extent>* out) {
931  for (const Extent& extent : extents) {
932    Extent* new_extent = out->Add();
933    *new_extent = extent;
934  }
935}
936
937// Stores all extents in |extents| into |out_vector|.
938void DeltaDiffGenerator::ExtentsToVector(
939    const google::protobuf::RepeatedPtrField<Extent>& extents,
940    vector<Extent>* out_vector) {
941  out_vector->clear();
942  for (int i = 0; i < extents.size(); i++) {
943    out_vector->push_back(extents.Get(i));
944  }
945}
946
947// Returns true if |op| is a no-op operation that doesn't do any useful work
948// (e.g., a move operation that copies blocks onto themselves).
949bool DeltaDiffGenerator::IsNoopOperation(
950    const DeltaArchiveManifest_InstallOperation& op) {
951  return (op.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE &&
952          ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
953}
954
955void DeltaDiffGenerator::FilterNoopOperations(vector<AnnotatedOperation>* ops) {
956  ops->erase(
957      std::remove_if(
958          ops->begin(), ops->end(),
959          [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
960      ops->end());
961}
962
963bool DeltaDiffGenerator::ReorderDataBlobs(
964    DeltaArchiveManifest* manifest,
965    const string& data_blobs_path,
966    const string& new_data_blobs_path) {
967  int in_fd = open(data_blobs_path.c_str(), O_RDONLY, 0);
968  TEST_AND_RETURN_FALSE_ERRNO(in_fd >= 0);
969  ScopedFdCloser in_fd_closer(&in_fd);
970
971  DirectFileWriter writer;
972  TEST_AND_RETURN_FALSE(
973      writer.Open(new_data_blobs_path.c_str(),
974                  O_WRONLY | O_TRUNC | O_CREAT,
975                  0644) == 0);
976  ScopedFileWriterCloser writer_closer(&writer);
977  uint64_t out_file_size = 0;
978
979  for (int i = 0; i < (manifest->install_operations_size() +
980                       manifest->kernel_install_operations_size()); i++) {
981    DeltaArchiveManifest_InstallOperation* op = nullptr;
982    if (i < manifest->install_operations_size()) {
983      op = manifest->mutable_install_operations(i);
984    } else {
985      op = manifest->mutable_kernel_install_operations(
986          i - manifest->install_operations_size());
987    }
988    if (!op->has_data_offset())
989      continue;
990    CHECK(op->has_data_length());
991    chromeos::Blob buf(op->data_length());
992    ssize_t rc = pread(in_fd, buf.data(), buf.size(), op->data_offset());
993    TEST_AND_RETURN_FALSE(rc == static_cast<ssize_t>(buf.size()));
994
995    // Add the hash of the data blobs for this operation
996    TEST_AND_RETURN_FALSE(AddOperationHash(op, buf));
997
998    op->set_data_offset(out_file_size);
999    TEST_AND_RETURN_FALSE(writer.Write(buf.data(), buf.size()));
1000    out_file_size += buf.size();
1001  }
1002  return true;
1003}
1004
1005bool DeltaDiffGenerator::AddOperationHash(
1006    DeltaArchiveManifest_InstallOperation* op,
1007    const chromeos::Blob& buf) {
1008  OmahaHashCalculator hasher;
1009  TEST_AND_RETURN_FALSE(hasher.Update(buf.data(), buf.size()));
1010  TEST_AND_RETURN_FALSE(hasher.Finalize());
1011  const chromeos::Blob& hash = hasher.raw_hash();
1012  op->set_data_sha256_hash(hash.data(), hash.size());
1013  return true;
1014}
1015
1016bool DeltaDiffGenerator::GenerateOperations(
1017    const PayloadGenerationConfig& config,
1018    int data_file_fd,
1019    off_t* data_file_size,
1020    vector<AnnotatedOperation>* rootfs_ops,
1021    vector<AnnotatedOperation>* kernel_ops) {
1022  // List of blocks in the target partition, with the operation that needs to
1023  // write it and the operation that needs to read it. This is used here to
1024  // keep track of the blocks that no operation is writing it.
1025  vector<Block> blocks(config.target.rootfs_size / config.block_size);
1026
1027  // TODO(deymo): DeltaReadFiles() should not use a graph to generate the
1028  // operations, either in the in-place or source uprate. Split out the
1029  // graph dependency generation.
1030  Graph graph;
1031  TEST_AND_RETURN_FALSE(DeltaReadFiles(&graph,
1032                                       &blocks,
1033                                       config.source.rootfs_part,
1034                                       config.target.rootfs_part,
1035                                       config.source.rootfs_mountpt,
1036                                       config.target.rootfs_mountpt,
1037                                       config.chunk_size,
1038                                       data_file_fd,
1039                                       data_file_size,
1040                                       true));  // src_ops_allowed
1041  rootfs_ops->clear();
1042  for (const Vertex& v : graph) {
1043    rootfs_ops->emplace_back();
1044    AnnotatedOperation& aop = rootfs_ops->back();
1045    aop.op = v.op;
1046    aop.SetNameFromFileAndChunk(v.file_name, v.chunk_offset, v.chunk_size);
1047  }
1048
1049  LOG(INFO) << "done reading normal files";
1050
1051  // Read kernel partition
1052  TEST_AND_RETURN_FALSE(
1053      DeltaCompressKernelPartition(config.source.kernel_part,
1054                                   config.target.kernel_part,
1055                                   kernel_ops,
1056                                   data_file_fd,
1057                                   data_file_size,
1058                                   true));  // src_ops_allowed
1059  LOG(INFO) << "done reading kernel";
1060
1061  Vertex unwritten_vertex;
1062  TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks,
1063                                            data_file_fd,
1064                                            data_file_size,
1065                                            config.source.rootfs_part,
1066                                            config.source.rootfs_size,
1067                                            config.target.rootfs_part,
1068                                            &unwritten_vertex,
1069                                            config.minor_version));
1070  if (unwritten_vertex.op.data_length() == 0) {
1071    LOG(INFO) << "No unwritten blocks to write, omitting operation";
1072  } else {
1073    rootfs_ops->emplace_back();
1074    rootfs_ops->back().op = unwritten_vertex.op;
1075    rootfs_ops->back().name = unwritten_vertex.file_name;
1076  }
1077
1078  TEST_AND_RETURN_FALSE(FragmentOperations(rootfs_ops,
1079                                           config.target.rootfs_part,
1080                                           data_file_fd,
1081                                           data_file_size));
1082  TEST_AND_RETURN_FALSE(FragmentOperations(kernel_ops,
1083                                           config.target.kernel_part,
1084                                           data_file_fd,
1085                                           data_file_size));
1086  SortOperationsByDestination(rootfs_ops);
1087  SortOperationsByDestination(kernel_ops);
1088  // TODO(alliewood): Change merge operations to use config.chunk_size once
1089  // specifying chunk_size on the command line works. crbug/485397.
1090  TEST_AND_RETURN_FALSE(MergeOperations(rootfs_ops,
1091                                        kDefaultChunkSize,
1092                                        config.target.rootfs_part,
1093                                        data_file_fd,
1094                                        data_file_size));
1095  TEST_AND_RETURN_FALSE(MergeOperations(kernel_ops,
1096                                        kDefaultChunkSize,
1097                                        config.target.kernel_part,
1098                                        data_file_fd,
1099                                        data_file_size));
1100  return true;
1101}
1102
1103bool GenerateUpdatePayloadFile(
1104    const PayloadGenerationConfig& config,
1105    const string& output_path,
1106    const string& private_key_path,
1107    uint64_t* metadata_size) {
1108  if (config.is_delta) {
1109    LOG_IF(WARNING, config.source.rootfs_size != config.target.rootfs_size)
1110        << "Old and new images have different block counts.";
1111    // TODO(deymo): Our tools only support growing the filesystem size during
1112    // an update. Remove this check when that's fixed. crbug.com/192136
1113    LOG_IF(FATAL, config.source.rootfs_size > config.target.rootfs_size)
1114        << "Shirking the rootfs size is not supported at the moment.";
1115  }
1116
1117  // Sanity checks for the partition size.
1118  LOG(INFO) << "Rootfs partition size: " << config.rootfs_partition_size;
1119  LOG(INFO) << "Actual filesystem size: " << config.target.rootfs_size;
1120
1121  LOG(INFO) << "Invalid block index: " << Vertex::kInvalidIndex;
1122  LOG(INFO) << "Block count: "
1123            << config.target.rootfs_size / config.block_size;
1124
1125  const string kTempFileTemplate("CrAU_temp_data.XXXXXX");
1126  string temp_file_path;
1127  unique_ptr<ScopedPathUnlinker> temp_file_unlinker;
1128  off_t data_file_size = 0;
1129
1130  LOG(INFO) << "Reading files...";
1131
1132  // Create empty protobuf Manifest object
1133  DeltaArchiveManifest manifest;
1134  manifest.set_minor_version(config.minor_version);
1135
1136  vector<AnnotatedOperation> rootfs_ops;
1137  vector<AnnotatedOperation> kernel_ops;
1138
1139  // Select payload generation strategy based on the config.
1140  unique_ptr<OperationsGenerator> strategy;
1141  if (config.is_delta) {
1142    // We don't efficiently support deltas on squashfs. For now, we will
1143    // produce full operations in that case.
1144    if (utils::IsSquashfsFilesystem(config.target.rootfs_part)) {
1145      LOG(INFO) << "Using generator FullUpdateGenerator::Run for squashfs "
1146                   "deltas";
1147      strategy.reset(new FullUpdateGenerator());
1148    } else if (utils::IsExtFilesystem(config.target.rootfs_part)) {
1149      // Delta update (with possibly a full kernel update).
1150      if (config.minor_version == kInPlaceMinorPayloadVersion) {
1151        LOG(INFO) << "Using generator InplaceGenerator::GenerateInplaceDelta";
1152        strategy.reset(new InplaceGenerator());
1153      } else if (config.minor_version == kSourceMinorPayloadVersion) {
1154        LOG(INFO) << "Using generator DeltaDiffGenerator::GenerateSourceDelta";
1155        strategy.reset(new DeltaDiffGenerator());
1156      } else {
1157        LOG(ERROR) << "Unsupported minor version given for delta payload: "
1158                   << config.minor_version;
1159        return false;
1160      }
1161    } else {
1162      LOG(ERROR) << "Unsupported filesystem for delta payload in "
1163                 << config.target.rootfs_part;
1164      return false;
1165    }
1166  } else {
1167    // Full update.
1168    LOG(INFO) << "Using generator FullUpdateGenerator::Run";
1169    strategy.reset(new FullUpdateGenerator());
1170  }
1171
1172  {
1173    int data_file_fd;
1174    TEST_AND_RETURN_FALSE(
1175        utils::MakeTempFile(kTempFileTemplate, &temp_file_path, &data_file_fd));
1176    temp_file_unlinker.reset(new ScopedPathUnlinker(temp_file_path));
1177    TEST_AND_RETURN_FALSE(data_file_fd >= 0);
1178    ScopedFdCloser data_file_fd_closer(&data_file_fd);
1179
1180    // Generate the operations using the strategy we selected above.
1181    TEST_AND_RETURN_FALSE(strategy->GenerateOperations(config,
1182                                                       data_file_fd,
1183                                                       &data_file_size,
1184                                                       &rootfs_ops,
1185                                                       &kernel_ops));
1186  }
1187
1188  if (!config.source.ImageInfoIsEmpty())
1189    *(manifest.mutable_old_image_info()) = config.source.image_info;
1190
1191  if (!config.target.ImageInfoIsEmpty())
1192    *(manifest.mutable_new_image_info()) = config.target.image_info;
1193
1194  // Filter the no-operations. OperationsGenerators should not output this kind
1195  // of operations normally, but this is an extra step to fix that if
1196  // happened.
1197  DeltaDiffGenerator::FilterNoopOperations(&rootfs_ops);
1198  DeltaDiffGenerator::FilterNoopOperations(&kernel_ops);
1199
1200  OperationNameMap op_name_map;
1201  InstallOperationsToManifest(rootfs_ops, kernel_ops, &manifest, &op_name_map);
1202  manifest.set_block_size(config.block_size);
1203
1204  // Reorder the data blobs with the newly ordered manifest.
1205  string ordered_blobs_path;
1206  TEST_AND_RETURN_FALSE(utils::MakeTempFile(
1207      "CrAU_temp_data.ordered.XXXXXX",
1208      &ordered_blobs_path,
1209      nullptr));
1210  ScopedPathUnlinker ordered_blobs_unlinker(ordered_blobs_path);
1211  TEST_AND_RETURN_FALSE(
1212      DeltaDiffGenerator::ReorderDataBlobs(&manifest,
1213                                           temp_file_path,
1214                                           ordered_blobs_path));
1215  temp_file_unlinker.reset();
1216
1217  // Check that install op blobs are in order.
1218  uint64_t next_blob_offset = 0;
1219  {
1220    for (int i = 0; i < (manifest.install_operations_size() +
1221                         manifest.kernel_install_operations_size()); i++) {
1222      DeltaArchiveManifest_InstallOperation* op =
1223          i < manifest.install_operations_size() ?
1224          manifest.mutable_install_operations(i) :
1225          manifest.mutable_kernel_install_operations(
1226              i - manifest.install_operations_size());
1227      if (op->has_data_offset()) {
1228        if (op->data_offset() != next_blob_offset) {
1229          LOG(FATAL) << "bad blob offset! " << op->data_offset() << " != "
1230                     << next_blob_offset;
1231        }
1232        next_blob_offset += op->data_length();
1233      }
1234    }
1235  }
1236
1237  // Signatures appear at the end of the blobs. Note the offset in the
1238  // manifest
1239  if (!private_key_path.empty()) {
1240    uint64_t signature_blob_length = 0;
1241    TEST_AND_RETURN_FALSE(
1242        PayloadSigner::SignatureBlobLength(vector<string>(1, private_key_path),
1243                                           &signature_blob_length));
1244    DeltaDiffGenerator::AddSignatureOp(
1245        next_blob_offset, signature_blob_length, &manifest);
1246  }
1247
1248  TEST_AND_RETURN_FALSE(InitializePartitionInfos(config, &manifest));
1249
1250  // Serialize protobuf
1251  string serialized_manifest;
1252
1253  TEST_AND_RETURN_FALSE(manifest.AppendToString(&serialized_manifest));
1254
1255  LOG(INFO) << "Writing final delta file header...";
1256  DirectFileWriter writer;
1257  TEST_AND_RETURN_FALSE_ERRNO(writer.Open(output_path.c_str(),
1258                                          O_WRONLY | O_CREAT | O_TRUNC,
1259                                          0644) == 0);
1260  ScopedFileWriterCloser writer_closer(&writer);
1261
1262  // Write header
1263  TEST_AND_RETURN_FALSE(writer.Write(kDeltaMagic, strlen(kDeltaMagic)));
1264
1265  // Write major version number
1266  TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer, kMajorVersionNumber));
1267
1268  // Write protobuf length
1269  TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer,
1270                                               serialized_manifest.size()));
1271
1272  // Write protobuf
1273  LOG(INFO) << "Writing final delta file protobuf... "
1274            << serialized_manifest.size();
1275  TEST_AND_RETURN_FALSE(writer.Write(serialized_manifest.data(),
1276                                     serialized_manifest.size()));
1277
1278  // Append the data blobs
1279  LOG(INFO) << "Writing final delta file data blobs...";
1280  int blobs_fd = open(ordered_blobs_path.c_str(), O_RDONLY, 0);
1281  ScopedFdCloser blobs_fd_closer(&blobs_fd);
1282  TEST_AND_RETURN_FALSE(blobs_fd >= 0);
1283  for (;;) {
1284    vector<char> buf(config.block_size);
1285    ssize_t rc = read(blobs_fd, buf.data(), buf.size());
1286    if (0 == rc) {
1287      // EOF
1288      break;
1289    }
1290    TEST_AND_RETURN_FALSE_ERRNO(rc > 0);
1291    TEST_AND_RETURN_FALSE(writer.Write(buf.data(), rc));
1292  }
1293
1294  // Write signature blob.
1295  if (!private_key_path.empty()) {
1296    LOG(INFO) << "Signing the update...";
1297    chromeos::Blob signature_blob;
1298    TEST_AND_RETURN_FALSE(PayloadSigner::SignPayload(
1299        output_path,
1300        vector<string>(1, private_key_path),
1301        &signature_blob));
1302    TEST_AND_RETURN_FALSE(writer.Write(signature_blob.data(),
1303                                       signature_blob.size()));
1304  }
1305
1306  *metadata_size =
1307      strlen(kDeltaMagic) + 2 * sizeof(uint64_t) + serialized_manifest.size();
1308  ReportPayloadUsage(manifest, *metadata_size, op_name_map);
1309
1310  LOG(INFO) << "All done. Successfully created delta file with "
1311            << "metadata size = " << *metadata_size;
1312  return true;
1313}
1314
1315// Runs the bsdiff tool on two files and returns the resulting delta in
1316// 'out'. Returns true on success.
1317bool DeltaDiffGenerator::BsdiffFiles(const string& old_file,
1318                                     const string& new_file,
1319                                     chromeos::Blob* out) {
1320  const string kPatchFile = "delta.patchXXXXXX";
1321  string patch_file_path;
1322
1323  TEST_AND_RETURN_FALSE(
1324      utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
1325
1326  vector<string> cmd;
1327  cmd.push_back(kBsdiffPath);
1328  cmd.push_back(old_file);
1329  cmd.push_back(new_file);
1330  cmd.push_back(patch_file_path);
1331
1332  int rc = 1;
1333  chromeos::Blob patch_file;
1334  TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, nullptr));
1335  TEST_AND_RETURN_FALSE(rc == 0);
1336  TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
1337  unlink(patch_file_path.c_str());
1338  return true;
1339}
1340
1341void DeltaDiffGenerator::AddSignatureOp(uint64_t signature_blob_offset,
1342                                        uint64_t signature_blob_length,
1343                                        DeltaArchiveManifest* manifest) {
1344  LOG(INFO) << "Making room for signature in file";
1345  manifest->set_signatures_offset(signature_blob_offset);
1346  LOG(INFO) << "set? " << manifest->has_signatures_offset();
1347  // Add a dummy op at the end to appease older clients
1348  DeltaArchiveManifest_InstallOperation* dummy_op =
1349      manifest->add_kernel_install_operations();
1350  dummy_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
1351  dummy_op->set_data_offset(signature_blob_offset);
1352  manifest->set_signatures_offset(signature_blob_offset);
1353  dummy_op->set_data_length(signature_blob_length);
1354  manifest->set_signatures_size(signature_blob_length);
1355  Extent* dummy_extent = dummy_op->add_dst_extents();
1356  // Tell the dummy op to write this data to a big sparse hole
1357  dummy_extent->set_start_block(kSparseHole);
1358  dummy_extent->set_num_blocks((signature_blob_length + kBlockSize - 1) /
1359                               kBlockSize);
1360}
1361
1362void DeltaDiffGenerator::ClearSparseHoles(vector<Extent>* extents) {
1363  extents->erase(std::remove_if(extents->begin(), extents->end(), IsSparseHole),
1364                 extents->end());
1365}
1366
1367void DeltaDiffGenerator::NormalizeExtents(vector<Extent>* extents) {
1368  vector<Extent> new_extents;
1369  for (const Extent& curr_ext : *extents) {
1370    if (new_extents.empty()) {
1371      new_extents.push_back(curr_ext);
1372      continue;
1373    }
1374    Extent& last_ext = new_extents.back();
1375    if (last_ext.start_block() + last_ext.num_blocks() ==
1376        curr_ext.start_block()) {
1377      // If the extents are touching, we want to combine them.
1378      last_ext.set_num_blocks(last_ext.num_blocks() + curr_ext.num_blocks());
1379    } else {
1380      // Otherwise just include the extent as is.
1381      new_extents.push_back(curr_ext);
1382    }
1383  }
1384  *extents = new_extents;
1385}
1386
1387bool DeltaDiffGenerator::FragmentOperations(
1388    vector<AnnotatedOperation>* aops,
1389    const string& target_part_path,
1390    int data_fd,
1391    off_t* data_file_size) {
1392  vector<AnnotatedOperation> fragmented_aops;
1393  for (const AnnotatedOperation& aop : *aops) {
1394    if (aop.op.type() ==
1395        DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY) {
1396      TEST_AND_RETURN_FALSE(SplitSourceCopy(aop, &fragmented_aops));
1397    } else if ((aop.op.type() ==
1398                DeltaArchiveManifest_InstallOperation_Type_REPLACE) ||
1399               (aop.op.type() ==
1400                DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ)) {
1401      TEST_AND_RETURN_FALSE(SplitReplaceOrReplaceBz(aop, &fragmented_aops,
1402                                                    target_part_path, data_fd,
1403                                                    data_file_size));
1404    } else {
1405      fragmented_aops.push_back(aop);
1406    }
1407  }
1408  *aops = fragmented_aops;
1409  return true;
1410}
1411
1412void DeltaDiffGenerator::SortOperationsByDestination(
1413    vector<AnnotatedOperation>* aops) {
1414  sort(aops->begin(), aops->end(), CompareAopsByDestination);
1415}
1416
1417bool DeltaDiffGenerator::SplitSourceCopy(
1418    const AnnotatedOperation& original_aop,
1419    vector<AnnotatedOperation>* result_aops) {
1420  DeltaArchiveManifest_InstallOperation original_op = original_aop.op;
1421  TEST_AND_RETURN_FALSE(original_op.type() ==
1422                        DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY);
1423  // Keeps track of the index of curr_src_ext.
1424  int curr_src_ext_index = 0;
1425  Extent curr_src_ext = original_op.src_extents(curr_src_ext_index);
1426  for (int i = 0; i < original_op.dst_extents_size(); i++) {
1427    Extent dst_ext = original_op.dst_extents(i);
1428    // The new operation which will have only one dst extent.
1429    DeltaArchiveManifest_InstallOperation new_op;
1430    uint64_t blocks_left = dst_ext.num_blocks();
1431    while (blocks_left > 0) {
1432      if (curr_src_ext.num_blocks() <= blocks_left) {
1433        // If the curr_src_ext is smaller than dst_ext, add it.
1434        blocks_left -= curr_src_ext.num_blocks();
1435        *(new_op.add_src_extents()) = curr_src_ext;
1436        if (curr_src_ext_index + 1 < original_op.src_extents().size()) {
1437          curr_src_ext = original_op.src_extents(++curr_src_ext_index);
1438        } else {
1439          break;
1440        }
1441      } else {
1442        // Split src_exts that are bigger than the dst_ext we're dealing with.
1443        Extent first_ext;
1444        first_ext.set_num_blocks(blocks_left);
1445        first_ext.set_start_block(curr_src_ext.start_block());
1446        *(new_op.add_src_extents()) = first_ext;
1447        // Keep the second half of the split op.
1448        curr_src_ext.set_num_blocks(curr_src_ext.num_blocks() - blocks_left);
1449        curr_src_ext.set_start_block(curr_src_ext.start_block() + blocks_left);
1450        blocks_left -= first_ext.num_blocks();
1451      }
1452    }
1453    // Fix up our new operation and add it to the results.
1454    new_op.set_type(DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY);
1455    *(new_op.add_dst_extents()) = dst_ext;
1456    new_op.set_src_length(dst_ext.num_blocks() * kBlockSize);
1457    new_op.set_dst_length(dst_ext.num_blocks() * kBlockSize);
1458
1459    AnnotatedOperation new_aop;
1460    new_aop.op = new_op;
1461    new_aop.name = base::StringPrintf("%s:%d", original_aop.name.c_str(), i);
1462    result_aops->push_back(new_aop);
1463  }
1464  if (curr_src_ext_index != original_op.src_extents().size() - 1) {
1465    LOG(FATAL) << "Incorrectly split SOURCE_COPY operation. Did not use all "
1466               << "source extents.";
1467  }
1468  return true;
1469}
1470
1471bool DeltaDiffGenerator::SplitReplaceOrReplaceBz(
1472    const AnnotatedOperation& original_aop,
1473    vector<AnnotatedOperation>* result_aops,
1474    const string& target_part_path,
1475    int data_fd,
1476    off_t* data_file_size) {
1477  DeltaArchiveManifest_InstallOperation original_op = original_aop.op;
1478  const bool is_replace =
1479      original_op.type() == DeltaArchiveManifest_InstallOperation_Type_REPLACE;
1480  TEST_AND_RETURN_FALSE(
1481      is_replace ||
1482      (original_op.type() ==
1483       DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ));
1484
1485  uint32_t data_offset = original_op.data_offset();
1486  for (int i = 0; i < original_op.dst_extents_size(); i++) {
1487    Extent dst_ext = original_op.dst_extents(i);
1488    // Make a new operation with only one dst extent.
1489    DeltaArchiveManifest_InstallOperation new_op;
1490    *(new_op.add_dst_extents()) = dst_ext;
1491    uint32_t data_size = dst_ext.num_blocks() * kBlockSize;
1492    new_op.set_dst_length(data_size);
1493    // If this is a REPLACE, attempt to reuse portions of the existing blob.
1494    if (is_replace) {
1495      new_op.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
1496      new_op.set_data_length(data_size);
1497      new_op.set_data_offset(data_offset);
1498      data_offset += data_size;
1499    }
1500
1501    AnnotatedOperation new_aop;
1502    new_aop.op = new_op;
1503    new_aop.name = base::StringPrintf("%s:%d", original_aop.name.c_str(), i);
1504    TEST_AND_RETURN_FALSE(AddDataAndSetType(&new_aop, target_part_path, data_fd,
1505                                            data_file_size));
1506
1507    result_aops->push_back(new_aop);
1508  }
1509  return true;
1510}
1511
1512bool DeltaDiffGenerator::MergeOperations(vector<AnnotatedOperation>* aops,
1513                                         off_t chunk_size,
1514                                         const string& target_part_path,
1515                                         int data_fd,
1516                                         off_t* data_file_size) {
1517  vector<AnnotatedOperation> new_aops;
1518  for (const AnnotatedOperation& curr_aop : *aops) {
1519    if (new_aops.empty()) {
1520      new_aops.push_back(curr_aop);
1521      continue;
1522    }
1523    AnnotatedOperation& last_aop = new_aops.back();
1524
1525    if (last_aop.op.dst_extents_size() <= 0 ||
1526        curr_aop.op.dst_extents_size() <= 0) {
1527      new_aops.push_back(curr_aop);
1528      continue;
1529    }
1530    uint32_t last_dst_idx = last_aop.op.dst_extents_size() - 1;
1531    uint32_t last_end_block =
1532        last_aop.op.dst_extents(last_dst_idx).start_block() +
1533        last_aop.op.dst_extents(last_dst_idx).num_blocks();
1534    uint32_t curr_start_block = curr_aop.op.dst_extents(0).start_block();
1535    uint32_t combined_block_count =
1536        last_aop.op.dst_extents(last_dst_idx).num_blocks() +
1537        curr_aop.op.dst_extents(0).num_blocks();
1538    bool good_op_type =
1539        curr_aop.op.type() ==
1540            DeltaArchiveManifest_InstallOperation_Type_SOURCE_COPY ||
1541        curr_aop.op.type() ==
1542            DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
1543        curr_aop.op.type() ==
1544            DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ;
1545    if (good_op_type &&
1546        last_aop.op.type() == curr_aop.op.type() &&
1547        last_end_block == curr_start_block &&
1548        static_cast<off_t>(combined_block_count * kBlockSize) <= chunk_size) {
1549      // If the operations have the same type (which is a type that we can
1550      // merge), are contiguous, are fragmented to have one destination extent,
1551      // and their combined block count would be less than chunk size, merge
1552      // them.
1553      last_aop.name = base::StringPrintf("%s,%s",
1554                                         last_aop.name.c_str(),
1555                                         curr_aop.name.c_str());
1556
1557      ExtendExtents(last_aop.op.mutable_src_extents(),
1558                    curr_aop.op.src_extents());
1559      if (curr_aop.op.src_length() > 0)
1560        last_aop.op.set_src_length(last_aop.op.src_length() +
1561                                   curr_aop.op.src_length());
1562      ExtendExtents(last_aop.op.mutable_dst_extents(),
1563                    curr_aop.op.dst_extents());
1564      if (curr_aop.op.dst_length() > 0)
1565        last_aop.op.set_dst_length(last_aop.op.dst_length() +
1566                                   curr_aop.op.dst_length());
1567      // Set the data length to zero so we know to add the blob later.
1568      if (curr_aop.op.type() ==
1569          DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
1570          curr_aop.op.type() ==
1571          DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
1572        last_aop.op.set_data_length(0);
1573      }
1574    } else {
1575      // Otherwise just include the extent as is.
1576      new_aops.push_back(curr_aop);
1577    }
1578  }
1579
1580  // Set the blobs for REPLACE/REPLACE_BZ operations that have been merged.
1581  for (AnnotatedOperation& curr_aop : new_aops) {
1582    if (curr_aop.op.data_length() == 0 &&
1583        (curr_aop.op.type() ==
1584            DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
1585         curr_aop.op.type() ==
1586            DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ)) {
1587      TEST_AND_RETURN_FALSE(AddDataAndSetType(&curr_aop, target_part_path,
1588                                              data_fd, data_file_size));
1589    }
1590  }
1591
1592  *aops = new_aops;
1593  return true;
1594}
1595
1596void DeltaDiffGenerator::ExtendExtents(
1597    google::protobuf::RepeatedPtrField<Extent>* extents,
1598    const google::protobuf::RepeatedPtrField<Extent>& extents_to_add) {
1599  vector<Extent> extents_vector;
1600  vector<Extent> extents_to_add_vector;
1601  ExtentsToVector(*extents, &extents_vector);
1602  ExtentsToVector(extents_to_add, &extents_to_add_vector);
1603  extents_vector.insert(extents_vector.end(),
1604                        extents_to_add_vector.begin(),
1605                        extents_to_add_vector.end());
1606  NormalizeExtents(&extents_vector);
1607  extents->Clear();
1608  StoreExtents(extents_vector, extents);
1609}
1610
1611bool DeltaDiffGenerator::AddDataAndSetType(AnnotatedOperation* aop,
1612                                           const string& target_part_path,
1613                                           int data_fd,
1614                                           off_t* data_file_size) {
1615  TEST_AND_RETURN_FALSE(
1616      aop->op.type() == DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
1617      aop->op.type() == DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
1618
1619  chromeos::Blob data(aop->op.dst_length());
1620  vector<Extent> dst_extents;
1621  ExtentsToVector(aop->op.dst_extents(), &dst_extents);
1622  TEST_AND_RETURN_FALSE(utils::ReadExtents(target_part_path,
1623                                           dst_extents,
1624                                           &data,
1625                                           data.size(),
1626                                           kBlockSize));
1627
1628  chromeos::Blob data_bz;
1629  TEST_AND_RETURN_FALSE(BzipCompress(data, &data_bz));
1630  CHECK(!data_bz.empty());
1631
1632  chromeos::Blob* data_p = nullptr;
1633  DeltaArchiveManifest_InstallOperation_Type new_op_type;
1634  if (data_bz.size() < data.size()) {
1635    new_op_type = DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ;
1636    data_p = &data_bz;
1637  } else {
1638    new_op_type = DeltaArchiveManifest_InstallOperation_Type_REPLACE;
1639    data_p = &data;
1640  }
1641
1642  // If the operation already points to a data blob, check whether it's
1643  // identical to the new one, in which case don't add it.
1644  if (aop->op.type() == new_op_type &&
1645      aop->op.data_length() == data_p->size()) {
1646    chromeos::Blob current_data(data_p->size());
1647    ssize_t bytes_read;
1648    TEST_AND_RETURN_FALSE(utils::PReadAll(data_fd,
1649                                          current_data.data(),
1650                                          aop->op.data_length(),
1651                                          aop->op.data_offset(),
1652                                          &bytes_read));
1653    TEST_AND_RETURN_FALSE(bytes_read ==
1654                          static_cast<ssize_t>(aop->op.data_length()));
1655    if (current_data == *data_p)
1656      data_p = nullptr;
1657  }
1658
1659  if (data_p) {
1660    aop->op.set_type(new_op_type);
1661    aop->SetOperationBlob(data_p, data_fd, data_file_size);
1662  }
1663
1664  return true;
1665}
1666
1667};  // namespace chromeos_update_engine
1668