1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package com.google.archivepatcher.applier.bsdiff;
16
17import com.google.archivepatcher.applier.PatchFormatException;
18
19import java.io.BufferedInputStream;
20import java.io.BufferedOutputStream;
21import java.io.IOException;
22import java.io.InputStream;
23import java.io.OutputStream;
24import java.io.RandomAccessFile;
25
26/**
27 * A Java implementation of the "bspatch" algorithm based on the BSD-2 licensed source code
28 * available here: https://github.com/mendsley/bsdiff. This implementation supports a maximum file
29 * size of 2GB for all binaries involved (old, new and patch binaries).
30 */
31public class BsPatch {
32  /**
33   * Standard header found at the start of every patch.
34   */
35  private static final String SIGNATURE = "ENDSLEY/BSDIFF43";
36
37  /**
38   * Default buffer size is 50 kibibytes, a reasonable tradeoff between size and speed.
39   */
40  private static final int PATCH_BUFFER_SIZE = 1024 * 50;
41
42  /**
43   * Masks the upper bit of a long, used to determine if a long is positive or negative.
44   */
45  private static final long NEGATIVE_LONG_SIGN_MASK = 1L << 63;
46
47  /**
48   * The patch is typically compressed and the input stream is decompressing on-the-fly. A small
49   * buffer greatly improves efficiency on complicated patches with lots of short directives.
50   */
51  private static final int PATCH_STREAM_BUFFER_SIZE = 4 * 1024;
52
53  /**
54   * Complicated patches with lots of short directives result in many calls to write small amounts
55   * of data. A buffer greatly improves efficiency for these patches.
56   */
57  private static final int OUTPUT_STREAM_BUFFER_SIZE = 16 * 1024;
58
59  /**
60   * Applies a patch from |patchData| to the data in |oldData|, writing the result to |newData|.
61   *
62   * @param oldData data to which the patch should be applied
63   * @param newData stream to write the new artifact to
64   * @param patchData stream to read patch instructions from
65   * @throws PatchFormatException if the patch stream is invalid
66   * @throws IOException if unable to read or write any of the data
67   */
68  public static void applyPatch(
69      RandomAccessFile oldData, OutputStream newData, InputStream patchData)
70      throws PatchFormatException, IOException {
71    patchData = new BufferedInputStream(patchData, PATCH_STREAM_BUFFER_SIZE);
72    newData = new BufferedOutputStream(newData, OUTPUT_STREAM_BUFFER_SIZE);
73    try {
74      applyPatchInternal(oldData, newData, patchData);
75    } finally {
76      newData.flush();
77    }
78  }
79
80  /**
81   * Does the work of the public applyPatch method.
82   */
83  private static void applyPatchInternal(
84      final RandomAccessFile oldData,
85      final OutputStream newData,
86      final InputStream patchData)
87      throws PatchFormatException, IOException {
88    final byte[] signatureBuffer = new byte[SIGNATURE.length()];
89    try {
90      readFully(patchData, signatureBuffer, 0, signatureBuffer.length);
91    } catch (IOException e) {
92      throw new PatchFormatException("truncated signature");
93    }
94
95    String signature = new String(signatureBuffer, 0, signatureBuffer.length, "US-ASCII");
96    if (!SIGNATURE.equals(signature)) {
97      throw new PatchFormatException("bad signature");
98    }
99
100    // Sanity-check: ensure a-priori knowledge matches patch expectations
101    final long oldSize = oldData.length();
102    if (oldSize > Integer.MAX_VALUE) {
103      throw new PatchFormatException("bad oldSize");
104    }
105    final long newSize = readBsdiffLong(patchData);
106    if (newSize < 0 || newSize > Integer.MAX_VALUE) {
107      throw new PatchFormatException("bad newSize");
108    }
109
110    // These buffers are used for performing transformations and copies. They are not stateful.
111    final byte[] buffer1 = new byte[PATCH_BUFFER_SIZE];
112    final byte[] buffer2 = new byte[PATCH_BUFFER_SIZE];
113
114    // Offsets into |oldData| and |newData|.
115    long oldDataOffset = 0; // strobes |oldData| in order specified by the patch file
116    long newDataBytesWritten = 0; // monotonically increases from 0 .. |expectedNewSize|
117
118    while (newDataBytesWritten < newSize) {
119      // Read "control data" for the operation. There are three values here:
120      // 1. |diffSegmentLength| defines a number of "similar" bytes that can be transformed
121      //    from |oldData| to |newData| by applying byte-by-byte addends. The addend bytes are
122      //    read from |patchData|. If zero, no "similar" bytes are transformed in this
123      //    operation.
124      final long diffSegmentLength = readBsdiffLong(patchData);
125
126      // 2. |copySegmentLength| defines a number of identical bytes that can be copied from
127      //    |oldData| to |newData|. If zero, no identical bytes are copied in this operation.
128      final long copySegmentLength = readBsdiffLong(patchData);
129
130      // 3. |offsetToNextInput| defines a relative offset to the next position in |oldData| to
131      //    jump do after the current operation completes. Strangely, this compensates for
132      //    |diffSegmentLength| but not for |copySegmentLength|, so |diffSegmentLength| must
133      //    be accumulated into |oldDataOffset| while |copySegmentLength| must NOT be.
134      final long offsetToNextInput = readBsdiffLong(patchData);
135
136      // Sanity-checks
137      if (diffSegmentLength < 0 || diffSegmentLength > Integer.MAX_VALUE) {
138        throw new PatchFormatException("bad diffSegmentLength");
139      }
140      if (copySegmentLength < 0 || copySegmentLength > Integer.MAX_VALUE) {
141        throw new PatchFormatException("bad copySegmentLength");
142      }
143      if (offsetToNextInput < Integer.MIN_VALUE || offsetToNextInput > Integer.MAX_VALUE) {
144        throw new PatchFormatException("bad offsetToNextInput");
145      }
146
147      final long expectedFinalNewDataBytesWritten =
148          newDataBytesWritten + diffSegmentLength + copySegmentLength;
149      if (expectedFinalNewDataBytesWritten > newSize) {
150        throw new PatchFormatException("expectedFinalNewDataBytesWritten too large");
151      }
152
153      final long expectedFinalOldDataOffset = oldDataOffset + diffSegmentLength + offsetToNextInput;
154      if (expectedFinalOldDataOffset > oldSize) {
155        throw new PatchFormatException("expectedFinalOldDataOffset too large");
156      }
157      if (expectedFinalOldDataOffset < 0) {
158        throw new PatchFormatException("expectedFinalOldDataOffset is negative");
159      }
160
161      // At this point everything is known to be sane, and the operations should all succeed.
162      oldData.seek(oldDataOffset);
163      if (diffSegmentLength > 0) {
164        transformBytes((int) diffSegmentLength, patchData, oldData, newData, buffer1, buffer2);
165      }
166      if (copySegmentLength > 0) {
167        pipe(patchData, newData, buffer1, (int) copySegmentLength);
168      }
169      newDataBytesWritten = expectedFinalNewDataBytesWritten;
170      oldDataOffset = expectedFinalOldDataOffset;
171    }
172  }
173
174  /**
175   * Transforms bytes from |oldData| into |newData| by applying byte-for-byte addends from
176   * |patchData|. The number of bytes consumed from |oldData| and |patchData|, as well as the
177   * number of bytes written to |newData|, is |diffLength|. The contents of the buffers are
178   * ignored and overwritten, and no guarantee is made as to their contents when this method
179   * returns. This is the core of the bsdiff patching algorithm. |buffer1.length| must equal
180   * |buffer2.length|, and |buffer1| and |buffer2| must be distinct objects.
181   *
182   * @param diffLength the length of the BsDiff entry (how many bytes to read and apply).
183   * @param patchData the input stream from the BsDiff patch containing diff bytes. This stream
184   *                  must be positioned so that the first byte read is the first addend to be
185   *                  applied to the first byte of data to be read from |oldData|.
186   * @param oldData the old file, for the diff bytes to be applied to. This input source must be
187   *                positioned so that the first byte read is the first byte of data to which the
188   *                first byte of addends from |patchData| should be applied.
189   * @param newData the stream to write the resulting data to.
190   * @param buffer1 temporary buffer to use for data transformation; contents are ignored, may be
191   *                overwritten, and are undefined when this method returns.
192   * @param buffer2 temporary buffer to use for data transformation; contents are ignored, may be
193   *                overwritten, and are undefined when this method returns.
194   */
195  // Visible for testing only
196  static void transformBytes(
197      final int diffLength,
198      final InputStream patchData,
199      final RandomAccessFile oldData,
200      final OutputStream newData,
201      final byte[] buffer1,
202      final byte[] buffer2)
203      throws IOException {
204    int numBytesLeft = diffLength;
205    while (numBytesLeft > 0) {
206      final int numBytesThisRound = Math.min(numBytesLeft, buffer1.length);
207      oldData.readFully(buffer1, 0, numBytesThisRound);
208      readFully(patchData, buffer2, 0, numBytesThisRound);
209      for (int i = 0; i < numBytesThisRound; i++) {
210        buffer1[i] += buffer2[i];
211      }
212      newData.write(buffer1, 0, numBytesThisRound);
213      numBytesLeft -= numBytesThisRound;
214    }
215  }
216
217  /**
218   * Reads a long value in little-endian, signed-magnitude format (the format used by the C++
219   * bsdiff implementation).
220   *
221   * @param in the stream to read from
222   * @return the long value
223   * @throws PatchFormatException if the value is negative zero (unsupported)
224   * @throws IOException if unable to read all 8 bytes from the stream
225   */
226  // Visible for testing only
227  static final long readBsdiffLong(InputStream in) throws PatchFormatException, IOException {
228    long result = 0;
229    for (int bitshift = 0; bitshift < 64; bitshift += 8) {
230      result |= ((long) in.read()) << bitshift;
231    }
232
233    if (result == NEGATIVE_LONG_SIGN_MASK) {
234      // "Negative zero", which is valid in signed-magnitude format.
235      // NB: No sane patch generator should ever produce such a value.
236      throw new PatchFormatException("read negative zero");
237    }
238
239    if ((result & NEGATIVE_LONG_SIGN_MASK) != 0) {
240      result = -(result & ~NEGATIVE_LONG_SIGN_MASK);
241    }
242
243    return result;
244  }
245
246  /**
247   * Read exactly the specified number of bytes into the specified buffer.
248   *
249   * @param in the input stream to read from
250   * @param destination where to write the bytes to
251   * @param startAt the offset at which to start writing bytes in the destination buffer
252   * @param numBytes the number of bytes to read
253   * @throws IOException if reading from the stream fails
254   */
255  // Visible for testing only
256  static void readFully(
257      final InputStream in, final byte[] destination, final int startAt, final int numBytes)
258      throws IOException {
259    int numRead = 0;
260    while (numRead < numBytes) {
261      int readNow = in.read(destination, startAt + numRead, numBytes - numRead);
262      if (readNow == -1) {
263        throw new IOException("truncated input stream");
264      }
265      numRead += readNow;
266    }
267  }
268
269  /**
270   * Use an intermediate buffer to pipe bytes from an InputStream directly to an OutputStream. The
271   * buffer's contents may be destroyed by this operation.
272   *
273   * @param in the stream to read bytes from.
274   * @param out the stream to write bytes to.
275   * @param buffer the buffer to use for copying bytes; must have length > 0
276   * @param copyLength the number of bytes to copy from the input stream to the output stream
277   */
278  // Visible for testing only
279  static void pipe(
280      final InputStream in, final OutputStream out, final byte[] buffer, int copyLength)
281      throws IOException {
282    while (copyLength > 0) {
283      int maxCopy = Math.min(buffer.length, copyLength);
284      readFully(in, buffer, 0, maxCopy);
285      out.write(buffer, 0, maxCopy);
286      copyLength -= maxCopy;
287    }
288  }
289}
290