1/*
2 * Copyright (C) 2014 Square, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package okio;
17
18import java.io.EOFException;
19import java.io.IOException;
20import java.util.zip.CRC32;
21import java.util.zip.Inflater;
22
23/**
24 * A source that uses <a href="http://www.ietf.org/rfc/rfc1952.txt">GZIP</a> to
25 * decompress data read from another source.
26 */
27public final class GzipSource implements Source {
28  private static final byte FHCRC = 1;
29  private static final byte FEXTRA = 2;
30  private static final byte FNAME = 3;
31  private static final byte FCOMMENT = 4;
32
33  private static final byte SECTION_HEADER = 0;
34  private static final byte SECTION_BODY = 1;
35  private static final byte SECTION_TRAILER = 2;
36  private static final byte SECTION_DONE = 3;
37
38  /** The current section. Always progresses forward. */
39  private int section = SECTION_HEADER;
40
41  /**
42   * Our source should yield a GZIP header (which we consume directly), followed
43   * by deflated bytes (which we consume via an InflaterSource), followed by a
44   * GZIP trailer (which we also consume directly).
45   */
46  private final BufferedSource source;
47
48  /** The inflater used to decompress the deflated body. */
49  private final Inflater inflater;
50
51  /**
52   * The inflater source takes care of moving data between compressed source and
53   * decompressed sink buffers.
54   */
55  private final InflaterSource inflaterSource;
56
57  /** Checksum used to check both the GZIP header and decompressed body. */
58  private final CRC32 crc = new CRC32();
59
60  public GzipSource(Source source) {
61    if (source == null) throw new IllegalArgumentException("source == null");
62    this.inflater = new Inflater(true);
63    this.source = Okio.buffer(source);
64    this.inflaterSource = new InflaterSource(this.source, inflater);
65  }
66
67  @Override public long read(Buffer sink, long byteCount) throws IOException {
68    if (byteCount < 0) throw new IllegalArgumentException("byteCount < 0: " + byteCount);
69    if (byteCount == 0) return 0;
70
71    // If we haven't consumed the header, we must consume it before anything else.
72    if (section == SECTION_HEADER) {
73      consumeHeader();
74      section = SECTION_BODY;
75    }
76
77    // Attempt to read at least a byte of the body. If we do, we're done.
78    if (section == SECTION_BODY) {
79      long offset = sink.size;
80      long result = inflaterSource.read(sink, byteCount);
81      if (result != -1) {
82        updateCrc(sink, offset, result);
83        return result;
84      }
85      section = SECTION_TRAILER;
86    }
87
88    // The body is exhausted; time to read the trailer. We always consume the
89    // trailer before returning a -1 exhausted result; that way if you read to
90    // the end of a GzipSource you guarantee that the CRC has been checked.
91    if (section == SECTION_TRAILER) {
92      consumeTrailer();
93      section = SECTION_DONE;
94
95      // Gzip streams self-terminate: they return -1 before their underlying
96      // source returns -1. Here we attempt to force the underlying stream to
97      // return -1 which may trigger it to release its resources. If it doesn't
98      // return -1, then our Gzip data finished prematurely!
99      if (!source.exhausted()) {
100        throw new IOException("gzip finished without exhausting source");
101      }
102    }
103
104    return -1;
105  }
106
107  private void consumeHeader() throws IOException {
108    // Read the 10-byte header. We peek at the flags byte first so we know if we
109    // need to CRC the entire header. Then we read the magic ID1ID2 sequence.
110    // We can skip everything else in the first 10 bytes.
111    // +---+---+---+---+---+---+---+---+---+---+
112    // |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
113    // +---+---+---+---+---+---+---+---+---+---+
114    source.require(10);
115    byte flags = source.buffer().getByte(3);
116    boolean fhcrc = ((flags >> FHCRC) & 1) == 1;
117    if (fhcrc) updateCrc(source.buffer(), 0, 10);
118
119    short id1id2 = source.readShort();
120    checkEqual("ID1ID2", (short) 0x1f8b, id1id2);
121    source.skip(8);
122
123    // Skip optional extra fields.
124    // +---+---+=================================+
125    // | XLEN  |...XLEN bytes of "extra field"...| (more-->)
126    // +---+---+=================================+
127    if (((flags >> FEXTRA) & 1) == 1) {
128      source.require(2);
129      if (fhcrc) updateCrc(source.buffer(), 0, 2);
130      int xlen = source.buffer().readShortLe();
131      source.require(xlen);
132      if (fhcrc) updateCrc(source.buffer(), 0, xlen);
133      source.skip(xlen);
134    }
135
136    // Skip an optional 0-terminated name.
137    // +=========================================+
138    // |...original file name, zero-terminated...| (more-->)
139    // +=========================================+
140    if (((flags >> FNAME) & 1) == 1) {
141      long index = source.indexOf((byte) 0);
142      if (index == -1) throw new EOFException();
143      if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
144      source.skip(index + 1);
145    }
146
147    // Skip an optional 0-terminated comment.
148    // +===================================+
149    // |...file comment, zero-terminated...| (more-->)
150    // +===================================+
151    if (((flags >> FCOMMENT) & 1) == 1) {
152      long index = source.indexOf((byte) 0);
153      if (index == -1) throw new EOFException();
154      if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
155      source.skip(index + 1);
156    }
157
158    // Confirm the optional header CRC.
159    // +---+---+
160    // | CRC16 |
161    // +---+---+
162    if (fhcrc) {
163      checkEqual("FHCRC", source.readShortLe(), (short) crc.getValue());
164      crc.reset();
165    }
166  }
167
168  private void consumeTrailer() throws IOException {
169    // Read the eight-byte trailer. Confirm the body's CRC and size.
170    // +---+---+---+---+---+---+---+---+
171    // |     CRC32     |     ISIZE     |
172    // +---+---+---+---+---+---+---+---+
173    checkEqual("CRC", source.readIntLe(), (int) crc.getValue());
174    checkEqual("ISIZE", source.readIntLe(), inflater.getTotalOut());
175  }
176
177  @Override public Timeout timeout() {
178    return source.timeout();
179  }
180
181  @Override public void close() throws IOException {
182    inflaterSource.close();
183  }
184
185  /** Updates the CRC with the given bytes. */
186  private void updateCrc(Buffer buffer, long offset, long byteCount) {
187    // Skip segments that we aren't checksumming.
188    Segment s = buffer.head;
189    for (; offset >= (s.limit - s.pos); s = s.next) {
190      offset -= (s.limit - s.pos);
191    }
192
193    // Checksum one segment at a time.
194    for (; byteCount > 0; s = s.next) {
195      int pos = (int) (s.pos + offset);
196      int toUpdate = (int) Math.min(s.limit - pos, byteCount);
197      crc.update(s.data, pos, toUpdate);
198      byteCount -= toUpdate;
199      offset = 0;
200    }
201  }
202
203  private void checkEqual(String name, int expected, int actual) throws IOException {
204    if (actual != expected) {
205      throw new IOException(String.format(
206          "%s: actual 0x%08x != expected 0x%08x", name, actual, expected));
207    }
208  }
209}
210