1/*
2 * Copyright (C) 2014 Square, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package okio;
17
18import java.io.EOFException;
19import java.io.IOException;
20import java.util.zip.CRC32;
21import java.util.zip.Inflater;
22
23public final class GzipSource implements Source {
24  private static final byte FHCRC = 1;
25  private static final byte FEXTRA = 2;
26  private static final byte FNAME = 3;
27  private static final byte FCOMMENT = 4;
28
29  private static final byte SECTION_HEADER = 0;
30  private static final byte SECTION_BODY = 1;
31  private static final byte SECTION_TRAILER = 2;
32  private static final byte SECTION_DONE = 3;
33
34  /** The current section. Always progresses forward. */
35  private int section = SECTION_HEADER;
36
37  /**
38   * Our source should yield a GZIP header (which we consume directly), followed
39   * by deflated bytes (which we consume via an InflaterSource), followed by a
40   * GZIP trailer (which we also consume directly).
41   */
42  private final BufferedSource source;
43
44  /** The inflater used to decompress the deflated body. */
45  private final Inflater inflater;
46
47  /**
48   * The inflater source takes care of moving data between compressed source and
49   * decompressed sink buffers.
50   */
51  private final InflaterSource inflaterSource;
52
53  /** Checksum used to check both the GZIP header and decompressed body. */
54  private final CRC32 crc = new CRC32();
55
56  public GzipSource(Source source) throws IOException {
57    this.inflater = new Inflater(true);
58    this.source = Okio.buffer(source);
59    this.inflaterSource = new InflaterSource(this.source, inflater);
60  }
61
62  @Override public long read(OkBuffer sink, long byteCount) throws IOException {
63    if (byteCount < 0) throw new IllegalArgumentException("byteCount < 0: " + byteCount);
64    if (byteCount == 0) return 0;
65
66    // If we haven't consumed the header, we must consume it before anything else.
67    if (section == SECTION_HEADER) {
68      consumeHeader();
69      section = SECTION_BODY;
70    }
71
72    // Attempt to read at least a byte of the body. If we do, we're done.
73    if (section == SECTION_BODY) {
74      long offset = sink.size;
75      long result = inflaterSource.read(sink, byteCount);
76      if (result != -1) {
77        updateCrc(sink, offset, result);
78        return result;
79      }
80      section = SECTION_TRAILER;
81    }
82
83    // The body is exhausted; time to read the trailer. We always consume the
84    // trailer before returning a -1 exhausted result; that way if you read to
85    // the end of a GzipSource you guarantee that the CRC has been checked.
86    if (section == SECTION_TRAILER) {
87      consumeTrailer();
88      section = SECTION_DONE;
89
90      // Gzip streams self-terminate: they return -1 before their underlying
91      // source returns -1. Here we attempt to force the underlying stream to
92      // return -1 which may trigger it to release its resources. If it doesn't
93      // return -1, then our Gzip data finished prematurely!
94      if (!source.exhausted()) {
95        throw new IOException("gzip finished without exhausting source");
96      }
97    }
98
99    return -1;
100  }
101
102  private void consumeHeader() throws IOException {
103    // Read the 10-byte header. We peek at the flags byte first so we know if we
104    // need to CRC the entire header. Then we read the magic ID1ID2 sequence.
105    // We can skip everything else in the first 10 bytes.
106    // +---+---+---+---+---+---+---+---+---+---+
107    // |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
108    // +---+---+---+---+---+---+---+---+---+---+
109    source.require(10);
110    byte flags = source.buffer().getByte(3);
111    boolean fhcrc = ((flags >> FHCRC) & 1) == 1;
112    if (fhcrc) updateCrc(source.buffer(), 0, 10);
113
114    short id1id2 = source.readShort();
115    checkEqual("ID1ID2", (short) 0x1f8b, id1id2);
116    source.skip(8);
117
118    // Skip optional extra fields.
119    // +---+---+=================================+
120    // | XLEN  |...XLEN bytes of "extra field"...| (more-->)
121    // +---+---+=================================+
122    if (((flags >> FEXTRA) & 1) == 1) {
123      source.require(2);
124      if (fhcrc) updateCrc(source.buffer(), 0, 2);
125      int xlen = source.buffer().readShortLe();
126      source.require(xlen);
127      if (fhcrc) updateCrc(source.buffer(), 0, xlen);
128      source.skip(xlen);
129    }
130
131    // Skip an optional 0-terminated name.
132    // +=========================================+
133    // |...original file name, zero-terminated...| (more-->)
134    // +=========================================+
135    if (((flags >> FNAME) & 1) == 1) {
136      long index = source.indexOf((byte) 0);
137      if (index == -1) throw new EOFException();
138      if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
139      source.skip(index + 1);
140    }
141
142    // Skip an optional 0-terminated comment.
143    // +===================================+
144    // |...file comment, zero-terminated...| (more-->)
145    // +===================================+
146    if (((flags >> FCOMMENT) & 1) == 1) {
147      long index = source.indexOf((byte) 0);
148      if (index == -1) throw new EOFException();
149      if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
150      source.skip(index + 1);
151    }
152
153    // Confirm the optional header CRC.
154    // +---+---+
155    // | CRC16 |
156    // +---+---+
157    if (fhcrc) {
158      checkEqual("FHCRC", source.readShortLe(), (short) crc.getValue());
159      crc.reset();
160    }
161  }
162
163  private void consumeTrailer() throws IOException {
164    // Read the eight-byte trailer. Confirm the body's CRC and size.
165    // +---+---+---+---+---+---+---+---+
166    // |     CRC32     |     ISIZE     |
167    // +---+---+---+---+---+---+---+---+
168    checkEqual("CRC", source.readIntLe(), (int) crc.getValue());
169    checkEqual("ISIZE", source.readIntLe(), inflater.getTotalOut());
170  }
171
172  @Override public Source deadline(Deadline deadline) {
173    source.deadline(deadline);
174    return this;
175  }
176
177  @Override public void close() throws IOException {
178    inflaterSource.close();
179  }
180
181  /** Updates the CRC with the given bytes. */
182  private void updateCrc(OkBuffer buffer, long offset, long byteCount) {
183    for (Segment s = buffer.head; byteCount > 0; s = s.next) {
184      int segmentByteCount = s.limit - s.pos;
185      if (offset < segmentByteCount) {
186        int toUpdate = (int) Math.min(byteCount, segmentByteCount - offset);
187        crc.update(s.data, (int) (s.pos + offset), toUpdate);
188        byteCount -= toUpdate;
189      }
190      offset -= segmentByteCount; // Track the offset of the current segment.
191    }
192  }
193
194  private void checkEqual(String name, int expected, int actual) throws IOException {
195    if (actual != expected) {
196      throw new IOException(String.format(
197          "%s: actual 0x%08x != expected 0x%08x", name, actual, expected));
198    }
199  }
200}
201