zip_reader.h revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6#define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
7
8#include <string>
9
10#include "base/basictypes.h"
11#include "base/file_util.h"
12#include "base/files/file_path.h"
13#include "base/memory/scoped_ptr.h"
14#include "base/platform_file.h"
15#include "base/time/time.h"
16
17#if defined(USE_SYSTEM_MINIZIP)
18#include <minizip/unzip.h>
19#else
20#include "third_party/zlib/contrib/minizip/unzip.h"
21#endif
22
23namespace zip {
24
25// This class is used for reading zip files. A typical use case of this
26// class is to scan entries in a zip file and extract them. The code will
27// look like:
28//
29//   ZipReader reader;
30//   reader.Open(zip_file_path);
31//   while (reader.HasMore()) {
32//     reader.OpenCurrentEntryInZip();
33//     reader.ExtractCurrentEntryToDirectory(output_directory_path);
34//     reader.AdvanceToNextEntry();
35//   }
36//
37// For simplicty, error checking is omitted in the example code above. The
38// production code should check return values from all of these functions.
39//
40// This calls can also be used for random access of contents in a zip file
41// using LocateAndOpenEntry().
42//
43class ZipReader {
44 public:
45  // This class represents information of an entry (file or directory) in
46  // a zip file.
47  class EntryInfo {
48   public:
49    EntryInfo(const std::string& filename_in_zip,
50              const unz_file_info& raw_file_info);
51
52    // Returns the file path. The path is usually relative like
53    // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
54    const base::FilePath& file_path() const { return file_path_; }
55
56    // Returns the size of the original file (i.e. after uncompressed).
57    // Returns 0 if the entry is a directory.
58    int64 original_size() const { return original_size_; }
59
60    // Returns the last modified time.
61    base::Time last_modified() const { return last_modified_; }
62
63    // Returns true if the entry is a directory.
64    bool is_directory() const { return is_directory_; }
65
66    // Returns true if the entry is unsafe, like having ".." or invalid
67    // UTF-8 characters in its file name, or the file path is absolute.
68    bool is_unsafe() const { return is_unsafe_; }
69
70   private:
71    const base::FilePath file_path_;
72    int64 original_size_;
73    base::Time last_modified_;
74    bool is_directory_;
75    bool is_unsafe_;
76    DISALLOW_COPY_AND_ASSIGN(EntryInfo);
77  };
78
79  ZipReader();
80  ~ZipReader();
81
82  // Opens the zip file specified by |zip_file_path|. Returns true on
83  // success.
84  bool Open(const base::FilePath& zip_file_path);
85
86  // Opens the zip file referred to by the platform file |zip_fd|.
87  // Returns true on success.
88  bool OpenFromPlatformFile(base::PlatformFile zip_fd);
89
90  // Opens the zip data stored in |data|. This class uses a weak reference to
91  // the given sring while extracting files, i.e. the caller should keep the
92  // string until it finishes extracting files.
93  bool OpenFromString(const std::string& data);
94
95  // Closes the currently opened zip file. This function is called in the
96  // destructor of the class, so you usually don't need to call this.
97  void Close();
98
99  // Returns true if there is at least one entry to read. This function is
100  // used to scan entries with AdvanceToNextEntry(), like:
101  //
102  // while (reader.HasMore()) {
103  //   // Do something with the current file here.
104  //   reader.AdvanceToNextEntry();
105  // }
106  bool HasMore();
107
108  // Advances the next entry. Returns true on success.
109  bool AdvanceToNextEntry();
110
111  // Opens the current entry in the zip file. On success, returns true and
112  // updates the the current entry state (i.e. current_entry_info() is
113  // updated). This function should be called before operations over the
114  // current entry like ExtractCurrentEntryToFile().
115  //
116  // Note that there is no CloseCurrentEntryInZip(). The the current entry
117  // state is reset automatically as needed.
118  bool OpenCurrentEntryInZip();
119
120  // Locates an entry in the zip file and opens it. Returns true on
121  // success. This function internally calls OpenCurrentEntryInZip() on
122  // success. On failure, current_entry_info() becomes NULL.
123  bool LocateAndOpenEntry(const base::FilePath& path_in_zip);
124
125  // Extracts the current entry to the given output file path. If the
126  // current file is a directory, just creates a directory
127  // instead. Returns true on success. OpenCurrentEntryInZip() must be
128  // called beforehand.
129  //
130  // This function does not preserve the timestamp of the original entry.
131  bool ExtractCurrentEntryToFilePath(const base::FilePath& output_file_path);
132
133  // Extracts the current entry to the given output directory path using
134  // ExtractCurrentEntryToFilePath(). Sub directories are created as needed
135  // based on the file path of the current entry. For example, if the file
136  // path in zip is "foo/bar.txt", and the output directory is "output",
137  // "output/foo/bar.txt" will be created.
138  //
139  // Returns true on success. OpenCurrentEntryInZip() must be called
140  // beforehand.
141  bool ExtractCurrentEntryIntoDirectory(
142      const base::FilePath& output_directory_path);
143
144#if defined(OS_POSIX)
145  // Extracts the current entry by writing directly to a file descriptor.
146  // Does not close the file descriptor. Returns true on success.
147  bool ExtractCurrentEntryToFd(int fd);
148#endif
149
150  // Returns the current entry info. Returns NULL if the current entry is
151  // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
152  EntryInfo* current_entry_info() const {
153    return current_entry_info_.get();
154  }
155
156  // Returns the number of entries in the zip file.
157  // Open() must be called beforehand.
158  int num_entries() const { return num_entries_; }
159
160 private:
161  // Common code used both in Open and OpenFromFd.
162  bool OpenInternal();
163
164  // Resets the internal state.
165  void Reset();
166
167  unzFile zip_file_;
168  int num_entries_;
169  bool reached_end_;
170  scoped_ptr<EntryInfo> current_entry_info_;
171
172  DISALLOW_COPY_AND_ASSIGN(ZipReader);
173};
174
175}  // namespace zip
176
177#endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
178