zip_reader.h revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4#ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5#define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6
7#include <string>
8
9#include "base/basictypes.h"
10#include "base/callback.h"
11#include "base/file_util.h"
12#include "base/files/file.h"
13#include "base/files/file_path.h"
14#include "base/memory/scoped_ptr.h"
15#include "base/memory/weak_ptr.h"
16#include "base/time/time.h"
17
18#if defined(USE_SYSTEM_MINIZIP)
19#include <minizip/unzip.h>
20#else
21#include "third_party/zlib/contrib/minizip/unzip.h"
22#endif
23
24namespace zip {
25
26// This class is used for reading zip files. A typical use case of this
27// class is to scan entries in a zip file and extract them. The code will
28// look like:
29//
30//   ZipReader reader;
31//   reader.Open(zip_file_path);
32//   while (reader.HasMore()) {
33//     reader.OpenCurrentEntryInZip();
34//     reader.ExtractCurrentEntryToDirectory(output_directory_path);
35//     reader.AdvanceToNextEntry();
36//   }
37//
38// For simplicty, error checking is omitted in the example code above. The
39// production code should check return values from all of these functions.
40//
41// This calls can also be used for random access of contents in a zip file
42// using LocateAndOpenEntry().
43//
44class ZipReader {
45 public:
46  // A callback that is called when the operation is successful.
47  typedef base::Closure SuccessCallback;
48  // A callback that is called when the operation fails.
49  typedef base::Closure FailureCallback;
50  // A callback that is called periodically during the operation with the number
51  // of bytes that have been processed so far.
52  typedef base::Callback<void(int64)> ProgressCallback;
53
54  // This class represents information of an entry (file or directory) in
55  // a zip file.
56  class EntryInfo {
57   public:
58    EntryInfo(const std::string& filename_in_zip,
59              const unz_file_info& raw_file_info);
60
61    // Returns the file path. The path is usually relative like
62    // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
63    const base::FilePath& file_path() const { return file_path_; }
64
65    // Returns the size of the original file (i.e. after uncompressed).
66    // Returns 0 if the entry is a directory.
67    int64 original_size() const { return original_size_; }
68
69    // Returns the last modified time. If the time stored in the zip file was
70    // not valid, the unix epoch will be returned.
71    //
72    // The time stored in the zip archive uses the MS-DOS date and time format.
73    // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
74    // As such the following limitations apply:
75    // * only years from 1980 to 2107 can be represented.
76    // * the time stamp has a 2 second resolution.
77    // * there's no timezone information, so the time is interpreted as local.
78    base::Time last_modified() const { return last_modified_; }
79
80    // Returns true if the entry is a directory.
81    bool is_directory() const { return is_directory_; }
82
83    // Returns true if the entry is unsafe, like having ".." or invalid
84    // UTF-8 characters in its file name, or the file path is absolute.
85    bool is_unsafe() const { return is_unsafe_; }
86
87   private:
88    const base::FilePath file_path_;
89    int64 original_size_;
90    base::Time last_modified_;
91    bool is_directory_;
92    bool is_unsafe_;
93    DISALLOW_COPY_AND_ASSIGN(EntryInfo);
94  };
95
96  ZipReader();
97  ~ZipReader();
98
99  // Opens the zip file specified by |zip_file_path|. Returns true on
100  // success.
101  bool Open(const base::FilePath& zip_file_path);
102
103  // Opens the zip file referred to by the platform file |zip_fd|.
104  // Returns true on success.
105  bool OpenFromPlatformFile(base::PlatformFile zip_fd);
106
107  // Opens the zip data stored in |data|. This class uses a weak reference to
108  // the given sring while extracting files, i.e. the caller should keep the
109  // string until it finishes extracting files.
110  bool OpenFromString(const std::string& data);
111
112  // Closes the currently opened zip file. This function is called in the
113  // destructor of the class, so you usually don't need to call this.
114  void Close();
115
116  // Returns true if there is at least one entry to read. This function is
117  // used to scan entries with AdvanceToNextEntry(), like:
118  //
119  // while (reader.HasMore()) {
120  //   // Do something with the current file here.
121  //   reader.AdvanceToNextEntry();
122  // }
123  bool HasMore();
124
125  // Advances the next entry. Returns true on success.
126  bool AdvanceToNextEntry();
127
128  // Opens the current entry in the zip file. On success, returns true and
129  // updates the the current entry state (i.e. current_entry_info() is
130  // updated). This function should be called before operations over the
131  // current entry like ExtractCurrentEntryToFile().
132  //
133  // Note that there is no CloseCurrentEntryInZip(). The the current entry
134  // state is reset automatically as needed.
135  bool OpenCurrentEntryInZip();
136
137  // Locates an entry in the zip file and opens it. Returns true on
138  // success. This function internally calls OpenCurrentEntryInZip() on
139  // success. On failure, current_entry_info() becomes NULL.
140  bool LocateAndOpenEntry(const base::FilePath& path_in_zip);
141
142  // Extracts the current entry to the given output file path. If the
143  // current file is a directory, just creates a directory
144  // instead. Returns true on success. OpenCurrentEntryInZip() must be
145  // called beforehand.
146  //
147  // This function preserves the timestamp of the original entry. If that
148  // timestamp is not valid, the timestamp will be set to the current time.
149  bool ExtractCurrentEntryToFilePath(const base::FilePath& output_file_path);
150
151  // Asynchronously extracts the current entry to the given output file path.
152  // If the current entry is a directory it just creates the directory
153  // synchronously instead.  OpenCurrentEntryInZip() must be called beforehand.
154  // success_callback will be called on success and failure_callback will be
155  // called on failure.  progress_callback will be called at least once.
156  // Callbacks will be posted to the current MessageLoop in-order.
157  void ExtractCurrentEntryToFilePathAsync(
158      const base::FilePath& output_file_path,
159      const SuccessCallback& success_callback,
160      const FailureCallback& failure_callback,
161      const ProgressCallback& progress_callback);
162
163  // Extracts the current entry to the given output directory path using
164  // ExtractCurrentEntryToFilePath(). Sub directories are created as needed
165  // based on the file path of the current entry. For example, if the file
166  // path in zip is "foo/bar.txt", and the output directory is "output",
167  // "output/foo/bar.txt" will be created.
168  //
169  // Returns true on success. OpenCurrentEntryInZip() must be called
170  // beforehand.
171  //
172  // This function preserves the timestamp of the original entry. If that
173  // timestamp is not valid, the timestamp will be set to the current time.
174  bool ExtractCurrentEntryIntoDirectory(
175      const base::FilePath& output_directory_path);
176
177#if defined(OS_POSIX)
178  // Extracts the current entry by writing directly to a file descriptor.
179  // Does not close the file descriptor. Returns true on success.
180  bool ExtractCurrentEntryToFd(int fd);
181#endif
182
183  // Returns the current entry info. Returns NULL if the current entry is
184  // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
185  EntryInfo* current_entry_info() const {
186    return current_entry_info_.get();
187  }
188
189  // Returns the number of entries in the zip file.
190  // Open() must be called beforehand.
191  int num_entries() const { return num_entries_; }
192
193 private:
194  // Common code used both in Open and OpenFromFd.
195  bool OpenInternal();
196
197  // Resets the internal state.
198  void Reset();
199
200  // Extracts a chunk of the file to the target.  Will post a task for the next
201  // chunk and success/failure/progress callbacks as necessary.
202  void ExtractChunk(base::File target_file,
203                    const SuccessCallback& success_callback,
204                    const FailureCallback& failure_callback,
205                    const ProgressCallback& progress_callback,
206                    const int64 offset);
207
208  unzFile zip_file_;
209  int num_entries_;
210  bool reached_end_;
211  scoped_ptr<EntryInfo> current_entry_info_;
212
213  base::WeakPtrFactory<ZipReader> weak_ptr_factory_;
214
215  DISALLOW_COPY_AND_ASSIGN(ZipReader);
216};
217
218}  // namespace zip
219
220#endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
221