1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4#ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 5#define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 6 7#include <string> 8 9#include "base/basictypes.h" 10#include "base/callback.h" 11#include "base/files/file.h" 12#include "base/files/file_path.h" 13#include "base/files/file_util.h" 14#include "base/memory/scoped_ptr.h" 15#include "base/memory/weak_ptr.h" 16#include "base/time/time.h" 17 18#if defined(USE_SYSTEM_MINIZIP) 19#include <minizip/unzip.h> 20#else 21#include "third_party/zlib/contrib/minizip/unzip.h" 22#endif 23 24namespace zip { 25 26// This class is used for reading zip files. A typical use case of this 27// class is to scan entries in a zip file and extract them. The code will 28// look like: 29// 30// ZipReader reader; 31// reader.Open(zip_file_path); 32// while (reader.HasMore()) { 33// reader.OpenCurrentEntryInZip(); 34// reader.ExtractCurrentEntryToDirectory(output_directory_path); 35// reader.AdvanceToNextEntry(); 36// } 37// 38// For simplicity, error checking is omitted in the example code above. The 39// production code should check return values from all of these functions. 40// 41// This calls can also be used for random access of contents in a zip file 42// using LocateAndOpenEntry(). 43// 44class ZipReader { 45 public: 46 // A callback that is called when the operation is successful. 47 typedef base::Closure SuccessCallback; 48 // A callback that is called when the operation fails. 49 typedef base::Closure FailureCallback; 50 // A callback that is called periodically during the operation with the number 51 // of bytes that have been processed so far. 52 typedef base::Callback<void(int64)> ProgressCallback; 53 54 // This class represents information of an entry (file or directory) in 55 // a zip file. 56 class EntryInfo { 57 public: 58 EntryInfo(const std::string& filename_in_zip, 59 const unz_file_info& raw_file_info); 60 61 // Returns the file path. The path is usually relative like 62 // "foo/bar.txt", but if it's absolute, is_unsafe() returns true. 63 const base::FilePath& file_path() const { return file_path_; } 64 65 // Returns the size of the original file (i.e. after uncompressed). 66 // Returns 0 if the entry is a directory. 67 // Note: this value should not be trusted, because it is stored as metadata 68 // in the zip archive and can be different from the real uncompressed size. 69 int64 original_size() const { return original_size_; } 70 71 // Returns the last modified time. If the time stored in the zip file was 72 // not valid, the unix epoch will be returned. 73 // 74 // The time stored in the zip archive uses the MS-DOS date and time format. 75 // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx 76 // As such the following limitations apply: 77 // * only years from 1980 to 2107 can be represented. 78 // * the time stamp has a 2 second resolution. 79 // * there's no timezone information, so the time is interpreted as local. 80 base::Time last_modified() const { return last_modified_; } 81 82 // Returns true if the entry is a directory. 83 bool is_directory() const { return is_directory_; } 84 85 // Returns true if the entry is unsafe, like having ".." or invalid 86 // UTF-8 characters in its file name, or the file path is absolute. 87 bool is_unsafe() const { return is_unsafe_; } 88 89 private: 90 const base::FilePath file_path_; 91 int64 original_size_; 92 base::Time last_modified_; 93 bool is_directory_; 94 bool is_unsafe_; 95 DISALLOW_COPY_AND_ASSIGN(EntryInfo); 96 }; 97 98 ZipReader(); 99 ~ZipReader(); 100 101 // Opens the zip file specified by |zip_file_path|. Returns true on 102 // success. 103 bool Open(const base::FilePath& zip_file_path); 104 105 // Opens the zip file referred to by the platform file |zip_fd|. 106 // Returns true on success. 107 bool OpenFromPlatformFile(base::PlatformFile zip_fd); 108 109 // Opens the zip data stored in |data|. This class uses a weak reference to 110 // the given sring while extracting files, i.e. the caller should keep the 111 // string until it finishes extracting files. 112 bool OpenFromString(const std::string& data); 113 114 // Closes the currently opened zip file. This function is called in the 115 // destructor of the class, so you usually don't need to call this. 116 void Close(); 117 118 // Returns true if there is at least one entry to read. This function is 119 // used to scan entries with AdvanceToNextEntry(), like: 120 // 121 // while (reader.HasMore()) { 122 // // Do something with the current file here. 123 // reader.AdvanceToNextEntry(); 124 // } 125 bool HasMore(); 126 127 // Advances the next entry. Returns true on success. 128 bool AdvanceToNextEntry(); 129 130 // Opens the current entry in the zip file. On success, returns true and 131 // updates the the current entry state (i.e. current_entry_info() is 132 // updated). This function should be called before operations over the 133 // current entry like ExtractCurrentEntryToFile(). 134 // 135 // Note that there is no CloseCurrentEntryInZip(). The the current entry 136 // state is reset automatically as needed. 137 bool OpenCurrentEntryInZip(); 138 139 // Locates an entry in the zip file and opens it. Returns true on 140 // success. This function internally calls OpenCurrentEntryInZip() on 141 // success. On failure, current_entry_info() becomes NULL. 142 bool LocateAndOpenEntry(const base::FilePath& path_in_zip); 143 144 // Extracts the current entry to the given output file path. If the 145 // current file is a directory, just creates a directory 146 // instead. Returns true on success. OpenCurrentEntryInZip() must be 147 // called beforehand. 148 // 149 // This function preserves the timestamp of the original entry. If that 150 // timestamp is not valid, the timestamp will be set to the current time. 151 bool ExtractCurrentEntryToFilePath(const base::FilePath& output_file_path); 152 153 // Asynchronously extracts the current entry to the given output file path. 154 // If the current entry is a directory it just creates the directory 155 // synchronously instead. OpenCurrentEntryInZip() must be called beforehand. 156 // success_callback will be called on success and failure_callback will be 157 // called on failure. progress_callback will be called at least once. 158 // Callbacks will be posted to the current MessageLoop in-order. 159 void ExtractCurrentEntryToFilePathAsync( 160 const base::FilePath& output_file_path, 161 const SuccessCallback& success_callback, 162 const FailureCallback& failure_callback, 163 const ProgressCallback& progress_callback); 164 165 // Extracts the current entry to the given output directory path using 166 // ExtractCurrentEntryToFilePath(). Sub directories are created as needed 167 // based on the file path of the current entry. For example, if the file 168 // path in zip is "foo/bar.txt", and the output directory is "output", 169 // "output/foo/bar.txt" will be created. 170 // 171 // Returns true on success. OpenCurrentEntryInZip() must be called 172 // beforehand. 173 // 174 // This function preserves the timestamp of the original entry. If that 175 // timestamp is not valid, the timestamp will be set to the current time. 176 bool ExtractCurrentEntryIntoDirectory( 177 const base::FilePath& output_directory_path); 178 179#if defined(OS_POSIX) 180 // Extracts the current entry by writing directly to a file descriptor. 181 // Does not close the file descriptor. Returns true on success. 182 bool ExtractCurrentEntryToFd(int fd); 183#endif 184 185 // Extracts the current entry into memory. If the current entry is a directory 186 // the |output| parameter is set to the empty string. If the current entry is 187 // a file, the |output| parameter is filled with its contents. Returns true on 188 // success. OpenCurrentEntryInZip() must be called beforehand. 189 // Note: the |output| parameter can be filled with a big amount of data, avoid 190 // passing it around by value, but by reference or pointer. 191 // Note: the value returned by EntryInfo::original_size() cannot be 192 // trusted, so the real size of the uncompressed contents can be different. 193 // Use max_read_bytes to limit the ammount of memory used to carry the entry. 194 // If the real size of the uncompressed data is bigger than max_read_bytes 195 // then false is returned. |max_read_bytes| must be non-zero. 196 bool ExtractCurrentEntryToString( 197 size_t max_read_bytes, 198 std::string* output) const; 199 200 // Returns the current entry info. Returns NULL if the current entry is 201 // not yet opened. OpenCurrentEntryInZip() must be called beforehand. 202 EntryInfo* current_entry_info() const { 203 return current_entry_info_.get(); 204 } 205 206 // Returns the number of entries in the zip file. 207 // Open() must be called beforehand. 208 int num_entries() const { return num_entries_; } 209 210 private: 211 // Common code used both in Open and OpenFromFd. 212 bool OpenInternal(); 213 214 // Resets the internal state. 215 void Reset(); 216 217 // Extracts a chunk of the file to the target. Will post a task for the next 218 // chunk and success/failure/progress callbacks as necessary. 219 void ExtractChunk(base::File target_file, 220 const SuccessCallback& success_callback, 221 const FailureCallback& failure_callback, 222 const ProgressCallback& progress_callback, 223 const int64 offset); 224 225 unzFile zip_file_; 226 int num_entries_; 227 bool reached_end_; 228 scoped_ptr<EntryInfo> current_entry_info_; 229 230 base::WeakPtrFactory<ZipReader> weak_ptr_factory_; 231 232 DISALLOW_COPY_AND_ASSIGN(ZipReader); 233}; 234 235} // namespace zip 236 237#endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 238