data_pack.cc revision adf47cdafabb7eed9d3d806ce331c2b810a7acd1
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ui/base/resource/data_pack.h"
6
7#include <errno.h>
8
9#include "base/file_util.h"
10#include "base/files/memory_mapped_file.h"
11#include "base/logging.h"
12#include "base/memory/ref_counted_memory.h"
13#include "base/metrics/histogram.h"
14#include "base/strings/string_piece.h"
15
16// For details of the file layout, see
17// http://dev.chromium.org/developers/design-documents/linuxresourcesandlocalizedstrings
18
19namespace {
20
21static const uint32 kFileFormatVersion = 4;
22// Length of file header: version, entry count and text encoding type.
23static const size_t kHeaderLength = 2 * sizeof(uint32) + sizeof(uint8);
24
25#pragma pack(push,2)
26struct DataPackEntry {
27  uint16 resource_id;
28  uint32 file_offset;
29
30  static int CompareById(const void* void_key, const void* void_entry) {
31    uint16 key = *reinterpret_cast<const uint16*>(void_key);
32    const DataPackEntry* entry =
33        reinterpret_cast<const DataPackEntry*>(void_entry);
34    if (key < entry->resource_id) {
35      return -1;
36    } else if (key > entry->resource_id) {
37      return 1;
38    } else {
39      return 0;
40    }
41  }
42};
43#pragma pack(pop)
44
45COMPILE_ASSERT(sizeof(DataPackEntry) == 6, size_of_entry_must_be_six);
46
47// We're crashing when trying to load a pak file on Windows.  Add some error
48// codes for logging.
49// http://crbug.com/58056
50enum LoadErrors {
51  INIT_FAILED = 1,
52  BAD_VERSION,
53  INDEX_TRUNCATED,
54  ENTRY_NOT_FOUND,
55  HEADER_TRUNCATED,
56  WRONG_ENCODING,
57  INIT_FAILED_FROM_FILE,
58
59  LOAD_ERRORS_COUNT,
60};
61
62}  // namespace
63
64namespace ui {
65
66DataPack::DataPack(ui::ScaleFactor scale_factor)
67    : resource_count_(0),
68      text_encoding_type_(BINARY),
69      scale_factor_(scale_factor) {
70}
71
72DataPack::~DataPack() {
73}
74
75bool DataPack::LoadFromPath(const base::FilePath& path) {
76  mmap_.reset(new base::MemoryMappedFile);
77  if (!mmap_->Initialize(path)) {
78    DLOG(ERROR) << "Failed to mmap datapack";
79    UMA_HISTOGRAM_ENUMERATION("DataPack.Load", INIT_FAILED,
80                              LOAD_ERRORS_COUNT);
81    mmap_.reset();
82    return false;
83  }
84  return LoadImpl();
85}
86
87bool DataPack::LoadFromFile(base::File file) {
88  return LoadFromFileRegion(file.Pass(),
89                            base::MemoryMappedFile::Region::kWholeFile);
90}
91
92bool DataPack::LoadFromFileRegion(
93    base::File file,
94    const base::MemoryMappedFile::Region& region) {
95  mmap_.reset(new base::MemoryMappedFile);
96  if (!mmap_->Initialize(file.Pass(), region)) {
97    DLOG(ERROR) << "Failed to mmap datapack";
98    UMA_HISTOGRAM_ENUMERATION("DataPack.Load", INIT_FAILED_FROM_FILE,
99                              LOAD_ERRORS_COUNT);
100    mmap_.reset();
101    return false;
102  }
103  return LoadImpl();
104}
105
106bool DataPack::LoadImpl() {
107  // Sanity check the header of the file.
108  if (kHeaderLength > mmap_->length()) {
109    DLOG(ERROR) << "Data pack file corruption: incomplete file header.";
110    UMA_HISTOGRAM_ENUMERATION("DataPack.Load", HEADER_TRUNCATED,
111                              LOAD_ERRORS_COUNT);
112    mmap_.reset();
113    return false;
114  }
115
116  // Parse the header of the file.
117  // First uint32: version; second: resource count;
118  const uint32* ptr = reinterpret_cast<const uint32*>(mmap_->data());
119  uint32 version = ptr[0];
120  if (version != kFileFormatVersion) {
121    LOG(ERROR) << "Bad data pack version: got " << version << ", expected "
122               << kFileFormatVersion;
123    UMA_HISTOGRAM_ENUMERATION("DataPack.Load", BAD_VERSION,
124                              LOAD_ERRORS_COUNT);
125    mmap_.reset();
126    return false;
127  }
128  resource_count_ = ptr[1];
129
130  // third: text encoding.
131  const uint8* ptr_encoding = reinterpret_cast<const uint8*>(ptr + 2);
132  text_encoding_type_ = static_cast<TextEncodingType>(*ptr_encoding);
133  if (text_encoding_type_ != UTF8 && text_encoding_type_ != UTF16 &&
134      text_encoding_type_ != BINARY) {
135    LOG(ERROR) << "Bad data pack text encoding: got " << text_encoding_type_
136               << ", expected between " << BINARY << " and " << UTF16;
137    UMA_HISTOGRAM_ENUMERATION("DataPack.Load", WRONG_ENCODING,
138                              LOAD_ERRORS_COUNT);
139    mmap_.reset();
140    return false;
141  }
142
143  // Sanity check the file.
144  // 1) Check we have enough entries. There's an extra entry after the last item
145  // which gives the length of the last item.
146  if (kHeaderLength + (resource_count_ + 1) * sizeof(DataPackEntry) >
147      mmap_->length()) {
148    LOG(ERROR) << "Data pack file corruption: too short for number of "
149                  "entries specified.";
150    UMA_HISTOGRAM_ENUMERATION("DataPack.Load", INDEX_TRUNCATED,
151                              LOAD_ERRORS_COUNT);
152    mmap_.reset();
153    return false;
154  }
155  // 2) Verify the entries are within the appropriate bounds. There's an extra
156  // entry after the last item which gives us the length of the last item.
157  for (size_t i = 0; i < resource_count_ + 1; ++i) {
158    const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(
159        mmap_->data() + kHeaderLength + (i * sizeof(DataPackEntry)));
160    if (entry->file_offset > mmap_->length()) {
161      LOG(ERROR) << "Entry #" << i << " in data pack points off end of file. "
162                 << "Was the file corrupted?";
163      UMA_HISTOGRAM_ENUMERATION("DataPack.Load", ENTRY_NOT_FOUND,
164                                LOAD_ERRORS_COUNT);
165      mmap_.reset();
166      return false;
167    }
168  }
169
170  return true;
171}
172
173bool DataPack::HasResource(uint16 resource_id) const {
174  return !!bsearch(&resource_id, mmap_->data() + kHeaderLength, resource_count_,
175                   sizeof(DataPackEntry), DataPackEntry::CompareById);
176}
177
178bool DataPack::GetStringPiece(uint16 resource_id,
179                              base::StringPiece* data) const {
180  // It won't be hard to make this endian-agnostic, but it's not worth
181  // bothering to do right now.
182#if defined(__BYTE_ORDER)
183  // Linux check
184  COMPILE_ASSERT(__BYTE_ORDER == __LITTLE_ENDIAN,
185                 datapack_assumes_little_endian);
186#elif defined(__BIG_ENDIAN__)
187  // Mac check
188  #error DataPack assumes little endian
189#endif
190
191  const DataPackEntry* target = reinterpret_cast<const DataPackEntry*>(
192      bsearch(&resource_id, mmap_->data() + kHeaderLength, resource_count_,
193              sizeof(DataPackEntry), DataPackEntry::CompareById));
194  if (!target) {
195    return false;
196  }
197
198  const DataPackEntry* next_entry = target + 1;
199  // If the next entry points beyond the end of the file this data pack's entry
200  // table is corrupt. Log an error and return false. See
201  // http://crbug.com/371301.
202  if (next_entry->file_offset > mmap_->length()) {
203    size_t entry_index = target -
204        reinterpret_cast<const DataPackEntry*>(mmap_->data() + kHeaderLength);
205    LOG(ERROR) << "Entry #" << entry_index << " in data pack points off end "
206               << "of file. This should have been caught when loading. Was the "
207               << "file modified?";
208    return false;
209  }
210
211  size_t length = next_entry->file_offset - target->file_offset;
212  data->set(reinterpret_cast<const char*>(mmap_->data() + target->file_offset),
213            length);
214  return true;
215}
216
217base::RefCountedStaticMemory* DataPack::GetStaticMemory(
218    uint16 resource_id) const {
219  base::StringPiece piece;
220  if (!GetStringPiece(resource_id, &piece))
221    return NULL;
222
223  return new base::RefCountedStaticMemory(piece.data(), piece.length());
224}
225
226ResourceHandle::TextEncodingType DataPack::GetTextEncodingType() const {
227  return text_encoding_type_;
228}
229
230ui::ScaleFactor DataPack::GetScaleFactor() const {
231  return scale_factor_;
232}
233
234// static
235bool DataPack::WritePack(const base::FilePath& path,
236                         const std::map<uint16, base::StringPiece>& resources,
237                         TextEncodingType textEncodingType) {
238  FILE* file = base::OpenFile(path, "wb");
239  if (!file)
240    return false;
241
242  if (fwrite(&kFileFormatVersion, sizeof(kFileFormatVersion), 1, file) != 1) {
243    LOG(ERROR) << "Failed to write file version";
244    base::CloseFile(file);
245    return false;
246  }
247
248  // Note: the python version of this function explicitly sorted keys, but
249  // std::map is a sorted associative container, we shouldn't have to do that.
250  uint32 entry_count = resources.size();
251  if (fwrite(&entry_count, sizeof(entry_count), 1, file) != 1) {
252    LOG(ERROR) << "Failed to write entry count";
253    base::CloseFile(file);
254    return false;
255  }
256
257  if (textEncodingType != UTF8 && textEncodingType != UTF16 &&
258      textEncodingType != BINARY) {
259    LOG(ERROR) << "Invalid text encoding type, got " << textEncodingType
260               << ", expected between " << BINARY << " and " << UTF16;
261    base::CloseFile(file);
262    return false;
263  }
264
265  uint8 write_buffer = textEncodingType;
266  if (fwrite(&write_buffer, sizeof(uint8), 1, file) != 1) {
267    LOG(ERROR) << "Failed to write file text resources encoding";
268    base::CloseFile(file);
269    return false;
270  }
271
272  // Each entry is a uint16 + a uint32. We have an extra entry after the last
273  // item so we can compute the size of the list item.
274  uint32 index_length = (entry_count + 1) * sizeof(DataPackEntry);
275  uint32 data_offset = kHeaderLength + index_length;
276  for (std::map<uint16, base::StringPiece>::const_iterator it =
277           resources.begin();
278       it != resources.end(); ++it) {
279    uint16 resource_id = it->first;
280    if (fwrite(&resource_id, sizeof(resource_id), 1, file) != 1) {
281      LOG(ERROR) << "Failed to write id for " << resource_id;
282      base::CloseFile(file);
283      return false;
284    }
285
286    if (fwrite(&data_offset, sizeof(data_offset), 1, file) != 1) {
287      LOG(ERROR) << "Failed to write offset for " << resource_id;
288      base::CloseFile(file);
289      return false;
290    }
291
292    data_offset += it->second.length();
293  }
294
295  // We place an extra entry after the last item that allows us to read the
296  // size of the last item.
297  uint16 resource_id = 0;
298  if (fwrite(&resource_id, sizeof(resource_id), 1, file) != 1) {
299    LOG(ERROR) << "Failed to write extra resource id.";
300    base::CloseFile(file);
301    return false;
302  }
303
304  if (fwrite(&data_offset, sizeof(data_offset), 1, file) != 1) {
305    LOG(ERROR) << "Failed to write extra offset.";
306    base::CloseFile(file);
307    return false;
308  }
309
310  for (std::map<uint16, base::StringPiece>::const_iterator it =
311           resources.begin();
312       it != resources.end(); ++it) {
313    if (fwrite(it->second.data(), it->second.length(), 1, file) != 1) {
314      LOG(ERROR) << "Failed to write data for " << it->first;
315      base::CloseFile(file);
316      return false;
317    }
318  }
319
320  base::CloseFile(file);
321
322  return true;
323}
324
325}  // namespace ui
326