15f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// found in the LICENSE file.
45f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
55f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// The cache is stored on disk as a collection of block-files, plus an index
65f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// plus a collection of external files.
75f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
85f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Any data blob bigger than kMaxBlockSize (disk_cache/addr.h) will be stored in
95f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// a separate file named f_xxx where x is a hexadecimal number. Shorter data
105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// will be stored as a series of blocks on a block-file. In any case, CacheAddr
115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// represents the address of the data inside the cache.
125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// The index is actually a collection of four files that store a hash table with
145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// allocation bitmaps and backup data. Hash collisions are handled directly by
155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// the table, which from some point of view behaves like a 4-way associative
165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// cache with overflow buckets (so not really open addressing).
175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Basically the hash table is a collection of buckets. The first part of the
195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// table has a fixed number of buckets and it is directly addressed by the hash,
205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// while the second part of the table (stored on a second file) has a variable
215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// number of buckets. Each bucket stores up to four cells (each cell represents
225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// a possibl entry). The index bitmap tracks the state of individual cells.
235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// The last element of the cache is the block-file. A block file is a file
255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// designed to store blocks of data of a given size. For more details see
265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// disk_cache/disk_format_base.h
275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// A new cache is initialized with a set of block files (named data_0 through
295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// data_6), each one dedicated to store blocks of a given size or function. The
305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// number at the end of the file name is the block file number (in decimal).
315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// There are three "special" types of blocks: normal entries, evicted entries
335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// and control data for external files.
345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// The files that store internal information for the cache (blocks and index)
365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// are memory mapped. They have a location that is signaled every time the
375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// internal structures are modified, so it is possible to detect (most of the
385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// time) when the process dies in the middle of an update. There are dedicated
395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// backup files for cache bitmaps, used to detect entries out of date.
405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//
415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Although cache files are to be consumed on the same machine that creates
425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// them, if files are to be moved accross machines, little endian storage is
435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// assumed.
445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#ifndef NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
465f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#define NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "base/basictypes.h"
495f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "net/disk_cache/blockfile/disk_format_base.h"
505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)namespace disk_cache {
525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)const int kBaseTableLen = 0x400;
545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)const uint32 kIndexMagicV3 = 0xC103CAC3;
555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)const uint32 kVersion3 = 0x30000;  // Version 3.0.
565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Flags for a given cache.
585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)enum CacheFlags {
595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  SMALL_CACHE = 1 << 0,       // See IndexCell.
605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  CACHE_EVICTION_2 = 1 << 1,  // Keep multiple lists for eviction.
615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  CACHE_EVICTED = 1 << 2      // Already evicted at least one entry.
625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)};
635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Header for the master index file.
655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)struct IndexHeaderV3 {
665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint32      magic;
675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint32      version;
685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       num_entries;   // Number of entries currently stored.
695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       num_bytes;     // Total size of the stored data.
705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       last_file;     // Last external file created.
715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       reserved1;
725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  CacheAddr   stats;         // Storage for usage data.
735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       table_len;     // Actual size of the table.
745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       crash;         // Signals a previous crash.
755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       experiment;    // Id of an ongoing test.
765f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       max_bytes;     // Total maximum size of the stored data.
775f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint32      flags;
785f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       used_cells;
795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       max_bucket;
805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint64      create_time;   // Creation time for this set of files.
815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint64      base_time;     // Current base for timestamps.
825f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint64      old_time;      // Previous time used for timestamps.
835f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       max_block_file;
845f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       num_no_use_entries;
855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       num_low_use_entries;
865f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       num_high_use_entries;
875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       reserved;
885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       num_evicted_entries;
895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  int32       pad[6];
905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)};
915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)const int kBaseBitmapBytes = 3968;
935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// The IndexBitmap is directly saved to a file named index. The file grows in
945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// page increments (4096 bytes), but all bits don't have to be in use at any
955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// given time. The required file size can be computed from header.table_len.
965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)struct IndexBitmap {
975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  IndexHeaderV3   header;
985f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint32          bitmap[kBaseBitmapBytes / 4];  // First page of the bitmap.
995f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)};
1005f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)COMPILE_ASSERT(sizeof(IndexBitmap) == 4096, bad_IndexHeader);
1015f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// Possible states for a given entry.
1035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)enum EntryState {
1045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_FREE = 0,   // Available slot.
1055f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_NEW,        // The entry is being created.
1065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_OPEN,       // The entry is being accessed.
1075f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_MODIFIED,   // The entry is being modified.
1085f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_DELETED,    // The entry is being deleted.
1095f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_FIXING,     // Inconsistent state. The entry is being verified.
1105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_USED        // The slot is in use (entry is present).
1115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)};
1125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)COMPILE_ASSERT(ENTRY_USED <= 7, state_uses_3_bits);
1135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)enum EntryGroup {
1155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_NO_USE = 0,   // The entry has not been reused.
1165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_LOW_USE,      // The entry has low reuse.
1175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_HIGH_USE,     // The entry has high reuse.
1185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_RESERVED,     // Reserved for future use.
1195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ENTRY_EVICTED       // The entry was deleted.
1205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)};
1215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)COMPILE_ASSERT(ENTRY_USED <= 7, group_uses_3_bits);
1225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#pragma pack(push, 1)
1245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)struct IndexCell {
1255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  void Clear() { memset(this, 0, sizeof(*this)); }
1265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // A cell is a 9 byte bit-field that stores 7 values:
1285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   location : 22 bits
1295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   id : 18 bits
1305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   timestamp : 20 bits
1315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   reuse : 4 bits
1325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   state : 3 bits
1335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   group : 3 bits
1345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   sum : 2 bits
1355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // The id is derived from the full hash of the entry.
1365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //
1375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // The actual layout is as follows:
1385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //
1395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // first_part (low order 32 bits):
1405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //   0000 0000 0011 1111 1111 1111 1111 1111 : location
141  //   1111 1111 1100 0000 0000 0000 0000 0000 : id
142  //
143  // first_part (high order 32 bits):
144  //   0000 0000 0000 0000 0000 0000 1111 1111 : id
145  //   0000 1111 1111 1111 1111 1111 0000 0000 : timestamp
146  //   1111 0000 0000 0000 0000 0000 0000 0000 : reuse
147  //
148  // last_part:
149  //   0000 0111 : state
150  //   0011 1000 : group
151  //   1100 0000 : sum
152  //
153  // The small-cache version of the format moves some bits from the location to
154  // the id fileds, like so:
155  //   location : 16 bits
156  //   id : 24 bits
157  //
158  // first_part (low order 32 bits):
159  //   0000 0000 0000 0000 1111 1111 1111 1111 : location
160  //   1111 1111 1111 1111 0000 0000 0000 0000 : id
161  //
162  // The actual bit distribution between location and id is determined by the
163  // table size (IndexHeaderV3.table_len). Tables smaller than 65536 entries
164  // use the small-cache version; after that size, caches should have the
165  // SMALL_CACHE flag cleared.
166  //
167  // To locate a given entry after recovering the location from the cell, the
168  // file type and file number are appended (see disk_cache/addr.h). For a large
169  // table only the file type is implied; for a small table, the file number
170  // is also implied, and it should be the first file for that type of entry,
171  // as determined by the EntryGroup (two files in total, one for active entries
172  // and another one for evicted entries).
173  //
174  // For example, a small table may store something like 0x1234 as the location
175  // field. That means it stores the entry number 0x1234. If that record belongs
176  // to a deleted entry, the regular cache address may look something like
177  //     BLOCK_EVICTED + 1 block + file number 6 + entry number 0x1234
178  //     so Addr = 0xf0061234
179  //
180  // If that same Addr is stored on a large table, the location field would be
181  // 0x61234
182
183  uint64      first_part;
184  uint8       last_part;
185};
186COMPILE_ASSERT(sizeof(IndexCell) == 9, bad_IndexCell);
187
188const int kCellsPerBucket = 4;
189struct IndexBucket {
190  IndexCell   cells[kCellsPerBucket];
191  int32       next;
192  uint32      hash;  // The high order byte is reserved (should be zero).
193};
194COMPILE_ASSERT(sizeof(IndexBucket) == 44, bad_IndexBucket);
195const int kBytesPerCell = 44 / kCellsPerBucket;
196
197// The main cache index. Backed by a file named index_tb1.
198// The extra table (index_tb2) has a similar format, but different size.
199struct Index {
200  // Default size. Actual size controlled by header.table_len.
201  IndexBucket table[kBaseTableLen / kCellsPerBucket];
202};
203#pragma pack(pop)
204
205// Flags that can be applied to an entry.
206enum EntryFlags {
207  PARENT_ENTRY = 1,         // This entry has children (sparse) entries.
208  CHILD_ENTRY = 1 << 1      // Child entry that stores sparse data.
209};
210
211struct EntryRecord {
212  uint32      hash;
213  uint32      pad1;
214  uint8       reuse_count;
215  uint8       refetch_count;
216  int8        state;              // Current EntryState.
217  uint8       flags;              // Any combination of EntryFlags.
218  int32       key_len;
219  int32       data_size[4];       // We can store up to 4 data streams for each
220  CacheAddr   data_addr[4];       // entry.
221  uint32      data_hash[4];
222  uint64      creation_time;
223  uint64      last_modified_time;
224  uint64      last_access_time;
225  int32       pad[3];
226  uint32      self_hash;
227};
228COMPILE_ASSERT(sizeof(EntryRecord) == 104, bad_EntryRecord);
229
230struct ShortEntryRecord {
231  uint32      hash;
232  uint32      pad1;
233  uint8       reuse_count;
234  uint8       refetch_count;
235  int8        state;              // Current EntryState.
236  uint8       flags;
237  int32       key_len;
238  uint64      last_access_time;
239  uint32      long_hash[5];
240  uint32      self_hash;
241};
242COMPILE_ASSERT(sizeof(ShortEntryRecord) == 48, bad_ShortEntryRecord);
243
244}  // namespace disk_cache
245
246#endif  // NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
247