1/*
2** 2008 February 16
3**
4** The author disclaims copyright to this source code.  In place of
5** a legal notice, here is a blessing:
6**
7**    May you do good and not evil.
8**    May you find forgiveness for yourself and forgive others.
9**    May you share freely, never taking more than you give.
10**
11*************************************************************************
12** This file implements an object that represents a fixed-length
13** bitmap.  Bits are numbered starting with 1.
14**
15** A bitmap is used to record which pages of a database file have been
16** journalled during a transaction, or which pages have the "dont-write"
17** property.  Usually only a few pages are meet either condition.
18** So the bitmap is usually sparse and has low cardinality.
19** But sometimes (for example when during a DROP of a large table) most
20** or all of the pages in a database can get journalled.  In those cases,
21** the bitmap becomes dense with high cardinality.  The algorithm needs
22** to handle both cases well.
23**
24** The size of the bitmap is fixed when the object is created.
25**
26** All bits are clear when the bitmap is created.  Individual bits
27** may be set or cleared one at a time.
28**
29** Test operations are about 100 times more common that set operations.
30** Clear operations are exceedingly rare.  There are usually between
31** 5 and 500 set operations per Bitvec object, though the number of sets can
32** sometimes grow into tens of thousands or larger.  The size of the
33** Bitvec object is the number of pages in the database file at the
34** start of a transaction, and is thus usually less than a few thousand,
35** but can be as large as 2 billion for a really big database.
36*/
37#include "sqliteInt.h"
38
39/* Size of the Bitvec structure in bytes. */
40#define BITVEC_SZ        512
41
42/* Round the union size down to the nearest pointer boundary, since that's how
43** it will be aligned within the Bitvec struct. */
44#define BITVEC_USIZE     (((BITVEC_SZ-(3*sizeof(u32)))/sizeof(Bitvec*))*sizeof(Bitvec*))
45
46/* Type of the array "element" for the bitmap representation.
47** Should be a power of 2, and ideally, evenly divide into BITVEC_USIZE.
48** Setting this to the "natural word" size of your CPU may improve
49** performance. */
50#define BITVEC_TELEM     u8
51/* Size, in bits, of the bitmap element. */
52#define BITVEC_SZELEM    8
53/* Number of elements in a bitmap array. */
54#define BITVEC_NELEM     (BITVEC_USIZE/sizeof(BITVEC_TELEM))
55/* Number of bits in the bitmap array. */
56#define BITVEC_NBIT      (BITVEC_NELEM*BITVEC_SZELEM)
57
58/* Number of u32 values in hash table. */
59#define BITVEC_NINT      (BITVEC_USIZE/sizeof(u32))
60/* Maximum number of entries in hash table before
61** sub-dividing and re-hashing. */
62#define BITVEC_MXHASH    (BITVEC_NINT/2)
63/* Hashing function for the aHash representation.
64** Empirical testing showed that the *37 multiplier
65** (an arbitrary prime)in the hash function provided
66** no fewer collisions than the no-op *1. */
67#define BITVEC_HASH(X)   (((X)*1)%BITVEC_NINT)
68
69#define BITVEC_NPTR      (BITVEC_USIZE/sizeof(Bitvec *))
70
71
72/*
73** A bitmap is an instance of the following structure.
74**
75** This bitmap records the existance of zero or more bits
76** with values between 1 and iSize, inclusive.
77**
78** There are three possible representations of the bitmap.
79** If iSize<=BITVEC_NBIT, then Bitvec.u.aBitmap[] is a straight
80** bitmap.  The least significant bit is bit 1.
81**
82** If iSize>BITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is
83** a hash table that will hold up to BITVEC_MXHASH distinct values.
84**
85** Otherwise, the value i is redirected into one of BITVEC_NPTR
86** sub-bitmaps pointed to by Bitvec.u.apSub[].  Each subbitmap
87** handles up to iDivisor separate values of i.  apSub[0] holds
88** values between 1 and iDivisor.  apSub[1] holds values between
89** iDivisor+1 and 2*iDivisor.  apSub[N] holds values between
90** N*iDivisor+1 and (N+1)*iDivisor.  Each subbitmap is normalized
91** to hold deal with values between 1 and iDivisor.
92*/
93struct Bitvec {
94  u32 iSize;      /* Maximum bit index.  Max iSize is 4,294,967,296. */
95  u32 nSet;       /* Number of bits that are set - only valid for aHash
96                  ** element.  Max is BITVEC_NINT.  For BITVEC_SZ of 512,
97                  ** this would be 125. */
98  u32 iDivisor;   /* Number of bits handled by each apSub[] entry. */
99                  /* Should >=0 for apSub element. */
100                  /* Max iDivisor is max(u32) / BITVEC_NPTR + 1.  */
101                  /* For a BITVEC_SZ of 512, this would be 34,359,739. */
102  union {
103    BITVEC_TELEM aBitmap[BITVEC_NELEM];    /* Bitmap representation */
104    u32 aHash[BITVEC_NINT];      /* Hash table representation */
105    Bitvec *apSub[BITVEC_NPTR];  /* Recursive representation */
106  } u;
107};
108
109/*
110** Create a new bitmap object able to handle bits between 0 and iSize,
111** inclusive.  Return a pointer to the new object.  Return NULL if
112** malloc fails.
113*/
114Bitvec *sqlite3BitvecCreate(u32 iSize){
115  Bitvec *p;
116  assert( sizeof(*p)==BITVEC_SZ );
117  p = sqlite3MallocZero( sizeof(*p) );
118  if( p ){
119    p->iSize = iSize;
120  }
121  return p;
122}
123
124/*
125** Check to see if the i-th bit is set.  Return true or false.
126** If p is NULL (if the bitmap has not been created) or if
127** i is out of range, then return false.
128*/
129int sqlite3BitvecTest(Bitvec *p, u32 i){
130  if( p==0 ) return 0;
131  if( i>p->iSize || i==0 ) return 0;
132  i--;
133  while( p->iDivisor ){
134    u32 bin = i/p->iDivisor;
135    i = i%p->iDivisor;
136    p = p->u.apSub[bin];
137    if (!p) {
138      return 0;
139    }
140  }
141  if( p->iSize<=BITVEC_NBIT ){
142    return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0;
143  } else{
144    u32 h = BITVEC_HASH(i++);
145    while( p->u.aHash[h] ){
146      if( p->u.aHash[h]==i ) return 1;
147      h = (h+1) % BITVEC_NINT;
148    }
149    return 0;
150  }
151}
152
153/*
154** Set the i-th bit.  Return 0 on success and an error code if
155** anything goes wrong.
156**
157** This routine might cause sub-bitmaps to be allocated.  Failing
158** to get the memory needed to hold the sub-bitmap is the only
159** that can go wrong with an insert, assuming p and i are valid.
160**
161** The calling function must ensure that p is a valid Bitvec object
162** and that the value for "i" is within range of the Bitvec object.
163** Otherwise the behavior is undefined.
164*/
165int sqlite3BitvecSet(Bitvec *p, u32 i){
166  u32 h;
167  if( p==0 ) return SQLITE_OK;
168  assert( i>0 );
169  assert( i<=p->iSize );
170  i--;
171  while((p->iSize > BITVEC_NBIT) && p->iDivisor) {
172    u32 bin = i/p->iDivisor;
173    i = i%p->iDivisor;
174    if( p->u.apSub[bin]==0 ){
175      p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor );
176      if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM;
177    }
178    p = p->u.apSub[bin];
179  }
180  if( p->iSize<=BITVEC_NBIT ){
181    p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1));
182    return SQLITE_OK;
183  }
184  h = BITVEC_HASH(i++);
185  /* if there wasn't a hash collision, and this doesn't */
186  /* completely fill the hash, then just add it without */
187  /* worring about sub-dividing and re-hashing. */
188  if( !p->u.aHash[h] ){
189    if (p->nSet<(BITVEC_NINT-1)) {
190      goto bitvec_set_end;
191    } else {
192      goto bitvec_set_rehash;
193    }
194  }
195  /* there was a collision, check to see if it's already */
196  /* in hash, if not, try to find a spot for it */
197  do {
198    if( p->u.aHash[h]==i ) return SQLITE_OK;
199    h++;
200    if( h>=BITVEC_NINT ) h = 0;
201  } while( p->u.aHash[h] );
202  /* we didn't find it in the hash.  h points to the first */
203  /* available free spot. check to see if this is going to */
204  /* make our hash too "full".  */
205bitvec_set_rehash:
206  if( p->nSet>=BITVEC_MXHASH ){
207    unsigned int j;
208    int rc;
209    u32 *aiValues = sqlite3StackAllocRaw(0, sizeof(p->u.aHash));
210    if( aiValues==0 ){
211      return SQLITE_NOMEM;
212    }else{
213      memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash));
214      memset(p->u.apSub, 0, sizeof(p->u.apSub));
215      p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR;
216      rc = sqlite3BitvecSet(p, i);
217      for(j=0; j<BITVEC_NINT; j++){
218        if( aiValues[j] ) rc |= sqlite3BitvecSet(p, aiValues[j]);
219      }
220      sqlite3StackFree(0, aiValues);
221      return rc;
222    }
223  }
224bitvec_set_end:
225  p->nSet++;
226  p->u.aHash[h] = i;
227  return SQLITE_OK;
228}
229
230/*
231** Clear the i-th bit.
232**
233** pBuf must be a pointer to at least BITVEC_SZ bytes of temporary storage
234** that BitvecClear can use to rebuilt its hash table.
235*/
236void sqlite3BitvecClear(Bitvec *p, u32 i, void *pBuf){
237  if( p==0 ) return;
238  assert( i>0 );
239  i--;
240  while( p->iDivisor ){
241    u32 bin = i/p->iDivisor;
242    i = i%p->iDivisor;
243    p = p->u.apSub[bin];
244    if (!p) {
245      return;
246    }
247  }
248  if( p->iSize<=BITVEC_NBIT ){
249    p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1)));
250  }else{
251    unsigned int j;
252    u32 *aiValues = pBuf;
253    memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash));
254    memset(p->u.aHash, 0, sizeof(p->u.aHash));
255    p->nSet = 0;
256    for(j=0; j<BITVEC_NINT; j++){
257      if( aiValues[j] && aiValues[j]!=(i+1) ){
258        u32 h = BITVEC_HASH(aiValues[j]-1);
259        p->nSet++;
260        while( p->u.aHash[h] ){
261          h++;
262          if( h>=BITVEC_NINT ) h = 0;
263        }
264        p->u.aHash[h] = aiValues[j];
265      }
266    }
267  }
268}
269
270/*
271** Destroy a bitmap object.  Reclaim all memory used.
272*/
273void sqlite3BitvecDestroy(Bitvec *p){
274  if( p==0 ) return;
275  if( p->iDivisor ){
276    unsigned int i;
277    for(i=0; i<BITVEC_NPTR; i++){
278      sqlite3BitvecDestroy(p->u.apSub[i]);
279    }
280  }
281  sqlite3_free(p);
282}
283
284/*
285** Return the value of the iSize parameter specified when Bitvec *p
286** was created.
287*/
288u32 sqlite3BitvecSize(Bitvec *p){
289  return p->iSize;
290}
291
292#ifndef SQLITE_OMIT_BUILTIN_TEST
293/*
294** Let V[] be an array of unsigned characters sufficient to hold
295** up to N bits.  Let I be an integer between 0 and N.  0<=I<N.
296** Then the following macros can be used to set, clear, or test
297** individual bits within V.
298*/
299#define SETBIT(V,I)      V[I>>3] |= (1<<(I&7))
300#define CLEARBIT(V,I)    V[I>>3] &= ~(1<<(I&7))
301#define TESTBIT(V,I)     (V[I>>3]&(1<<(I&7)))!=0
302
303/*
304** This routine runs an extensive test of the Bitvec code.
305**
306** The input is an array of integers that acts as a program
307** to test the Bitvec.  The integers are opcodes followed
308** by 0, 1, or 3 operands, depending on the opcode.  Another
309** opcode follows immediately after the last operand.
310**
311** There are 6 opcodes numbered from 0 through 5.  0 is the
312** "halt" opcode and causes the test to end.
313**
314**    0          Halt and return the number of errors
315**    1 N S X    Set N bits beginning with S and incrementing by X
316**    2 N S X    Clear N bits beginning with S and incrementing by X
317**    3 N        Set N randomly chosen bits
318**    4 N        Clear N randomly chosen bits
319**    5 N S X    Set N bits from S increment X in array only, not in bitvec
320**
321** The opcodes 1 through 4 perform set and clear operations are performed
322** on both a Bitvec object and on a linear array of bits obtained from malloc.
323** Opcode 5 works on the linear array only, not on the Bitvec.
324** Opcode 5 is used to deliberately induce a fault in order to
325** confirm that error detection works.
326**
327** At the conclusion of the test the linear array is compared
328** against the Bitvec object.  If there are any differences,
329** an error is returned.  If they are the same, zero is returned.
330**
331** If a memory allocation error occurs, return -1.
332*/
333int sqlite3BitvecBuiltinTest(int sz, int *aOp){
334  Bitvec *pBitvec = 0;
335  unsigned char *pV = 0;
336  int rc = -1;
337  int i, nx, pc, op;
338  void *pTmpSpace;
339
340  /* Allocate the Bitvec to be tested and a linear array of
341  ** bits to act as the reference */
342  pBitvec = sqlite3BitvecCreate( sz );
343  pV = sqlite3_malloc( (sz+7)/8 + 1 );
344  pTmpSpace = sqlite3_malloc(BITVEC_SZ);
345  if( pBitvec==0 || pV==0 || pTmpSpace==0  ) goto bitvec_end;
346  memset(pV, 0, (sz+7)/8 + 1);
347
348  /* NULL pBitvec tests */
349  sqlite3BitvecSet(0, 1);
350  sqlite3BitvecClear(0, 1, pTmpSpace);
351
352  /* Run the program */
353  pc = 0;
354  while( (op = aOp[pc])!=0 ){
355    switch( op ){
356      case 1:
357      case 2:
358      case 5: {
359        nx = 4;
360        i = aOp[pc+2] - 1;
361        aOp[pc+2] += aOp[pc+3];
362        break;
363      }
364      case 3:
365      case 4:
366      default: {
367        nx = 2;
368        sqlite3_randomness(sizeof(i), &i);
369        break;
370      }
371    }
372    if( (--aOp[pc+1]) > 0 ) nx = 0;
373    pc += nx;
374    i = (i & 0x7fffffff)%sz;
375    if( (op & 1)!=0 ){
376      SETBIT(pV, (i+1));
377      if( op!=5 ){
378        if( sqlite3BitvecSet(pBitvec, i+1) ) goto bitvec_end;
379      }
380    }else{
381      CLEARBIT(pV, (i+1));
382      sqlite3BitvecClear(pBitvec, i+1, pTmpSpace);
383    }
384  }
385
386  /* Test to make sure the linear array exactly matches the
387  ** Bitvec object.  Start with the assumption that they do
388  ** match (rc==0).  Change rc to non-zero if a discrepancy
389  ** is found.
390  */
391  rc = sqlite3BitvecTest(0,0) + sqlite3BitvecTest(pBitvec, sz+1)
392          + sqlite3BitvecTest(pBitvec, 0)
393          + (sqlite3BitvecSize(pBitvec) - sz);
394  for(i=1; i<=sz; i++){
395    if(  (TESTBIT(pV,i))!=sqlite3BitvecTest(pBitvec,i) ){
396      rc = i;
397      break;
398    }
399  }
400
401  /* Free allocated structure */
402bitvec_end:
403  sqlite3_free(pTmpSpace);
404  sqlite3_free(pV);
405  sqlite3BitvecDestroy(pBitvec);
406  return rc;
407}
408#endif /* SQLITE_OMIT_BUILTIN_TEST */
409