1/*
2** 2010 July 12
3**
4** The author disclaims copyright to this source code.  In place of
5** a legal notice, here is a blessing:
6**
7**    May you do good and not evil.
8**    May you find forgiveness for yourself and forgive others.
9**    May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains an implementation of the "dbstat" virtual table.
14**
15** The dbstat virtual table is used to extract low-level formatting
16** information from an SQLite database in order to implement the
17** "sqlite3_analyzer" utility.  See the ../tool/spaceanal.tcl script
18** for an example implementation.
19*/
20
21#include "sqliteInt.h"
22
23#ifndef SQLITE_OMIT_VIRTUALTABLE
24
25/*
26** Page paths:
27**
28**   The value of the 'path' column describes the path taken from the
29**   root-node of the b-tree structure to each page. The value of the
30**   root-node path is '/'.
31**
32**   The value of the path for the left-most child page of the root of
33**   a b-tree is '/000/'. (Btrees store content ordered from left to right
34**   so the pages to the left have smaller keys than the pages to the right.)
35**   The next to left-most child of the root page is
36**   '/001', and so on, each sibling page identified by a 3-digit hex
37**   value. The children of the 451st left-most sibling have paths such
38**   as '/1c2/000/, '/1c2/001/' etc.
39**
40**   Overflow pages are specified by appending a '+' character and a
41**   six-digit hexadecimal value to the path to the cell they are linked
42**   from. For example, the three overflow pages in a chain linked from
43**   the left-most cell of the 450th child of the root page are identified
44**   by the paths:
45**
46**      '/1c2/000+000000'         // First page in overflow chain
47**      '/1c2/000+000001'         // Second page in overflow chain
48**      '/1c2/000+000002'         // Third page in overflow chain
49**
50**   If the paths are sorted using the BINARY collation sequence, then
51**   the overflow pages associated with a cell will appear earlier in the
52**   sort-order than its child page:
53**
54**      '/1c2/000/'               // Left-most child of 451st child of root
55*/
56#define VTAB_SCHEMA                                                         \
57  "CREATE TABLE xx( "                                                       \
58  "  name       STRING,           /* Name of table or index */"             \
59  "  path       INTEGER,          /* Path to page from root */"             \
60  "  pageno     INTEGER,          /* Page number */"                        \
61  "  pagetype   STRING,           /* 'internal', 'leaf' or 'overflow' */"   \
62  "  ncell      INTEGER,          /* Cells on page (0 for overflow) */"     \
63  "  payload    INTEGER,          /* Bytes of payload on this page */"      \
64  "  unused     INTEGER,          /* Bytes of unused space on this page */" \
65  "  mx_payload INTEGER           /* Largest payload size of all cells */"  \
66  ");"
67
68#if 0
69#define VTAB_SCHEMA2                                                        \
70  "CREATE TABLE yy( "                                                       \
71  "  pageno   INTEGER,            /* B-tree page number */"                 \
72  "  cellno   INTEGER,            /* Cell number within page */"            \
73  "  local    INTEGER,            /* Bytes of content stored locally */"    \
74  "  payload  INTEGER,            /* Total cell payload size */"            \
75  "  novfl    INTEGER             /* Number of overflow pages */"           \
76  ");"
77#endif
78
79
80typedef struct StatTable StatTable;
81typedef struct StatCursor StatCursor;
82typedef struct StatPage StatPage;
83typedef struct StatCell StatCell;
84
85struct StatCell {
86  int nLocal;                     /* Bytes of local payload */
87  u32 iChildPg;                   /* Child node (or 0 if this is a leaf) */
88  int nOvfl;                      /* Entries in aOvfl[] */
89  u32 *aOvfl;                     /* Array of overflow page numbers */
90  int nLastOvfl;                  /* Bytes of payload on final overflow page */
91  int iOvfl;                      /* Iterates through aOvfl[] */
92};
93
94struct StatPage {
95  u32 iPgno;
96  DbPage *pPg;
97  int iCell;
98
99  char *zPath;                    /* Path to this page */
100
101  /* Variables populated by statDecodePage(): */
102  u8 flags;                       /* Copy of flags byte */
103  int nCell;                      /* Number of cells on page */
104  int nUnused;                    /* Number of unused bytes on page */
105  StatCell *aCell;                /* Array of parsed cells */
106  u32 iRightChildPg;              /* Right-child page number (or 0) */
107  int nMxPayload;                 /* Largest payload of any cell on this page */
108};
109
110struct StatCursor {
111  sqlite3_vtab_cursor base;
112  sqlite3_stmt *pStmt;            /* Iterates through set of root pages */
113  int isEof;                      /* After pStmt has returned SQLITE_DONE */
114
115  StatPage aPage[32];
116  int iPage;                      /* Current entry in aPage[] */
117
118  /* Values to return. */
119  char *zName;                    /* Value of 'name' column */
120  char *zPath;                    /* Value of 'path' column */
121  u32 iPageno;                    /* Value of 'pageno' column */
122  char *zPagetype;                /* Value of 'pagetype' column */
123  int nCell;                      /* Value of 'ncell' column */
124  int nPayload;                   /* Value of 'payload' column */
125  int nUnused;                    /* Value of 'unused' column */
126  int nMxPayload;                 /* Value of 'mx_payload' column */
127};
128
129struct StatTable {
130  sqlite3_vtab base;
131  sqlite3 *db;
132};
133
134#ifndef get2byte
135# define get2byte(x)   ((x)[0]<<8 | (x)[1])
136#endif
137
138/*
139** Connect to or create a statvfs virtual table.
140*/
141static int statConnect(
142  sqlite3 *db,
143  void *pAux,
144  int argc, const char *const*argv,
145  sqlite3_vtab **ppVtab,
146  char **pzErr
147){
148  StatTable *pTab;
149
150  pTab = (StatTable *)sqlite3_malloc(sizeof(StatTable));
151  memset(pTab, 0, sizeof(StatTable));
152  pTab->db = db;
153
154  sqlite3_declare_vtab(db, VTAB_SCHEMA);
155  *ppVtab = &pTab->base;
156  return SQLITE_OK;
157}
158
159/*
160** Disconnect from or destroy a statvfs virtual table.
161*/
162static int statDisconnect(sqlite3_vtab *pVtab){
163  sqlite3_free(pVtab);
164  return SQLITE_OK;
165}
166
167/*
168** There is no "best-index". This virtual table always does a linear
169** scan of the binary VFS log file.
170*/
171static int statBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
172
173  /* Records are always returned in ascending order of (name, path).
174  ** If this will satisfy the client, set the orderByConsumed flag so that
175  ** SQLite does not do an external sort.
176  */
177  if( ( pIdxInfo->nOrderBy==1
178     && pIdxInfo->aOrderBy[0].iColumn==0
179     && pIdxInfo->aOrderBy[0].desc==0
180     ) ||
181      ( pIdxInfo->nOrderBy==2
182     && pIdxInfo->aOrderBy[0].iColumn==0
183     && pIdxInfo->aOrderBy[0].desc==0
184     && pIdxInfo->aOrderBy[1].iColumn==1
185     && pIdxInfo->aOrderBy[1].desc==0
186     )
187  ){
188    pIdxInfo->orderByConsumed = 1;
189  }
190
191  pIdxInfo->estimatedCost = 10.0;
192  return SQLITE_OK;
193}
194
195/*
196** Open a new statvfs cursor.
197*/
198static int statOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
199  StatTable *pTab = (StatTable *)pVTab;
200  StatCursor *pCsr;
201  int rc;
202
203  pCsr = (StatCursor *)sqlite3_malloc(sizeof(StatCursor));
204  memset(pCsr, 0, sizeof(StatCursor));
205  pCsr->base.pVtab = pVTab;
206
207  rc = sqlite3_prepare_v2(pTab->db,
208      "SELECT 'sqlite_master' AS name, 1 AS rootpage, 'table' AS type"
209      "  UNION ALL  "
210      "SELECT name, rootpage, type FROM sqlite_master WHERE rootpage!=0"
211      "  ORDER BY name", -1,
212      &pCsr->pStmt, 0
213  );
214  if( rc!=SQLITE_OK ){
215    sqlite3_free(pCsr);
216    return rc;
217  }
218
219  *ppCursor = (sqlite3_vtab_cursor *)pCsr;
220  return SQLITE_OK;
221}
222
223static void statClearPage(StatPage *p){
224  int i;
225  for(i=0; i<p->nCell; i++){
226    sqlite3_free(p->aCell[i].aOvfl);
227  }
228  sqlite3PagerUnref(p->pPg);
229  sqlite3_free(p->aCell);
230  sqlite3_free(p->zPath);
231  memset(p, 0, sizeof(StatPage));
232}
233
234static void statResetCsr(StatCursor *pCsr){
235  int i;
236  sqlite3_reset(pCsr->pStmt);
237  for(i=0; i<ArraySize(pCsr->aPage); i++){
238    statClearPage(&pCsr->aPage[i]);
239  }
240  pCsr->iPage = 0;
241  sqlite3_free(pCsr->zPath);
242  pCsr->zPath = 0;
243}
244
245/*
246** Close a statvfs cursor.
247*/
248static int statClose(sqlite3_vtab_cursor *pCursor){
249  StatCursor *pCsr = (StatCursor *)pCursor;
250  statResetCsr(pCsr);
251  sqlite3_finalize(pCsr->pStmt);
252  sqlite3_free(pCsr);
253  return SQLITE_OK;
254}
255
256static void getLocalPayload(
257  int nUsable,                    /* Usable bytes per page */
258  u8 flags,                       /* Page flags */
259  int nTotal,                     /* Total record (payload) size */
260  int *pnLocal                    /* OUT: Bytes stored locally */
261){
262  int nLocal;
263  int nMinLocal;
264  int nMaxLocal;
265
266  if( flags==0x0D ){              /* Table leaf node */
267    nMinLocal = (nUsable - 12) * 32 / 255 - 23;
268    nMaxLocal = nUsable - 35;
269  }else{                          /* Index interior and leaf nodes */
270    nMinLocal = (nUsable - 12) * 32 / 255 - 23;
271    nMaxLocal = (nUsable - 12) * 64 / 255 - 23;
272  }
273
274  nLocal = nMinLocal + (nTotal - nMinLocal) % (nUsable - 4);
275  if( nLocal>nMaxLocal ) nLocal = nMinLocal;
276  *pnLocal = nLocal;
277}
278
279static int statDecodePage(Btree *pBt, StatPage *p){
280  int nUnused;
281  int iOff;
282  int nHdr;
283  int isLeaf;
284
285  u8 *aData = sqlite3PagerGetData(p->pPg);
286  u8 *aHdr = &aData[p->iPgno==1 ? 100 : 0];
287
288  p->flags = aHdr[0];
289  p->nCell = get2byte(&aHdr[3]);
290  p->nMxPayload = 0;
291
292  isLeaf = (p->flags==0x0A || p->flags==0x0D);
293  nHdr = 12 - isLeaf*4 + (p->iPgno==1)*100;
294
295  nUnused = get2byte(&aHdr[5]) - nHdr - 2*p->nCell;
296  nUnused += (int)aHdr[7];
297  iOff = get2byte(&aHdr[1]);
298  while( iOff ){
299    nUnused += get2byte(&aData[iOff+2]);
300    iOff = get2byte(&aData[iOff]);
301  }
302  p->nUnused = nUnused;
303  p->iRightChildPg = isLeaf ? 0 : sqlite3Get4byte(&aHdr[8]);
304
305  if( p->nCell ){
306    int i;                        /* Used to iterate through cells */
307    int nUsable = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt);
308
309    p->aCell = sqlite3_malloc((p->nCell+1) * sizeof(StatCell));
310    memset(p->aCell, 0, (p->nCell+1) * sizeof(StatCell));
311
312    for(i=0; i<p->nCell; i++){
313      StatCell *pCell = &p->aCell[i];
314
315      iOff = get2byte(&aData[nHdr+i*2]);
316      if( !isLeaf ){
317        pCell->iChildPg = sqlite3Get4byte(&aData[iOff]);
318        iOff += 4;
319      }
320      if( p->flags==0x05 ){
321        /* A table interior node. nPayload==0. */
322      }else{
323        u32 nPayload;             /* Bytes of payload total (local+overflow) */
324        int nLocal;               /* Bytes of payload stored locally */
325        iOff += getVarint32(&aData[iOff], nPayload);
326        if( p->flags==0x0D ){
327          u64 dummy;
328          iOff += sqlite3GetVarint(&aData[iOff], &dummy);
329        }
330        if( nPayload>p->nMxPayload ) p->nMxPayload = nPayload;
331        getLocalPayload(nUsable, p->flags, nPayload, &nLocal);
332        pCell->nLocal = nLocal;
333        assert( nPayload>=nLocal );
334        assert( nLocal<=(nUsable-35) );
335        if( nPayload>nLocal ){
336          int j;
337          int nOvfl = ((nPayload - nLocal) + nUsable-4 - 1) / (nUsable - 4);
338          pCell->nLastOvfl = (nPayload-nLocal) - (nOvfl-1) * (nUsable-4);
339          pCell->nOvfl = nOvfl;
340          pCell->aOvfl = sqlite3_malloc(sizeof(u32)*nOvfl);
341          pCell->aOvfl[0] = sqlite3Get4byte(&aData[iOff+nLocal]);
342          for(j=1; j<nOvfl; j++){
343            int rc;
344            u32 iPrev = pCell->aOvfl[j-1];
345            DbPage *pPg = 0;
346            rc = sqlite3PagerGet(sqlite3BtreePager(pBt), iPrev, &pPg);
347            if( rc!=SQLITE_OK ){
348              assert( pPg==0 );
349              return rc;
350            }
351            pCell->aOvfl[j] = sqlite3Get4byte(sqlite3PagerGetData(pPg));
352            sqlite3PagerUnref(pPg);
353          }
354        }
355      }
356    }
357  }
358
359  return SQLITE_OK;
360}
361
362/*
363** Move a statvfs cursor to the next entry in the file.
364*/
365static int statNext(sqlite3_vtab_cursor *pCursor){
366  int rc;
367  int nPayload;
368  StatCursor *pCsr = (StatCursor *)pCursor;
369  StatTable *pTab = (StatTable *)pCursor->pVtab;
370  Btree *pBt = pTab->db->aDb[0].pBt;
371  Pager *pPager = sqlite3BtreePager(pBt);
372
373  sqlite3_free(pCsr->zPath);
374  pCsr->zPath = 0;
375
376  if( pCsr->aPage[0].pPg==0 ){
377    rc = sqlite3_step(pCsr->pStmt);
378    if( rc==SQLITE_ROW ){
379      int nPage;
380      u32 iRoot = sqlite3_column_int64(pCsr->pStmt, 1);
381      sqlite3PagerPagecount(pPager, &nPage);
382      if( nPage==0 ){
383        pCsr->isEof = 1;
384        return sqlite3_reset(pCsr->pStmt);
385      }
386      rc = sqlite3PagerGet(pPager, iRoot, &pCsr->aPage[0].pPg);
387      pCsr->aPage[0].iPgno = iRoot;
388      pCsr->aPage[0].iCell = 0;
389      pCsr->aPage[0].zPath = sqlite3_mprintf("/");
390      pCsr->iPage = 0;
391    }else{
392      pCsr->isEof = 1;
393      return sqlite3_reset(pCsr->pStmt);
394    }
395  }else{
396
397    /* Page p itself has already been visited. */
398    StatPage *p = &pCsr->aPage[pCsr->iPage];
399
400    while( p->iCell<p->nCell ){
401      StatCell *pCell = &p->aCell[p->iCell];
402      if( pCell->iOvfl<pCell->nOvfl ){
403        int nUsable = sqlite3BtreeGetPageSize(pBt)-sqlite3BtreeGetReserve(pBt);
404        pCsr->zName = (char *)sqlite3_column_text(pCsr->pStmt, 0);
405        pCsr->iPageno = pCell->aOvfl[pCell->iOvfl];
406        pCsr->zPagetype = "overflow";
407        pCsr->nCell = 0;
408        pCsr->nMxPayload = 0;
409        pCsr->zPath = sqlite3_mprintf(
410            "%s%.3x+%.6x", p->zPath, p->iCell, pCell->iOvfl
411        );
412        if( pCell->iOvfl<pCell->nOvfl-1 ){
413          pCsr->nUnused = 0;
414          pCsr->nPayload = nUsable - 4;
415        }else{
416          pCsr->nPayload = pCell->nLastOvfl;
417          pCsr->nUnused = nUsable - 4 - pCsr->nPayload;
418        }
419        pCell->iOvfl++;
420        return SQLITE_OK;
421      }
422      if( p->iRightChildPg ) break;
423      p->iCell++;
424    }
425
426    while( !p->iRightChildPg || p->iCell>p->nCell ){
427      statClearPage(p);
428      if( pCsr->iPage==0 ) return statNext(pCursor);
429      pCsr->iPage--;
430      p = &pCsr->aPage[pCsr->iPage];
431    }
432    pCsr->iPage++;
433    assert( p==&pCsr->aPage[pCsr->iPage-1] );
434
435    if( p->iCell==p->nCell ){
436      p[1].iPgno = p->iRightChildPg;
437    }else{
438      p[1].iPgno = p->aCell[p->iCell].iChildPg;
439    }
440    rc = sqlite3PagerGet(pPager, p[1].iPgno, &p[1].pPg);
441    p[1].iCell = 0;
442    p[1].zPath = sqlite3_mprintf("%s%.3x/", p->zPath, p->iCell);
443    p->iCell++;
444  }
445
446
447  /* Populate the StatCursor fields with the values to be returned
448  ** by the xColumn() and xRowid() methods.
449  */
450  if( rc==SQLITE_OK ){
451    int i;
452    StatPage *p = &pCsr->aPage[pCsr->iPage];
453    pCsr->zName = (char *)sqlite3_column_text(pCsr->pStmt, 0);
454    pCsr->iPageno = p->iPgno;
455
456    statDecodePage(pBt, p);
457
458    switch( p->flags ){
459      case 0x05:             /* table internal */
460      case 0x02:             /* index internal */
461        pCsr->zPagetype = "internal";
462        break;
463      case 0x0D:             /* table leaf */
464      case 0x0A:             /* index leaf */
465        pCsr->zPagetype = "leaf";
466        break;
467      default:
468        pCsr->zPagetype = "corrupted";
469        break;
470    }
471    pCsr->nCell = p->nCell;
472    pCsr->nUnused = p->nUnused;
473    pCsr->nMxPayload = p->nMxPayload;
474    pCsr->zPath = sqlite3_mprintf("%s", p->zPath);
475    nPayload = 0;
476    for(i=0; i<p->nCell; i++){
477      nPayload += p->aCell[i].nLocal;
478    }
479    pCsr->nPayload = nPayload;
480  }
481
482  return rc;
483}
484
485static int statEof(sqlite3_vtab_cursor *pCursor){
486  StatCursor *pCsr = (StatCursor *)pCursor;
487  return pCsr->isEof;
488}
489
490static int statFilter(
491  sqlite3_vtab_cursor *pCursor,
492  int idxNum, const char *idxStr,
493  int argc, sqlite3_value **argv
494){
495  StatCursor *pCsr = (StatCursor *)pCursor;
496
497  statResetCsr(pCsr);
498  return statNext(pCursor);
499}
500
501static int statColumn(
502  sqlite3_vtab_cursor *pCursor,
503  sqlite3_context *ctx,
504  int i
505){
506  StatCursor *pCsr = (StatCursor *)pCursor;
507  switch( i ){
508    case 0:            /* name */
509      sqlite3_result_text(ctx, pCsr->zName, -1, SQLITE_STATIC);
510      break;
511    case 1:            /* path */
512      sqlite3_result_text(ctx, pCsr->zPath, -1, SQLITE_TRANSIENT);
513      break;
514    case 2:            /* pageno */
515      sqlite3_result_int64(ctx, pCsr->iPageno);
516      break;
517    case 3:            /* pagetype */
518      sqlite3_result_text(ctx, pCsr->zPagetype, -1, SQLITE_STATIC);
519      break;
520    case 4:            /* ncell */
521      sqlite3_result_int(ctx, pCsr->nCell);
522      break;
523    case 5:            /* payload */
524      sqlite3_result_int(ctx, pCsr->nPayload);
525      break;
526    case 6:            /* unused */
527      sqlite3_result_int(ctx, pCsr->nUnused);
528      break;
529    case 7:            /* mx_payload */
530      sqlite3_result_int(ctx, pCsr->nMxPayload);
531      break;
532  }
533  return SQLITE_OK;
534}
535
536static int statRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
537  StatCursor *pCsr = (StatCursor *)pCursor;
538  *pRowid = pCsr->iPageno;
539  return SQLITE_OK;
540}
541
542int sqlite3_dbstat_register(sqlite3 *db){
543  static sqlite3_module dbstat_module = {
544    0,                            /* iVersion */
545    statConnect,                  /* xCreate */
546    statConnect,                  /* xConnect */
547    statBestIndex,                /* xBestIndex */
548    statDisconnect,               /* xDisconnect */
549    statDisconnect,               /* xDestroy */
550    statOpen,                     /* xOpen - open a cursor */
551    statClose,                    /* xClose - close a cursor */
552    statFilter,                   /* xFilter - configure scan constraints */
553    statNext,                     /* xNext - advance a cursor */
554    statEof,                      /* xEof - check for end of scan */
555    statColumn,                   /* xColumn - read data */
556    statRowid,                    /* xRowid - read data */
557    0,                            /* xUpdate */
558    0,                            /* xBegin */
559    0,                            /* xSync */
560    0,                            /* xCommit */
561    0,                            /* xRollback */
562    0,                            /* xFindMethod */
563    0,                            /* xRename */
564  };
565  sqlite3_create_module(db, "dbstat", &dbstat_module, 0);
566  return SQLITE_OK;
567}
568
569#endif
570
571#ifdef SQLITE_TEST
572#include <tcl.h>
573
574static int test_dbstat(
575  void *clientData,
576  Tcl_Interp *interp,
577  int objc,
578  Tcl_Obj *CONST objv[]
579){
580#ifdef SQLITE_OMIT_VIRTUALTABLE
581  Tcl_AppendResult(interp, "dbstat not available because of "
582                           "SQLITE_OMIT_VIRTUALTABLE", (void*)0);
583  return TCL_ERROR;
584#else
585  struct SqliteDb { sqlite3 *db; };
586  char *zDb;
587  Tcl_CmdInfo cmdInfo;
588
589  if( objc!=2 ){
590    Tcl_WrongNumArgs(interp, 1, objv, "DB");
591    return TCL_ERROR;
592  }
593
594  zDb = Tcl_GetString(objv[1]);
595  if( Tcl_GetCommandInfo(interp, zDb, &cmdInfo) ){
596    sqlite3* db = ((struct SqliteDb*)cmdInfo.objClientData)->db;
597    sqlite3_dbstat_register(db);
598  }
599  return TCL_OK;
600#endif
601}
602
603int SqlitetestStat_Init(Tcl_Interp *interp){
604  Tcl_CreateObjCommand(interp, "register_dbstat_vtab", test_dbstat, 0, 0);
605  return TCL_OK;
606}
607#endif
608