1/* 2** 2009 Nov 12 3** 4** The author disclaims copyright to this source code. In place of 5** a legal notice, here is a blessing: 6** 7** May you do good and not evil. 8** May you find forgiveness for yourself and forgive others. 9** May you share freely, never taking more than you give. 10** 11****************************************************************************** 12** 13*/ 14 15#ifndef _FTSINT_H 16#define _FTSINT_H 17 18#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 19# define NDEBUG 1 20#endif 21 22#include "sqlite3.h" 23#include "fts3_tokenizer.h" 24#include "fts3_hash.h" 25 26/* 27** This constant controls how often segments are merged. Once there are 28** FTS3_MERGE_COUNT segments of level N, they are merged into a single 29** segment of level N+1. 30*/ 31#define FTS3_MERGE_COUNT 16 32 33/* 34** This is the maximum amount of data (in bytes) to store in the 35** Fts3Table.pendingTerms hash table. Normally, the hash table is 36** populated as documents are inserted/updated/deleted in a transaction 37** and used to create a new segment when the transaction is committed. 38** However if this limit is reached midway through a transaction, a new 39** segment is created and the hash table cleared immediately. 40*/ 41#define FTS3_MAX_PENDING_DATA (1*1024*1024) 42 43/* 44** Macro to return the number of elements in an array. SQLite has a 45** similar macro called ArraySize(). Use a different name to avoid 46** a collision when building an amalgamation with built-in FTS3. 47*/ 48#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) 49 50/* 51** Maximum length of a varint encoded integer. The varint format is different 52** from that used by SQLite, so the maximum length is 10, not 9. 53*/ 54#define FTS3_VARINT_MAX 10 55 56/* 57** The testcase() macro is only used by the amalgamation. If undefined, 58** make it a no-op. 59*/ 60#ifndef testcase 61# define testcase(X) 62#endif 63 64/* 65** Terminator values for position-lists and column-lists. 66*/ 67#define POS_COLUMN (1) /* Column-list terminator */ 68#define POS_END (0) /* Position-list terminator */ 69 70/* 71** This section provides definitions to allow the 72** FTS3 extension to be compiled outside of the 73** amalgamation. 74*/ 75#ifndef SQLITE_AMALGAMATION 76/* 77** Macros indicating that conditional expressions are always true or 78** false. 79*/ 80#ifdef SQLITE_COVERAGE_TEST 81# define ALWAYS(x) (1) 82# define NEVER(X) (0) 83#else 84# define ALWAYS(x) (x) 85# define NEVER(X) (x) 86#endif 87 88/* 89** Internal types used by SQLite. 90*/ 91typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ 92typedef short int i16; /* 2-byte (or larger) signed integer */ 93typedef unsigned int u32; /* 4-byte unsigned integer */ 94typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ 95/* 96** Macro used to suppress compiler warnings for unused parameters. 97*/ 98#define UNUSED_PARAMETER(x) (void)(x) 99#endif 100 101typedef struct Fts3Table Fts3Table; 102typedef struct Fts3Cursor Fts3Cursor; 103typedef struct Fts3Expr Fts3Expr; 104typedef struct Fts3Phrase Fts3Phrase; 105typedef struct Fts3PhraseToken Fts3PhraseToken; 106 107typedef struct Fts3SegFilter Fts3SegFilter; 108typedef struct Fts3DeferredToken Fts3DeferredToken; 109typedef struct Fts3SegReader Fts3SegReader; 110typedef struct Fts3SegReaderCursor Fts3SegReaderCursor; 111 112/* 113** A connection to a fulltext index is an instance of the following 114** structure. The xCreate and xConnect methods create an instance 115** of this structure and xDestroy and xDisconnect free that instance. 116** All other methods receive a pointer to the structure as one of their 117** arguments. 118*/ 119struct Fts3Table { 120 sqlite3_vtab base; /* Base class used by SQLite core */ 121 sqlite3 *db; /* The database connection */ 122 const char *zDb; /* logical database name */ 123 const char *zName; /* virtual table name */ 124 int nColumn; /* number of named columns in virtual table */ 125 char **azColumn; /* column names. malloced */ 126 sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ 127 128 /* Precompiled statements used by the implementation. Each of these 129 ** statements is run and reset within a single virtual table API call. 130 */ 131 sqlite3_stmt *aStmt[24]; 132 133 char *zReadExprlist; 134 char *zWriteExprlist; 135 136 int nNodeSize; /* Soft limit for node size */ 137 u8 bHasStat; /* True if %_stat table exists */ 138 u8 bHasDocsize; /* True if %_docsize table exists */ 139 int nPgsz; /* Page size for host database */ 140 char *zSegmentsTbl; /* Name of %_segments table */ 141 sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ 142 143 /* The following hash table is used to buffer pending index updates during 144 ** transactions. Variable nPendingData estimates the memory size of the 145 ** pending data, including hash table overhead, but not malloc overhead. 146 ** When nPendingData exceeds nMaxPendingData, the buffer is flushed 147 ** automatically. Variable iPrevDocid is the docid of the most recently 148 ** inserted record. 149 */ 150 int nMaxPendingData; 151 int nPendingData; 152 sqlite_int64 iPrevDocid; 153 Fts3Hash pendingTerms; 154}; 155 156/* 157** When the core wants to read from the virtual table, it creates a 158** virtual table cursor (an instance of the following structure) using 159** the xOpen method. Cursors are destroyed using the xClose method. 160*/ 161struct Fts3Cursor { 162 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ 163 i16 eSearch; /* Search strategy (see below) */ 164 u8 isEof; /* True if at End Of Results */ 165 u8 isRequireSeek; /* True if must seek pStmt to %_content row */ 166 sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ 167 Fts3Expr *pExpr; /* Parsed MATCH query string */ 168 int nPhrase; /* Number of matchable phrases in query */ 169 Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ 170 sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ 171 char *pNextId; /* Pointer into the body of aDoclist */ 172 char *aDoclist; /* List of docids for full-text queries */ 173 int nDoclist; /* Size of buffer at aDoclist */ 174 int eEvalmode; /* An FTS3_EVAL_XX constant */ 175 int nRowAvg; /* Average size of database rows, in pages */ 176 177 int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ 178 u32 *aMatchinfo; /* Information about most recent match */ 179 int nMatchinfo; /* Number of elements in aMatchinfo[] */ 180 char *zMatchinfo; /* Matchinfo specification */ 181}; 182 183#define FTS3_EVAL_FILTER 0 184#define FTS3_EVAL_NEXT 1 185#define FTS3_EVAL_MATCHINFO 2 186 187/* 188** The Fts3Cursor.eSearch member is always set to one of the following. 189** Actualy, Fts3Cursor.eSearch can be greater than or equal to 190** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index 191** of the column to be searched. For example, in 192** 193** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); 194** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; 195** 196** Because the LHS of the MATCH operator is 2nd column "b", 197** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, 198** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" 199** indicating that all columns should be searched, 200** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. 201*/ 202#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ 203#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ 204#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ 205 206/* 207** A "phrase" is a sequence of one or more tokens that must match in 208** sequence. A single token is the base case and the most common case. 209** For a sequence of tokens contained in double-quotes (i.e. "one two three") 210** nToken will be the number of tokens in the string. 211** 212** The nDocMatch and nMatch variables contain data that may be used by the 213** matchinfo() function. They are populated when the full-text index is 214** queried for hits on the phrase. If one or more tokens in the phrase 215** are deferred, the nDocMatch and nMatch variables are populated based 216** on the assumption that the 217*/ 218struct Fts3PhraseToken { 219 char *z; /* Text of the token */ 220 int n; /* Number of bytes in buffer z */ 221 int isPrefix; /* True if token ends with a "*" character */ 222 int bFulltext; /* True if full-text index was used */ 223 Fts3SegReaderCursor *pSegcsr; /* Segment-reader for this token */ 224 Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ 225}; 226 227struct Fts3Phrase { 228 /* Variables populated by fts3_expr.c when parsing a MATCH expression */ 229 int nToken; /* Number of tokens in the phrase */ 230 int iColumn; /* Index of column this phrase must match */ 231 int isNot; /* Phrase prefixed by unary not (-) operator */ 232 Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ 233}; 234 235/* 236** A tree of these objects forms the RHS of a MATCH operator. 237** 238** If Fts3Expr.eType is either FTSQUERY_NEAR or FTSQUERY_PHRASE and isLoaded 239** is true, then aDoclist points to a malloced buffer, size nDoclist bytes, 240** containing the results of the NEAR or phrase query in FTS3 doclist 241** format. As usual, the initial "Length" field found in doclists stored 242** on disk is omitted from this buffer. 243** 244** Variable pCurrent always points to the start of a docid field within 245** aDoclist. Since the doclist is usually scanned in docid order, this can 246** be used to accelerate seeking to the required docid within the doclist. 247*/ 248struct Fts3Expr { 249 int eType; /* One of the FTSQUERY_XXX values defined below */ 250 int nNear; /* Valid if eType==FTSQUERY_NEAR */ 251 Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ 252 Fts3Expr *pLeft; /* Left operand */ 253 Fts3Expr *pRight; /* Right operand */ 254 Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ 255 256 int isLoaded; /* True if aDoclist/nDoclist are initialized. */ 257 char *aDoclist; /* Buffer containing doclist */ 258 int nDoclist; /* Size of aDoclist in bytes */ 259 260 sqlite3_int64 iCurrent; 261 char *pCurrent; 262}; 263 264/* 265** Candidate values for Fts3Query.eType. Note that the order of the first 266** four values is in order of precedence when parsing expressions. For 267** example, the following: 268** 269** "a OR b AND c NOT d NEAR e" 270** 271** is equivalent to: 272** 273** "a OR (b AND (c NOT (d NEAR e)))" 274*/ 275#define FTSQUERY_NEAR 1 276#define FTSQUERY_NOT 2 277#define FTSQUERY_AND 3 278#define FTSQUERY_OR 4 279#define FTSQUERY_PHRASE 5 280 281 282/* fts3_write.c */ 283int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); 284int sqlite3Fts3PendingTermsFlush(Fts3Table *); 285void sqlite3Fts3PendingTermsClear(Fts3Table *); 286int sqlite3Fts3Optimize(Fts3Table *); 287int sqlite3Fts3SegReaderNew(int, sqlite3_int64, 288 sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); 289int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**); 290void sqlite3Fts3SegReaderFree(Fts3SegReader *); 291int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *); 292int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **); 293int sqlite3Fts3ReadLock(Fts3Table *); 294int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*); 295 296int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); 297int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); 298 299void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); 300int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); 301int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); 302void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); 303char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); 304void sqlite3Fts3SegmentsClose(Fts3Table *); 305 306#define FTS3_SEGCURSOR_PENDING -1 307#define FTS3_SEGCURSOR_ALL -2 308 309int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*); 310int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *); 311void sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor *); 312int sqlite3Fts3SegReaderCursor( 313 Fts3Table *, int, const char *, int, int, int, Fts3SegReaderCursor *); 314 315/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ 316#define FTS3_SEGMENT_REQUIRE_POS 0x00000001 317#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 318#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 319#define FTS3_SEGMENT_PREFIX 0x00000008 320#define FTS3_SEGMENT_SCAN 0x00000010 321 322/* Type passed as 4th argument to SegmentReaderIterate() */ 323struct Fts3SegFilter { 324 const char *zTerm; 325 int nTerm; 326 int iCol; 327 int flags; 328}; 329 330struct Fts3SegReaderCursor { 331 /* Used internally by sqlite3Fts3SegReaderXXX() calls */ 332 Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ 333 int nSegment; /* Size of apSegment array */ 334 int nAdvance; /* How many seg-readers to advance */ 335 Fts3SegFilter *pFilter; /* Pointer to filter object */ 336 char *aBuffer; /* Buffer to merge doclists in */ 337 int nBuffer; /* Allocated size of aBuffer[] in bytes */ 338 339 /* Cost of running this iterator. Used by fts3.c only. */ 340 int nCost; 341 342 /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ 343 char *zTerm; /* Pointer to term buffer */ 344 int nTerm; /* Size of zTerm in bytes */ 345 char *aDoclist; /* Pointer to doclist buffer */ 346 int nDoclist; /* Size of aDoclist[] in bytes */ 347}; 348 349/* fts3.c */ 350int sqlite3Fts3PutVarint(char *, sqlite3_int64); 351int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); 352int sqlite3Fts3GetVarint32(const char *, int *); 353int sqlite3Fts3VarintLen(sqlite3_uint64); 354void sqlite3Fts3Dequote(char *); 355 356char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); 357int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); 358int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *); 359int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); 360 361/* fts3_tokenizer.c */ 362const char *sqlite3Fts3NextToken(const char *, int *); 363int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); 364int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, 365 sqlite3_tokenizer **, char ** 366); 367int sqlite3Fts3IsIdChar(char); 368 369/* fts3_snippet.c */ 370void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); 371void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, 372 const char *, const char *, int, int 373); 374void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); 375 376/* fts3_expr.c */ 377int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 378 char **, int, int, const char *, int, Fts3Expr ** 379); 380void sqlite3Fts3ExprFree(Fts3Expr *); 381#ifdef SQLITE_TEST 382int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); 383#endif 384 385/* fts3_aux.c */ 386int sqlite3Fts3InitAux(sqlite3 *db); 387 388#endif /* _FTSINT_H */ 389