1/* 2** 2009 Oct 23 3** 4** The author disclaims copyright to this source code. In place of 5** a legal notice, here is a blessing: 6** 7** May you do good and not evil. 8** May you find forgiveness for yourself and forgive others. 9** May you share freely, never taking more than you give. 10** 11****************************************************************************** 12*/ 13 14#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) 15 16#include "fts3Int.h" 17#include <string.h> 18#include <assert.h> 19 20/* 21** Characters that may appear in the second argument to matchinfo(). 22*/ 23#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ 24#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ 25#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ 26#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ 27#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ 28#define FTS3_MATCHINFO_LCS 's' /* nCol values */ 29#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ 30 31/* 32** The default value for the second argument to matchinfo(). 33*/ 34#define FTS3_MATCHINFO_DEFAULT "pcx" 35 36 37/* 38** Used as an fts3ExprIterate() context when loading phrase doclists to 39** Fts3Expr.aDoclist[]/nDoclist. 40*/ 41typedef struct LoadDoclistCtx LoadDoclistCtx; 42struct LoadDoclistCtx { 43 Fts3Cursor *pCsr; /* FTS3 Cursor */ 44 int nPhrase; /* Number of phrases seen so far */ 45 int nToken; /* Number of tokens seen so far */ 46}; 47 48/* 49** The following types are used as part of the implementation of the 50** fts3BestSnippet() routine. 51*/ 52typedef struct SnippetIter SnippetIter; 53typedef struct SnippetPhrase SnippetPhrase; 54typedef struct SnippetFragment SnippetFragment; 55 56struct SnippetIter { 57 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ 58 int iCol; /* Extract snippet from this column */ 59 int nSnippet; /* Requested snippet length (in tokens) */ 60 int nPhrase; /* Number of phrases in query */ 61 SnippetPhrase *aPhrase; /* Array of size nPhrase */ 62 int iCurrent; /* First token of current snippet */ 63}; 64 65struct SnippetPhrase { 66 int nToken; /* Number of tokens in phrase */ 67 char *pList; /* Pointer to start of phrase position list */ 68 int iHead; /* Next value in position list */ 69 char *pHead; /* Position list data following iHead */ 70 int iTail; /* Next value in trailing position list */ 71 char *pTail; /* Position list data following iTail */ 72}; 73 74struct SnippetFragment { 75 int iCol; /* Column snippet is extracted from */ 76 int iPos; /* Index of first token in snippet */ 77 u64 covered; /* Mask of query phrases covered */ 78 u64 hlmask; /* Mask of snippet terms to highlight */ 79}; 80 81/* 82** This type is used as an fts3ExprIterate() context object while 83** accumulating the data returned by the matchinfo() function. 84*/ 85typedef struct MatchInfo MatchInfo; 86struct MatchInfo { 87 Fts3Cursor *pCursor; /* FTS3 Cursor */ 88 int nCol; /* Number of columns in table */ 89 int nPhrase; /* Number of matchable phrases in query */ 90 sqlite3_int64 nDoc; /* Number of docs in database */ 91 u32 *aMatchinfo; /* Pre-allocated buffer */ 92}; 93 94 95 96/* 97** The snippet() and offsets() functions both return text values. An instance 98** of the following structure is used to accumulate those values while the 99** functions are running. See fts3StringAppend() for details. 100*/ 101typedef struct StrBuffer StrBuffer; 102struct StrBuffer { 103 char *z; /* Pointer to buffer containing string */ 104 int n; /* Length of z in bytes (excl. nul-term) */ 105 int nAlloc; /* Allocated size of buffer z in bytes */ 106}; 107 108 109/* 110** This function is used to help iterate through a position-list. A position 111** list is a list of unique integers, sorted from smallest to largest. Each 112** element of the list is represented by an FTS3 varint that takes the value 113** of the difference between the current element and the previous one plus 114** two. For example, to store the position-list: 115** 116** 4 9 113 117** 118** the three varints: 119** 120** 6 7 106 121** 122** are encoded. 123** 124** When this function is called, *pp points to the start of an element of 125** the list. *piPos contains the value of the previous entry in the list. 126** After it returns, *piPos contains the value of the next element of the 127** list and *pp is advanced to the following varint. 128*/ 129static void fts3GetDeltaPosition(char **pp, int *piPos){ 130 int iVal; 131 *pp += sqlite3Fts3GetVarint32(*pp, &iVal); 132 *piPos += (iVal-2); 133} 134 135/* 136** Helper function for fts3ExprIterate() (see below). 137*/ 138static int fts3ExprIterate2( 139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ 140 int *piPhrase, /* Pointer to phrase counter */ 141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ 142 void *pCtx /* Second argument to pass to callback */ 143){ 144 int rc; /* Return code */ 145 int eType = pExpr->eType; /* Type of expression node pExpr */ 146 147 if( eType!=FTSQUERY_PHRASE ){ 148 assert( pExpr->pLeft && pExpr->pRight ); 149 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); 150 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ 151 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); 152 } 153 }else{ 154 rc = x(pExpr, *piPhrase, pCtx); 155 (*piPhrase)++; 156 } 157 return rc; 158} 159 160/* 161** Iterate through all phrase nodes in an FTS3 query, except those that 162** are part of a sub-tree that is the right-hand-side of a NOT operator. 163** For each phrase node found, the supplied callback function is invoked. 164** 165** If the callback function returns anything other than SQLITE_OK, 166** the iteration is abandoned and the error code returned immediately. 167** Otherwise, SQLITE_OK is returned after a callback has been made for 168** all eligible phrase nodes. 169*/ 170static int fts3ExprIterate( 171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ 172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ 173 void *pCtx /* Second argument to pass to callback */ 174){ 175 int iPhrase = 0; /* Variable used as the phrase counter */ 176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); 177} 178 179/* 180** The argument to this function is always a phrase node. Its doclist 181** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes 182** to the left of this one in the query tree have already been loaded. 183** 184** If this phrase node is part of a series of phrase nodes joined by 185** NEAR operators (and is not the left-most of said series), then elements are 186** removed from the phrases doclist consistent with the NEAR restriction. If 187** required, elements may be removed from the doclists of phrases to the 188** left of this one that are part of the same series of NEAR operator 189** connected phrases. 190** 191** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. 192*/ 193static int fts3ExprNearTrim(Fts3Expr *pExpr){ 194 int rc = SQLITE_OK; 195 Fts3Expr *pParent = pExpr->pParent; 196 197 assert( pExpr->eType==FTSQUERY_PHRASE ); 198 while( rc==SQLITE_OK 199 && pParent 200 && pParent->eType==FTSQUERY_NEAR 201 && pParent->pRight==pExpr 202 ){ 203 /* This expression (pExpr) is the right-hand-side of a NEAR operator. 204 ** Find the expression to the left of the same operator. 205 */ 206 int nNear = pParent->nNear; 207 Fts3Expr *pLeft = pParent->pLeft; 208 209 if( pLeft->eType!=FTSQUERY_PHRASE ){ 210 assert( pLeft->eType==FTSQUERY_NEAR ); 211 assert( pLeft->pRight->eType==FTSQUERY_PHRASE ); 212 pLeft = pLeft->pRight; 213 } 214 215 rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear); 216 217 pExpr = pLeft; 218 pParent = pExpr->pParent; 219 } 220 221 return rc; 222} 223 224/* 225** This is an fts3ExprIterate() callback used while loading the doclists 226** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also 227** fts3ExprLoadDoclists(). 228*/ 229static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ 230 int rc = SQLITE_OK; 231 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; 232 233 UNUSED_PARAMETER(iPhrase); 234 235 p->nPhrase++; 236 p->nToken += pExpr->pPhrase->nToken; 237 238 if( pExpr->isLoaded==0 ){ 239 rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr); 240 pExpr->isLoaded = 1; 241 if( rc==SQLITE_OK ){ 242 rc = fts3ExprNearTrim(pExpr); 243 } 244 } 245 246 return rc; 247} 248 249/* 250** Load the doclists for each phrase in the query associated with FTS3 cursor 251** pCsr. 252** 253** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable 254** phrases in the expression (all phrases except those directly or 255** indirectly descended from the right-hand-side of a NOT operator). If 256** pnToken is not NULL, then it is set to the number of tokens in all 257** matchable phrases of the expression. 258*/ 259static int fts3ExprLoadDoclists( 260 Fts3Cursor *pCsr, /* Fts3 cursor for current query */ 261 int *pnPhrase, /* OUT: Number of phrases in query */ 262 int *pnToken /* OUT: Number of tokens in query */ 263){ 264 int rc; /* Return Code */ 265 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ 266 sCtx.pCsr = pCsr; 267 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); 268 if( pnPhrase ) *pnPhrase = sCtx.nPhrase; 269 if( pnToken ) *pnToken = sCtx.nToken; 270 return rc; 271} 272 273static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ 274 (*(int *)ctx)++; 275 UNUSED_PARAMETER(pExpr); 276 UNUSED_PARAMETER(iPhrase); 277 return SQLITE_OK; 278} 279static int fts3ExprPhraseCount(Fts3Expr *pExpr){ 280 int nPhrase = 0; 281 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); 282 return nPhrase; 283} 284 285/* 286** Advance the position list iterator specified by the first two 287** arguments so that it points to the first element with a value greater 288** than or equal to parameter iNext. 289*/ 290static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ 291 char *pIter = *ppIter; 292 if( pIter ){ 293 int iIter = *piIter; 294 295 while( iIter<iNext ){ 296 if( 0==(*pIter & 0xFE) ){ 297 iIter = -1; 298 pIter = 0; 299 break; 300 } 301 fts3GetDeltaPosition(&pIter, &iIter); 302 } 303 304 *piIter = iIter; 305 *ppIter = pIter; 306 } 307} 308 309/* 310** Advance the snippet iterator to the next candidate snippet. 311*/ 312static int fts3SnippetNextCandidate(SnippetIter *pIter){ 313 int i; /* Loop counter */ 314 315 if( pIter->iCurrent<0 ){ 316 /* The SnippetIter object has just been initialized. The first snippet 317 ** candidate always starts at offset 0 (even if this candidate has a 318 ** score of 0.0). 319 */ 320 pIter->iCurrent = 0; 321 322 /* Advance the 'head' iterator of each phrase to the first offset that 323 ** is greater than or equal to (iNext+nSnippet). 324 */ 325 for(i=0; i<pIter->nPhrase; i++){ 326 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; 327 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); 328 } 329 }else{ 330 int iStart; 331 int iEnd = 0x7FFFFFFF; 332 333 for(i=0; i<pIter->nPhrase; i++){ 334 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; 335 if( pPhrase->pHead && pPhrase->iHead<iEnd ){ 336 iEnd = pPhrase->iHead; 337 } 338 } 339 if( iEnd==0x7FFFFFFF ){ 340 return 1; 341 } 342 343 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; 344 for(i=0; i<pIter->nPhrase; i++){ 345 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; 346 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); 347 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); 348 } 349 } 350 351 return 0; 352} 353 354/* 355** Retrieve information about the current candidate snippet of snippet 356** iterator pIter. 357*/ 358static void fts3SnippetDetails( 359 SnippetIter *pIter, /* Snippet iterator */ 360 u64 mCovered, /* Bitmask of phrases already covered */ 361 int *piToken, /* OUT: First token of proposed snippet */ 362 int *piScore, /* OUT: "Score" for this snippet */ 363 u64 *pmCover, /* OUT: Bitmask of phrases covered */ 364 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ 365){ 366 int iStart = pIter->iCurrent; /* First token of snippet */ 367 int iScore = 0; /* Score of this snippet */ 368 int i; /* Loop counter */ 369 u64 mCover = 0; /* Mask of phrases covered by this snippet */ 370 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ 371 372 for(i=0; i<pIter->nPhrase; i++){ 373 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; 374 if( pPhrase->pTail ){ 375 char *pCsr = pPhrase->pTail; 376 int iCsr = pPhrase->iTail; 377 378 while( iCsr<(iStart+pIter->nSnippet) ){ 379 int j; 380 u64 mPhrase = (u64)1 << i; 381 u64 mPos = (u64)1 << (iCsr - iStart); 382 assert( iCsr>=iStart ); 383 if( (mCover|mCovered)&mPhrase ){ 384 iScore++; 385 }else{ 386 iScore += 1000; 387 } 388 mCover |= mPhrase; 389 390 for(j=0; j<pPhrase->nToken; j++){ 391 mHighlight |= (mPos>>j); 392 } 393 394 if( 0==(*pCsr & 0x0FE) ) break; 395 fts3GetDeltaPosition(&pCsr, &iCsr); 396 } 397 } 398 } 399 400 /* Set the output variables before returning. */ 401 *piToken = iStart; 402 *piScore = iScore; 403 *pmCover = mCover; 404 *pmHighlight = mHighlight; 405} 406 407/* 408** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). 409** Each invocation populates an element of the SnippetIter.aPhrase[] array. 410*/ 411static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ 412 SnippetIter *p = (SnippetIter *)ctx; 413 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; 414 char *pCsr; 415 416 pPhrase->nToken = pExpr->pPhrase->nToken; 417 418 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol); 419 if( pCsr ){ 420 int iFirst = 0; 421 pPhrase->pList = pCsr; 422 fts3GetDeltaPosition(&pCsr, &iFirst); 423 pPhrase->pHead = pCsr; 424 pPhrase->pTail = pCsr; 425 pPhrase->iHead = iFirst; 426 pPhrase->iTail = iFirst; 427 }else{ 428 assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); 429 } 430 431 return SQLITE_OK; 432} 433 434/* 435** Select the fragment of text consisting of nFragment contiguous tokens 436** from column iCol that represent the "best" snippet. The best snippet 437** is the snippet with the highest score, where scores are calculated 438** by adding: 439** 440** (a) +1 point for each occurence of a matchable phrase in the snippet. 441** 442** (b) +1000 points for the first occurence of each matchable phrase in 443** the snippet for which the corresponding mCovered bit is not set. 444** 445** The selected snippet parameters are stored in structure *pFragment before 446** returning. The score of the selected snippet is stored in *piScore 447** before returning. 448*/ 449static int fts3BestSnippet( 450 int nSnippet, /* Desired snippet length */ 451 Fts3Cursor *pCsr, /* Cursor to create snippet for */ 452 int iCol, /* Index of column to create snippet from */ 453 u64 mCovered, /* Mask of phrases already covered */ 454 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ 455 SnippetFragment *pFragment, /* OUT: Best snippet found */ 456 int *piScore /* OUT: Score of snippet pFragment */ 457){ 458 int rc; /* Return Code */ 459 int nList; /* Number of phrases in expression */ 460 SnippetIter sIter; /* Iterates through snippet candidates */ 461 int nByte; /* Number of bytes of space to allocate */ 462 int iBestScore = -1; /* Best snippet score found so far */ 463 int i; /* Loop counter */ 464 465 memset(&sIter, 0, sizeof(sIter)); 466 467 /* Iterate through the phrases in the expression to count them. The same 468 ** callback makes sure the doclists are loaded for each phrase. 469 */ 470 rc = fts3ExprLoadDoclists(pCsr, &nList, 0); 471 if( rc!=SQLITE_OK ){ 472 return rc; 473 } 474 475 /* Now that it is known how many phrases there are, allocate and zero 476 ** the required space using malloc(). 477 */ 478 nByte = sizeof(SnippetPhrase) * nList; 479 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); 480 if( !sIter.aPhrase ){ 481 return SQLITE_NOMEM; 482 } 483 memset(sIter.aPhrase, 0, nByte); 484 485 /* Initialize the contents of the SnippetIter object. Then iterate through 486 ** the set of phrases in the expression to populate the aPhrase[] array. 487 */ 488 sIter.pCsr = pCsr; 489 sIter.iCol = iCol; 490 sIter.nSnippet = nSnippet; 491 sIter.nPhrase = nList; 492 sIter.iCurrent = -1; 493 (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter); 494 495 /* Set the *pmSeen output variable. */ 496 for(i=0; i<nList; i++){ 497 if( sIter.aPhrase[i].pHead ){ 498 *pmSeen |= (u64)1 << i; 499 } 500 } 501 502 /* Loop through all candidate snippets. Store the best snippet in 503 ** *pFragment. Store its associated 'score' in iBestScore. 504 */ 505 pFragment->iCol = iCol; 506 while( !fts3SnippetNextCandidate(&sIter) ){ 507 int iPos; 508 int iScore; 509 u64 mCover; 510 u64 mHighlight; 511 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight); 512 assert( iScore>=0 ); 513 if( iScore>iBestScore ){ 514 pFragment->iPos = iPos; 515 pFragment->hlmask = mHighlight; 516 pFragment->covered = mCover; 517 iBestScore = iScore; 518 } 519 } 520 521 sqlite3_free(sIter.aPhrase); 522 *piScore = iBestScore; 523 return SQLITE_OK; 524} 525 526 527/* 528** Append a string to the string-buffer passed as the first argument. 529** 530** If nAppend is negative, then the length of the string zAppend is 531** determined using strlen(). 532*/ 533static int fts3StringAppend( 534 StrBuffer *pStr, /* Buffer to append to */ 535 const char *zAppend, /* Pointer to data to append to buffer */ 536 int nAppend /* Size of zAppend in bytes (or -1) */ 537){ 538 if( nAppend<0 ){ 539 nAppend = (int)strlen(zAppend); 540 } 541 542 /* If there is insufficient space allocated at StrBuffer.z, use realloc() 543 ** to grow the buffer until so that it is big enough to accomadate the 544 ** appended data. 545 */ 546 if( pStr->n+nAppend+1>=pStr->nAlloc ){ 547 int nAlloc = pStr->nAlloc+nAppend+100; 548 char *zNew = sqlite3_realloc(pStr->z, nAlloc); 549 if( !zNew ){ 550 return SQLITE_NOMEM; 551 } 552 pStr->z = zNew; 553 pStr->nAlloc = nAlloc; 554 } 555 556 /* Append the data to the string buffer. */ 557 memcpy(&pStr->z[pStr->n], zAppend, nAppend); 558 pStr->n += nAppend; 559 pStr->z[pStr->n] = '\0'; 560 561 return SQLITE_OK; 562} 563 564/* 565** The fts3BestSnippet() function often selects snippets that end with a 566** query term. That is, the final term of the snippet is always a term 567** that requires highlighting. For example, if 'X' is a highlighted term 568** and '.' is a non-highlighted term, BestSnippet() may select: 569** 570** ........X.....X 571** 572** This function "shifts" the beginning of the snippet forward in the 573** document so that there are approximately the same number of 574** non-highlighted terms to the right of the final highlighted term as there 575** are to the left of the first highlighted term. For example, to this: 576** 577** ....X.....X.... 578** 579** This is done as part of extracting the snippet text, not when selecting 580** the snippet. Snippet selection is done based on doclists only, so there 581** is no way for fts3BestSnippet() to know whether or not the document 582** actually contains terms that follow the final highlighted term. 583*/ 584static int fts3SnippetShift( 585 Fts3Table *pTab, /* FTS3 table snippet comes from */ 586 int nSnippet, /* Number of tokens desired for snippet */ 587 const char *zDoc, /* Document text to extract snippet from */ 588 int nDoc, /* Size of buffer zDoc in bytes */ 589 int *piPos, /* IN/OUT: First token of snippet */ 590 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ 591){ 592 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ 593 594 if( hlmask ){ 595 int nLeft; /* Tokens to the left of first highlight */ 596 int nRight; /* Tokens to the right of last highlight */ 597 int nDesired; /* Ideal number of tokens to shift forward */ 598 599 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); 600 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); 601 nDesired = (nLeft-nRight)/2; 602 603 /* Ideally, the start of the snippet should be pushed forward in the 604 ** document nDesired tokens. This block checks if there are actually 605 ** nDesired tokens to the right of the snippet. If so, *piPos and 606 ** *pHlMask are updated to shift the snippet nDesired tokens to the 607 ** right. Otherwise, the snippet is shifted by the number of tokens 608 ** available. 609 */ 610 if( nDesired>0 ){ 611 int nShift; /* Number of tokens to shift snippet by */ 612 int iCurrent = 0; /* Token counter */ 613 int rc; /* Return Code */ 614 sqlite3_tokenizer_module *pMod; 615 sqlite3_tokenizer_cursor *pC; 616 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; 617 618 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) 619 ** or more tokens in zDoc/nDoc. 620 */ 621 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); 622 if( rc!=SQLITE_OK ){ 623 return rc; 624 } 625 pC->pTokenizer = pTab->pTokenizer; 626 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ 627 const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; 628 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); 629 } 630 pMod->xClose(pC); 631 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } 632 633 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; 634 assert( nShift<=nDesired ); 635 if( nShift>0 ){ 636 *piPos += nShift; 637 *pHlmask = hlmask >> nShift; 638 } 639 } 640 } 641 return SQLITE_OK; 642} 643 644/* 645** Extract the snippet text for fragment pFragment from cursor pCsr and 646** append it to string buffer pOut. 647*/ 648static int fts3SnippetText( 649 Fts3Cursor *pCsr, /* FTS3 Cursor */ 650 SnippetFragment *pFragment, /* Snippet to extract */ 651 int iFragment, /* Fragment number */ 652 int isLast, /* True for final fragment in snippet */ 653 int nSnippet, /* Number of tokens in extracted snippet */ 654 const char *zOpen, /* String inserted before highlighted term */ 655 const char *zClose, /* String inserted after highlighted term */ 656 const char *zEllipsis, /* String inserted between snippets */ 657 StrBuffer *pOut /* Write output here */ 658){ 659 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; 660 int rc; /* Return code */ 661 const char *zDoc; /* Document text to extract snippet from */ 662 int nDoc; /* Size of zDoc in bytes */ 663 int iCurrent = 0; /* Current token number of document */ 664 int iEnd = 0; /* Byte offset of end of current token */ 665 int isShiftDone = 0; /* True after snippet is shifted */ 666 int iPos = pFragment->iPos; /* First token of snippet */ 667 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ 668 int iCol = pFragment->iCol+1; /* Query column to extract text from */ 669 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ 670 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ 671 const char *ZDUMMY; /* Dummy argument used with tokenizer */ 672 int DUMMY1; /* Dummy argument used with tokenizer */ 673 674 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); 675 if( zDoc==0 ){ 676 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ 677 return SQLITE_NOMEM; 678 } 679 return SQLITE_OK; 680 } 681 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); 682 683 /* Open a token cursor on the document. */ 684 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; 685 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); 686 if( rc!=SQLITE_OK ){ 687 return rc; 688 } 689 pC->pTokenizer = pTab->pTokenizer; 690 691 while( rc==SQLITE_OK ){ 692 int iBegin; /* Offset in zDoc of start of token */ 693 int iFin; /* Offset in zDoc of end of token */ 694 int isHighlight; /* True for highlighted terms */ 695 696 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); 697 if( rc!=SQLITE_OK ){ 698 if( rc==SQLITE_DONE ){ 699 /* Special case - the last token of the snippet is also the last token 700 ** of the column. Append any punctuation that occurred between the end 701 ** of the previous token and the end of the document to the output. 702 ** Then break out of the loop. */ 703 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); 704 } 705 break; 706 } 707 if( iCurrent<iPos ){ continue; } 708 709 if( !isShiftDone ){ 710 int n = nDoc - iBegin; 711 rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); 712 isShiftDone = 1; 713 714 /* Now that the shift has been done, check if the initial "..." are 715 ** required. They are required if (a) this is not the first fragment, 716 ** or (b) this fragment does not begin at position 0 of its column. 717 */ 718 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ 719 rc = fts3StringAppend(pOut, zEllipsis, -1); 720 } 721 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; 722 } 723 724 if( iCurrent>=(iPos+nSnippet) ){ 725 if( isLast ){ 726 rc = fts3StringAppend(pOut, zEllipsis, -1); 727 } 728 break; 729 } 730 731 /* Set isHighlight to true if this term should be highlighted. */ 732 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; 733 734 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); 735 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); 736 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); 737 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); 738 739 iEnd = iFin; 740 } 741 742 pMod->xClose(pC); 743 return rc; 744} 745 746 747/* 748** This function is used to count the entries in a column-list (a 749** delta-encoded list of term offsets within a single column of a single 750** row). When this function is called, *ppCollist should point to the 751** beginning of the first varint in the column-list (the varint that 752** contains the position of the first matching term in the column data). 753** Before returning, *ppCollist is set to point to the first byte after 754** the last varint in the column-list (either the 0x00 signifying the end 755** of the position-list, or the 0x01 that precedes the column number of 756** the next column in the position-list). 757** 758** The number of elements in the column-list is returned. 759*/ 760static int fts3ColumnlistCount(char **ppCollist){ 761 char *pEnd = *ppCollist; 762 char c = 0; 763 int nEntry = 0; 764 765 /* A column-list is terminated by either a 0x01 or 0x00. */ 766 while( 0xFE & (*pEnd | c) ){ 767 c = *pEnd++ & 0x80; 768 if( !c ) nEntry++; 769 } 770 771 *ppCollist = pEnd; 772 return nEntry; 773} 774 775static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){ 776 char *pCsr = *pp; 777 while( *pCsr ){ 778 int nHit; 779 sqlite3_int64 iCol = 0; 780 if( *pCsr==0x01 ){ 781 pCsr++; 782 pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); 783 } 784 nHit = fts3ColumnlistCount(&pCsr); 785 assert( nHit>0 ); 786 if( isGlobal ){ 787 aOut[iCol*3+1]++; 788 } 789 aOut[iCol*3] += nHit; 790 } 791 pCsr++; 792 *pp = pCsr; 793} 794 795/* 796** fts3ExprIterate() callback used to collect the "global" matchinfo stats 797** for a single query. 798** 799** fts3ExprIterate() callback to load the 'global' elements of a 800** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements 801** of the matchinfo array that are constant for all rows returned by the 802** current query. 803** 804** Argument pCtx is actually a pointer to a struct of type MatchInfo. This 805** function populates Matchinfo.aMatchinfo[] as follows: 806** 807** for(iCol=0; iCol<nCol; iCol++){ 808** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; 809** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; 810** } 811** 812** where X is the number of matches for phrase iPhrase is column iCol of all 813** rows of the table. Y is the number of rows for which column iCol contains 814** at least one instance of phrase iPhrase. 815** 816** If the phrase pExpr consists entirely of deferred tokens, then all X and 817** Y values are set to nDoc, where nDoc is the number of documents in the 818** file system. This is done because the full-text index doclist is required 819** to calculate these values properly, and the full-text index doclist is 820** not available for deferred tokens. 821*/ 822static int fts3ExprGlobalHitsCb( 823 Fts3Expr *pExpr, /* Phrase expression node */ 824 int iPhrase, /* Phrase number (numbered from zero) */ 825 void *pCtx /* Pointer to MatchInfo structure */ 826){ 827 MatchInfo *p = (MatchInfo *)pCtx; 828 Fts3Cursor *pCsr = p->pCursor; 829 char *pIter; 830 char *pEnd; 831 char *pFree = 0; 832 u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol]; 833 834 assert( pExpr->isLoaded ); 835 assert( pExpr->eType==FTSQUERY_PHRASE ); 836 837 if( pCsr->pDeferred ){ 838 Fts3Phrase *pPhrase = pExpr->pPhrase; 839 int ii; 840 for(ii=0; ii<pPhrase->nToken; ii++){ 841 if( pPhrase->aToken[ii].bFulltext ) break; 842 } 843 if( ii<pPhrase->nToken ){ 844 int nFree = 0; 845 int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree); 846 if( rc!=SQLITE_OK ) return rc; 847 pIter = pFree; 848 pEnd = &pFree[nFree]; 849 }else{ 850 int iCol; /* Column index */ 851 for(iCol=0; iCol<p->nCol; iCol++){ 852 aOut[iCol*3 + 1] = (u32)p->nDoc; 853 aOut[iCol*3 + 2] = (u32)p->nDoc; 854 } 855 return SQLITE_OK; 856 } 857 }else{ 858 pIter = pExpr->aDoclist; 859 pEnd = &pExpr->aDoclist[pExpr->nDoclist]; 860 } 861 862 /* Fill in the global hit count matrix row for this phrase. */ 863 while( pIter<pEnd ){ 864 while( *pIter++ & 0x80 ); /* Skip past docid. */ 865 fts3LoadColumnlistCounts(&pIter, &aOut[1], 1); 866 } 867 868 sqlite3_free(pFree); 869 return SQLITE_OK; 870} 871 872/* 873** fts3ExprIterate() callback used to collect the "local" part of the 874** FTS3_MATCHINFO_HITS array. The local stats are those elements of the 875** array that are different for each row returned by the query. 876*/ 877static int fts3ExprLocalHitsCb( 878 Fts3Expr *pExpr, /* Phrase expression node */ 879 int iPhrase, /* Phrase number */ 880 void *pCtx /* Pointer to MatchInfo structure */ 881){ 882 MatchInfo *p = (MatchInfo *)pCtx; 883 int iStart = iPhrase * p->nCol * 3; 884 int i; 885 886 for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0; 887 888 if( pExpr->aDoclist ){ 889 char *pCsr; 890 891 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1); 892 if( pCsr ){ 893 fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0); 894 } 895 } 896 897 return SQLITE_OK; 898} 899 900static int fts3MatchinfoCheck( 901 Fts3Table *pTab, 902 char cArg, 903 char **pzErr 904){ 905 if( (cArg==FTS3_MATCHINFO_NPHRASE) 906 || (cArg==FTS3_MATCHINFO_NCOL) 907 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) 908 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) 909 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) 910 || (cArg==FTS3_MATCHINFO_LCS) 911 || (cArg==FTS3_MATCHINFO_HITS) 912 ){ 913 return SQLITE_OK; 914 } 915 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); 916 return SQLITE_ERROR; 917} 918 919static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ 920 int nVal; /* Number of integers output by cArg */ 921 922 switch( cArg ){ 923 case FTS3_MATCHINFO_NDOC: 924 case FTS3_MATCHINFO_NPHRASE: 925 case FTS3_MATCHINFO_NCOL: 926 nVal = 1; 927 break; 928 929 case FTS3_MATCHINFO_AVGLENGTH: 930 case FTS3_MATCHINFO_LENGTH: 931 case FTS3_MATCHINFO_LCS: 932 nVal = pInfo->nCol; 933 break; 934 935 default: 936 assert( cArg==FTS3_MATCHINFO_HITS ); 937 nVal = pInfo->nCol * pInfo->nPhrase * 3; 938 break; 939 } 940 941 return nVal; 942} 943 944static int fts3MatchinfoSelectDoctotal( 945 Fts3Table *pTab, 946 sqlite3_stmt **ppStmt, 947 sqlite3_int64 *pnDoc, 948 const char **paLen 949){ 950 sqlite3_stmt *pStmt; 951 const char *a; 952 sqlite3_int64 nDoc; 953 954 if( !*ppStmt ){ 955 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); 956 if( rc!=SQLITE_OK ) return rc; 957 } 958 pStmt = *ppStmt; 959 assert( sqlite3_data_count(pStmt)==1 ); 960 961 a = sqlite3_column_blob(pStmt, 0); 962 a += sqlite3Fts3GetVarint(a, &nDoc); 963 if( nDoc==0 ) return SQLITE_CORRUPT; 964 *pnDoc = (u32)nDoc; 965 966 if( paLen ) *paLen = a; 967 return SQLITE_OK; 968} 969 970/* 971** An instance of the following structure is used to store state while 972** iterating through a multi-column position-list corresponding to the 973** hits for a single phrase on a single row in order to calculate the 974** values for a matchinfo() FTS3_MATCHINFO_LCS request. 975*/ 976typedef struct LcsIterator LcsIterator; 977struct LcsIterator { 978 Fts3Expr *pExpr; /* Pointer to phrase expression */ 979 char *pRead; /* Cursor used to iterate through aDoclist */ 980 int iPosOffset; /* Tokens count up to end of this phrase */ 981 int iCol; /* Current column number */ 982 int iPos; /* Current position */ 983}; 984 985/* 986** If LcsIterator.iCol is set to the following value, the iterator has 987** finished iterating through all offsets for all columns. 988*/ 989#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; 990 991static int fts3MatchinfoLcsCb( 992 Fts3Expr *pExpr, /* Phrase expression node */ 993 int iPhrase, /* Phrase number (numbered from zero) */ 994 void *pCtx /* Pointer to MatchInfo structure */ 995){ 996 LcsIterator *aIter = (LcsIterator *)pCtx; 997 aIter[iPhrase].pExpr = pExpr; 998 return SQLITE_OK; 999} 1000 1001/* 1002** Advance the iterator passed as an argument to the next position. Return 1003** 1 if the iterator is at EOF or if it now points to the start of the 1004** position list for the next column. 1005*/ 1006static int fts3LcsIteratorAdvance(LcsIterator *pIter){ 1007 char *pRead = pIter->pRead; 1008 sqlite3_int64 iRead; 1009 int rc = 0; 1010 1011 pRead += sqlite3Fts3GetVarint(pRead, &iRead); 1012 if( iRead==0 ){ 1013 pIter->iCol = LCS_ITERATOR_FINISHED; 1014 rc = 1; 1015 }else{ 1016 if( iRead==1 ){ 1017 pRead += sqlite3Fts3GetVarint(pRead, &iRead); 1018 pIter->iCol = (int)iRead; 1019 pIter->iPos = pIter->iPosOffset; 1020 pRead += sqlite3Fts3GetVarint(pRead, &iRead); 1021 rc = 1; 1022 } 1023 pIter->iPos += (int)(iRead-2); 1024 } 1025 1026 pIter->pRead = pRead; 1027 return rc; 1028} 1029 1030/* 1031** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. 1032** 1033** If the call is successful, the longest-common-substring lengths for each 1034** column are written into the first nCol elements of the pInfo->aMatchinfo[] 1035** array before returning. SQLITE_OK is returned in this case. 1036** 1037** Otherwise, if an error occurs, an SQLite error code is returned and the 1038** data written to the first nCol elements of pInfo->aMatchinfo[] is 1039** undefined. 1040*/ 1041static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ 1042 LcsIterator *aIter; 1043 int i; 1044 int iCol; 1045 int nToken = 0; 1046 1047 /* Allocate and populate the array of LcsIterator objects. The array 1048 ** contains one element for each matchable phrase in the query. 1049 **/ 1050 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); 1051 if( !aIter ) return SQLITE_NOMEM; 1052 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); 1053 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); 1054 for(i=0; i<pInfo->nPhrase; i++){ 1055 LcsIterator *pIter = &aIter[i]; 1056 nToken -= pIter->pExpr->pPhrase->nToken; 1057 pIter->iPosOffset = nToken; 1058 pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); 1059 if( pIter->pRead ){ 1060 pIter->iPos = pIter->iPosOffset; 1061 fts3LcsIteratorAdvance(&aIter[i]); 1062 }else{ 1063 pIter->iCol = LCS_ITERATOR_FINISHED; 1064 } 1065 } 1066 1067 for(iCol=0; iCol<pInfo->nCol; iCol++){ 1068 int nLcs = 0; /* LCS value for this column */ 1069 int nLive = 0; /* Number of iterators in aIter not at EOF */ 1070 1071 /* Loop through the iterators in aIter[]. Set nLive to the number of 1072 ** iterators that point to a position-list corresponding to column iCol. 1073 */ 1074 for(i=0; i<pInfo->nPhrase; i++){ 1075 assert( aIter[i].iCol>=iCol ); 1076 if( aIter[i].iCol==iCol ) nLive++; 1077 } 1078 1079 /* The following loop runs until all iterators in aIter[] have finished 1080 ** iterating through positions in column iCol. Exactly one of the 1081 ** iterators is advanced each time the body of the loop is run. 1082 */ 1083 while( nLive>0 ){ 1084 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ 1085 int nThisLcs = 0; /* LCS for the current iterator positions */ 1086 1087 for(i=0; i<pInfo->nPhrase; i++){ 1088 LcsIterator *pIter = &aIter[i]; 1089 if( iCol!=pIter->iCol ){ 1090 /* This iterator is already at EOF for this column. */ 1091 nThisLcs = 0; 1092 }else{ 1093 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ 1094 pAdv = pIter; 1095 } 1096 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ 1097 nThisLcs++; 1098 }else{ 1099 nThisLcs = 1; 1100 } 1101 if( nThisLcs>nLcs ) nLcs = nThisLcs; 1102 } 1103 } 1104 if( fts3LcsIteratorAdvance(pAdv) ) nLive--; 1105 } 1106 1107 pInfo->aMatchinfo[iCol] = nLcs; 1108 } 1109 1110 sqlite3_free(aIter); 1111 return SQLITE_OK; 1112} 1113 1114/* 1115** Populate the buffer pInfo->aMatchinfo[] with an array of integers to 1116** be returned by the matchinfo() function. Argument zArg contains the 1117** format string passed as the second argument to matchinfo (or the 1118** default value "pcx" if no second argument was specified). The format 1119** string has already been validated and the pInfo->aMatchinfo[] array 1120** is guaranteed to be large enough for the output. 1121** 1122** If bGlobal is true, then populate all fields of the matchinfo() output. 1123** If it is false, then assume that those fields that do not change between 1124** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) 1125** have already been populated. 1126** 1127** Return SQLITE_OK if successful, or an SQLite error code if an error 1128** occurs. If a value other than SQLITE_OK is returned, the state the 1129** pInfo->aMatchinfo[] buffer is left in is undefined. 1130*/ 1131static int fts3MatchinfoValues( 1132 Fts3Cursor *pCsr, /* FTS3 cursor object */ 1133 int bGlobal, /* True to grab the global stats */ 1134 MatchInfo *pInfo, /* Matchinfo context object */ 1135 const char *zArg /* Matchinfo format string */ 1136){ 1137 int rc = SQLITE_OK; 1138 int i; 1139 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; 1140 sqlite3_stmt *pSelect = 0; 1141 1142 for(i=0; rc==SQLITE_OK && zArg[i]; i++){ 1143 1144 switch( zArg[i] ){ 1145 case FTS3_MATCHINFO_NPHRASE: 1146 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; 1147 break; 1148 1149 case FTS3_MATCHINFO_NCOL: 1150 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; 1151 break; 1152 1153 case FTS3_MATCHINFO_NDOC: 1154 if( bGlobal ){ 1155 sqlite3_int64 nDoc; 1156 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); 1157 pInfo->aMatchinfo[0] = (u32)nDoc; 1158 } 1159 break; 1160 1161 case FTS3_MATCHINFO_AVGLENGTH: 1162 if( bGlobal ){ 1163 sqlite3_int64 nDoc; /* Number of rows in table */ 1164 const char *a; /* Aggregate column length array */ 1165 1166 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); 1167 if( rc==SQLITE_OK ){ 1168 int iCol; 1169 for(iCol=0; iCol<pInfo->nCol; iCol++){ 1170 u32 iVal; 1171 sqlite3_int64 nToken; 1172 a += sqlite3Fts3GetVarint(a, &nToken); 1173 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); 1174 pInfo->aMatchinfo[iCol] = iVal; 1175 } 1176 } 1177 } 1178 break; 1179 1180 case FTS3_MATCHINFO_LENGTH: { 1181 sqlite3_stmt *pSelectDocsize = 0; 1182 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); 1183 if( rc==SQLITE_OK ){ 1184 int iCol; 1185 const char *a = sqlite3_column_blob(pSelectDocsize, 0); 1186 for(iCol=0; iCol<pInfo->nCol; iCol++){ 1187 sqlite3_int64 nToken; 1188 a += sqlite3Fts3GetVarint(a, &nToken); 1189 pInfo->aMatchinfo[iCol] = (u32)nToken; 1190 } 1191 } 1192 sqlite3_reset(pSelectDocsize); 1193 break; 1194 } 1195 1196 case FTS3_MATCHINFO_LCS: 1197 rc = fts3ExprLoadDoclists(pCsr, 0, 0); 1198 if( rc==SQLITE_OK ){ 1199 rc = fts3MatchinfoLcs(pCsr, pInfo); 1200 } 1201 break; 1202 1203 default: { 1204 Fts3Expr *pExpr; 1205 assert( zArg[i]==FTS3_MATCHINFO_HITS ); 1206 pExpr = pCsr->pExpr; 1207 rc = fts3ExprLoadDoclists(pCsr, 0, 0); 1208 if( rc!=SQLITE_OK ) break; 1209 if( bGlobal ){ 1210 if( pCsr->pDeferred ){ 1211 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); 1212 if( rc!=SQLITE_OK ) break; 1213 } 1214 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); 1215 if( rc!=SQLITE_OK ) break; 1216 } 1217 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); 1218 break; 1219 } 1220 } 1221 1222 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); 1223 } 1224 1225 sqlite3_reset(pSelect); 1226 return rc; 1227} 1228 1229 1230/* 1231** Populate pCsr->aMatchinfo[] with data for the current row. The 1232** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). 1233*/ 1234static int fts3GetMatchinfo( 1235 Fts3Cursor *pCsr, /* FTS3 Cursor object */ 1236 const char *zArg /* Second argument to matchinfo() function */ 1237){ 1238 MatchInfo sInfo; 1239 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; 1240 int rc = SQLITE_OK; 1241 int bGlobal = 0; /* Collect 'global' stats as well as local */ 1242 1243 memset(&sInfo, 0, sizeof(MatchInfo)); 1244 sInfo.pCursor = pCsr; 1245 sInfo.nCol = pTab->nColumn; 1246 1247 /* If there is cached matchinfo() data, but the format string for the 1248 ** cache does not match the format string for this request, discard 1249 ** the cached data. */ 1250 if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ 1251 assert( pCsr->aMatchinfo ); 1252 sqlite3_free(pCsr->aMatchinfo); 1253 pCsr->zMatchinfo = 0; 1254 pCsr->aMatchinfo = 0; 1255 } 1256 1257 /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the 1258 ** matchinfo function has been called for this query. In this case 1259 ** allocate the array used to accumulate the matchinfo data and 1260 ** initialize those elements that are constant for every row. 1261 */ 1262 if( pCsr->aMatchinfo==0 ){ 1263 int nMatchinfo = 0; /* Number of u32 elements in match-info */ 1264 int nArg; /* Bytes in zArg */ 1265 int i; /* Used to iterate through zArg */ 1266 1267 /* Determine the number of phrases in the query */ 1268 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); 1269 sInfo.nPhrase = pCsr->nPhrase; 1270 1271 /* Determine the number of integers in the buffer returned by this call. */ 1272 for(i=0; zArg[i]; i++){ 1273 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); 1274 } 1275 1276 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ 1277 nArg = (int)strlen(zArg); 1278 pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); 1279 if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; 1280 1281 pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; 1282 pCsr->nMatchinfo = nMatchinfo; 1283 memcpy(pCsr->zMatchinfo, zArg, nArg+1); 1284 memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); 1285 pCsr->isMatchinfoNeeded = 1; 1286 bGlobal = 1; 1287 } 1288 1289 sInfo.aMatchinfo = pCsr->aMatchinfo; 1290 sInfo.nPhrase = pCsr->nPhrase; 1291 if( pCsr->isMatchinfoNeeded ){ 1292 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); 1293 pCsr->isMatchinfoNeeded = 0; 1294 } 1295 1296 return rc; 1297} 1298 1299/* 1300** Implementation of snippet() function. 1301*/ 1302void sqlite3Fts3Snippet( 1303 sqlite3_context *pCtx, /* SQLite function call context */ 1304 Fts3Cursor *pCsr, /* Cursor object */ 1305 const char *zStart, /* Snippet start text - "<b>" */ 1306 const char *zEnd, /* Snippet end text - "</b>" */ 1307 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ 1308 int iCol, /* Extract snippet from this column */ 1309 int nToken /* Approximate number of tokens in snippet */ 1310){ 1311 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; 1312 int rc = SQLITE_OK; 1313 int i; 1314 StrBuffer res = {0, 0, 0}; 1315 1316 /* The returned text includes up to four fragments of text extracted from 1317 ** the data in the current row. The first iteration of the for(...) loop 1318 ** below attempts to locate a single fragment of text nToken tokens in 1319 ** size that contains at least one instance of all phrases in the query 1320 ** expression that appear in the current row. If such a fragment of text 1321 ** cannot be found, the second iteration of the loop attempts to locate 1322 ** a pair of fragments, and so on. 1323 */ 1324 int nSnippet = 0; /* Number of fragments in this snippet */ 1325 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ 1326 int nFToken = -1; /* Number of tokens in each fragment */ 1327 1328 if( !pCsr->pExpr ){ 1329 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); 1330 return; 1331 } 1332 1333 for(nSnippet=1; 1; nSnippet++){ 1334 1335 int iSnip; /* Loop counter 0..nSnippet-1 */ 1336 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ 1337 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ 1338 1339 if( nToken>=0 ){ 1340 nFToken = (nToken+nSnippet-1) / nSnippet; 1341 }else{ 1342 nFToken = -1 * nToken; 1343 } 1344 1345 for(iSnip=0; iSnip<nSnippet; iSnip++){ 1346 int iBestScore = -1; /* Best score of columns checked so far */ 1347 int iRead; /* Used to iterate through columns */ 1348 SnippetFragment *pFragment = &aSnippet[iSnip]; 1349 1350 memset(pFragment, 0, sizeof(*pFragment)); 1351 1352 /* Loop through all columns of the table being considered for snippets. 1353 ** If the iCol argument to this function was negative, this means all 1354 ** columns of the FTS3 table. Otherwise, only column iCol is considered. 1355 */ 1356 for(iRead=0; iRead<pTab->nColumn; iRead++){ 1357 SnippetFragment sF = {0, 0, 0, 0}; 1358 int iS; 1359 if( iCol>=0 && iRead!=iCol ) continue; 1360 1361 /* Find the best snippet of nFToken tokens in column iRead. */ 1362 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); 1363 if( rc!=SQLITE_OK ){ 1364 goto snippet_out; 1365 } 1366 if( iS>iBestScore ){ 1367 *pFragment = sF; 1368 iBestScore = iS; 1369 } 1370 } 1371 1372 mCovered |= pFragment->covered; 1373 } 1374 1375 /* If all query phrases seen by fts3BestSnippet() are present in at least 1376 ** one of the nSnippet snippet fragments, break out of the loop. 1377 */ 1378 assert( (mCovered&mSeen)==mCovered ); 1379 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; 1380 } 1381 1382 assert( nFToken>0 ); 1383 1384 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ 1385 rc = fts3SnippetText(pCsr, &aSnippet[i], 1386 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res 1387 ); 1388 } 1389 1390 snippet_out: 1391 sqlite3Fts3SegmentsClose(pTab); 1392 if( rc!=SQLITE_OK ){ 1393 sqlite3_result_error_code(pCtx, rc); 1394 sqlite3_free(res.z); 1395 }else{ 1396 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); 1397 } 1398} 1399 1400 1401typedef struct TermOffset TermOffset; 1402typedef struct TermOffsetCtx TermOffsetCtx; 1403 1404struct TermOffset { 1405 char *pList; /* Position-list */ 1406 int iPos; /* Position just read from pList */ 1407 int iOff; /* Offset of this term from read positions */ 1408}; 1409 1410struct TermOffsetCtx { 1411 int iCol; /* Column of table to populate aTerm for */ 1412 int iTerm; 1413 sqlite3_int64 iDocid; 1414 TermOffset *aTerm; 1415}; 1416 1417/* 1418** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). 1419*/ 1420static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ 1421 TermOffsetCtx *p = (TermOffsetCtx *)ctx; 1422 int nTerm; /* Number of tokens in phrase */ 1423 int iTerm; /* For looping through nTerm phrase terms */ 1424 char *pList; /* Pointer to position list for phrase */ 1425 int iPos = 0; /* First position in position-list */ 1426 1427 UNUSED_PARAMETER(iPhrase); 1428 pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); 1429 nTerm = pExpr->pPhrase->nToken; 1430 if( pList ){ 1431 fts3GetDeltaPosition(&pList, &iPos); 1432 assert( iPos>=0 ); 1433 } 1434 1435 for(iTerm=0; iTerm<nTerm; iTerm++){ 1436 TermOffset *pT = &p->aTerm[p->iTerm++]; 1437 pT->iOff = nTerm-iTerm-1; 1438 pT->pList = pList; 1439 pT->iPos = iPos; 1440 } 1441 1442 return SQLITE_OK; 1443} 1444 1445/* 1446** Implementation of offsets() function. 1447*/ 1448void sqlite3Fts3Offsets( 1449 sqlite3_context *pCtx, /* SQLite function call context */ 1450 Fts3Cursor *pCsr /* Cursor object */ 1451){ 1452 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; 1453 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; 1454 const char *ZDUMMY; /* Dummy argument used with xNext() */ 1455 int NDUMMY; /* Dummy argument used with xNext() */ 1456 int rc; /* Return Code */ 1457 int nToken; /* Number of tokens in query */ 1458 int iCol; /* Column currently being processed */ 1459 StrBuffer res = {0, 0, 0}; /* Result string */ 1460 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ 1461 1462 if( !pCsr->pExpr ){ 1463 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); 1464 return; 1465 } 1466 1467 memset(&sCtx, 0, sizeof(sCtx)); 1468 assert( pCsr->isRequireSeek==0 ); 1469 1470 /* Count the number of terms in the query */ 1471 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); 1472 if( rc!=SQLITE_OK ) goto offsets_out; 1473 1474 /* Allocate the array of TermOffset iterators. */ 1475 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); 1476 if( 0==sCtx.aTerm ){ 1477 rc = SQLITE_NOMEM; 1478 goto offsets_out; 1479 } 1480 sCtx.iDocid = pCsr->iPrevId; 1481 1482 /* Loop through the table columns, appending offset information to 1483 ** string-buffer res for each column. 1484 */ 1485 for(iCol=0; iCol<pTab->nColumn; iCol++){ 1486 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ 1487 int iStart; 1488 int iEnd; 1489 int iCurrent; 1490 const char *zDoc; 1491 int nDoc; 1492 1493 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is 1494 ** no way that this operation can fail, so the return code from 1495 ** fts3ExprIterate() can be discarded. 1496 */ 1497 sCtx.iCol = iCol; 1498 sCtx.iTerm = 0; 1499 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); 1500 1501 /* Retreive the text stored in column iCol. If an SQL NULL is stored 1502 ** in column iCol, jump immediately to the next iteration of the loop. 1503 ** If an OOM occurs while retrieving the data (this can happen if SQLite 1504 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM 1505 ** to the caller. 1506 */ 1507 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); 1508 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); 1509 if( zDoc==0 ){ 1510 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ 1511 continue; 1512 } 1513 rc = SQLITE_NOMEM; 1514 goto offsets_out; 1515 } 1516 1517 /* Initialize a tokenizer iterator to iterate through column iCol. */ 1518 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); 1519 if( rc!=SQLITE_OK ) goto offsets_out; 1520 pC->pTokenizer = pTab->pTokenizer; 1521 1522 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); 1523 while( rc==SQLITE_OK ){ 1524 int i; /* Used to loop through terms */ 1525 int iMinPos = 0x7FFFFFFF; /* Position of next token */ 1526 TermOffset *pTerm = 0; /* TermOffset associated with next token */ 1527 1528 for(i=0; i<nToken; i++){ 1529 TermOffset *pT = &sCtx.aTerm[i]; 1530 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ 1531 iMinPos = pT->iPos-pT->iOff; 1532 pTerm = pT; 1533 } 1534 } 1535 1536 if( !pTerm ){ 1537 /* All offsets for this column have been gathered. */ 1538 break; 1539 }else{ 1540 assert( iCurrent<=iMinPos ); 1541 if( 0==(0xFE&*pTerm->pList) ){ 1542 pTerm->pList = 0; 1543 }else{ 1544 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); 1545 } 1546 while( rc==SQLITE_OK && iCurrent<iMinPos ){ 1547 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); 1548 } 1549 if( rc==SQLITE_OK ){ 1550 char aBuffer[64]; 1551 sqlite3_snprintf(sizeof(aBuffer), aBuffer, 1552 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart 1553 ); 1554 rc = fts3StringAppend(&res, aBuffer, -1); 1555 }else if( rc==SQLITE_DONE ){ 1556 rc = SQLITE_CORRUPT; 1557 } 1558 } 1559 } 1560 if( rc==SQLITE_DONE ){ 1561 rc = SQLITE_OK; 1562 } 1563 1564 pMod->xClose(pC); 1565 if( rc!=SQLITE_OK ) goto offsets_out; 1566 } 1567 1568 offsets_out: 1569 sqlite3_free(sCtx.aTerm); 1570 assert( rc!=SQLITE_DONE ); 1571 sqlite3Fts3SegmentsClose(pTab); 1572 if( rc!=SQLITE_OK ){ 1573 sqlite3_result_error_code(pCtx, rc); 1574 sqlite3_free(res.z); 1575 }else{ 1576 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); 1577 } 1578 return; 1579} 1580 1581/* 1582** Implementation of matchinfo() function. 1583*/ 1584void sqlite3Fts3Matchinfo( 1585 sqlite3_context *pContext, /* Function call context */ 1586 Fts3Cursor *pCsr, /* FTS3 table cursor */ 1587 const char *zArg /* Second arg to matchinfo() function */ 1588){ 1589 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; 1590 int rc; 1591 int i; 1592 const char *zFormat; 1593 1594 if( zArg ){ 1595 for(i=0; zArg[i]; i++){ 1596 char *zErr = 0; 1597 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ 1598 sqlite3_result_error(pContext, zErr, -1); 1599 sqlite3_free(zErr); 1600 return; 1601 } 1602 } 1603 zFormat = zArg; 1604 }else{ 1605 zFormat = FTS3_MATCHINFO_DEFAULT; 1606 } 1607 1608 if( !pCsr->pExpr ){ 1609 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); 1610 return; 1611 } 1612 1613 /* Retrieve matchinfo() data. */ 1614 rc = fts3GetMatchinfo(pCsr, zFormat); 1615 sqlite3Fts3SegmentsClose(pTab); 1616 1617 if( rc!=SQLITE_OK ){ 1618 sqlite3_result_error_code(pContext, rc); 1619 }else{ 1620 int n = pCsr->nMatchinfo * sizeof(u32); 1621 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); 1622 } 1623} 1624 1625#endif 1626