1/*
2** 2009 Oct 23
3**
4** The author disclaims copyright to this source code.  In place of
5** a legal notice, here is a blessing:
6**
7**    May you do good and not evil.
8**    May you find forgiveness for yourself and forgive others.
9**    May you share freely, never taking more than you give.
10**
11******************************************************************************
12*/
13
14#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
15
16#include "fts3Int.h"
17#include <string.h>
18#include <assert.h>
19
20/*
21** Characters that may appear in the second argument to matchinfo().
22*/
23#define FTS3_MATCHINFO_NPHRASE   'p'        /* 1 value */
24#define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
25#define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
26#define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
27#define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
28#define FTS3_MATCHINFO_LCS       's'        /* nCol values */
29#define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */
30
31/*
32** The default value for the second argument to matchinfo().
33*/
34#define FTS3_MATCHINFO_DEFAULT   "pcx"
35
36
37/*
38** Used as an fts3ExprIterate() context when loading phrase doclists to
39** Fts3Expr.aDoclist[]/nDoclist.
40*/
41typedef struct LoadDoclistCtx LoadDoclistCtx;
42struct LoadDoclistCtx {
43  Fts3Cursor *pCsr;               /* FTS3 Cursor */
44  int nPhrase;                    /* Number of phrases seen so far */
45  int nToken;                     /* Number of tokens seen so far */
46};
47
48/*
49** The following types are used as part of the implementation of the
50** fts3BestSnippet() routine.
51*/
52typedef struct SnippetIter SnippetIter;
53typedef struct SnippetPhrase SnippetPhrase;
54typedef struct SnippetFragment SnippetFragment;
55
56struct SnippetIter {
57  Fts3Cursor *pCsr;               /* Cursor snippet is being generated from */
58  int iCol;                       /* Extract snippet from this column */
59  int nSnippet;                   /* Requested snippet length (in tokens) */
60  int nPhrase;                    /* Number of phrases in query */
61  SnippetPhrase *aPhrase;         /* Array of size nPhrase */
62  int iCurrent;                   /* First token of current snippet */
63};
64
65struct SnippetPhrase {
66  int nToken;                     /* Number of tokens in phrase */
67  char *pList;                    /* Pointer to start of phrase position list */
68  int iHead;                      /* Next value in position list */
69  char *pHead;                    /* Position list data following iHead */
70  int iTail;                      /* Next value in trailing position list */
71  char *pTail;                    /* Position list data following iTail */
72};
73
74struct SnippetFragment {
75  int iCol;                       /* Column snippet is extracted from */
76  int iPos;                       /* Index of first token in snippet */
77  u64 covered;                    /* Mask of query phrases covered */
78  u64 hlmask;                     /* Mask of snippet terms to highlight */
79};
80
81/*
82** This type is used as an fts3ExprIterate() context object while
83** accumulating the data returned by the matchinfo() function.
84*/
85typedef struct MatchInfo MatchInfo;
86struct MatchInfo {
87  Fts3Cursor *pCursor;            /* FTS3 Cursor */
88  int nCol;                       /* Number of columns in table */
89  int nPhrase;                    /* Number of matchable phrases in query */
90  sqlite3_int64 nDoc;             /* Number of docs in database */
91  u32 *aMatchinfo;                /* Pre-allocated buffer */
92};
93
94
95
96/*
97** The snippet() and offsets() functions both return text values. An instance
98** of the following structure is used to accumulate those values while the
99** functions are running. See fts3StringAppend() for details.
100*/
101typedef struct StrBuffer StrBuffer;
102struct StrBuffer {
103  char *z;                        /* Pointer to buffer containing string */
104  int n;                          /* Length of z in bytes (excl. nul-term) */
105  int nAlloc;                     /* Allocated size of buffer z in bytes */
106};
107
108
109/*
110** This function is used to help iterate through a position-list. A position
111** list is a list of unique integers, sorted from smallest to largest. Each
112** element of the list is represented by an FTS3 varint that takes the value
113** of the difference between the current element and the previous one plus
114** two. For example, to store the position-list:
115**
116**     4 9 113
117**
118** the three varints:
119**
120**     6 7 106
121**
122** are encoded.
123**
124** When this function is called, *pp points to the start of an element of
125** the list. *piPos contains the value of the previous entry in the list.
126** After it returns, *piPos contains the value of the next element of the
127** list and *pp is advanced to the following varint.
128*/
129static void fts3GetDeltaPosition(char **pp, int *piPos){
130  int iVal;
131  *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
132  *piPos += (iVal-2);
133}
134
135/*
136** Helper function for fts3ExprIterate() (see below).
137*/
138static int fts3ExprIterate2(
139  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
140  int *piPhrase,                  /* Pointer to phrase counter */
141  int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
142  void *pCtx                      /* Second argument to pass to callback */
143){
144  int rc;                         /* Return code */
145  int eType = pExpr->eType;       /* Type of expression node pExpr */
146
147  if( eType!=FTSQUERY_PHRASE ){
148    assert( pExpr->pLeft && pExpr->pRight );
149    rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
150    if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
151      rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
152    }
153  }else{
154    rc = x(pExpr, *piPhrase, pCtx);
155    (*piPhrase)++;
156  }
157  return rc;
158}
159
160/*
161** Iterate through all phrase nodes in an FTS3 query, except those that
162** are part of a sub-tree that is the right-hand-side of a NOT operator.
163** For each phrase node found, the supplied callback function is invoked.
164**
165** If the callback function returns anything other than SQLITE_OK,
166** the iteration is abandoned and the error code returned immediately.
167** Otherwise, SQLITE_OK is returned after a callback has been made for
168** all eligible phrase nodes.
169*/
170static int fts3ExprIterate(
171  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
172  int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
173  void *pCtx                      /* Second argument to pass to callback */
174){
175  int iPhrase = 0;                /* Variable used as the phrase counter */
176  return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
177}
178
179/*
180** The argument to this function is always a phrase node. Its doclist
181** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes
182** to the left of this one in the query tree have already been loaded.
183**
184** If this phrase node is part of a series of phrase nodes joined by
185** NEAR operators (and is not the left-most of said series), then elements are
186** removed from the phrases doclist consistent with the NEAR restriction. If
187** required, elements may be removed from the doclists of phrases to the
188** left of this one that are part of the same series of NEAR operator
189** connected phrases.
190**
191** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
192*/
193static int fts3ExprNearTrim(Fts3Expr *pExpr){
194  int rc = SQLITE_OK;
195  Fts3Expr *pParent = pExpr->pParent;
196
197  assert( pExpr->eType==FTSQUERY_PHRASE );
198  while( rc==SQLITE_OK
199   && pParent
200   && pParent->eType==FTSQUERY_NEAR
201   && pParent->pRight==pExpr
202  ){
203    /* This expression (pExpr) is the right-hand-side of a NEAR operator.
204    ** Find the expression to the left of the same operator.
205    */
206    int nNear = pParent->nNear;
207    Fts3Expr *pLeft = pParent->pLeft;
208
209    if( pLeft->eType!=FTSQUERY_PHRASE ){
210      assert( pLeft->eType==FTSQUERY_NEAR );
211      assert( pLeft->pRight->eType==FTSQUERY_PHRASE );
212      pLeft = pLeft->pRight;
213    }
214
215    rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear);
216
217    pExpr = pLeft;
218    pParent = pExpr->pParent;
219  }
220
221  return rc;
222}
223
224/*
225** This is an fts3ExprIterate() callback used while loading the doclists
226** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
227** fts3ExprLoadDoclists().
228*/
229static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
230  int rc = SQLITE_OK;
231  LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
232
233  UNUSED_PARAMETER(iPhrase);
234
235  p->nPhrase++;
236  p->nToken += pExpr->pPhrase->nToken;
237
238  if( pExpr->isLoaded==0 ){
239    rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr);
240    pExpr->isLoaded = 1;
241    if( rc==SQLITE_OK ){
242      rc = fts3ExprNearTrim(pExpr);
243    }
244  }
245
246  return rc;
247}
248
249/*
250** Load the doclists for each phrase in the query associated with FTS3 cursor
251** pCsr.
252**
253** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
254** phrases in the expression (all phrases except those directly or
255** indirectly descended from the right-hand-side of a NOT operator). If
256** pnToken is not NULL, then it is set to the number of tokens in all
257** matchable phrases of the expression.
258*/
259static int fts3ExprLoadDoclists(
260  Fts3Cursor *pCsr,               /* Fts3 cursor for current query */
261  int *pnPhrase,                  /* OUT: Number of phrases in query */
262  int *pnToken                    /* OUT: Number of tokens in query */
263){
264  int rc;                         /* Return Code */
265  LoadDoclistCtx sCtx = {0,0,0};  /* Context for fts3ExprIterate() */
266  sCtx.pCsr = pCsr;
267  rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
268  if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
269  if( pnToken ) *pnToken = sCtx.nToken;
270  return rc;
271}
272
273static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
274  (*(int *)ctx)++;
275  UNUSED_PARAMETER(pExpr);
276  UNUSED_PARAMETER(iPhrase);
277  return SQLITE_OK;
278}
279static int fts3ExprPhraseCount(Fts3Expr *pExpr){
280  int nPhrase = 0;
281  (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
282  return nPhrase;
283}
284
285/*
286** Advance the position list iterator specified by the first two
287** arguments so that it points to the first element with a value greater
288** than or equal to parameter iNext.
289*/
290static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
291  char *pIter = *ppIter;
292  if( pIter ){
293    int iIter = *piIter;
294
295    while( iIter<iNext ){
296      if( 0==(*pIter & 0xFE) ){
297        iIter = -1;
298        pIter = 0;
299        break;
300      }
301      fts3GetDeltaPosition(&pIter, &iIter);
302    }
303
304    *piIter = iIter;
305    *ppIter = pIter;
306  }
307}
308
309/*
310** Advance the snippet iterator to the next candidate snippet.
311*/
312static int fts3SnippetNextCandidate(SnippetIter *pIter){
313  int i;                          /* Loop counter */
314
315  if( pIter->iCurrent<0 ){
316    /* The SnippetIter object has just been initialized. The first snippet
317    ** candidate always starts at offset 0 (even if this candidate has a
318    ** score of 0.0).
319    */
320    pIter->iCurrent = 0;
321
322    /* Advance the 'head' iterator of each phrase to the first offset that
323    ** is greater than or equal to (iNext+nSnippet).
324    */
325    for(i=0; i<pIter->nPhrase; i++){
326      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
327      fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
328    }
329  }else{
330    int iStart;
331    int iEnd = 0x7FFFFFFF;
332
333    for(i=0; i<pIter->nPhrase; i++){
334      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
335      if( pPhrase->pHead && pPhrase->iHead<iEnd ){
336        iEnd = pPhrase->iHead;
337      }
338    }
339    if( iEnd==0x7FFFFFFF ){
340      return 1;
341    }
342
343    pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
344    for(i=0; i<pIter->nPhrase; i++){
345      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
346      fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
347      fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
348    }
349  }
350
351  return 0;
352}
353
354/*
355** Retrieve information about the current candidate snippet of snippet
356** iterator pIter.
357*/
358static void fts3SnippetDetails(
359  SnippetIter *pIter,             /* Snippet iterator */
360  u64 mCovered,                   /* Bitmask of phrases already covered */
361  int *piToken,                   /* OUT: First token of proposed snippet */
362  int *piScore,                   /* OUT: "Score" for this snippet */
363  u64 *pmCover,                   /* OUT: Bitmask of phrases covered */
364  u64 *pmHighlight                /* OUT: Bitmask of terms to highlight */
365){
366  int iStart = pIter->iCurrent;   /* First token of snippet */
367  int iScore = 0;                 /* Score of this snippet */
368  int i;                          /* Loop counter */
369  u64 mCover = 0;                 /* Mask of phrases covered by this snippet */
370  u64 mHighlight = 0;             /* Mask of tokens to highlight in snippet */
371
372  for(i=0; i<pIter->nPhrase; i++){
373    SnippetPhrase *pPhrase = &pIter->aPhrase[i];
374    if( pPhrase->pTail ){
375      char *pCsr = pPhrase->pTail;
376      int iCsr = pPhrase->iTail;
377
378      while( iCsr<(iStart+pIter->nSnippet) ){
379        int j;
380        u64 mPhrase = (u64)1 << i;
381        u64 mPos = (u64)1 << (iCsr - iStart);
382        assert( iCsr>=iStart );
383        if( (mCover|mCovered)&mPhrase ){
384          iScore++;
385        }else{
386          iScore += 1000;
387        }
388        mCover |= mPhrase;
389
390        for(j=0; j<pPhrase->nToken; j++){
391          mHighlight |= (mPos>>j);
392        }
393
394        if( 0==(*pCsr & 0x0FE) ) break;
395        fts3GetDeltaPosition(&pCsr, &iCsr);
396      }
397    }
398  }
399
400  /* Set the output variables before returning. */
401  *piToken = iStart;
402  *piScore = iScore;
403  *pmCover = mCover;
404  *pmHighlight = mHighlight;
405}
406
407/*
408** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
409** Each invocation populates an element of the SnippetIter.aPhrase[] array.
410*/
411static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
412  SnippetIter *p = (SnippetIter *)ctx;
413  SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
414  char *pCsr;
415
416  pPhrase->nToken = pExpr->pPhrase->nToken;
417
418  pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol);
419  if( pCsr ){
420    int iFirst = 0;
421    pPhrase->pList = pCsr;
422    fts3GetDeltaPosition(&pCsr, &iFirst);
423    pPhrase->pHead = pCsr;
424    pPhrase->pTail = pCsr;
425    pPhrase->iHead = iFirst;
426    pPhrase->iTail = iFirst;
427  }else{
428    assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 );
429  }
430
431  return SQLITE_OK;
432}
433
434/*
435** Select the fragment of text consisting of nFragment contiguous tokens
436** from column iCol that represent the "best" snippet. The best snippet
437** is the snippet with the highest score, where scores are calculated
438** by adding:
439**
440**   (a) +1 point for each occurence of a matchable phrase in the snippet.
441**
442**   (b) +1000 points for the first occurence of each matchable phrase in
443**       the snippet for which the corresponding mCovered bit is not set.
444**
445** The selected snippet parameters are stored in structure *pFragment before
446** returning. The score of the selected snippet is stored in *piScore
447** before returning.
448*/
449static int fts3BestSnippet(
450  int nSnippet,                   /* Desired snippet length */
451  Fts3Cursor *pCsr,               /* Cursor to create snippet for */
452  int iCol,                       /* Index of column to create snippet from */
453  u64 mCovered,                   /* Mask of phrases already covered */
454  u64 *pmSeen,                    /* IN/OUT: Mask of phrases seen */
455  SnippetFragment *pFragment,     /* OUT: Best snippet found */
456  int *piScore                    /* OUT: Score of snippet pFragment */
457){
458  int rc;                         /* Return Code */
459  int nList;                      /* Number of phrases in expression */
460  SnippetIter sIter;              /* Iterates through snippet candidates */
461  int nByte;                      /* Number of bytes of space to allocate */
462  int iBestScore = -1;            /* Best snippet score found so far */
463  int i;                          /* Loop counter */
464
465  memset(&sIter, 0, sizeof(sIter));
466
467  /* Iterate through the phrases in the expression to count them. The same
468  ** callback makes sure the doclists are loaded for each phrase.
469  */
470  rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
471  if( rc!=SQLITE_OK ){
472    return rc;
473  }
474
475  /* Now that it is known how many phrases there are, allocate and zero
476  ** the required space using malloc().
477  */
478  nByte = sizeof(SnippetPhrase) * nList;
479  sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
480  if( !sIter.aPhrase ){
481    return SQLITE_NOMEM;
482  }
483  memset(sIter.aPhrase, 0, nByte);
484
485  /* Initialize the contents of the SnippetIter object. Then iterate through
486  ** the set of phrases in the expression to populate the aPhrase[] array.
487  */
488  sIter.pCsr = pCsr;
489  sIter.iCol = iCol;
490  sIter.nSnippet = nSnippet;
491  sIter.nPhrase = nList;
492  sIter.iCurrent = -1;
493  (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
494
495  /* Set the *pmSeen output variable. */
496  for(i=0; i<nList; i++){
497    if( sIter.aPhrase[i].pHead ){
498      *pmSeen |= (u64)1 << i;
499    }
500  }
501
502  /* Loop through all candidate snippets. Store the best snippet in
503  ** *pFragment. Store its associated 'score' in iBestScore.
504  */
505  pFragment->iCol = iCol;
506  while( !fts3SnippetNextCandidate(&sIter) ){
507    int iPos;
508    int iScore;
509    u64 mCover;
510    u64 mHighlight;
511    fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
512    assert( iScore>=0 );
513    if( iScore>iBestScore ){
514      pFragment->iPos = iPos;
515      pFragment->hlmask = mHighlight;
516      pFragment->covered = mCover;
517      iBestScore = iScore;
518    }
519  }
520
521  sqlite3_free(sIter.aPhrase);
522  *piScore = iBestScore;
523  return SQLITE_OK;
524}
525
526
527/*
528** Append a string to the string-buffer passed as the first argument.
529**
530** If nAppend is negative, then the length of the string zAppend is
531** determined using strlen().
532*/
533static int fts3StringAppend(
534  StrBuffer *pStr,                /* Buffer to append to */
535  const char *zAppend,            /* Pointer to data to append to buffer */
536  int nAppend                     /* Size of zAppend in bytes (or -1) */
537){
538  if( nAppend<0 ){
539    nAppend = (int)strlen(zAppend);
540  }
541
542  /* If there is insufficient space allocated at StrBuffer.z, use realloc()
543  ** to grow the buffer until so that it is big enough to accomadate the
544  ** appended data.
545  */
546  if( pStr->n+nAppend+1>=pStr->nAlloc ){
547    int nAlloc = pStr->nAlloc+nAppend+100;
548    char *zNew = sqlite3_realloc(pStr->z, nAlloc);
549    if( !zNew ){
550      return SQLITE_NOMEM;
551    }
552    pStr->z = zNew;
553    pStr->nAlloc = nAlloc;
554  }
555
556  /* Append the data to the string buffer. */
557  memcpy(&pStr->z[pStr->n], zAppend, nAppend);
558  pStr->n += nAppend;
559  pStr->z[pStr->n] = '\0';
560
561  return SQLITE_OK;
562}
563
564/*
565** The fts3BestSnippet() function often selects snippets that end with a
566** query term. That is, the final term of the snippet is always a term
567** that requires highlighting. For example, if 'X' is a highlighted term
568** and '.' is a non-highlighted term, BestSnippet() may select:
569**
570**     ........X.....X
571**
572** This function "shifts" the beginning of the snippet forward in the
573** document so that there are approximately the same number of
574** non-highlighted terms to the right of the final highlighted term as there
575** are to the left of the first highlighted term. For example, to this:
576**
577**     ....X.....X....
578**
579** This is done as part of extracting the snippet text, not when selecting
580** the snippet. Snippet selection is done based on doclists only, so there
581** is no way for fts3BestSnippet() to know whether or not the document
582** actually contains terms that follow the final highlighted term.
583*/
584static int fts3SnippetShift(
585  Fts3Table *pTab,                /* FTS3 table snippet comes from */
586  int nSnippet,                   /* Number of tokens desired for snippet */
587  const char *zDoc,               /* Document text to extract snippet from */
588  int nDoc,                       /* Size of buffer zDoc in bytes */
589  int *piPos,                     /* IN/OUT: First token of snippet */
590  u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
591){
592  u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
593
594  if( hlmask ){
595    int nLeft;                    /* Tokens to the left of first highlight */
596    int nRight;                   /* Tokens to the right of last highlight */
597    int nDesired;                 /* Ideal number of tokens to shift forward */
598
599    for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
600    for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
601    nDesired = (nLeft-nRight)/2;
602
603    /* Ideally, the start of the snippet should be pushed forward in the
604    ** document nDesired tokens. This block checks if there are actually
605    ** nDesired tokens to the right of the snippet. If so, *piPos and
606    ** *pHlMask are updated to shift the snippet nDesired tokens to the
607    ** right. Otherwise, the snippet is shifted by the number of tokens
608    ** available.
609    */
610    if( nDesired>0 ){
611      int nShift;                 /* Number of tokens to shift snippet by */
612      int iCurrent = 0;           /* Token counter */
613      int rc;                     /* Return Code */
614      sqlite3_tokenizer_module *pMod;
615      sqlite3_tokenizer_cursor *pC;
616      pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
617
618      /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
619      ** or more tokens in zDoc/nDoc.
620      */
621      rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
622      if( rc!=SQLITE_OK ){
623        return rc;
624      }
625      pC->pTokenizer = pTab->pTokenizer;
626      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
627        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
628        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
629      }
630      pMod->xClose(pC);
631      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
632
633      nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
634      assert( nShift<=nDesired );
635      if( nShift>0 ){
636        *piPos += nShift;
637        *pHlmask = hlmask >> nShift;
638      }
639    }
640  }
641  return SQLITE_OK;
642}
643
644/*
645** Extract the snippet text for fragment pFragment from cursor pCsr and
646** append it to string buffer pOut.
647*/
648static int fts3SnippetText(
649  Fts3Cursor *pCsr,               /* FTS3 Cursor */
650  SnippetFragment *pFragment,     /* Snippet to extract */
651  int iFragment,                  /* Fragment number */
652  int isLast,                     /* True for final fragment in snippet */
653  int nSnippet,                   /* Number of tokens in extracted snippet */
654  const char *zOpen,              /* String inserted before highlighted term */
655  const char *zClose,             /* String inserted after highlighted term */
656  const char *zEllipsis,          /* String inserted between snippets */
657  StrBuffer *pOut                 /* Write output here */
658){
659  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
660  int rc;                         /* Return code */
661  const char *zDoc;               /* Document text to extract snippet from */
662  int nDoc;                       /* Size of zDoc in bytes */
663  int iCurrent = 0;               /* Current token number of document */
664  int iEnd = 0;                   /* Byte offset of end of current token */
665  int isShiftDone = 0;            /* True after snippet is shifted */
666  int iPos = pFragment->iPos;     /* First token of snippet */
667  u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
668  int iCol = pFragment->iCol+1;   /* Query column to extract text from */
669  sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
670  sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
671  const char *ZDUMMY;             /* Dummy argument used with tokenizer */
672  int DUMMY1;                     /* Dummy argument used with tokenizer */
673
674  zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
675  if( zDoc==0 ){
676    if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
677      return SQLITE_NOMEM;
678    }
679    return SQLITE_OK;
680  }
681  nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
682
683  /* Open a token cursor on the document. */
684  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
685  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
686  if( rc!=SQLITE_OK ){
687    return rc;
688  }
689  pC->pTokenizer = pTab->pTokenizer;
690
691  while( rc==SQLITE_OK ){
692    int iBegin;                   /* Offset in zDoc of start of token */
693    int iFin;                     /* Offset in zDoc of end of token */
694    int isHighlight;              /* True for highlighted terms */
695
696    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
697    if( rc!=SQLITE_OK ){
698      if( rc==SQLITE_DONE ){
699        /* Special case - the last token of the snippet is also the last token
700        ** of the column. Append any punctuation that occurred between the end
701        ** of the previous token and the end of the document to the output.
702        ** Then break out of the loop. */
703        rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
704      }
705      break;
706    }
707    if( iCurrent<iPos ){ continue; }
708
709    if( !isShiftDone ){
710      int n = nDoc - iBegin;
711      rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
712      isShiftDone = 1;
713
714      /* Now that the shift has been done, check if the initial "..." are
715      ** required. They are required if (a) this is not the first fragment,
716      ** or (b) this fragment does not begin at position 0 of its column.
717      */
718      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
719        rc = fts3StringAppend(pOut, zEllipsis, -1);
720      }
721      if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
722    }
723
724    if( iCurrent>=(iPos+nSnippet) ){
725      if( isLast ){
726        rc = fts3StringAppend(pOut, zEllipsis, -1);
727      }
728      break;
729    }
730
731    /* Set isHighlight to true if this term should be highlighted. */
732    isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
733
734    if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
735    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
736    if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
737    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
738
739    iEnd = iFin;
740  }
741
742  pMod->xClose(pC);
743  return rc;
744}
745
746
747/*
748** This function is used to count the entries in a column-list (a
749** delta-encoded list of term offsets within a single column of a single
750** row). When this function is called, *ppCollist should point to the
751** beginning of the first varint in the column-list (the varint that
752** contains the position of the first matching term in the column data).
753** Before returning, *ppCollist is set to point to the first byte after
754** the last varint in the column-list (either the 0x00 signifying the end
755** of the position-list, or the 0x01 that precedes the column number of
756** the next column in the position-list).
757**
758** The number of elements in the column-list is returned.
759*/
760static int fts3ColumnlistCount(char **ppCollist){
761  char *pEnd = *ppCollist;
762  char c = 0;
763  int nEntry = 0;
764
765  /* A column-list is terminated by either a 0x01 or 0x00. */
766  while( 0xFE & (*pEnd | c) ){
767    c = *pEnd++ & 0x80;
768    if( !c ) nEntry++;
769  }
770
771  *ppCollist = pEnd;
772  return nEntry;
773}
774
775static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){
776  char *pCsr = *pp;
777  while( *pCsr ){
778    int nHit;
779    sqlite3_int64 iCol = 0;
780    if( *pCsr==0x01 ){
781      pCsr++;
782      pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
783    }
784    nHit = fts3ColumnlistCount(&pCsr);
785    assert( nHit>0 );
786    if( isGlobal ){
787      aOut[iCol*3+1]++;
788    }
789    aOut[iCol*3] += nHit;
790  }
791  pCsr++;
792  *pp = pCsr;
793}
794
795/*
796** fts3ExprIterate() callback used to collect the "global" matchinfo stats
797** for a single query.
798**
799** fts3ExprIterate() callback to load the 'global' elements of a
800** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
801** of the matchinfo array that are constant for all rows returned by the
802** current query.
803**
804** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
805** function populates Matchinfo.aMatchinfo[] as follows:
806**
807**   for(iCol=0; iCol<nCol; iCol++){
808**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
809**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
810**   }
811**
812** where X is the number of matches for phrase iPhrase is column iCol of all
813** rows of the table. Y is the number of rows for which column iCol contains
814** at least one instance of phrase iPhrase.
815**
816** If the phrase pExpr consists entirely of deferred tokens, then all X and
817** Y values are set to nDoc, where nDoc is the number of documents in the
818** file system. This is done because the full-text index doclist is required
819** to calculate these values properly, and the full-text index doclist is
820** not available for deferred tokens.
821*/
822static int fts3ExprGlobalHitsCb(
823  Fts3Expr *pExpr,                /* Phrase expression node */
824  int iPhrase,                    /* Phrase number (numbered from zero) */
825  void *pCtx                      /* Pointer to MatchInfo structure */
826){
827  MatchInfo *p = (MatchInfo *)pCtx;
828  Fts3Cursor *pCsr = p->pCursor;
829  char *pIter;
830  char *pEnd;
831  char *pFree = 0;
832  u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol];
833
834  assert( pExpr->isLoaded );
835  assert( pExpr->eType==FTSQUERY_PHRASE );
836
837  if( pCsr->pDeferred ){
838    Fts3Phrase *pPhrase = pExpr->pPhrase;
839    int ii;
840    for(ii=0; ii<pPhrase->nToken; ii++){
841      if( pPhrase->aToken[ii].bFulltext ) break;
842    }
843    if( ii<pPhrase->nToken ){
844      int nFree = 0;
845      int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree);
846      if( rc!=SQLITE_OK ) return rc;
847      pIter = pFree;
848      pEnd = &pFree[nFree];
849    }else{
850      int iCol;                   /* Column index */
851      for(iCol=0; iCol<p->nCol; iCol++){
852        aOut[iCol*3 + 1] = (u32)p->nDoc;
853        aOut[iCol*3 + 2] = (u32)p->nDoc;
854      }
855      return SQLITE_OK;
856    }
857  }else{
858    pIter = pExpr->aDoclist;
859    pEnd = &pExpr->aDoclist[pExpr->nDoclist];
860  }
861
862  /* Fill in the global hit count matrix row for this phrase. */
863  while( pIter<pEnd ){
864    while( *pIter++ & 0x80 );      /* Skip past docid. */
865    fts3LoadColumnlistCounts(&pIter, &aOut[1], 1);
866  }
867
868  sqlite3_free(pFree);
869  return SQLITE_OK;
870}
871
872/*
873** fts3ExprIterate() callback used to collect the "local" part of the
874** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
875** array that are different for each row returned by the query.
876*/
877static int fts3ExprLocalHitsCb(
878  Fts3Expr *pExpr,                /* Phrase expression node */
879  int iPhrase,                    /* Phrase number */
880  void *pCtx                      /* Pointer to MatchInfo structure */
881){
882  MatchInfo *p = (MatchInfo *)pCtx;
883  int iStart = iPhrase * p->nCol * 3;
884  int i;
885
886  for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0;
887
888  if( pExpr->aDoclist ){
889    char *pCsr;
890
891    pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
892    if( pCsr ){
893      fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0);
894    }
895  }
896
897  return SQLITE_OK;
898}
899
900static int fts3MatchinfoCheck(
901  Fts3Table *pTab,
902  char cArg,
903  char **pzErr
904){
905  if( (cArg==FTS3_MATCHINFO_NPHRASE)
906   || (cArg==FTS3_MATCHINFO_NCOL)
907   || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat)
908   || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat)
909   || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
910   || (cArg==FTS3_MATCHINFO_LCS)
911   || (cArg==FTS3_MATCHINFO_HITS)
912  ){
913    return SQLITE_OK;
914  }
915  *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
916  return SQLITE_ERROR;
917}
918
919static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
920  int nVal;                       /* Number of integers output by cArg */
921
922  switch( cArg ){
923    case FTS3_MATCHINFO_NDOC:
924    case FTS3_MATCHINFO_NPHRASE:
925    case FTS3_MATCHINFO_NCOL:
926      nVal = 1;
927      break;
928
929    case FTS3_MATCHINFO_AVGLENGTH:
930    case FTS3_MATCHINFO_LENGTH:
931    case FTS3_MATCHINFO_LCS:
932      nVal = pInfo->nCol;
933      break;
934
935    default:
936      assert( cArg==FTS3_MATCHINFO_HITS );
937      nVal = pInfo->nCol * pInfo->nPhrase * 3;
938      break;
939  }
940
941  return nVal;
942}
943
944static int fts3MatchinfoSelectDoctotal(
945  Fts3Table *pTab,
946  sqlite3_stmt **ppStmt,
947  sqlite3_int64 *pnDoc,
948  const char **paLen
949){
950  sqlite3_stmt *pStmt;
951  const char *a;
952  sqlite3_int64 nDoc;
953
954  if( !*ppStmt ){
955    int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
956    if( rc!=SQLITE_OK ) return rc;
957  }
958  pStmt = *ppStmt;
959  assert( sqlite3_data_count(pStmt)==1 );
960
961  a = sqlite3_column_blob(pStmt, 0);
962  a += sqlite3Fts3GetVarint(a, &nDoc);
963  if( nDoc==0 ) return SQLITE_CORRUPT;
964  *pnDoc = (u32)nDoc;
965
966  if( paLen ) *paLen = a;
967  return SQLITE_OK;
968}
969
970/*
971** An instance of the following structure is used to store state while
972** iterating through a multi-column position-list corresponding to the
973** hits for a single phrase on a single row in order to calculate the
974** values for a matchinfo() FTS3_MATCHINFO_LCS request.
975*/
976typedef struct LcsIterator LcsIterator;
977struct LcsIterator {
978  Fts3Expr *pExpr;                /* Pointer to phrase expression */
979  char *pRead;                    /* Cursor used to iterate through aDoclist */
980  int iPosOffset;                 /* Tokens count up to end of this phrase */
981  int iCol;                       /* Current column number */
982  int iPos;                       /* Current position */
983};
984
985/*
986** If LcsIterator.iCol is set to the following value, the iterator has
987** finished iterating through all offsets for all columns.
988*/
989#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
990
991static int fts3MatchinfoLcsCb(
992  Fts3Expr *pExpr,                /* Phrase expression node */
993  int iPhrase,                    /* Phrase number (numbered from zero) */
994  void *pCtx                      /* Pointer to MatchInfo structure */
995){
996  LcsIterator *aIter = (LcsIterator *)pCtx;
997  aIter[iPhrase].pExpr = pExpr;
998  return SQLITE_OK;
999}
1000
1001/*
1002** Advance the iterator passed as an argument to the next position. Return
1003** 1 if the iterator is at EOF or if it now points to the start of the
1004** position list for the next column.
1005*/
1006static int fts3LcsIteratorAdvance(LcsIterator *pIter){
1007  char *pRead = pIter->pRead;
1008  sqlite3_int64 iRead;
1009  int rc = 0;
1010
1011  pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1012  if( iRead==0 ){
1013    pIter->iCol = LCS_ITERATOR_FINISHED;
1014    rc = 1;
1015  }else{
1016    if( iRead==1 ){
1017      pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1018      pIter->iCol = (int)iRead;
1019      pIter->iPos = pIter->iPosOffset;
1020      pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1021      rc = 1;
1022    }
1023    pIter->iPos += (int)(iRead-2);
1024  }
1025
1026  pIter->pRead = pRead;
1027  return rc;
1028}
1029
1030/*
1031** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1032**
1033** If the call is successful, the longest-common-substring lengths for each
1034** column are written into the first nCol elements of the pInfo->aMatchinfo[]
1035** array before returning. SQLITE_OK is returned in this case.
1036**
1037** Otherwise, if an error occurs, an SQLite error code is returned and the
1038** data written to the first nCol elements of pInfo->aMatchinfo[] is
1039** undefined.
1040*/
1041static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
1042  LcsIterator *aIter;
1043  int i;
1044  int iCol;
1045  int nToken = 0;
1046
1047  /* Allocate and populate the array of LcsIterator objects. The array
1048  ** contains one element for each matchable phrase in the query.
1049  **/
1050  aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
1051  if( !aIter ) return SQLITE_NOMEM;
1052  memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
1053  (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
1054  for(i=0; i<pInfo->nPhrase; i++){
1055    LcsIterator *pIter = &aIter[i];
1056    nToken -= pIter->pExpr->pPhrase->nToken;
1057    pIter->iPosOffset = nToken;
1058    pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
1059    if( pIter->pRead ){
1060      pIter->iPos = pIter->iPosOffset;
1061      fts3LcsIteratorAdvance(&aIter[i]);
1062    }else{
1063      pIter->iCol = LCS_ITERATOR_FINISHED;
1064    }
1065  }
1066
1067  for(iCol=0; iCol<pInfo->nCol; iCol++){
1068    int nLcs = 0;                 /* LCS value for this column */
1069    int nLive = 0;                /* Number of iterators in aIter not at EOF */
1070
1071    /* Loop through the iterators in aIter[]. Set nLive to the number of
1072    ** iterators that point to a position-list corresponding to column iCol.
1073    */
1074    for(i=0; i<pInfo->nPhrase; i++){
1075      assert( aIter[i].iCol>=iCol );
1076      if( aIter[i].iCol==iCol ) nLive++;
1077    }
1078
1079    /* The following loop runs until all iterators in aIter[] have finished
1080    ** iterating through positions in column iCol. Exactly one of the
1081    ** iterators is advanced each time the body of the loop is run.
1082    */
1083    while( nLive>0 ){
1084      LcsIterator *pAdv = 0;      /* The iterator to advance by one position */
1085      int nThisLcs = 0;           /* LCS for the current iterator positions */
1086
1087      for(i=0; i<pInfo->nPhrase; i++){
1088        LcsIterator *pIter = &aIter[i];
1089        if( iCol!=pIter->iCol ){
1090          /* This iterator is already at EOF for this column. */
1091          nThisLcs = 0;
1092        }else{
1093          if( pAdv==0 || pIter->iPos<pAdv->iPos ){
1094            pAdv = pIter;
1095          }
1096          if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
1097            nThisLcs++;
1098          }else{
1099            nThisLcs = 1;
1100          }
1101          if( nThisLcs>nLcs ) nLcs = nThisLcs;
1102        }
1103      }
1104      if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
1105    }
1106
1107    pInfo->aMatchinfo[iCol] = nLcs;
1108  }
1109
1110  sqlite3_free(aIter);
1111  return SQLITE_OK;
1112}
1113
1114/*
1115** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1116** be returned by the matchinfo() function. Argument zArg contains the
1117** format string passed as the second argument to matchinfo (or the
1118** default value "pcx" if no second argument was specified). The format
1119** string has already been validated and the pInfo->aMatchinfo[] array
1120** is guaranteed to be large enough for the output.
1121**
1122** If bGlobal is true, then populate all fields of the matchinfo() output.
1123** If it is false, then assume that those fields that do not change between
1124** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1125** have already been populated.
1126**
1127** Return SQLITE_OK if successful, or an SQLite error code if an error
1128** occurs. If a value other than SQLITE_OK is returned, the state the
1129** pInfo->aMatchinfo[] buffer is left in is undefined.
1130*/
1131static int fts3MatchinfoValues(
1132  Fts3Cursor *pCsr,               /* FTS3 cursor object */
1133  int bGlobal,                    /* True to grab the global stats */
1134  MatchInfo *pInfo,               /* Matchinfo context object */
1135  const char *zArg                /* Matchinfo format string */
1136){
1137  int rc = SQLITE_OK;
1138  int i;
1139  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1140  sqlite3_stmt *pSelect = 0;
1141
1142  for(i=0; rc==SQLITE_OK && zArg[i]; i++){
1143
1144    switch( zArg[i] ){
1145      case FTS3_MATCHINFO_NPHRASE:
1146        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
1147        break;
1148
1149      case FTS3_MATCHINFO_NCOL:
1150        if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
1151        break;
1152
1153      case FTS3_MATCHINFO_NDOC:
1154        if( bGlobal ){
1155          sqlite3_int64 nDoc;
1156          rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
1157          pInfo->aMatchinfo[0] = (u32)nDoc;
1158        }
1159        break;
1160
1161      case FTS3_MATCHINFO_AVGLENGTH:
1162        if( bGlobal ){
1163          sqlite3_int64 nDoc;     /* Number of rows in table */
1164          const char *a;          /* Aggregate column length array */
1165
1166          rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
1167          if( rc==SQLITE_OK ){
1168            int iCol;
1169            for(iCol=0; iCol<pInfo->nCol; iCol++){
1170              u32 iVal;
1171              sqlite3_int64 nToken;
1172              a += sqlite3Fts3GetVarint(a, &nToken);
1173              iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
1174              pInfo->aMatchinfo[iCol] = iVal;
1175            }
1176          }
1177        }
1178        break;
1179
1180      case FTS3_MATCHINFO_LENGTH: {
1181        sqlite3_stmt *pSelectDocsize = 0;
1182        rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
1183        if( rc==SQLITE_OK ){
1184          int iCol;
1185          const char *a = sqlite3_column_blob(pSelectDocsize, 0);
1186          for(iCol=0; iCol<pInfo->nCol; iCol++){
1187            sqlite3_int64 nToken;
1188            a += sqlite3Fts3GetVarint(a, &nToken);
1189            pInfo->aMatchinfo[iCol] = (u32)nToken;
1190          }
1191        }
1192        sqlite3_reset(pSelectDocsize);
1193        break;
1194      }
1195
1196      case FTS3_MATCHINFO_LCS:
1197        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1198        if( rc==SQLITE_OK ){
1199          rc = fts3MatchinfoLcs(pCsr, pInfo);
1200        }
1201        break;
1202
1203      default: {
1204        Fts3Expr *pExpr;
1205        assert( zArg[i]==FTS3_MATCHINFO_HITS );
1206        pExpr = pCsr->pExpr;
1207        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1208        if( rc!=SQLITE_OK ) break;
1209        if( bGlobal ){
1210          if( pCsr->pDeferred ){
1211            rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
1212            if( rc!=SQLITE_OK ) break;
1213          }
1214          rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
1215          if( rc!=SQLITE_OK ) break;
1216        }
1217        (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
1218        break;
1219      }
1220    }
1221
1222    pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
1223  }
1224
1225  sqlite3_reset(pSelect);
1226  return rc;
1227}
1228
1229
1230/*
1231** Populate pCsr->aMatchinfo[] with data for the current row. The
1232** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1233*/
1234static int fts3GetMatchinfo(
1235  Fts3Cursor *pCsr,               /* FTS3 Cursor object */
1236  const char *zArg                /* Second argument to matchinfo() function */
1237){
1238  MatchInfo sInfo;
1239  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1240  int rc = SQLITE_OK;
1241  int bGlobal = 0;                /* Collect 'global' stats as well as local */
1242
1243  memset(&sInfo, 0, sizeof(MatchInfo));
1244  sInfo.pCursor = pCsr;
1245  sInfo.nCol = pTab->nColumn;
1246
1247  /* If there is cached matchinfo() data, but the format string for the
1248  ** cache does not match the format string for this request, discard
1249  ** the cached data. */
1250  if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
1251    assert( pCsr->aMatchinfo );
1252    sqlite3_free(pCsr->aMatchinfo);
1253    pCsr->zMatchinfo = 0;
1254    pCsr->aMatchinfo = 0;
1255  }
1256
1257  /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
1258  ** matchinfo function has been called for this query. In this case
1259  ** allocate the array used to accumulate the matchinfo data and
1260  ** initialize those elements that are constant for every row.
1261  */
1262  if( pCsr->aMatchinfo==0 ){
1263    int nMatchinfo = 0;           /* Number of u32 elements in match-info */
1264    int nArg;                     /* Bytes in zArg */
1265    int i;                        /* Used to iterate through zArg */
1266
1267    /* Determine the number of phrases in the query */
1268    pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
1269    sInfo.nPhrase = pCsr->nPhrase;
1270
1271    /* Determine the number of integers in the buffer returned by this call. */
1272    for(i=0; zArg[i]; i++){
1273      nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
1274    }
1275
1276    /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1277    nArg = (int)strlen(zArg);
1278    pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
1279    if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
1280
1281    pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
1282    pCsr->nMatchinfo = nMatchinfo;
1283    memcpy(pCsr->zMatchinfo, zArg, nArg+1);
1284    memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
1285    pCsr->isMatchinfoNeeded = 1;
1286    bGlobal = 1;
1287  }
1288
1289  sInfo.aMatchinfo = pCsr->aMatchinfo;
1290  sInfo.nPhrase = pCsr->nPhrase;
1291  if( pCsr->isMatchinfoNeeded ){
1292    rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
1293    pCsr->isMatchinfoNeeded = 0;
1294  }
1295
1296  return rc;
1297}
1298
1299/*
1300** Implementation of snippet() function.
1301*/
1302void sqlite3Fts3Snippet(
1303  sqlite3_context *pCtx,          /* SQLite function call context */
1304  Fts3Cursor *pCsr,               /* Cursor object */
1305  const char *zStart,             /* Snippet start text - "<b>" */
1306  const char *zEnd,               /* Snippet end text - "</b>" */
1307  const char *zEllipsis,          /* Snippet ellipsis text - "<b>...</b>" */
1308  int iCol,                       /* Extract snippet from this column */
1309  int nToken                      /* Approximate number of tokens in snippet */
1310){
1311  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1312  int rc = SQLITE_OK;
1313  int i;
1314  StrBuffer res = {0, 0, 0};
1315
1316  /* The returned text includes up to four fragments of text extracted from
1317  ** the data in the current row. The first iteration of the for(...) loop
1318  ** below attempts to locate a single fragment of text nToken tokens in
1319  ** size that contains at least one instance of all phrases in the query
1320  ** expression that appear in the current row. If such a fragment of text
1321  ** cannot be found, the second iteration of the loop attempts to locate
1322  ** a pair of fragments, and so on.
1323  */
1324  int nSnippet = 0;               /* Number of fragments in this snippet */
1325  SnippetFragment aSnippet[4];    /* Maximum of 4 fragments per snippet */
1326  int nFToken = -1;               /* Number of tokens in each fragment */
1327
1328  if( !pCsr->pExpr ){
1329    sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1330    return;
1331  }
1332
1333  for(nSnippet=1; 1; nSnippet++){
1334
1335    int iSnip;                    /* Loop counter 0..nSnippet-1 */
1336    u64 mCovered = 0;             /* Bitmask of phrases covered by snippet */
1337    u64 mSeen = 0;                /* Bitmask of phrases seen by BestSnippet() */
1338
1339    if( nToken>=0 ){
1340      nFToken = (nToken+nSnippet-1) / nSnippet;
1341    }else{
1342      nFToken = -1 * nToken;
1343    }
1344
1345    for(iSnip=0; iSnip<nSnippet; iSnip++){
1346      int iBestScore = -1;        /* Best score of columns checked so far */
1347      int iRead;                  /* Used to iterate through columns */
1348      SnippetFragment *pFragment = &aSnippet[iSnip];
1349
1350      memset(pFragment, 0, sizeof(*pFragment));
1351
1352      /* Loop through all columns of the table being considered for snippets.
1353      ** If the iCol argument to this function was negative, this means all
1354      ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1355      */
1356      for(iRead=0; iRead<pTab->nColumn; iRead++){
1357        SnippetFragment sF = {0, 0, 0, 0};
1358        int iS;
1359        if( iCol>=0 && iRead!=iCol ) continue;
1360
1361        /* Find the best snippet of nFToken tokens in column iRead. */
1362        rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
1363        if( rc!=SQLITE_OK ){
1364          goto snippet_out;
1365        }
1366        if( iS>iBestScore ){
1367          *pFragment = sF;
1368          iBestScore = iS;
1369        }
1370      }
1371
1372      mCovered |= pFragment->covered;
1373    }
1374
1375    /* If all query phrases seen by fts3BestSnippet() are present in at least
1376    ** one of the nSnippet snippet fragments, break out of the loop.
1377    */
1378    assert( (mCovered&mSeen)==mCovered );
1379    if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
1380  }
1381
1382  assert( nFToken>0 );
1383
1384  for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
1385    rc = fts3SnippetText(pCsr, &aSnippet[i],
1386        i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
1387    );
1388  }
1389
1390 snippet_out:
1391  sqlite3Fts3SegmentsClose(pTab);
1392  if( rc!=SQLITE_OK ){
1393    sqlite3_result_error_code(pCtx, rc);
1394    sqlite3_free(res.z);
1395  }else{
1396    sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
1397  }
1398}
1399
1400
1401typedef struct TermOffset TermOffset;
1402typedef struct TermOffsetCtx TermOffsetCtx;
1403
1404struct TermOffset {
1405  char *pList;                    /* Position-list */
1406  int iPos;                       /* Position just read from pList */
1407  int iOff;                       /* Offset of this term from read positions */
1408};
1409
1410struct TermOffsetCtx {
1411  int iCol;                       /* Column of table to populate aTerm for */
1412  int iTerm;
1413  sqlite3_int64 iDocid;
1414  TermOffset *aTerm;
1415};
1416
1417/*
1418** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1419*/
1420static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
1421  TermOffsetCtx *p = (TermOffsetCtx *)ctx;
1422  int nTerm;                      /* Number of tokens in phrase */
1423  int iTerm;                      /* For looping through nTerm phrase terms */
1424  char *pList;                    /* Pointer to position list for phrase */
1425  int iPos = 0;                   /* First position in position-list */
1426
1427  UNUSED_PARAMETER(iPhrase);
1428  pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol);
1429  nTerm = pExpr->pPhrase->nToken;
1430  if( pList ){
1431    fts3GetDeltaPosition(&pList, &iPos);
1432    assert( iPos>=0 );
1433  }
1434
1435  for(iTerm=0; iTerm<nTerm; iTerm++){
1436    TermOffset *pT = &p->aTerm[p->iTerm++];
1437    pT->iOff = nTerm-iTerm-1;
1438    pT->pList = pList;
1439    pT->iPos = iPos;
1440  }
1441
1442  return SQLITE_OK;
1443}
1444
1445/*
1446** Implementation of offsets() function.
1447*/
1448void sqlite3Fts3Offsets(
1449  sqlite3_context *pCtx,          /* SQLite function call context */
1450  Fts3Cursor *pCsr                /* Cursor object */
1451){
1452  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1453  sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
1454  const char *ZDUMMY;             /* Dummy argument used with xNext() */
1455  int NDUMMY;                     /* Dummy argument used with xNext() */
1456  int rc;                         /* Return Code */
1457  int nToken;                     /* Number of tokens in query */
1458  int iCol;                       /* Column currently being processed */
1459  StrBuffer res = {0, 0, 0};      /* Result string */
1460  TermOffsetCtx sCtx;             /* Context for fts3ExprTermOffsetInit() */
1461
1462  if( !pCsr->pExpr ){
1463    sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1464    return;
1465  }
1466
1467  memset(&sCtx, 0, sizeof(sCtx));
1468  assert( pCsr->isRequireSeek==0 );
1469
1470  /* Count the number of terms in the query */
1471  rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
1472  if( rc!=SQLITE_OK ) goto offsets_out;
1473
1474  /* Allocate the array of TermOffset iterators. */
1475  sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
1476  if( 0==sCtx.aTerm ){
1477    rc = SQLITE_NOMEM;
1478    goto offsets_out;
1479  }
1480  sCtx.iDocid = pCsr->iPrevId;
1481
1482  /* Loop through the table columns, appending offset information to
1483  ** string-buffer res for each column.
1484  */
1485  for(iCol=0; iCol<pTab->nColumn; iCol++){
1486    sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
1487    int iStart;
1488    int iEnd;
1489    int iCurrent;
1490    const char *zDoc;
1491    int nDoc;
1492
1493    /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
1494    ** no way that this operation can fail, so the return code from
1495    ** fts3ExprIterate() can be discarded.
1496    */
1497    sCtx.iCol = iCol;
1498    sCtx.iTerm = 0;
1499    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
1500
1501    /* Retreive the text stored in column iCol. If an SQL NULL is stored
1502    ** in column iCol, jump immediately to the next iteration of the loop.
1503    ** If an OOM occurs while retrieving the data (this can happen if SQLite
1504    ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1505    ** to the caller.
1506    */
1507    zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
1508    nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
1509    if( zDoc==0 ){
1510      if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
1511        continue;
1512      }
1513      rc = SQLITE_NOMEM;
1514      goto offsets_out;
1515    }
1516
1517    /* Initialize a tokenizer iterator to iterate through column iCol. */
1518    rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
1519    if( rc!=SQLITE_OK ) goto offsets_out;
1520    pC->pTokenizer = pTab->pTokenizer;
1521
1522    rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1523    while( rc==SQLITE_OK ){
1524      int i;                      /* Used to loop through terms */
1525      int iMinPos = 0x7FFFFFFF;   /* Position of next token */
1526      TermOffset *pTerm = 0;      /* TermOffset associated with next token */
1527
1528      for(i=0; i<nToken; i++){
1529        TermOffset *pT = &sCtx.aTerm[i];
1530        if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
1531          iMinPos = pT->iPos-pT->iOff;
1532          pTerm = pT;
1533        }
1534      }
1535
1536      if( !pTerm ){
1537        /* All offsets for this column have been gathered. */
1538        break;
1539      }else{
1540        assert( iCurrent<=iMinPos );
1541        if( 0==(0xFE&*pTerm->pList) ){
1542          pTerm->pList = 0;
1543        }else{
1544          fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
1545        }
1546        while( rc==SQLITE_OK && iCurrent<iMinPos ){
1547          rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1548        }
1549        if( rc==SQLITE_OK ){
1550          char aBuffer[64];
1551          sqlite3_snprintf(sizeof(aBuffer), aBuffer,
1552              "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
1553          );
1554          rc = fts3StringAppend(&res, aBuffer, -1);
1555        }else if( rc==SQLITE_DONE ){
1556          rc = SQLITE_CORRUPT;
1557        }
1558      }
1559    }
1560    if( rc==SQLITE_DONE ){
1561      rc = SQLITE_OK;
1562    }
1563
1564    pMod->xClose(pC);
1565    if( rc!=SQLITE_OK ) goto offsets_out;
1566  }
1567
1568 offsets_out:
1569  sqlite3_free(sCtx.aTerm);
1570  assert( rc!=SQLITE_DONE );
1571  sqlite3Fts3SegmentsClose(pTab);
1572  if( rc!=SQLITE_OK ){
1573    sqlite3_result_error_code(pCtx,  rc);
1574    sqlite3_free(res.z);
1575  }else{
1576    sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
1577  }
1578  return;
1579}
1580
1581/*
1582** Implementation of matchinfo() function.
1583*/
1584void sqlite3Fts3Matchinfo(
1585  sqlite3_context *pContext,      /* Function call context */
1586  Fts3Cursor *pCsr,               /* FTS3 table cursor */
1587  const char *zArg                /* Second arg to matchinfo() function */
1588){
1589  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1590  int rc;
1591  int i;
1592  const char *zFormat;
1593
1594  if( zArg ){
1595    for(i=0; zArg[i]; i++){
1596      char *zErr = 0;
1597      if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
1598        sqlite3_result_error(pContext, zErr, -1);
1599        sqlite3_free(zErr);
1600        return;
1601      }
1602    }
1603    zFormat = zArg;
1604  }else{
1605    zFormat = FTS3_MATCHINFO_DEFAULT;
1606  }
1607
1608  if( !pCsr->pExpr ){
1609    sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
1610    return;
1611  }
1612
1613  /* Retrieve matchinfo() data. */
1614  rc = fts3GetMatchinfo(pCsr, zFormat);
1615  sqlite3Fts3SegmentsClose(pTab);
1616
1617  if( rc!=SQLITE_OK ){
1618    sqlite3_result_error_code(pContext, rc);
1619  }else{
1620    int n = pCsr->nMatchinfo * sizeof(u32);
1621    sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
1622  }
1623}
1624
1625#endif
1626