1/*
2** 2008 Nov 28
3**
4** The author disclaims copyright to this source code.  In place of
5** a legal notice, here is a blessing:
6**
7**    May you do good and not evil.
8**    May you find forgiveness for yourself and forgive others.
9**    May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This module contains code that implements a parser for fts3 query strings
14** (the right-hand argument to the MATCH operator). Because the supported
15** syntax is relatively simple, the whole tokenizer/parser system is
16** hand-coded.
17*/
18#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
19
20/*
21** By default, this module parses the legacy syntax that has been
22** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
23** is defined, then it uses the new syntax. The differences between
24** the new and the old syntaxes are:
25**
26**  a) The new syntax supports parenthesis. The old does not.
27**
28**  b) The new syntax supports the AND and NOT operators. The old does not.
29**
30**  c) The old syntax supports the "-" token qualifier. This is not
31**     supported by the new syntax (it is replaced by the NOT operator).
32**
33**  d) When using the old syntax, the OR operator has a greater precedence
34**     than an implicit AND. When using the new, both implicity and explicit
35**     AND operators have a higher precedence than OR.
36**
37** If compiled with SQLITE_TEST defined, then this module exports the
38** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
39** to zero causes the module to use the old syntax. If it is set to
40** non-zero the new syntax is activated. This is so both syntaxes can
41** be tested using a single build of testfixture.
42**
43** The following describes the syntax supported by the fts3 MATCH
44** operator in a similar format to that used by the lemon parser
45** generator. This module does not use actually lemon, it uses a
46** custom parser.
47**
48**   query ::= andexpr (OR andexpr)*.
49**
50**   andexpr ::= notexpr (AND? notexpr)*.
51**
52**   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
53**   notexpr ::= LP query RP.
54**
55**   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
56**
57**   distance_opt ::= .
58**   distance_opt ::= / INTEGER.
59**
60**   phrase ::= TOKEN.
61**   phrase ::= COLUMN:TOKEN.
62**   phrase ::= "TOKEN TOKEN TOKEN...".
63*/
64
65#ifdef SQLITE_TEST
66int sqlite3_fts3_enable_parentheses = 0;
67#else
68# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
69#  define sqlite3_fts3_enable_parentheses 1
70# else
71#  define sqlite3_fts3_enable_parentheses 0
72# endif
73#endif
74
75/*
76** Default span for NEAR operators.
77*/
78#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
79
80#include "fts3Int.h"
81#include <string.h>
82#include <assert.h>
83
84typedef struct ParseContext ParseContext;
85struct ParseContext {
86  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
87  const char **azCol;                 /* Array of column names for fts3 table */
88  int nCol;                           /* Number of entries in azCol[] */
89  int iDefaultCol;                    /* Default column to query */
90  sqlite3_context *pCtx;              /* Write error message here */
91  int nNest;                          /* Number of nested brackets */
92};
93
94/*
95** This function is equivalent to the standard isspace() function.
96**
97** The standard isspace() can be awkward to use safely, because although it
98** is defined to accept an argument of type int, its behaviour when passed
99** an integer that falls outside of the range of the unsigned char type
100** is undefined (and sometimes, "undefined" means segfault). This wrapper
101** is defined to accept an argument of type char, and always returns 0 for
102** any values that fall outside of the range of the unsigned char type (i.e.
103** negative values).
104*/
105static int fts3isspace(char c){
106  return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
107}
108
109/*
110** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
111** zero the memory before returning a pointer to it. If unsuccessful,
112** return NULL.
113*/
114static void *fts3MallocZero(int nByte){
115  void *pRet = sqlite3_malloc(nByte);
116  if( pRet ) memset(pRet, 0, nByte);
117  return pRet;
118}
119
120
121/*
122** Extract the next token from buffer z (length n) using the tokenizer
123** and other information (column names etc.) in pParse. Create an Fts3Expr
124** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
125** single token and set *ppExpr to point to it. If the end of the buffer is
126** reached before a token is found, set *ppExpr to zero. It is the
127** responsibility of the caller to eventually deallocate the allocated
128** Fts3Expr structure (if any) by passing it to sqlite3_free().
129**
130** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
131** fails.
132*/
133static int getNextToken(
134  ParseContext *pParse,                   /* fts3 query parse context */
135  int iCol,                               /* Value for Fts3Phrase.iColumn */
136  const char *z, int n,                   /* Input string */
137  Fts3Expr **ppExpr,                      /* OUT: expression */
138  int *pnConsumed                         /* OUT: Number of bytes consumed */
139){
140  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
141  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
142  int rc;
143  sqlite3_tokenizer_cursor *pCursor;
144  Fts3Expr *pRet = 0;
145  int nConsumed = 0;
146
147  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
148  if( rc==SQLITE_OK ){
149    const char *zToken;
150    int nToken, iStart, iEnd, iPosition;
151    int nByte;                               /* total space to allocate */
152
153    pCursor->pTokenizer = pTokenizer;
154    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
155
156    if( rc==SQLITE_OK ){
157      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
158      pRet = (Fts3Expr *)fts3MallocZero(nByte);
159      if( !pRet ){
160        rc = SQLITE_NOMEM;
161      }else{
162        pRet->eType = FTSQUERY_PHRASE;
163        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
164        pRet->pPhrase->nToken = 1;
165        pRet->pPhrase->iColumn = iCol;
166        pRet->pPhrase->aToken[0].n = nToken;
167        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
168        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
169
170        if( iEnd<n && z[iEnd]=='*' ){
171          pRet->pPhrase->aToken[0].isPrefix = 1;
172          iEnd++;
173        }
174        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
175          pRet->pPhrase->isNot = 1;
176        }
177      }
178      nConsumed = iEnd;
179    }
180
181    pModule->xClose(pCursor);
182  }
183
184  *pnConsumed = nConsumed;
185  *ppExpr = pRet;
186  return rc;
187}
188
189
190/*
191** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
192** then free the old allocation.
193*/
194static void *fts3ReallocOrFree(void *pOrig, int nNew){
195  void *pRet = sqlite3_realloc(pOrig, nNew);
196  if( !pRet ){
197    sqlite3_free(pOrig);
198  }
199  return pRet;
200}
201
202/*
203** Buffer zInput, length nInput, contains the contents of a quoted string
204** that appeared as part of an fts3 query expression. Neither quote character
205** is included in the buffer. This function attempts to tokenize the entire
206** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
207** containing the results.
208**
209** If successful, SQLITE_OK is returned and *ppExpr set to point at the
210** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
211** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
212** to 0.
213*/
214static int getNextString(
215  ParseContext *pParse,                   /* fts3 query parse context */
216  const char *zInput, int nInput,         /* Input string */
217  Fts3Expr **ppExpr                       /* OUT: expression */
218){
219  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
220  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
221  int rc;
222  Fts3Expr *p = 0;
223  sqlite3_tokenizer_cursor *pCursor = 0;
224  char *zTemp = 0;
225  int nTemp = 0;
226
227  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
228  if( rc==SQLITE_OK ){
229    int ii;
230    pCursor->pTokenizer = pTokenizer;
231    for(ii=0; rc==SQLITE_OK; ii++){
232      const char *zToken;
233      int nToken, iBegin, iEnd, iPos;
234      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
235      if( rc==SQLITE_OK ){
236        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
237        p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken));
238        zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
239        if( !p || !zTemp ){
240          goto no_mem;
241        }
242        if( ii==0 ){
243          memset(p, 0, nByte);
244          p->pPhrase = (Fts3Phrase *)&p[1];
245        }
246        p->pPhrase = (Fts3Phrase *)&p[1];
247        memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken));
248        p->pPhrase->nToken = ii+1;
249        p->pPhrase->aToken[ii].n = nToken;
250        memcpy(&zTemp[nTemp], zToken, nToken);
251        nTemp += nToken;
252        if( iEnd<nInput && zInput[iEnd]=='*' ){
253          p->pPhrase->aToken[ii].isPrefix = 1;
254        }else{
255          p->pPhrase->aToken[ii].isPrefix = 0;
256        }
257      }
258    }
259
260    pModule->xClose(pCursor);
261    pCursor = 0;
262  }
263
264  if( rc==SQLITE_DONE ){
265    int jj;
266    char *zNew = NULL;
267    int nNew = 0;
268    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
269    nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken);
270    p = fts3ReallocOrFree(p, nByte + nTemp);
271    if( !p ){
272      goto no_mem;
273    }
274    if( zTemp ){
275      zNew = &(((char *)p)[nByte]);
276      memcpy(zNew, zTemp, nTemp);
277    }else{
278      memset(p, 0, nByte+nTemp);
279    }
280    p->pPhrase = (Fts3Phrase *)&p[1];
281    for(jj=0; jj<p->pPhrase->nToken; jj++){
282      p->pPhrase->aToken[jj].z = &zNew[nNew];
283      nNew += p->pPhrase->aToken[jj].n;
284    }
285    sqlite3_free(zTemp);
286    p->eType = FTSQUERY_PHRASE;
287    p->pPhrase->iColumn = pParse->iDefaultCol;
288    rc = SQLITE_OK;
289  }
290
291  *ppExpr = p;
292  return rc;
293no_mem:
294
295  if( pCursor ){
296    pModule->xClose(pCursor);
297  }
298  sqlite3_free(zTemp);
299  sqlite3_free(p);
300  *ppExpr = 0;
301  return SQLITE_NOMEM;
302}
303
304/*
305** Function getNextNode(), which is called by fts3ExprParse(), may itself
306** call fts3ExprParse(). So this forward declaration is required.
307*/
308static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
309
310/*
311** The output variable *ppExpr is populated with an allocated Fts3Expr
312** structure, or set to 0 if the end of the input buffer is reached.
313**
314** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
315** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
316** If SQLITE_ERROR is returned, pContext is populated with an error message.
317*/
318static int getNextNode(
319  ParseContext *pParse,                   /* fts3 query parse context */
320  const char *z, int n,                   /* Input string */
321  Fts3Expr **ppExpr,                      /* OUT: expression */
322  int *pnConsumed                         /* OUT: Number of bytes consumed */
323){
324  static const struct Fts3Keyword {
325    char *z;                              /* Keyword text */
326    unsigned char n;                      /* Length of the keyword */
327    unsigned char parenOnly;              /* Only valid in paren mode */
328    unsigned char eType;                  /* Keyword code */
329  } aKeyword[] = {
330    { "OR" ,  2, 0, FTSQUERY_OR   },
331    { "AND",  3, 1, FTSQUERY_AND  },
332    { "NOT",  3, 1, FTSQUERY_NOT  },
333    { "NEAR", 4, 0, FTSQUERY_NEAR }
334  };
335  int ii;
336  int iCol;
337  int iColLen;
338  int rc;
339  Fts3Expr *pRet = 0;
340
341  const char *zInput = z;
342  int nInput = n;
343
344  /* Skip over any whitespace before checking for a keyword, an open or
345  ** close bracket, or a quoted string.
346  */
347  while( nInput>0 && fts3isspace(*zInput) ){
348    nInput--;
349    zInput++;
350  }
351  if( nInput==0 ){
352    return SQLITE_DONE;
353  }
354
355  /* See if we are dealing with a keyword. */
356  for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
357    const struct Fts3Keyword *pKey = &aKeyword[ii];
358
359    if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
360      continue;
361    }
362
363    if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
364      int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
365      int nKey = pKey->n;
366      char cNext;
367
368      /* If this is a "NEAR" keyword, check for an explicit nearness. */
369      if( pKey->eType==FTSQUERY_NEAR ){
370        assert( nKey==4 );
371        if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
372          nNear = 0;
373          for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
374            nNear = nNear * 10 + (zInput[nKey] - '0');
375          }
376        }
377      }
378
379      /* At this point this is probably a keyword. But for that to be true,
380      ** the next byte must contain either whitespace, an open or close
381      ** parenthesis, a quote character, or EOF.
382      */
383      cNext = zInput[nKey];
384      if( fts3isspace(cNext)
385       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
386      ){
387        pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
388        if( !pRet ){
389          return SQLITE_NOMEM;
390        }
391        pRet->eType = pKey->eType;
392        pRet->nNear = nNear;
393        *ppExpr = pRet;
394        *pnConsumed = (int)((zInput - z) + nKey);
395        return SQLITE_OK;
396      }
397
398      /* Turns out that wasn't a keyword after all. This happens if the
399      ** user has supplied a token such as "ORacle". Continue.
400      */
401    }
402  }
403
404  /* Check for an open bracket. */
405  if( sqlite3_fts3_enable_parentheses ){
406    if( *zInput=='(' ){
407      int nConsumed;
408      pParse->nNest++;
409      rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
410      if( rc==SQLITE_OK && !*ppExpr ){
411        rc = SQLITE_DONE;
412      }
413      *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
414      return rc;
415    }
416
417    /* Check for a close bracket. */
418    if( *zInput==')' ){
419      pParse->nNest--;
420      *pnConsumed = (int)((zInput - z) + 1);
421      return SQLITE_DONE;
422    }
423  }
424
425  /* See if we are dealing with a quoted phrase. If this is the case, then
426  ** search for the closing quote and pass the whole string to getNextString()
427  ** for processing. This is easy to do, as fts3 has no syntax for escaping
428  ** a quote character embedded in a string.
429  */
430  if( *zInput=='"' ){
431    for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
432    *pnConsumed = (int)((zInput - z) + ii + 1);
433    if( ii==nInput ){
434      return SQLITE_ERROR;
435    }
436    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
437  }
438
439
440  /* If control flows to this point, this must be a regular token, or
441  ** the end of the input. Read a regular token using the sqlite3_tokenizer
442  ** interface. Before doing so, figure out if there is an explicit
443  ** column specifier for the token.
444  **
445  ** TODO: Strangely, it is not possible to associate a column specifier
446  ** with a quoted phrase, only with a single token. Not sure if this was
447  ** an implementation artifact or an intentional decision when fts3 was
448  ** first implemented. Whichever it was, this module duplicates the
449  ** limitation.
450  */
451  iCol = pParse->iDefaultCol;
452  iColLen = 0;
453  for(ii=0; ii<pParse->nCol; ii++){
454    const char *zStr = pParse->azCol[ii];
455    int nStr = (int)strlen(zStr);
456    if( nInput>nStr && zInput[nStr]==':'
457     && sqlite3_strnicmp(zStr, zInput, nStr)==0
458    ){
459      iCol = ii;
460      iColLen = (int)((zInput - z) + nStr + 1);
461      break;
462    }
463  }
464  rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
465  *pnConsumed += iColLen;
466  return rc;
467}
468
469/*
470** The argument is an Fts3Expr structure for a binary operator (any type
471** except an FTSQUERY_PHRASE). Return an integer value representing the
472** precedence of the operator. Lower values have a higher precedence (i.e.
473** group more tightly). For example, in the C language, the == operator
474** groups more tightly than ||, and would therefore have a higher precedence.
475**
476** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
477** is defined), the order of the operators in precedence from highest to
478** lowest is:
479**
480**   NEAR
481**   NOT
482**   AND (including implicit ANDs)
483**   OR
484**
485** Note that when using the old query syntax, the OR operator has a higher
486** precedence than the AND operator.
487*/
488static int opPrecedence(Fts3Expr *p){
489  assert( p->eType!=FTSQUERY_PHRASE );
490  if( sqlite3_fts3_enable_parentheses ){
491    return p->eType;
492  }else if( p->eType==FTSQUERY_NEAR ){
493    return 1;
494  }else if( p->eType==FTSQUERY_OR ){
495    return 2;
496  }
497  assert( p->eType==FTSQUERY_AND );
498  return 3;
499}
500
501/*
502** Argument ppHead contains a pointer to the current head of a query
503** expression tree being parsed. pPrev is the expression node most recently
504** inserted into the tree. This function adds pNew, which is always a binary
505** operator node, into the expression tree based on the relative precedence
506** of pNew and the existing nodes of the tree. This may result in the head
507** of the tree changing, in which case *ppHead is set to the new root node.
508*/
509static void insertBinaryOperator(
510  Fts3Expr **ppHead,       /* Pointer to the root node of a tree */
511  Fts3Expr *pPrev,         /* Node most recently inserted into the tree */
512  Fts3Expr *pNew           /* New binary node to insert into expression tree */
513){
514  Fts3Expr *pSplit = pPrev;
515  while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
516    pSplit = pSplit->pParent;
517  }
518
519  if( pSplit->pParent ){
520    assert( pSplit->pParent->pRight==pSplit );
521    pSplit->pParent->pRight = pNew;
522    pNew->pParent = pSplit->pParent;
523  }else{
524    *ppHead = pNew;
525  }
526  pNew->pLeft = pSplit;
527  pSplit->pParent = pNew;
528}
529
530/*
531** Parse the fts3 query expression found in buffer z, length n. This function
532** returns either when the end of the buffer is reached or an unmatched
533** closing bracket - ')' - is encountered.
534**
535** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
536** parsed form of the expression and *pnConsumed is set to the number of
537** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
538** (out of memory error) or SQLITE_ERROR (parse error) is returned.
539*/
540static int fts3ExprParse(
541  ParseContext *pParse,                   /* fts3 query parse context */
542  const char *z, int n,                   /* Text of MATCH query */
543  Fts3Expr **ppExpr,                      /* OUT: Parsed query structure */
544  int *pnConsumed                         /* OUT: Number of bytes consumed */
545){
546  Fts3Expr *pRet = 0;
547  Fts3Expr *pPrev = 0;
548  Fts3Expr *pNotBranch = 0;               /* Only used in legacy parse mode */
549  int nIn = n;
550  const char *zIn = z;
551  int rc = SQLITE_OK;
552  int isRequirePhrase = 1;
553
554  while( rc==SQLITE_OK ){
555    Fts3Expr *p = 0;
556    int nByte = 0;
557    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
558    if( rc==SQLITE_OK ){
559      int isPhrase;
560
561      if( !sqlite3_fts3_enable_parentheses
562       && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot
563      ){
564        /* Create an implicit NOT operator. */
565        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
566        if( !pNot ){
567          sqlite3Fts3ExprFree(p);
568          rc = SQLITE_NOMEM;
569          goto exprparse_out;
570        }
571        pNot->eType = FTSQUERY_NOT;
572        pNot->pRight = p;
573        if( pNotBranch ){
574          pNot->pLeft = pNotBranch;
575        }
576        pNotBranch = pNot;
577        p = pPrev;
578      }else{
579        int eType = p->eType;
580        assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
581        isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
582
583        /* The isRequirePhrase variable is set to true if a phrase or
584        ** an expression contained in parenthesis is required. If a
585        ** binary operator (AND, OR, NOT or NEAR) is encounted when
586        ** isRequirePhrase is set, this is a syntax error.
587        */
588        if( !isPhrase && isRequirePhrase ){
589          sqlite3Fts3ExprFree(p);
590          rc = SQLITE_ERROR;
591          goto exprparse_out;
592        }
593
594        if( isPhrase && !isRequirePhrase ){
595          /* Insert an implicit AND operator. */
596          Fts3Expr *pAnd;
597          assert( pRet && pPrev );
598          pAnd = fts3MallocZero(sizeof(Fts3Expr));
599          if( !pAnd ){
600            sqlite3Fts3ExprFree(p);
601            rc = SQLITE_NOMEM;
602            goto exprparse_out;
603          }
604          pAnd->eType = FTSQUERY_AND;
605          insertBinaryOperator(&pRet, pPrev, pAnd);
606          pPrev = pAnd;
607        }
608
609        /* This test catches attempts to make either operand of a NEAR
610        ** operator something other than a phrase. For example, either of
611        ** the following:
612        **
613        **    (bracketed expression) NEAR phrase
614        **    phrase NEAR (bracketed expression)
615        **
616        ** Return an error in either case.
617        */
618        if( pPrev && (
619            (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
620         || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
621        )){
622          sqlite3Fts3ExprFree(p);
623          rc = SQLITE_ERROR;
624          goto exprparse_out;
625        }
626
627        if( isPhrase ){
628          if( pRet ){
629            assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
630            pPrev->pRight = p;
631            p->pParent = pPrev;
632          }else{
633            pRet = p;
634          }
635        }else{
636          insertBinaryOperator(&pRet, pPrev, p);
637        }
638        isRequirePhrase = !isPhrase;
639      }
640      assert( nByte>0 );
641    }
642    assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
643    nIn -= nByte;
644    zIn += nByte;
645    pPrev = p;
646  }
647
648  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
649    rc = SQLITE_ERROR;
650  }
651
652  if( rc==SQLITE_DONE ){
653    rc = SQLITE_OK;
654    if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
655      if( !pRet ){
656        rc = SQLITE_ERROR;
657      }else{
658        Fts3Expr *pIter = pNotBranch;
659        while( pIter->pLeft ){
660          pIter = pIter->pLeft;
661        }
662        pIter->pLeft = pRet;
663        pRet = pNotBranch;
664      }
665    }
666  }
667  *pnConsumed = n - nIn;
668
669exprparse_out:
670  if( rc!=SQLITE_OK ){
671    sqlite3Fts3ExprFree(pRet);
672    sqlite3Fts3ExprFree(pNotBranch);
673    pRet = 0;
674  }
675  *ppExpr = pRet;
676  return rc;
677}
678
679/*
680** Parameters z and n contain a pointer to and length of a buffer containing
681** an fts3 query expression, respectively. This function attempts to parse the
682** query expression and create a tree of Fts3Expr structures representing the
683** parsed expression. If successful, *ppExpr is set to point to the head
684** of the parsed expression tree and SQLITE_OK is returned. If an error
685** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
686** error) is returned and *ppExpr is set to 0.
687**
688** If parameter n is a negative number, then z is assumed to point to a
689** nul-terminated string and the length is determined using strlen().
690**
691** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
692** use to normalize query tokens while parsing the expression. The azCol[]
693** array, which is assumed to contain nCol entries, should contain the names
694** of each column in the target fts3 table, in order from left to right.
695** Column names must be nul-terminated strings.
696**
697** The iDefaultCol parameter should be passed the index of the table column
698** that appears on the left-hand-side of the MATCH operator (the default
699** column to match against for tokens for which a column name is not explicitly
700** specified as part of the query string), or -1 if tokens may by default
701** match any table column.
702*/
703int sqlite3Fts3ExprParse(
704  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
705  char **azCol,                       /* Array of column names for fts3 table */
706  int nCol,                           /* Number of entries in azCol[] */
707  int iDefaultCol,                    /* Default column to query */
708  const char *z, int n,               /* Text of MATCH query */
709  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
710){
711  int nParsed;
712  int rc;
713  ParseContext sParse;
714  sParse.pTokenizer = pTokenizer;
715  sParse.azCol = (const char **)azCol;
716  sParse.nCol = nCol;
717  sParse.iDefaultCol = iDefaultCol;
718  sParse.nNest = 0;
719  if( z==0 ){
720    *ppExpr = 0;
721    return SQLITE_OK;
722  }
723  if( n<0 ){
724    n = (int)strlen(z);
725  }
726  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
727
728  /* Check for mismatched parenthesis */
729  if( rc==SQLITE_OK && sParse.nNest ){
730    rc = SQLITE_ERROR;
731    sqlite3Fts3ExprFree(*ppExpr);
732    *ppExpr = 0;
733  }
734
735  return rc;
736}
737
738/*
739** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
740*/
741void sqlite3Fts3ExprFree(Fts3Expr *p){
742  if( p ){
743    sqlite3Fts3ExprFree(p->pLeft);
744    sqlite3Fts3ExprFree(p->pRight);
745    sqlite3_free(p->aDoclist);
746    sqlite3_free(p);
747  }
748}
749
750/****************************************************************************
751*****************************************************************************
752** Everything after this point is just test code.
753*/
754
755#ifdef SQLITE_TEST
756
757#include <stdio.h>
758
759/*
760** Function to query the hash-table of tokenizers (see README.tokenizers).
761*/
762static int queryTestTokenizer(
763  sqlite3 *db,
764  const char *zName,
765  const sqlite3_tokenizer_module **pp
766){
767  int rc;
768  sqlite3_stmt *pStmt;
769  const char zSql[] = "SELECT fts3_tokenizer(?)";
770
771  *pp = 0;
772  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
773  if( rc!=SQLITE_OK ){
774    return rc;
775  }
776
777  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
778  if( SQLITE_ROW==sqlite3_step(pStmt) ){
779    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
780      memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
781    }
782  }
783
784  return sqlite3_finalize(pStmt);
785}
786
787/*
788** Return a pointer to a buffer containing a text representation of the
789** expression passed as the first argument. The buffer is obtained from
790** sqlite3_malloc(). It is the responsibility of the caller to use
791** sqlite3_free() to release the memory. If an OOM condition is encountered,
792** NULL is returned.
793**
794** If the second argument is not NULL, then its contents are prepended to
795** the returned expression text and then freed using sqlite3_free().
796*/
797static char *exprToString(Fts3Expr *pExpr, char *zBuf){
798  switch( pExpr->eType ){
799    case FTSQUERY_PHRASE: {
800      Fts3Phrase *pPhrase = pExpr->pPhrase;
801      int i;
802      zBuf = sqlite3_mprintf(
803          "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot);
804      for(i=0; zBuf && i<pPhrase->nToken; i++){
805        zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
806            pPhrase->aToken[i].n, pPhrase->aToken[i].z,
807            (pPhrase->aToken[i].isPrefix?"+":"")
808        );
809      }
810      return zBuf;
811    }
812
813    case FTSQUERY_NEAR:
814      zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
815      break;
816    case FTSQUERY_NOT:
817      zBuf = sqlite3_mprintf("%zNOT ", zBuf);
818      break;
819    case FTSQUERY_AND:
820      zBuf = sqlite3_mprintf("%zAND ", zBuf);
821      break;
822    case FTSQUERY_OR:
823      zBuf = sqlite3_mprintf("%zOR ", zBuf);
824      break;
825  }
826
827  if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
828  if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
829  if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
830
831  if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
832  if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
833
834  return zBuf;
835}
836
837/*
838** This is the implementation of a scalar SQL function used to test the
839** expression parser. It should be called as follows:
840**
841**   fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
842**
843** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
844** to parse the query expression (see README.tokenizers). The second argument
845** is the query expression to parse. Each subsequent argument is the name
846** of a column of the fts3 table that the query expression may refer to.
847** For example:
848**
849**   SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
850*/
851static void fts3ExprTest(
852  sqlite3_context *context,
853  int argc,
854  sqlite3_value **argv
855){
856  sqlite3_tokenizer_module const *pModule = 0;
857  sqlite3_tokenizer *pTokenizer = 0;
858  int rc;
859  char **azCol = 0;
860  const char *zExpr;
861  int nExpr;
862  int nCol;
863  int ii;
864  Fts3Expr *pExpr;
865  char *zBuf = 0;
866  sqlite3 *db = sqlite3_context_db_handle(context);
867
868  if( argc<3 ){
869    sqlite3_result_error(context,
870        "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
871    );
872    return;
873  }
874
875  rc = queryTestTokenizer(db,
876                          (const char *)sqlite3_value_text(argv[0]), &pModule);
877  if( rc==SQLITE_NOMEM ){
878    sqlite3_result_error_nomem(context);
879    goto exprtest_out;
880  }else if( !pModule ){
881    sqlite3_result_error(context, "No such tokenizer module", -1);
882    goto exprtest_out;
883  }
884
885  rc = pModule->xCreate(0, 0, &pTokenizer);
886  assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
887  if( rc==SQLITE_NOMEM ){
888    sqlite3_result_error_nomem(context);
889    goto exprtest_out;
890  }
891  pTokenizer->pModule = pModule;
892
893  zExpr = (const char *)sqlite3_value_text(argv[1]);
894  nExpr = sqlite3_value_bytes(argv[1]);
895  nCol = argc-2;
896  azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
897  if( !azCol ){
898    sqlite3_result_error_nomem(context);
899    goto exprtest_out;
900  }
901  for(ii=0; ii<nCol; ii++){
902    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
903  }
904
905  rc = sqlite3Fts3ExprParse(
906      pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
907  );
908  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
909    sqlite3_result_error(context, "Error parsing expression", -1);
910  }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
911    sqlite3_result_error_nomem(context);
912  }else{
913    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
914    sqlite3_free(zBuf);
915  }
916
917  sqlite3Fts3ExprFree(pExpr);
918
919exprtest_out:
920  if( pModule && pTokenizer ){
921    rc = pModule->xDestroy(pTokenizer);
922  }
923  sqlite3_free(azCol);
924}
925
926/*
927** Register the query expression parser test function fts3_exprtest()
928** with database connection db.
929*/
930int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
931  return sqlite3_create_function(
932      db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
933  );
934}
935
936#endif
937#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
938