15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2007 June 22
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The author disclaims copyright to this source code.  In place of
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** a legal notice, here is a blessing:
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**    May you do good and not evil.
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**    May you find forgiveness for yourself and forgive others.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**    May you share freely, never taking more than you give.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)******************************************************************************
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This is part of an SQLite module implementing full-text search.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This particular file implements the generic tokenizer interface.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The code in this file is only compiled if:
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**     * The FTS2 module is being built as an extension
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**       (in which case SQLITE_CORE is not defined), or
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**     * The FTS2 module is being built into the core of
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**       SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "sqlite3.h"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "sqlite3ext.h"
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef SQLITE_CORE
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SQLITE_EXTENSION_INIT1
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "fts2_hash.h"
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "fts2_tokenizer.h"
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <assert.h>
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stddef.h>
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Implementation of the SQL scalar function for accessing the underlying
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** hash table. This function may be called as follows:
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**   SELECT <function-name>(<key-name>);
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**   SELECT <function-name>(<key-name>, <pointer>);
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** where <function-name> is the name passed as the second argument
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** If the <pointer> argument is specified, it must be a blob value
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** containing a pointer to be stored as the hash data corresponding
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to the string <key-name>. If <pointer> is not specified, then
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** the string <key-name> must already exist in the has table. Otherwise,
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** an error is returned.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Whether or not the <pointer> argument is specified, the value returned
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** is a blob containing the pointer stored as the hash data corresponding
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to string <key-name> (after the hash-table is updated, if applicable).
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void scalarFunc(
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_context *context,
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int argc,
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_value **argv
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  fts2Hash *pHash;
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void *pPtr = 0;
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const unsigned char *zName;
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nName;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( argc==1 || argc==2 );
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pHash = (fts2Hash *)sqlite3_user_data(context);
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  zName = sqlite3_value_text(argv[0]);
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nName = sqlite3_value_bytes(argv[0])+1;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( argc==2 ){
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    void *pOld;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int n = sqlite3_value_bytes(argv[1]);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( n!=sizeof(pPtr) ){
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sqlite3_result_error(context, "argument type mismatch", -1);
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pPtr = *(void **)sqlite3_value_blob(argv[1]);
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( pOld==pPtr ){
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sqlite3_result_error(context, "out of memory", -1);
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }else{
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( !pPtr ){
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sqlite3_result_error(context, zErr, -1);
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sqlite3_free(zErr);
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef SQLITE_TEST
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <tcl.h>
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h>
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Implementation of a special SQL scalar function for testing tokenizers
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** designed to be used in concert with the Tcl testing framework. This
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** function must be called with two arguments:
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**   SELECT <function-name>(<key-name>, <input-string>);
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**   SELECT <function-name>(<key-name>, <pointer>);
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** where <function-name> is the name passed as the second argument
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The return value is a string that may be interpreted as a Tcl
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** list. For each token in the <input-string>, three elements are
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** added to the returned list. The first is the token position, the
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** second is the token text (folded, stemmed, etc.) and the third is the
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** substring of <input-string> associated with the token. For example,
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** using the built-in "simple" tokenizer:
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**   SELECT fts_tokenizer_test('simple', 'I don't see how');
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** will return the string:
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**   "{0 i I 1 dont don't 2 see see 3 how how}"
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void testFunc(
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_context *context,
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int argc,
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_value **argv
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  fts2Hash *pHash;
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer_module *p;
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer *pTokenizer = 0;
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer_cursor *pCsr = 0;
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zErr = 0;
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zName;
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nName;
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zInput;
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nInput;
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zArg = 0;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zToken;
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nToken;
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int iStart;
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int iEnd;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int iPos;
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tcl_Obj *pRet;
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( argc==2 || argc==3 );
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nName = sqlite3_value_bytes(argv[0]);
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  zName = (const char *)sqlite3_value_text(argv[0]);
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nInput = sqlite3_value_bytes(argv[argc-1]);
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  zInput = (const char *)sqlite3_value_text(argv[argc-1]);
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( argc==3 ){
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    zArg = (const char *)sqlite3_value_text(argv[1]);
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pHash = (fts2Hash *)sqlite3_user_data(context);
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( !p ){
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sqlite3_result_error(context, zErr, -1);
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sqlite3_free(zErr);
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pRet = Tcl_NewObj();
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tcl_IncrRefCount(pRet);
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    zErr = "error in xCreate()";
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    goto finish;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pTokenizer->pModule = p;
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    zErr = "error in xOpen()";
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    goto finish;
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  pCsr->pTokenizer = pTokenizer;
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    zToken = &zInput[iStart];
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nToken = iEnd-iStart;
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( SQLITE_OK!=p->xClose(pCsr) ){
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    zErr = "error in xClose()";
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    goto finish;
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    zErr = "error in xDestroy()";
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    goto finish;
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)finish:
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( zErr ){
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sqlite3_result_error(context, zErr, -1);
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }else{
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Tcl_DecrRefCount(pRet);
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int registerTokenizer(
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3 *db,
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *zName,
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const sqlite3_tokenizer_module *p
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int rc;
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_stmt *pStmt;
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( rc!=SQLITE_OK ){
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return rc;
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_step(pStmt);
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return sqlite3_finalize(pStmt);
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int queryFts2Tokenizer(
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3 *db,
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *zName,
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const sqlite3_tokenizer_module **pp
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int rc;
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_stmt *pStmt;
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char zSql[] = "SELECT fts2_tokenizer(?)";
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *pp = 0;
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( rc!=SQLITE_OK ){
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return rc;
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( SQLITE_ROW==sqlite3_step(pStmt) ){
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return sqlite3_finalize(pStmt);
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Implementation of the scalar function fts2_tokenizer_internal_test().
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This function is used for testing only, it is not included in the
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** build unless SQLITE_TEST is defined.
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The purpose of this is to test that the fts2_tokenizer() function
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** can be used as designed by the C-code in the queryFts2Tokenizer and
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** registerTokenizer() functions above. These two functions are repeated
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** in the README.tokenizer file as an example, so it is important to
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** test them.
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** function with no arguments. An assert() will fail if a problem is
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** detected. i.e.:
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**     SELECT fts2_tokenizer_internal_test();
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void intTestFunc(
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_context *context,
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int argc,
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_value **argv
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int rc;
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const sqlite3_tokenizer_module *p1;
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const sqlite3_tokenizer_module *p2;
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Test the query function */
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3Fts2SimpleTokenizerModule(&p1);
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rc = queryFts2Tokenizer(db, "simple", &p2);
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( rc==SQLITE_OK );
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( p1==p2 );
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( rc==SQLITE_ERROR );
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( p2==0 );
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Test the storage function */
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rc = registerTokenizer(db, "nosuchtokenizer", p1);
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( rc==SQLITE_OK );
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( rc==SQLITE_OK );
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( p2==p1 );
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Set up SQL objects in database db used to access the contents of
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** the hash table pointed to by argument pHash. The hash table must
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** been initialised to use string keys, and to take a private copy
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** of the key when a value is inserted. i.e. by a call similar to:
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**    sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This function adds a scalar function (see header comment above
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** scalarFunc() in this file for details) and, if ENABLE_TABLE is
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** defined at compilation time, a temporary virtual table (see header
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** comment above struct HashTableVtab) to the database schema. Both
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** provide read/write access to the contents of *pHash.
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The third argument to this function, zName, is used as the name
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** of both the scalar and, if created, the virtual table.
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int sqlite3Fts2InitHashTable(
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3 *db,
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  fts2Hash *pHash,
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zName
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int rc = SQLITE_OK;
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void *p = (void *)pHash;
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int any = SQLITE_ANY;
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *zTest = 0;
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *zTest2 = 0;
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef SQLITE_TEST
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void *pdb = (void *)db;
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  zTest = sqlite3_mprintf("%s_test", zName);
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  zTest2 = sqlite3_mprintf("%s_internal_test", zName);
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( !zTest || !zTest2 ){
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rc = SQLITE_NOMEM;
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( rc!=SQLITE_OK
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef SQLITE_TEST
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  );
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_free(zTest);
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_free(zTest2);
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return rc;
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
375