15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2007 June 22 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The author disclaims copyright to this source code. In place of 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** a legal notice, here is a blessing: 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** May you do good and not evil. 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** May you find forgiveness for yourself and forgive others. 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** May you share freely, never taking more than you give. 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)****************************************************************************** 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This is part of an SQLite module implementing full-text search. 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This particular file implements the generic tokenizer interface. 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The code in this file is only compiled if: 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** * The FTS2 module is being built as an extension 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** (in which case SQLITE_CORE is not defined), or 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** * The FTS2 module is being built into the core of 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "sqlite3.h" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "sqlite3ext.h" 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef SQLITE_CORE 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SQLITE_EXTENSION_INIT1 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "fts2_hash.h" 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "fts2_tokenizer.h" 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <assert.h> 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stddef.h> 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Implementation of the SQL scalar function for accessing the underlying 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** hash table. This function may be called as follows: 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** SELECT <function-name>(<key-name>); 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** SELECT <function-name>(<key-name>, <pointer>); 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** where <function-name> is the name passed as the second argument 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer'). 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** If the <pointer> argument is specified, it must be a blob value 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** containing a pointer to be stored as the hash data corresponding 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to the string <key-name>. If <pointer> is not specified, then 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** the string <key-name> must already exist in the has table. Otherwise, 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** an error is returned. 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Whether or not the <pointer> argument is specified, the value returned 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** is a blob containing the pointer stored as the hash data corresponding 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to string <key-name> (after the hash-table is updated, if applicable). 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void scalarFunc( 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_context *context, 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int argc, 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_value **argv 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fts2Hash *pHash; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *pPtr = 0; 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const unsigned char *zName; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nName; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( argc==1 || argc==2 ); 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pHash = (fts2Hash *)sqlite3_user_data(context); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zName = sqlite3_value_text(argv[0]); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nName = sqlite3_value_bytes(argv[0])+1; 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( argc==2 ){ 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *pOld; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = sqlite3_value_bytes(argv[1]); 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( n!=sizeof(pPtr) ){ 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_error(context, "argument type mismatch", -1); 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pPtr = *(void **)sqlite3_value_blob(argv[1]); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr); 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( pOld==pPtr ){ 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_error(context, "out of memory", -1); 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }else{ 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pPtr = sqlite3Fts2HashFind(pHash, zName, nName); 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( !pPtr ){ 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_error(context, zErr, -1); 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(zErr); 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef SQLITE_TEST 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <tcl.h> 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h> 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Implementation of a special SQL scalar function for testing tokenizers 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** designed to be used in concert with the Tcl testing framework. This 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** function must be called with two arguments: 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** SELECT <function-name>(<key-name>, <input-string>); 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** SELECT <function-name>(<key-name>, <pointer>); 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** where <function-name> is the name passed as the second argument 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer') 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test'). 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The return value is a string that may be interpreted as a Tcl 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** list. For each token in the <input-string>, three elements are 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** added to the returned list. The first is the token position, the 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** second is the token text (folded, stemmed, etc.) and the third is the 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** substring of <input-string> associated with the token. For example, 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** using the built-in "simple" tokenizer: 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** SELECT fts_tokenizer_test('simple', 'I don't see how'); 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** will return the string: 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** "{0 i I 1 dont don't 2 see see 3 how how}" 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void testFunc( 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_context *context, 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int argc, 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_value **argv 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fts2Hash *pHash; 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer_module *p; 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer *pTokenizer = 0; 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer_cursor *pCsr = 0; 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *zErr = 0; 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *zName; 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nName; 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *zInput; 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nInput; 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *zArg = 0; 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *zToken; 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nToken; 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iStart; 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iEnd; 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iPos; 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Tcl_Obj *pRet; 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( argc==2 || argc==3 ); 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nName = sqlite3_value_bytes(argv[0]); 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zName = (const char *)sqlite3_value_text(argv[0]); 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nInput = sqlite3_value_bytes(argv[argc-1]); 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zInput = (const char *)sqlite3_value_text(argv[argc-1]); 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( argc==3 ){ 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zArg = (const char *)sqlite3_value_text(argv[1]); 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pHash = (fts2Hash *)sqlite3_user_data(context); 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1); 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( !p ){ 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_error(context, zErr, -1); 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(zErr); 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pRet = Tcl_NewObj(); 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Tcl_IncrRefCount(pRet); 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zErr = "error in xCreate()"; 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto finish; 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pTokenizer->pModule = p; 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zErr = "error in xOpen()"; 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto finish; 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->pTokenizer = pTokenizer; 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zToken = &zInput[iStart]; 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nToken = iEnd-iStart; 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( SQLITE_OK!=p->xClose(pCsr) ){ 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zErr = "error in xClose()"; 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto finish; 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zErr = "error in xDestroy()"; 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto finish; 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)finish: 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( zErr ){ 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_error(context, zErr, -1); 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }else{ 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Tcl_DecrRefCount(pRet); 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int registerTokenizer( 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3 *db, 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zName, 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const sqlite3_tokenizer_module *p 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rc; 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_stmt *pStmt; 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char zSql[] = "SELECT fts2_tokenizer(?, ?)"; 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( rc!=SQLITE_OK ){ 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return rc; 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_step(pStmt); 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return sqlite3_finalize(pStmt); 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int queryFts2Tokenizer( 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3 *db, 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zName, 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const sqlite3_tokenizer_module **pp 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rc; 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_stmt *pStmt; 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char zSql[] = "SELECT fts2_tokenizer(?)"; 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *pp = 0; 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( rc!=SQLITE_OK ){ 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return rc; 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( SQLITE_ROW==sqlite3_step(pStmt) ){ 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return sqlite3_finalize(pStmt); 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Implementation of the scalar function fts2_tokenizer_internal_test(). 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This function is used for testing only, it is not included in the 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** build unless SQLITE_TEST is defined. 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The purpose of this is to test that the fts2_tokenizer() function 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** can be used as designed by the C-code in the queryFts2Tokenizer and 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** registerTokenizer() functions above. These two functions are repeated 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** in the README.tokenizer file as an example, so it is important to 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** test them. 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** function with no arguments. An assert() will fail if a problem is 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** detected. i.e.: 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** SELECT fts2_tokenizer_internal_test(); 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void intTestFunc( 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_context *context, 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int argc, 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_value **argv 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rc; 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const sqlite3_tokenizer_module *p1; 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const sqlite3_tokenizer_module *p2; 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Test the query function */ 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3Fts2SimpleTokenizerModule(&p1); 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rc = queryFts2Tokenizer(db, "simple", &p2); 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( rc==SQLITE_OK ); 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( p1==p2 ); 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( rc==SQLITE_ERROR ); 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( p2==0 ); 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) /* Test the storage function */ 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rc = registerTokenizer(db, "nosuchtokenizer", p1); 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( rc==SQLITE_OK ); 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( rc==SQLITE_OK ); 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert( p2==p1 ); 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Set up SQL objects in database db used to access the contents of 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** the hash table pointed to by argument pHash. The hash table must 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** been initialised to use string keys, and to take a private copy 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** of the key when a value is inserted. i.e. by a call similar to: 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This function adds a scalar function (see header comment above 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** scalarFunc() in this file for details) and, if ENABLE_TABLE is 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** defined at compilation time, a temporary virtual table (see header 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** comment above struct HashTableVtab) to the database schema. Both 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** provide read/write access to the contents of *pHash. 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The third argument to this function, zName, is used as the name 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** of both the scalar and, if created, the virtual table. 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int sqlite3Fts2InitHashTable( 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3 *db, 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fts2Hash *pHash, 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *zName 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rc = SQLITE_OK; 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *p = (void *)pHash; 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int any = SQLITE_ANY; 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zTest = 0; 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zTest2 = 0; 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef SQLITE_TEST 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void *pdb = (void *)db; 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zTest = sqlite3_mprintf("%s_test", zName); 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) zTest2 = sqlite3_mprintf("%s_internal_test", zName); 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( !zTest || !zTest2 ){ 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) rc = SQLITE_NOMEM; 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( rc!=SQLITE_OK 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0)) 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0)) 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef SQLITE_TEST 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0)) 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0)) 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0)) 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ); 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(zTest); 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(zTest2); 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return rc; 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ 375