1/* 2** 2007 June 22 3** 4** The author disclaims copyright to this source code. In place of 5** a legal notice, here is a blessing: 6** 7** May you do good and not evil. 8** May you find forgiveness for yourself and forgive others. 9** May you share freely, never taking more than you give. 10** 11****************************************************************************** 12** 13** This is part of an SQLite module implementing full-text search. 14** This particular file implements the generic tokenizer interface. 15*/ 16 17/* 18** The code in this file is only compiled if: 19** 20** * The FTS2 module is being built as an extension 21** (in which case SQLITE_CORE is not defined), or 22** 23** * The FTS2 module is being built into the core of 24** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). 25*/ 26#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) 27 28 29#include "sqlite3.h" 30#include "sqlite3ext.h" 31#ifndef SQLITE_CORE 32 SQLITE_EXTENSION_INIT1 33#endif 34 35#include "fts2_hash.h" 36#include "fts2_tokenizer.h" 37#include <assert.h> 38#include <stddef.h> 39 40/* 41** Implementation of the SQL scalar function for accessing the underlying 42** hash table. This function may be called as follows: 43** 44** SELECT <function-name>(<key-name>); 45** SELECT <function-name>(<key-name>, <pointer>); 46** 47** where <function-name> is the name passed as the second argument 48** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer'). 49** 50** If the <pointer> argument is specified, it must be a blob value 51** containing a pointer to be stored as the hash data corresponding 52** to the string <key-name>. If <pointer> is not specified, then 53** the string <key-name> must already exist in the has table. Otherwise, 54** an error is returned. 55** 56** Whether or not the <pointer> argument is specified, the value returned 57** is a blob containing the pointer stored as the hash data corresponding 58** to string <key-name> (after the hash-table is updated, if applicable). 59*/ 60static void scalarFunc( 61 sqlite3_context *context, 62 int argc, 63 sqlite3_value **argv 64){ 65 fts2Hash *pHash; 66 void *pPtr = 0; 67 const unsigned char *zName; 68 int nName; 69 70 assert( argc==1 || argc==2 ); 71 72 pHash = (fts2Hash *)sqlite3_user_data(context); 73 74 zName = sqlite3_value_text(argv[0]); 75 nName = sqlite3_value_bytes(argv[0])+1; 76 77 if( argc==2 ){ 78 void *pOld; 79 int n = sqlite3_value_bytes(argv[1]); 80 if( n!=sizeof(pPtr) ){ 81 sqlite3_result_error(context, "argument type mismatch", -1); 82 return; 83 } 84 pPtr = *(void **)sqlite3_value_blob(argv[1]); 85 pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr); 86 if( pOld==pPtr ){ 87 sqlite3_result_error(context, "out of memory", -1); 88 return; 89 } 90 }else{ 91 pPtr = sqlite3Fts2HashFind(pHash, zName, nName); 92 if( !pPtr ){ 93 char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); 94 sqlite3_result_error(context, zErr, -1); 95 sqlite3_free(zErr); 96 return; 97 } 98 } 99 100 sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); 101} 102 103#ifdef SQLITE_TEST 104 105#include <tcl.h> 106#include <string.h> 107 108/* 109** Implementation of a special SQL scalar function for testing tokenizers 110** designed to be used in concert with the Tcl testing framework. This 111** function must be called with two arguments: 112** 113** SELECT <function-name>(<key-name>, <input-string>); 114** SELECT <function-name>(<key-name>, <pointer>); 115** 116** where <function-name> is the name passed as the second argument 117** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer') 118** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test'). 119** 120** The return value is a string that may be interpreted as a Tcl 121** list. For each token in the <input-string>, three elements are 122** added to the returned list. The first is the token position, the 123** second is the token text (folded, stemmed, etc.) and the third is the 124** substring of <input-string> associated with the token. For example, 125** using the built-in "simple" tokenizer: 126** 127** SELECT fts_tokenizer_test('simple', 'I don't see how'); 128** 129** will return the string: 130** 131** "{0 i I 1 dont don't 2 see see 3 how how}" 132** 133*/ 134static void testFunc( 135 sqlite3_context *context, 136 int argc, 137 sqlite3_value **argv 138){ 139 fts2Hash *pHash; 140 sqlite3_tokenizer_module *p; 141 sqlite3_tokenizer *pTokenizer = 0; 142 sqlite3_tokenizer_cursor *pCsr = 0; 143 144 const char *zErr = 0; 145 146 const char *zName; 147 int nName; 148 const char *zInput; 149 int nInput; 150 151 const char *zArg = 0; 152 153 const char *zToken; 154 int nToken; 155 int iStart; 156 int iEnd; 157 int iPos; 158 159 Tcl_Obj *pRet; 160 161 assert( argc==2 || argc==3 ); 162 163 nName = sqlite3_value_bytes(argv[0]); 164 zName = (const char *)sqlite3_value_text(argv[0]); 165 nInput = sqlite3_value_bytes(argv[argc-1]); 166 zInput = (const char *)sqlite3_value_text(argv[argc-1]); 167 168 if( argc==3 ){ 169 zArg = (const char *)sqlite3_value_text(argv[1]); 170 } 171 172 pHash = (fts2Hash *)sqlite3_user_data(context); 173 p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1); 174 175 if( !p ){ 176 char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); 177 sqlite3_result_error(context, zErr, -1); 178 sqlite3_free(zErr); 179 return; 180 } 181 182 pRet = Tcl_NewObj(); 183 Tcl_IncrRefCount(pRet); 184 185 if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ 186 zErr = "error in xCreate()"; 187 goto finish; 188 } 189 pTokenizer->pModule = p; 190 if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ 191 zErr = "error in xOpen()"; 192 goto finish; 193 } 194 pCsr->pTokenizer = pTokenizer; 195 196 while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ 197 Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); 198 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); 199 zToken = &zInput[iStart]; 200 nToken = iEnd-iStart; 201 Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); 202 } 203 204 if( SQLITE_OK!=p->xClose(pCsr) ){ 205 zErr = "error in xClose()"; 206 goto finish; 207 } 208 if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ 209 zErr = "error in xDestroy()"; 210 goto finish; 211 } 212 213finish: 214 if( zErr ){ 215 sqlite3_result_error(context, zErr, -1); 216 }else{ 217 sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); 218 } 219 Tcl_DecrRefCount(pRet); 220} 221 222static 223int registerTokenizer( 224 sqlite3 *db, 225 char *zName, 226 const sqlite3_tokenizer_module *p 227){ 228 int rc; 229 sqlite3_stmt *pStmt; 230 const char zSql[] = "SELECT fts2_tokenizer(?, ?)"; 231 232 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 233 if( rc!=SQLITE_OK ){ 234 return rc; 235 } 236 237 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); 238 sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); 239 sqlite3_step(pStmt); 240 241 return sqlite3_finalize(pStmt); 242} 243 244static 245int queryFts2Tokenizer( 246 sqlite3 *db, 247 char *zName, 248 const sqlite3_tokenizer_module **pp 249){ 250 int rc; 251 sqlite3_stmt *pStmt; 252 const char zSql[] = "SELECT fts2_tokenizer(?)"; 253 254 *pp = 0; 255 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 256 if( rc!=SQLITE_OK ){ 257 return rc; 258 } 259 260 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); 261 if( SQLITE_ROW==sqlite3_step(pStmt) ){ 262 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ 263 memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); 264 } 265 } 266 267 return sqlite3_finalize(pStmt); 268} 269 270void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); 271 272/* 273** Implementation of the scalar function fts2_tokenizer_internal_test(). 274** This function is used for testing only, it is not included in the 275** build unless SQLITE_TEST is defined. 276** 277** The purpose of this is to test that the fts2_tokenizer() function 278** can be used as designed by the C-code in the queryFts2Tokenizer and 279** registerTokenizer() functions above. These two functions are repeated 280** in the README.tokenizer file as an example, so it is important to 281** test them. 282** 283** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar 284** function with no arguments. An assert() will fail if a problem is 285** detected. i.e.: 286** 287** SELECT fts2_tokenizer_internal_test(); 288** 289*/ 290static void intTestFunc( 291 sqlite3_context *context, 292 int argc, 293 sqlite3_value **argv 294){ 295 int rc; 296 const sqlite3_tokenizer_module *p1; 297 const sqlite3_tokenizer_module *p2; 298 sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); 299 300 /* Test the query function */ 301 sqlite3Fts2SimpleTokenizerModule(&p1); 302 rc = queryFts2Tokenizer(db, "simple", &p2); 303 assert( rc==SQLITE_OK ); 304 assert( p1==p2 ); 305 rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); 306 assert( rc==SQLITE_ERROR ); 307 assert( p2==0 ); 308 assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); 309 310 /* Test the storage function */ 311 rc = registerTokenizer(db, "nosuchtokenizer", p1); 312 assert( rc==SQLITE_OK ); 313 rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); 314 assert( rc==SQLITE_OK ); 315 assert( p2==p1 ); 316 317 sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); 318} 319 320#endif 321 322/* 323** Set up SQL objects in database db used to access the contents of 324** the hash table pointed to by argument pHash. The hash table must 325** been initialised to use string keys, and to take a private copy 326** of the key when a value is inserted. i.e. by a call similar to: 327** 328** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); 329** 330** This function adds a scalar function (see header comment above 331** scalarFunc() in this file for details) and, if ENABLE_TABLE is 332** defined at compilation time, a temporary virtual table (see header 333** comment above struct HashTableVtab) to the database schema. Both 334** provide read/write access to the contents of *pHash. 335** 336** The third argument to this function, zName, is used as the name 337** of both the scalar and, if created, the virtual table. 338*/ 339int sqlite3Fts2InitHashTable( 340 sqlite3 *db, 341 fts2Hash *pHash, 342 const char *zName 343){ 344 int rc = SQLITE_OK; 345 void *p = (void *)pHash; 346 const int any = SQLITE_ANY; 347 char *zTest = 0; 348 char *zTest2 = 0; 349 350#ifdef SQLITE_TEST 351 void *pdb = (void *)db; 352 zTest = sqlite3_mprintf("%s_test", zName); 353 zTest2 = sqlite3_mprintf("%s_internal_test", zName); 354 if( !zTest || !zTest2 ){ 355 rc = SQLITE_NOMEM; 356 } 357#endif 358 359 if( rc!=SQLITE_OK 360 || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0)) 361 || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0)) 362#ifdef SQLITE_TEST 363 || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0)) 364 || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0)) 365 || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0)) 366#endif 367 ); 368 369 sqlite3_free(zTest); 370 sqlite3_free(zTest2); 371 return rc; 372} 373 374#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ 375