15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2007 June 22 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The author disclaims copyright to this source code. In place of 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** a legal notice, here is a blessing: 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** May you do good and not evil. 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** May you find forgiveness for yourself and forgive others. 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** May you share freely, never taking more than you give. 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)************************************************************************* 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This file implements a tokenizer for fts2 based on the ICU library. 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $ 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef SQLITE_ENABLE_ICU 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <assert.h> 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h> 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "fts2_tokenizer.h" 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <unicode/ubrk.h> 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <unicode/ucol.h> 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <unicode/ustring.h> 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <unicode/utf16.h> 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct IcuTokenizer IcuTokenizer; 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct IcuCursor IcuCursor; 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct IcuTokenizer { 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer base; 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zLocale; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct IcuCursor { 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer_cursor base; 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UBreakIterator *pIter; /* ICU break-iterator object */ 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nChar; /* Number of UChar elements in pInput */ 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UChar *aChar; /* Copy of input using utf-16 encoding */ 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *aOffset; /* Offsets of each character in utf-8 input */ 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nBuffer; 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zBuffer; 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iToken; 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Create a new tokenizer instance. 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int icuCreate( 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int argc, /* Number of entries in argv[] */ 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char * const *argv, /* Tokenizer creation arguments */ 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) IcuTokenizer *p; 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = 0; 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( argc>0 ){ 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) n = strlen(argv[0])+1; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( !p ){ 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_NOMEM; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(p, 0, sizeof(IcuTokenizer)); 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( n ){ 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p->zLocale = (char *)&p[1]; 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(p->zLocale, argv[0], n); 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *ppTokenizer = (sqlite3_tokenizer *)p; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_OK; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Destroy a tokenizer 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int icuDestroy(sqlite3_tokenizer *pTokenizer){ 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) IcuTokenizer *p = (IcuTokenizer *)pTokenizer; 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(p); 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_OK; 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Prepare to begin tokenizing a particular string. The input 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** string to be tokenized is pInput[0..nBytes-1]. A cursor 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** used to incrementally tokenize this string is returned in 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** *ppCursor. 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int icuOpen( 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer *pTokenizer, /* The tokenizer */ 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char *zInput, /* Input string */ 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nInput, /* Length of zInput in bytes */ 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) IcuTokenizer *p = (IcuTokenizer *)pTokenizer; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) IcuCursor *pCsr; 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int32_t opt = U_FOLD_CASE_DEFAULT; 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nChar; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UChar32 c; 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iInput = 0; 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iOut = 0; 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *ppCursor = 0; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( nInput<0 ){ 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nInput = strlen(zInput); 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nChar = nInput+1; 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr = (IcuCursor *)sqlite3_malloc( 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sizeof(IcuCursor) + /* IcuCursor */ 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (nChar+1) * sizeof(int) + /* IcuCursor.aOffset[] */ 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nChar * sizeof(UChar) /* IcuCursor.aChar[] */ 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ); 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( !pCsr ){ 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_NOMEM; 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(pCsr, 0, sizeof(IcuCursor)); 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->aOffset = (int *)&pCsr[1]; 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->aChar = (UChar *)&pCsr->aOffset[nChar+1]; 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->aOffset[iOut] = iInput; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) U8_NEXT(zInput, iInput, nInput, c); 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while( c>0 ){ 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int isError = 0; 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = u_foldCase(c, opt); 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( isError ){ 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(pCsr); 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_ERROR; 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->aOffset[iOut] = iInput; 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( iInput<nInput ){ 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) U8_NEXT(zInput, iInput, nInput, c); 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }else{ 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = 0; 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( !U_SUCCESS(status) ){ 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(pCsr); 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_ERROR; 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->nChar = iOut; 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ubrk_first(pCsr->pIter); 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_OK; 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Close a tokenization cursor previously opened by a call to icuOpen(). 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int icuClose(sqlite3_tokenizer_cursor *pCursor){ 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) IcuCursor *pCsr = (IcuCursor *)pCursor; 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ubrk_close(pCsr->pIter); 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(pCsr->zBuffer); 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_free(pCsr); 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_OK; 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Extract the next token from a tokenization cursor. 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int icuNext( 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char **ppToken, /* OUT: *ppToken is the token text */ 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *pnBytes, /* OUT: Number of bytes in token */ 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *piStartOffset, /* OUT: Starting offset of token */ 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *piEndOffset, /* OUT: Ending offset of token */ 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int *piPosition /* OUT: Position integer of token */ 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) IcuCursor *pCsr = (IcuCursor *)pCursor; 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iStart = 0; 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iEnd = 0; 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nByte = 0; 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while( iStart==iEnd ){ 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UChar32 c; 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) iStart = ubrk_current(pCsr->pIter); 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) iEnd = ubrk_next(pCsr->pIter); 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( iEnd==UBRK_DONE ){ 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_DONE; 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while( iStart<iEnd ){ 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int iWhite = iStart; 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( u_isspace(c) ){ 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) iStart = iWhite; 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }else{ 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert(iStart<=iEnd); 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) do { 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( nByte ){ 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if( !zNew ){ 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_NOMEM; 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->zBuffer = zNew; 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->nBuffer = nByte; 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) u_strToUTF8( 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) &status /* Output success/failure */ 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ); 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } while( nByte>pCsr->nBuffer ); 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *ppToken = pCsr->zBuffer; 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *pnBytes = nByte; 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *piStartOffset = pCsr->aOffset[iStart]; 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *piEndOffset = pCsr->aOffset[iEnd]; 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *piPosition = pCsr->iToken++; 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SQLITE_OK; 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The set of routines that implement the simple tokenizer 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const sqlite3_tokenizer_module icuTokenizerModule = { 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 0, /* iVersion */ 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) icuCreate, /* xCreate */ 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) icuDestroy, /* xCreate */ 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) icuOpen, /* xOpen */ 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) icuClose, /* xClose */ 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) icuNext, /* xNext */ 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Set *ppModule to point at the implementation of the ICU tokenizer. 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void sqlite3Fts2IcuTokenizerModule( 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) sqlite3_tokenizer_module const**ppModule 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){ 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *ppModule = &icuTokenizerModule; 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* defined(SQLITE_ENABLE_ICU) */ 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ 261