15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 2006 September 30
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The author disclaims copyright to this source code.  In place of
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** a legal notice, here is a blessing:
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**    May you do good and not evil.
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**    May you find forgiveness for yourself and forgive others.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**    May you share freely, never taking more than you give.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*************************************************************************
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Implementation of the full-text-search tokenizer that implements
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** a Porter stemmer.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The code in this file is only compiled if:
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**     * The FTS2 module is being built as an extension
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**       (in which case SQLITE_CORE is not defined), or
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**     * The FTS2 module is being built into the core of
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**       SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <assert.h>
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h>
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdio.h>
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h>
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "fts2_tokenizer.h"
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Class derived from sqlite3_tokenizer
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct porter_tokenizer {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer base;      /* Base class */
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} porter_tokenizer;
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Class derived from sqlit3_tokenizer_cursor
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct porter_tokenizer_cursor {
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer_cursor base;
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zInput;          /* input we are tokenizing */
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nInput;                  /* size of the input */
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int iOffset;                 /* current position in zInput */
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int iToken;                  /* index of next token to be returned */
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *zToken;                /* storage for current token */
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nAllocated;              /* space allocated to zToken buffer */
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} porter_tokenizer_cursor;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Forward declaration */
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const sqlite3_tokenizer_module porterTokenizerModule;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Create a new tokenizer instance.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int porterCreate(
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int argc, const char * const *argv,
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer **ppTokenizer
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porter_tokenizer *t;
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( t==NULL ) return SQLITE_NOMEM;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memset(t, 0, sizeof(*t));
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *ppTokenizer = &t->base;
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SQLITE_OK;
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Destroy a tokenizer
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int porterDestroy(sqlite3_tokenizer *pTokenizer){
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_free(pTokenizer);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SQLITE_OK;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Prepare to begin tokenizing a particular string.  The input
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** string to be tokenized is zInput[0..nInput-1].  A cursor
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** used to incrementally tokenize this string is returned in
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** *ppCursor.
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int porterOpen(
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zInput, int nInput,        /* String to be tokenized */
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porter_tokenizer_cursor *c;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( c==NULL ) return SQLITE_NOMEM;
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  c->zInput = zInput;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( zInput==0 ){
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    c->nInput = 0;
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }else if( nInput<0 ){
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    c->nInput = (int)strlen(zInput);
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }else{
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    c->nInput = nInput;
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  c->iOffset = 0;                 /* start tokenizing at the beginning */
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  c->iToken = 0;
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  c->zToken = NULL;               /* no space allocated, yet. */
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  c->nAllocated = 0;
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *ppCursor = &c->base;
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SQLITE_OK;
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Close a tokenization cursor previously opened by a call to
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** porterOpen() above.
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int porterClose(sqlite3_tokenizer_cursor *pCursor){
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_free(c->zToken);
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_free(c);
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SQLITE_OK;
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Vowel or consonant
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char cType[] = {
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   1, 1, 1, 2, 1
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** isConsonant() and isVowel() determine if their first character in
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** the string they point to is a consonant or a vowel, according
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** to Porter ruls.
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 'Y' is a consonant unless it follows another consonant,
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** in which case it is a vowel.
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** In these routine, the letters are in reverse order.  So the 'y' rule
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** is that 'y' is a consonant unless it is followed by another
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** consonent.
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int isVowel(const char*);
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int isConsonant(const char *z){
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int j;
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char x = *z;
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( x==0 ) return 0;
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( x>='a' && x<='z' );
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  j = cType[x-'a'];
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( j<2 ) return j;
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return z[1]==0 || isVowel(z + 1);
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int isVowel(const char *z){
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int j;
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char x = *z;
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( x==0 ) return 0;
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert( x>='a' && x<='z' );
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  j = cType[x-'a'];
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( j<2 ) return 1-j;
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return isConsonant(z + 1);
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Let any sequence of one or more vowels be represented by V and let
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** C be sequence of one or more consonants.  Then every word can be
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** represented as:
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**           [C] (VC){m} [V]
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** In prose:  A word is an optional consonant followed by zero or
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** vowel-consonant pairs followed by an optional vowel.  "m" is the
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** number of vowel consonant pairs.  This routine computes the value
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** of m for the first i bytes of a word.
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Return true if the m-value for z is 1 or more.  In other words,
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** return true if z contains at least one vowel that is followed
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** by a consonant.
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** In this routine z[] is in reverse order.  So we are really looking
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** for an instance of of a consonant followed by a vowel.
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int m_gt_0(const char *z){
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isVowel(z) ){ z++; }
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *z==0 ) return 0;
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isConsonant(z) ){ z++; }
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return *z!=0;
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Like mgt0 above except we are looking for a value of m which is
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** exactly 1
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int m_eq_1(const char *z){
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isVowel(z) ){ z++; }
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *z==0 ) return 0;
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isConsonant(z) ){ z++; }
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *z==0 ) return 0;
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isVowel(z) ){ z++; }
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *z==0 ) return 1;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isConsonant(z) ){ z++; }
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return *z==0;
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* Like mgt0 above except we are looking for a value of m>1 instead
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** or m>0
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int m_gt_1(const char *z){
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isVowel(z) ){ z++; }
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *z==0 ) return 0;
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isConsonant(z) ){ z++; }
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *z==0 ) return 0;
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isVowel(z) ){ z++; }
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *z==0 ) return 0;
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isConsonant(z) ){ z++; }
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return *z!=0;
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Return TRUE if there is a vowel anywhere within z[0..n-1]
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int hasVowel(const char *z){
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( isConsonant(z) ){ z++; }
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return *z!=0;
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Return TRUE if the word ends in a double consonant.
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The text is reversed here. So we are really looking at
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** the first two characters of z[].
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int doubleConsonant(const char *z){
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Return TRUE if the word ends with three letters which
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** are consonant-vowel-consonent and where the final consonant
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** is not 'w', 'x', or 'y'.
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The word is reversed here.  So we are really checking the
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** first three letters and the first one cannot be in [wxy].
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int star_oh(const char *z){
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    z[0]!=0 && isConsonant(z) &&
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    z[0]!='w' && z[0]!='x' && z[0]!='y' &&
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    z[1]!=0 && isVowel(z+1) &&
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    z[2]!=0 && isConsonant(z+2);
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** If the word ends with zFrom and xCond() is true for the stem
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** of the word that preceeds the zFrom ending, then change the
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** ending to zTo.
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The input word *pz and zFrom are both in reverse order.  zTo
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** is in normal order.
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Return TRUE if zFrom matches.  Return FALSE if zFrom does not
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** match.  Not that TRUE is returned even if xCond() fails and
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** no substitution occurs.
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int stem(
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char **pz,             /* The word being stemmed (Reversed) */
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zFrom,     /* If the ending matches this... (Reversed) */
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *zTo,       /* ... change the ending to this (not reversed) */
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int (*xCond)(const char*)   /* Condition that must be true */
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *z = *pz;
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( *zFrom!=0 ) return 0;
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( xCond && !xCond(z) ) return 1;
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( *zTo ){
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *(--z) = *(zTo++);
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *pz = z;
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 1;
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** This is the fallback stemmer used when the porter stemmer is
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** inappropriate.  The input word is copied into the output with
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** US-ASCII case folding.  If the input word is too long (more
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** than 20 bytes if it contains no digits or more than 6 bytes if
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** it contains digits) then word is truncated to 20 or 6 bytes
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** by taking 10 or 3 bytes from the beginning and end.
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i, mx, j;
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int hasDigit = 0;
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for(i=0; i<nIn; i++){
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int c = zIn[i];
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( c>='A' && c<='Z' ){
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      zOut[i] = c - 'A' + 'a';
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }else{
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if( c>='0' && c<='9' ) hasDigit = 1;
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      zOut[i] = c;
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  mx = hasDigit ? 3 : 10;
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( nIn>mx*2 ){
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for(j=mx, i=nIn-mx; i<nIn; i++, j++){
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      zOut[j] = zOut[i];
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    i = j;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  zOut[i] = 0;
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *pnOut = i;
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Stem the input word zIn[0..nIn-1].  Store the output in zOut.
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** zOut is at least big enough to hold nIn bytes.  Write the actual
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** size of the output word (exclusive of the '\0' terminator) into *pnOut.
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Any upper-case characters in the US-ASCII character set ([A-Z])
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** are converted to lower case.  Upper-case UTF characters are
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** unchanged.
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Words that are longer than about 20 bytes are stemmed by retaining
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** a few bytes from the beginning and the end of the word.  If the
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** word contains digits, 3 bytes are taken from the beginning and
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** 3 bytes from the end.  For long words without digits, 10 bytes
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** are taken from each end.  US-ASCII case folding still applies.
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** If the input word contains not digits but does characters not
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** in [a-zA-Z] then no stemming is attempted and this routine just
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** copies the input into the input into the output with US-ASCII
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** case folding.
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)**
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Stemming never increases the length of the word.  So there is
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** no chance of overflowing the zOut buffer.
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i, j, c;
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char zReverse[28];
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *z, *z2;
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* The word is too big or too small for the porter stemmer.
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ** Fallback to the copy stemmer */
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    copy_stemmer(zIn, nIn, zOut, pnOut);
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    c = zIn[i];
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( c>='A' && c<='Z' ){
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      zReverse[j] = c + 'a' - 'A';
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }else if( c>='a' && c<='z' ){
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      zReverse[j] = c;
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }else{
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      /* The use of a character not in [a-zA-Z] means that we fallback
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ** to the copy stemmer */
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      copy_stemmer(zIn, nIn, zOut, pnOut);
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memset(&zReverse[sizeof(zReverse)-5], 0, 5);
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  z = &zReverse[j+1];
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 1a */
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( z[0]=='s' ){
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if(
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     !stem(&z, "sess", "ss", 0) &&
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     !stem(&z, "sei", "i", 0)  &&
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     !stem(&z, "ss", "ss", 0)
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ){
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      z++;
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 1b */
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  z2 = z;
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( stem(&z, "dee", "ee", m_gt_0) ){
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Do nothing.  The work was all in the test */
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }else if(
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      && z!=z2
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ){
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( stem(&z, "ta", "ate", 0) ||
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         stem(&z, "lb", "ble", 0) ||
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         stem(&z, "zi", "ize", 0) ){
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       /* Do nothing.  The work was all in the test */
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z++;
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }else if( m_eq_1(z) && star_oh(z) ){
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       *(--z) = 'e';
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 1c */
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( z[0]=='y' && hasVowel(z+1) ){
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    z[0] = 'i';
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 2 */
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch( z[1] ){
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'a':
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "lanoita", "ate", m_gt_0) ||
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "lanoit", "tion", m_gt_0);
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'c':
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "icne", "ence", m_gt_0) ||
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "icna", "ance", m_gt_0);
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'e':
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "rezi", "ize", m_gt_0);
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'g':
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "igol", "log", m_gt_0);
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'l':
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ilb", "ble", m_gt_0) ||
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "illa", "al", m_gt_0) ||
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "iltne", "ent", m_gt_0) ||
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ile", "e", m_gt_0) ||
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ilsuo", "ous", m_gt_0);
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'o':
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "noitazi", "ize", m_gt_0) ||
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "noita", "ate", m_gt_0) ||
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "rota", "ate", m_gt_0);
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 's':
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "msila", "al", m_gt_0) ||
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ssenevi", "ive", m_gt_0) ||
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ssenluf", "ful", m_gt_0) ||
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ssensuo", "ous", m_gt_0);
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 't':
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "itila", "al", m_gt_0) ||
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "itivi", "ive", m_gt_0) ||
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "itilib", "ble", m_gt_0);
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 3 */
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch( z[0] ){
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'e':
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "etaci", "ic", m_gt_0) ||
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "evita", "", m_gt_0)   ||
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ezila", "al", m_gt_0);
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'i':
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "itici", "ic", m_gt_0);
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'l':
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "laci", "ic", m_gt_0) ||
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "luf", "", m_gt_0);
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 's':
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "ssen", "", m_gt_0);
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 4 */
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch( z[1] ){
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'a':
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='l' && m_gt_1(z+2) ){
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 2;
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'c':
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e')  && m_gt_1(z+4)  ){
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 4;
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'e':
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='r' && m_gt_1(z+2) ){
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 2;
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'i':
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='c' && m_gt_1(z+2) ){
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 2;
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'l':
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 4;
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'n':
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='t' ){
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       if( z[2]=='a' ){
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         if( m_gt_1(z+3) ){
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           z += 3;
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         }
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       }else if( z[2]=='e' ){
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         stem(&z, "tneme", "", m_gt_1) ||
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         stem(&z, "tnem", "", m_gt_1) ||
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         stem(&z, "tne", "", m_gt_1);
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       }
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'o':
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='u' ){
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       if( m_gt_1(z+2) ){
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         z += 2;
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       }
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }else if( z[3]=='s' || z[3]=='t' ){
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       stem(&z, "noi", "", m_gt_1);
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 's':
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 3;
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 't':
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "eta", "", m_gt_1) ||
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     stem(&z, "iti", "", m_gt_1);
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'u':
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 3;
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'v':
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   case 'z':
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       z += 3;
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     }
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)     break;
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 5a */
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( z[0]=='e' ){
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( m_gt_1(z+1) ){
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      z++;
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }else if( m_eq_1(z+1) && !star_oh(z+1) ){
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      z++;
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* Step 5b */
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    z++;
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /* z[] is now the stemmed word in reverse order.  Flip it back
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ** around into forward order and return.
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  */
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *pnOut = i = strlen(z);
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  zOut[i] = 0;
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( *z ){
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    zOut[--i] = *(z++);
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Characters that can be part of a token.  We assume any character
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** whose value is greater than 0x80 (any UTF character) can be
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** part of a token.  In other words, delimiters all must have
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** values of 0x7f or lower.
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const char porterIdChar[] = {
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Extract the next token from a tokenization cursor.  The cursor must
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** have been opened by a prior call to porterOpen().
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int porterNext(
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by porterOpen */
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char **pzToken,               /* OUT: *pzToken is the token text */
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int *pnBytes,                       /* OUT: Number of bytes in token */
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int *piStartOffset,                 /* OUT: Starting offset of token */
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int *piEndOffset,                   /* OUT: Ending offset of token */
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int *piPosition                     /* OUT: Position integer of token */
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char *z = c->zInput;
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while( c->iOffset<c->nInput ){
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int iStartOffset, ch;
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Scan past delimiter characters */
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      c->iOffset++;
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    /* Count non-delimiter characters. */
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    iStartOffset = c->iOffset;
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      c->iOffset++;
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if( c->iOffset>iStartOffset ){
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int n = c->iOffset-iStartOffset;
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if( n>c->nAllocated ){
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        c->nAllocated = n+20;
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if( c->zToken==NULL ) return SQLITE_NOMEM;
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *pzToken = c->zToken;
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *piStartOffset = iStartOffset;
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *piEndOffset = c->iOffset;
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *piPosition = c->iToken++;
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return SQLITE_OK;
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SQLITE_DONE;
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** The set of routines that implement the porter-stemmer tokenizer
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const sqlite3_tokenizer_module porterTokenizerModule = {
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  0,
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porterCreate,
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porterDestroy,
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porterOpen,
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porterClose,
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  porterNext,
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** Allocate a new porter tokenizer.  Return a pointer to the new
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)** tokenizer in *ppModule
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void sqlite3Fts2PorterTokenizerModule(
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sqlite3_tokenizer_module const**ppModule
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)){
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *ppModule = &porterTokenizerModule;
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
642