1# 2010 November 02
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11# This file implements regression tests for the FTS3 module. The focus
12# of this file is tables created with the "matchinfo=fts3" option.
13#
14
15set testdir [file dirname $argv0]
16source $testdir/tester.tcl
17
18# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
19ifcapable !fts3 { finish_test ; return }
20
21set testprefix fts3matchinfo
22
23proc mit {blob} {
24  set scan(littleEndian) i*
25  set scan(bigEndian) I*
26  binary scan $blob $scan($::tcl_platform(byteOrder)) r
27  return $r
28}
29db func mit mit
30
31do_execsql_test 1.0 {
32  CREATE VIRTUAL TABLE t1 USING fts4(matchinfo=fts3);
33  SELECT name FROM sqlite_master WHERE type = 'table';
34} {t1 t1_content t1_segments t1_segdir t1_stat}
35
36do_execsql_test 1.1 {
37  INSERT INTO t1(content) VALUES('I wandered lonely as a cloud');
38  INSERT INTO t1(content) VALUES('That floats on high o''er vales and hills,');
39  INSERT INTO t1(content) VALUES('When all at once I saw a crowd,');
40  INSERT INTO t1(content) VALUES('A host, of golden daffodils,');
41  SELECT mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'I';
42} {{1 1 1 2 2} {1 1 1 2 2}}
43
44# Now create an FTS4 table that does not specify matchinfo=fts3.
45#
46do_execsql_test 1.2 {
47  CREATE VIRTUAL TABLE t2 USING fts4;
48  INSERT INTO t2 SELECT * FROM t1;
49  SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I';
50} {{1 1 1 2 2} {1 1 1 2 2}}
51
52# Test some syntax-error handling.
53#
54do_catchsql_test 2.0 {
55  CREATE VIRTUAL TABLE x1 USING fts4(matchinfo=fs3);
56} {1 {unrecognized matchinfo: fs3}}
57do_catchsql_test 2.1 {
58  CREATE VIRTUAL TABLE x2 USING fts4(mtchinfo=fts3);
59} {1 {unrecognized parameter: mtchinfo=fts3}}
60
61# Check that with fts3, the "=" character is permitted in column definitions.
62#
63do_execsql_test 3.1 {
64  CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
65  INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
66  SELECT mtchinfo FROM t3;
67} {{Beside the lake, beneath the trees}}
68
69do_execsql_test 3.2 {
70  CREATE VIRTUAL TABLE xx USING FTS4;
71  SELECT * FROM xx WHERE xx MATCH 'abc';
72  SELECT * FROM xx WHERE xx MATCH 'a b c';
73}
74
75
76#--------------------------------------------------------------------------
77# Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
78#
79# The first argument - $tn - is a test identifier. This may be either a
80# full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix
81# is set, just the numeric component (i.e. "1.1").
82#
83# The second argument is the name of an FTSX table. The third is the 
84# full text of a WHERE/MATCH expression to query the table for 
85# (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a
86# key-value list (serialized array) with matchinfo() format specifiers
87# as keys, and the results of executing the statement:
88#
89#   SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr
90#
91# For example:
92#
93#   CREATE VIRTUAL TABLE t1 USING fts4;
94#   INSERT INTO t1 VALUES('abc');
95#   INSERT INTO t1 VALUES('def');
96#   INSERT INTO t1 VALUES('abc abc');
97#
98#   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
99#     n {3 3}
100#     p {1 1}
101#     c {1 1}
102#     x {{1 3 2} {2 3 2}}
103#   }
104#
105# If the $results list contains keys mapped to "-" instead of a matchinfo()
106# result, then this command computes the expected results based on other
107# mappings to test the matchinfo() function. For example, the command above
108# could be changed to:
109#
110#   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
111#     n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}}
112#     pcx -
113#   }
114#
115# And this command would compute the expected results for matchinfo(t1, 'pcx')
116# based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and 
117# matchinfo(t1, 'x') in order to test 'pcx'.
118#
119proc do_matchinfo_test {tn tbl expr results} {
120
121  foreach {fmt res} $results {
122    if {$res == "-"} continue
123    set resarray($fmt) $res
124  }
125
126  set nRow 0
127  foreach {fmt res} [array get resarray] {
128    if {[llength $res]>$nRow} { set nRow [llength $res] }
129  }
130
131  # Construct expected results for any formats for which the caller 
132  # supplied result is "-".
133  #
134  foreach {fmt res} $results {
135    if {$res == "-"} {
136      set res [list]
137      for {set iRow 0} {$iRow<$nRow} {incr iRow} {
138        set rowres [list]
139        foreach c [split $fmt ""] {
140          set rowres [concat $rowres [lindex $resarray($c) $iRow]]
141        }
142        lappend res $rowres
143      }
144      set resarray($fmt) $res
145    }
146  }
147
148  # Test each matchinfo() request individually.
149  #
150  foreach {fmt res} [array get resarray] {
151    set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr"
152    do_execsql_test $tn.$fmt $sql [normalize2 $res]
153  }
154
155  # Test them all executed together (multiple invocations of matchinfo()).
156  #
157  set exprlist [list]
158  foreach {format res} [array get resarray] {
159    lappend exprlist "mit(matchinfo($tbl, '$format'))"
160  }
161  set allres [list]
162  for {set iRow 0} {$iRow<$nRow} {incr iRow} {
163    foreach {format res} [array get resarray] {
164      lappend allres [lindex $res $iRow]
165    }
166  }
167  set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr"
168  do_execsql_test $tn.multi $sql [normalize2 $allres]
169}
170proc normalize2 {list_of_lists} {
171  set res [list]
172  foreach elem $list_of_lists {
173    lappend res [list {*}$elem]
174  }
175  return $res
176}
177
178
179do_execsql_test 4.1.0 {
180  CREATE VIRTUAL TABLE t4 USING fts4(x, y);
181  INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
182  INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
183}
184
185do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
186  p {3 3}
187  c {2 2}
188  x {
189    {1 1 1   0 1 1   1 1 1   0 1 1   1 1 1   0 1 1}
190    {0 1 1   1 1 1   0 1 1   1 1 1   0 1 1   1 1 1}
191  }
192  n {2 2}
193  l {{5 5} {5 5}}
194  a {{5 5} {5 5}}
195
196  s {{3 0} {0 3}}
197
198  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
199  xpxsscplax -
200}
201
202do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
203  p {1 1}
204  c {2 2}
205  x {
206    {0 1 1   1 1 1}
207    {1 1 1   0 1 1}
208  }
209  n {2 2}
210  l {{5 5} {5 5}}
211  a {{5 5} {5 5}}
212
213  s {{0 1} {1 0}}
214
215  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
216  sxsxs -
217}
218
219do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'}     { s {{2 0} {0 2}} }
220do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
221do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
222do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'}     { s {{1 0} {0 1}} }
223
224do_execsql_test 4.2.0 {
225  CREATE VIRTUAL TABLE t5 USING fts4;
226  INSERT INTO t5 VALUES('a a a a a');
227  INSERT INTO t5 VALUES('a b a b a');
228  INSERT INTO t5 VALUES('c b c b c');
229  INSERT INTO t5 VALUES('x x x x x');
230}
231do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'}         { 
232  x {{5 8 2   5 8 2} {3 8 2   3 8 2}}
233  s {2 1} 
234}
235do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'}         { s {2} }
236do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'}       { s {3} }
237do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
238do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
239do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1} }
240
241do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
242
243do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { 
244  x {{5 8 2   5 5 5} {3 8 2   3 5 5}}
245  s {2 1} 
246}
247
248do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'}         { s {2} }
249do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'}       { s {3} }
250do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
251do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
252do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'}      { s {1 2 1 1} }
253
254do_execsql_test 4.4.0 {
255  INSERT INTO t5(t5) VALUES('optimize');
256  UPDATE t5_segments 
257  SET block = zeroblob(length(block)) 
258  WHERE length(block)>10000;
259}
260
261do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'}         { s {2} }
262do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'}         { s {2 1} }
263do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'}         { s {2} }
264do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'}       { s {3} }
265do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'}       { s {3 1} }
266do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
267
268do_execsql_test 4.5.0 {
269  CREATE VIRTUAL TABLE t6 USING fts4(a, b, c);
270  INSERT INTO t6 VALUES('a', 'b', 'c');
271}
272do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'}       { s {{1 1 1}} }
273
274
275#-------------------------------------------------------------------------
276# Check the following restrictions:
277#
278#   + Matchinfo flags 'a', 'l' and 'n' can only be used with fts4, not fts3.
279#   + Matchinfo flag 'l' cannot be used with matchinfo=fts3.
280#
281do_execsql_test 5.1 {
282  CREATE VIRTUAL TABLE t7 USING fts3(a, b);
283  INSERT INTO t7 VALUES('u v w', 'x y z');
284
285  CREATE VIRTUAL TABLE t8 USING fts4(a, b, matchinfo=fts3);
286  INSERT INTO t8 VALUES('u v w', 'x y z');
287}
288
289do_catchsql_test 5.2.1 { 
290  SELECT matchinfo(t7, 'a') FROM t7 WHERE t7 MATCH 'x y'
291} {1 {unrecognized matchinfo request: a}}
292do_catchsql_test 5.2.2 { 
293  SELECT matchinfo(t7, 'l') FROM t7 WHERE t7 MATCH 'x y'
294} {1 {unrecognized matchinfo request: l}}
295do_catchsql_test 5.2.3 { 
296  SELECT matchinfo(t7, 'n') FROM t7 WHERE t7 MATCH 'x y'
297} {1 {unrecognized matchinfo request: n}}
298
299do_catchsql_test 5.3.1 { 
300  SELECT matchinfo(t8, 'l') FROM t8 WHERE t8 MATCH 'x y'
301} {1 {unrecognized matchinfo request: l}}
302
303#-------------------------------------------------------------------------
304# Test that the offsets() function handles corruption in the %_content
305# table correctly.
306#
307do_execsql_test 6.1 {
308  CREATE VIRTUAL TABLE t9 USING fts4;
309  INSERT INTO t9 VALUES(
310    'this record is used to try to dectect corruption'
311  );
312  SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
313} {{0 0 20 2 0 0 27 2}}
314
315do_catchsql_test 6.2 {
316  UPDATE t9_content SET c0content = 'this record is used to'; 
317  SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to';
318} {1 {database disk image is malformed}}
319
320#-------------------------------------------------------------------------
321# Test the outcome of matchinfo() when used within a query that does not
322# use the full-text index (i.e. lookup by rowid or full-table scan).
323#
324do_execsql_test 7.1 {
325  CREATE VIRTUAL TABLE t10 USING fts4;
326  INSERT INTO t10 VALUES('first record');
327  INSERT INTO t10 VALUES('second record');
328}
329do_execsql_test 7.2 {
330  SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10;
331} {blob 0 blob 0}
332do_execsql_test 7.3 {
333  SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10 WHERE docid=1;
334} {blob 0}
335do_execsql_test 7.4 {
336  SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) 
337  FROM t10 WHERE t10 MATCH 'record'
338} {blob 20 blob 20}
339
340#-------------------------------------------------------------------------
341# Test a special case - matchinfo('nxa') with many zero length documents. 
342# Special because "x" internally uses a statement used by both "n" and "a". 
343# This was causing a problem at one point in the obscure case where the
344# total number of bytes of data stored in an fts3 table was greater than
345# the number of rows. i.e. when the following query returns true:
346#
347#   SELECT sum(length(content)) < count(*) FROM fts4table;
348#
349do_execsql_test 8.1 {
350  CREATE VIRTUAL TABLE t11 USING fts4;
351  INSERT INTO t11(t11) VALUES('nodesize=24');
352  INSERT INTO t11 VALUES('quitealongstringoftext');
353  INSERT INTO t11 VALUES('anotherquitealongstringoftext');
354  INSERT INTO t11 VALUES('athirdlongstringoftext');
355  INSERT INTO t11 VALUES('andonemoreforgoodluck');
356}
357do_test 8.2 {
358  for {set i 0} {$i < 200} {incr i} {
359    execsql { INSERT INTO t11 VALUES('') }
360  }
361  execsql { INSERT INTO t11(t11) VALUES('optimize') }
362} {}
363do_execsql_test 8.3 {
364  SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
365} {{204 1 3 3 0} {204 1 3 3 0} {204 1 3 3 0}}
366
367# Corruption related tests.
368do_execsql_test  8.4.1.1 { UPDATE t11_stat SET value = X'0000'; }
369do_catchsql_test 8.5.1.2 {
370  SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
371} {1 {database disk image is malformed}}
372
373do_execsql_test  8.4.2.1 { UPDATE t11_stat SET value = X'00'; }
374do_catchsql_test 8.5.2.2 {
375  SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
376} {1 {database disk image is malformed}}
377
378do_execsql_test  8.4.3.1 { UPDATE t11_stat SET value = NULL; }
379do_catchsql_test 8.5.3.2 {
380  SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
381} {1 {database disk image is malformed}}
382
383finish_test
384
385