1<?xml version="1.0" encoding="UTF-8"?>
2
3<!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved -->
4
5<!-- Test data file for string search  -->
6<!DOCTYPE stringsearch-tests [
7<!ELEMENT stringsearch-tests (test-case+)>
8<!ATTLIST stringsearch-tests debug IDREF #IMPLIED >
9<!ELEMENT test-case (pattern, pre?, m?, post?)>
10<!ATTLIST test-case 
11          id ID #REQUIRED
12          locale CDATA "en" 
13          strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY" 
14          norm (ON | OFF) "OFF"
15          alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE"
16          >
17
18<!ELEMENT pattern (#PCDATA)>
19<!ELEMENT pre  (#PCDATA)>
20<!ELEMENT m    (#PCDATA)>
21<!ELEMENT post (#PCDATA)>
22]>
23
24<stringsearch-tests>
25  <!-- debug="test11"     (for copying into the above element)  -->
26    
27    <!-- Very simple match  -->
28    <test-case id="test01" >
29       <pattern>abc</pattern>
30       <pre>xxx</pre><m>abc</m><post>yyy</post>
31    </test-case>
32    
33    <!-- Very simple no-match  -->
34    <test-case id="test02" >
35       <pattern>abc</pattern>
36       <pre>xxx</pre><post>yyy</post>
37    </test-case>
38
39    <!-- Match after several near-misses. -->
40    <test-case id="test03" >
41       <pattern>string</pattern>
42       <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post>
43    </test-case>
44    
45    <test-case id="test04" strength="PRIMARY" >
46       <pattern>FUSS</pattern>
47       <pre>abc</pre><m>fuss</m><post>sss</post>
48    </test-case>
49    
50    <test-case id="test05" strength="PRIMARY" >
51       <pattern>FUSS</pattern>
52       <pre>abc</pre><m>fuß</m><post>sss</post>
53    </test-case>
54
55  <test-case id="test05.5" strength="PRIMARY" >
56    <pattern>fuss</pattern>
57    <pre>a </pre>
58    <m>fuß</m>
59    <post>ball table</post>
60  </test-case>
61
62  <test-case id="test06" strength="PRIMARY" >
63      <pattern>fuß</pattern>
64       <pre>abc</pre><m>fuss</m><post>xyz</post>
65    </test-case>
66    
67    <test-case id="test07" strength="SECONDARY" >
68      <pattern>fuß</pattern>
69      <pre>abcfussxyz</pre>
70    </test-case>
71    
72    <test-case id="test08" strength="PRIMARY" >
73      <pattern>fus</pattern>
74      <pre>abcfuß</pre><post>xyz</post>
75    </test-case>
76    
77    <!-- A good match following an initial match that failed because
78         of not ending on a character boundary -->
79    <test-case id="test09" strength="PRIMARY">
80      <pattern>fus</pattern>
81      <pre>fuß  </pre><m>fus</m><post>sss</post>
82    </test-case>
83
84
85    <!-- Test cases from usrchdat.c  BREAKITERATOREXACT -->
86
87    <test-case id="test10" strength="TERTIARY">
88      <pattern>fox</pattern>
89      <m>fox</m><post>y fox</post>
90    </test-case>
91
92    <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook">
93      <pattern>toe</pattern>
94      <pre>This is a </pre><m>Tö</m><post>ne</post>
95    </test-case>
96    
97    <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook">
98      <pattern>toe</pattern>
99      <pre>This is a </pre><post>Töne</post>
100    </test-case>
101    
102    <test-case id="test12" strength="TERTIARY">
103      <pattern>e</pattern>
104      <pre>tésting that é doés not match </pre><m>e</m><post></post>
105    </test-case>
106    
107    <test-case id="test13" strength="PRIMARY" locale="fr">
108      <pattern>e</pattern>
109      <pre></pre><m>É</m><post>É</post>
110    </test-case>
111    
112    <test-case id="test14" strength="PRIMARY" locale="fr">
113      <pattern>O</pattern>
114      <pre>C</pre><m>O\u0302</m><post>TÉ</post>
115    </test-case>
116
117
118    <!-- Test cases from usrchdat.c  STRENGTH -->
119
120
121    <test-case id="test15" strength="PRIMARY" locale="en">
122      <pattern>fox</pattern>
123      <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post>
124    </test-case>
125    
126    <test-case id="test16" strength="PRIMARY" locale="fr">
127      <pattern>peche</pattern>
128      <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post>
129    </test-case>
130    
131    <test-case id="test17" strength="PRIMARY" locale="fr">
132      <pattern>peche</pattern>
133      <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post>
134    </test-case>
135    
136    <test-case id="test18" strength="PRIMARY" locale="fr">
137      <pattern>peche</pattern>
138      <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post>
139    </test-case>
140    
141    <test-case id="test19" strength="PRIMARY" locale="fr">
142      <pattern>peche</pattern>
143      <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post>
144    </test-case>
145    
146    <test-case id="test20" strength="PRIMARY" locale="es">
147      <pattern>channel</pattern>
148      <pre>A </pre><m>channel</m><post>, </post>
149    </test-case>
150    
151    <test-case id="test21" strength="PRIMARY" locale="es">
152      <pattern>channel</pattern>
153      <pre>A </pre><m>CHANNEL</m><post>, </post>
154    </test-case>
155    
156    <test-case id="test22" strength="PRIMARY" locale="es">
157      <pattern>channel</pattern>
158      <pre>A </pre><m>Channel</m><post>s, </post>
159    </test-case>
160    
161    <test-case id="test23" strength="PRIMARY" locale="es">
162      <pattern>channel</pattern>
163      <pre>A </pre><m>channel</m><post>... </post>
164    </test-case>
165    
166    <test-case id="test24" strength="TERTIARY" locale="en">
167      <pattern>A\u0300</pattern>
168      <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post>
169    </test-case>
170    
171    <!-- TODO:  In the original test data, this test matched at IDENTICAL strength.
172                Doesn't seem right.  The characters are different.
173                -->
174    <test-case id="test24a" strength="IDENTICAL" locale="en">
175      <pattern>A\u0300</pattern>
176      <pre>At IDENTICAL, shoud this match?  </pre><m>\u00c0</m><post></post>
177    </test-case>
178
179  <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
180    <pattern>A\u0300</pattern>
181    <pre>At IDENTICAL, shoud this match?  </pre>
182    <m>\u00c0</m>
183    <post></post>
184  </test-case>
185
186  <test-case id="test25" strength="SECONDARY" locale="en">
187      <pattern>Ű</pattern>
188      <pre>12</pre><m>ű</m><post> Ű</post>
189    </test-case>
190    
191    <test-case id="test26" strength="SECONDARY" locale="en">
192      <pattern>A</pattern>
193      <pre>12</pre><m>a</m><post>...</post>
194    </test-case>
195
196
197    <!--  Test Cases from usrchdat.c,  VARIABLE -->
198    <test-case id="test27" strength="TERTIARY" locale="en">
199      <pattern>blackbird</pattern>
200      <pre>black-bird </pre><m>blackbird</m><post>...</post>
201    </test-case>
202
203    <test-case id="test28" strength="TERTIARY" locale="en">
204      <pattern>go</pattern>
205      <pre> on</pre>
206    </test-case>
207
208    <!-- TODO:  this gives an U_ILLEGAL_ARGUMENT error when opening
209                the UStringSearch.  How did the orignal test run? -->
210    <!--
211    <test-case id="test29" strength="PRIMARY" locale="en">
212      <pattern>  </pattern>
213      <pre></pre><m></m><post>abc</post>
214    </test-case>
215    -->
216
217    <test-case id="test30" strength="SECONDARY" locale="en">
218      <pattern>abc</pattern>
219      <pre>  a bc   ab c    a  bc     ab  c"</pre>
220    </test-case>
221
222    <test-case id="test31" strength="SECONDARY" locale="en">
223      <pattern>abc</pattern>
224      <pre>           ---------------</pre>
225    </test-case>
226
227
228    <!--  Normalization test cases from usrchdat.c  -->
229    <test-case id="test32" strength="TERTIARY"  norm="ON">
230      <pattern>a\u0325\u0300</pattern>
231      <pre></pre><m>a\u0300\u0325</m>
232    </test-case>
233
234
235    <test-case id="test32a" strength="TERTIARY"  norm="OFF">
236      <pattern>a\u0325\u0300</pattern>
237      <pre>a\u0300\u0325</pre>
238    </test-case>
239
240
241    <!-- COMPOSITEBOUNDARIES from usrchdat.c
242         Boundaries are not identical to orignal test data because
243         of matching only full combining sequences
244    -->
245    <test-case id="test40" strength="TERTIARY">
246      <pattern>A</pattern>
247      <pre>À</pre>   <!-- \u00C0 -->
248    </test-case>
249    
250    <test-case id="test41" strength="TERTIARY">
251      <pattern>A</pattern>
252      <pre>À</pre><m>A</m><post>C</post>
253    </test-case>
254    
255    <test-case id="test42" strength="TERTIARY">
256      <pattern>A\u030A</pattern>
257      <pre>À\u01FA</pre>
258    </test-case>
259
260
261
262    <!-- SUPPLEMENTARYCANONICAL from usrchdat.c  -->
263    <test-case id="test50" strength="TERTIARY">
264      <pattern>\uD800\uDC00</pattern>
265      <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m>
266      <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post>
267    </test-case>
268    
269    <test-case id="test51" strength="TERTIARY">
270      <pattern>\\uD834\\uDDB9</pattern>
271      <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post>
272    </test-case>
273
274    <test-case id="test52" strength="TERTIARY">
275      <pattern> \\uD834\\uDDB9 </pattern>
276      <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post>
277    </test-case>
278    
279    <test-case id="test53" strength="TERTIARY">
280      <pattern>-\\uD834\\uDDB9-</pattern>
281      <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post>
282    </test-case>
283    
284    <test-case id="test54" strength="TERTIARY">
285      <pattern>,\\uD834\\uDDB9,</pattern>
286      <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post>
287    </test-case>
288    
289    <test-case id="test55" strength="TERTIARY">
290      <pattern>?\\uD834\\uDDB9?</pattern>
291      <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post>
292    </test-case>
293    
294
295    <!-- Long combining sequences  -->
296    <!-- Backwards search fails because patterns ends w/ ignorables
297    <test-case id="test60" strength="PRIMARY">
298      <pattern>A\u0301\u0301\u0301\u0301</pattern>
299      <m>A\u0301\u0301\u0301\u0301\u0301</m>
300    </test-case>
301    -->
302
303    <test-case id="test61" strength="TERTIARY">
304      <pattern>A\u0301\u0301\u0301\u0301</pattern>
305          <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
306    </test-case>
307    
308    <test-case id="test62" strength="TERTIARY">
309      <pattern>A\u0301\u0301\u0301\u0301</pattern>
310            <m>A\u0301\u0301\u0301\u0301</m>
311    </test-case>
312
313    <!-- stand-alone combining marks don't match attached marks  -->
314    <test-case id="test63" strength="TERTIARY">
315      <pattern>\u0301</pattern>
316      <pre>A\u0301\u0301\u0301\u0301</pre>
317    </test-case>
318    
319    <test-case id="test64" strength="TERTIARY">
320      <pattern>\u0301</pattern>
321      <post>\u0301\u0301\u0301\u0301</post>
322    </test-case>
323
324  <!-- stand-alone combining mark does match an un-attached combining mark -->
325    <test-case id="test65" strength="TERTIARY">
326       <pattern>\u0301</pattern>
327       <m>\u0301</m><post>A\u0301\u0301</post>
328    </test-case>
329
330    <test-case id="test66" strength="TERTIARY">
331       <pattern>\u0301</pattern>
332       <m>\u0301</m>
333    </test-case>
334          
335    <!-- stand-alone combining marks at end of the target text -->
336    <test-case id="test67" strength="TERTIARY">
337       <pattern>\u0301</pattern>
338       <pre>abcd\r</pre><m>\u0301</m>
339    </test-case>
340
341      <!-- attached combining marks at end of the target text, no match -->
342    <test-case id="test68" strength="TERTIARY">
343       <pattern>\u0301</pattern>
344       <pre>abcd\u0301</pre>
345    </test-case>
346
347
348
349   <!-- no match within expansions at the start -->
350    <test-case id="test70" strength="PRIMARY">
351      <pattern>Eligature</pattern>
352      <pre>Æligature</pre>
353    </test-case>
354
355    <test-case id="test71" strength="PRIMARY">
356      <pattern>AEligature</pattern>
357      <m>Æligature</m>
358    </test-case>
359
360    <test-case id="test72" strength="PRIMARY">
361        <pattern>AEligature</pattern>
362        <m>Æligature</m>
363    </test-case>
364    
365    <!-- unattached combining Tilde will not match a Tilde that is
366         part of a composed Ñ  (\u00D1)  -->
367    <test-case id="test73" strength="SECONDARY">
368        <pattern>\u0303</pattern>  <!-- combining tilde -->
369        <pre>Ñ&#x0d;</pre><m>\u0303</m>
370    </test-case>
371    
372    <test-case id="test74" strength="SECONDARY">
373        <pattern>\u0303</pattern>  <!-- combining tilde -->
374        <pre>Ñ &#x0d;</pre><m>\u0303</m><post>a</post>
375    </test-case>
376
377  <test-case id="test75" strength="TERTIARY" locale="fr">
378    <pattern>\u00EA</pattern>
379    <pre>p</pre><m>\u00EA</m><post>che</post>
380  </test-case>
381
382  <test-case id="test76" strength="TERTIARY" locale="fr">
383    <pattern>\u00EA</pattern>
384    <pre>p</pre><m>e\u0302</m><post>che</post>
385  </test-case>
386
387  <test-case id="test77" strength="TERTIARY" locale="fr">
388    <pattern>e\u0302</pattern>
389    <pre>p</pre><m>\u00EA</m><post>che</post>
390  </test-case>
391
392  <!-- Test cases from ticket:5382 -->
393  <test-case id="test78" strength="SECONDARY" locale="hu_HU">
394    <pattern>\u0170</pattern>
395    <m>\u0171</m>
396    <post>12</post>
397  </test-case>
398
399  <test-case id="test79" strength="SECONDARY" locale="hu_HU">
400    <pattern>\u0170</pattern>
401    <pre>1</pre>
402    <m>\u0171</m>
403    <post>2</post>
404  </test-case>
405
406  <test-case id="test80" strength="SECONDARY" locale="hu_HU">
407    <pattern>\u0170</pattern>
408    <pre>12</pre>
409    <m>\u0171</m>
410  </test-case>
411  
412  <!-- Test cases from ticket:5959 -->
413  <test-case id="test81" strength="SECONDARY">
414    <pattern>\u2166</pattern>
415    <m>VII</m>
416  </test-case>
417
418  <test-case id="test82" strength="SECONDARY">
419    <pattern>VII</pattern>
420    <m>\u2166</m>
421  </test-case>
422
423  <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
424    <pattern>Universal Declaration of Human Rights</pattern>
425    <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post>
426  </test-case>
427
428  <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en">
429    <pattern>Universal Declaration of Human Rights</pattern>
430    <pre>Proclaims this </pre>
431    <m>Universal-Declaration-of-Human-Rights</m>
432    <post> as a common standard of achievement for all peoples and all nations</post>
433  </test-case>
434
435  <test-case id="test84" strength="TERTIARY" locale="en">
436    <pattern>\u05E9\u0591\u05E9</pattern>
437    <m>\u05E9\u0592\u05E9</m>
438  </test-case>
439
440  <test-case id="test84b" strength="IDENTICAL" locale="en">
441    <pattern>\u05E9\u0591\u05E9</pattern>
442    <pre>\u05E9\u0592\u05E9</pre>
443  </test-case>
444</stringsearch-tests>
445  
446