1// Copyright (c) 2001-2010 International Business Machines
2// Corporation and others. All Rights Reserved.
3DataDrivenCollationTest:table(nofallback) {
4    Info {
5        Headers { "sequence" }
6        Description { "These are the data driven tests" }
7        LongDescription {     "The following entries are separate tests containing test data for various locales."
8                      "Each entry has the following fields: "
9                      "Info/Description - short descrioption of the test"
10                      "Settings - settings for the test."
11                      "Settings/TestLocale - locale for the collator OR"
12                      "Settings/Rules - rules for the collator (can't have both)"
13                      "Settings/Arguments - arguments to be passed to the collator before testing. Use rule syntax."
14                      "Cases - set of test cases, which are sequences of strings that will be parsed"
15                      "Sequences must not change the sign of relation, i.e. we can only have < and = or"
16                      "> and = in single sequence. Cannot mix < and > in the same sequence. Whitespace is"
17                      "is ignored unless quoted."
18                     }
19    }
20    TestData {
21		TestMorePinyin {
22			Info {
23				Description { "Testing the primary strength." }
24			}
25			Settings {
26				{
27					TestLocale { "zh" }
28					Arguments { "[strength 1]" }
29				}
30			}
31			Cases { "lā = lĀ = Lā = LĀ < lān = lĀn < lē = lĒ = Lē = LĒ < lēn = lĒn" }
32			
33		}
34        TestLithuanian {
35            Info {
36                Description { "Lithuanian sort order." }
37            }
38            Settings {
39                {
40                    TestLocale { "lt" }
41                }
42            }
43            Cases { "cz<č<d<iz<j<sz<š<t<zz<ž" } 
44        }
45        TestLatvian {
46            Info {
47                Description { "Latvian sort order." }
48            }
49            Settings {
50                {
51                    TestLocale { "lv" }
52                }
53            }
54            Cases { "cz<č<d<gz<ģ<h<iz<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" }
55        }
56        TestEstonian {
57            Info {
58                Description { "Estonian sort order." }
59            }
60            Settings {
61                {
62                    TestLocale { "et" }
63                }
64            }
65            Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" }
66        }
67        TestAlbanian {
68            Info {
69                Description { "Albanian sort order." }
70            }
71            Settings {
72                {
73                    TestLocale { "sq" }
74                }
75            }
76            Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" }
77         }
78         
79         TestSimplifiedChineseOrder {
80            Info {
81                Description { "Sorted file has different order." }
82            }
83            Settings {
84                {
85                    TestLocale { "root" }
86		    Arguments { "[normalization on]" }
87                }
88            }
89
90            Cases { "\u5F20<\u5F20\u4E00\u8E3F"  }
91        }
92        
93        TestTibetanNormalizedIterativeCrash {
94            Info {
95                Description { "This pretty much crashes." }
96            }
97            Settings {
98                {
99                    TestLocale { "root" }
100                }
101            }
102
103            Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72"
104                    "<\u0f80"
105            }
106        }
107        TestThaiPartialSortKeyProblems {
108            Info {
109                Description { "These are examples of strings that caused trouble in partial sort key testing." }
110            }
111            Settings {
112                {
113                    TestLocale { "th_TH" }
114                }
115            }
116            // TODO: the tests that are commented out should be enabled when j2720 is fixed
117            Cases { "\u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C"
118                    "<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18",
119                    "\u0E01\u0E07\u0E01\u0E32\u0E23"
120                    "<\u0E01\u0E07\u0E42\u0E01\u0E49",
121                    "\u0E01\u0E23\u0E19\u0E17\u0E32"
122                    "<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
123                    "\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27"
124                    "<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27",
125                    "\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
126                    "<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
127          }
128        }
129        TestJavaStyleRule {
130            Info {
131                Description { "java.text allows rules to start as '<<<x<<<y...' "
132                              "we emulate this by assuming a &[first tertiary ignorable] "
133                              "in this case."
134                }
135            }
136            Settings {
137                {
138                    Rules { "=equal<<<z<<x<<<w<y &[first tertiary ignorable]=a &[first primary ignorable]=b" }
139                }
140            }
141            Cases { "a = equal < z < x < w < b < y" }
142        }
143        TestShiftedIgnorable {
144            Info {
145                Description { "New UCA states that primary ignorables should be completely "
146                              "ignorable when following a shifted code point."
147                            }
148            }
149            Settings {
150                {
151                    TestLocale { "root" }
152                    Arguments { "[alternate shifted][strength 4]" }
153                }
154            }
155            Cases {
156                "a' 'b="
157                "a' '\u0300b="
158                "a' '\u0301b<"
159                "a_b="
160                "a_\u0300b="
161                "a_\u0301b<"
162                "A' 'b="
163                "A' '\u0300b="
164                "A' '\u0301b<"
165                "A_b="
166                "A_\u0300b="
167                "A_\u0301b<"
168                "a\u0301b<"
169                "A\u0301b<"
170                "a\u0300b<"
171                "A\u0300b"
172
173            }
174        }
175
176        TestNShiftedIgnorable {
177            Info {
178                Description { "New UCA states that primary ignorables should be completely "
179                              "ignorable when following a shifted code point."
180                            }
181            }
182            Settings {
183                {
184                    TestLocale { "root" }
185                    Arguments { "[alternate non-ignorable][strength 3]" }
186                }
187            }
188            Cases {
189                "a' 'b<"
190                "A' 'b<"
191                "a' '\u0301b<"
192                "A' '\u0301b<"
193                "a' '\u0300b<"
194                "A' '\u0300b<"
195                "a_b<"
196                "A_b<"
197                "a_\u0301b<"
198                "A_\u0301b<"
199                "a_\u0300b<"
200                "A_\u0300b<"
201                "a\u0301b<"
202                "A\u0301b<"
203                "a\u0300b<"
204                "A\u0300b<"
205            }
206        }
207
208        TestSafeSurrogates {
209            Info {
210                Description { "It turned out that surrogates were not skipped properly "
211                              "when iterating backwards if they were in the middle of a "
212                              "contraction. This test assures that this is fixed."
213                            }
214            }
215            Settings {
216                {
217                    Rules {
218                                "&a < x\ud800\udc00b"
219                    }
220                }
221            }
222            Cases {
223                "a<x\ud800\udc00b"
224            }
225        }
226/*
227	UCA 4.1 removes skipping of ignorable code points in contractions!
228        TestCIgnorableContraction {
229            Info {
230                Description { "Checks whether completely ignorable code points are "
231                              "skipped in contractions."
232                              }
233            }
234            Settings {
235                {
236                    TestLocale { "sh" }
237                }
238                {
239                    Rules {
240                                "& L < lj, Lj <<< LJ"
241                                "& N < nj, Nj <<< NJ "
242                    }
243                }
244            }
245            Cases {
246                "njiva=n\ud834\udd65jiva=n\uD834\uDD79jiva=n\u0000\u0000\u0000jiva=n\u0000jiva=n\ud800jiva=n\ufffejiva",
247                "ljubav=l\u0000jubav=l\uD834\uDD79jubav=l\u0000\u0000\u0000jubav=l\ud800jubav=l\ufffejubav",
248                "Ljubav=L\u0000jubav=L\uD834\uDD79jubav=L\u0000\u0000\u0000jubav=L\ud800jubav=L\ufffejubav",
249            }
250        }
251                
252*/
253/*
254	UCA 4.1 removes skipping of ignorable code points in contractions!
255        TestCIgnorablePrefix {
256            Info {
257                Description { "Checks whether completely ignorable code points are "
258                              "skipped in prefix processing."
259                              }
260            }
261            Settings {
262                {
263                    TestLocale { "ja" }
264                }
265            }
266            Cases {
267               "\u30A1\u30FC"
268               "= \u30A1\uDB40\uDC30\u30FC"
269               "= \u30A1\uD800\u30FC"
270               "= \u30A1\uFFFE\u30FC"
271               "= \u30A1\uD834\uDD79\u30FC"
272               "= \u30A1\u0000\u0000\u0000\u30FC"
273               "= \u30A1\u0000\u30FC"
274               "= \u30A1\u30FC"
275               "= \u30A1\u0000\u059a\u30FC"
276               "= \u30A1\u30FC"
277            }
278        }
279*/
280        da_TestPrimary {
281            Info {
282                Description { "This test goes through primary strength cases" }
283            }
284            Settings {
285                {
286                    TestLocale { "da" }
287                    Arguments { "[strength 1]" }
288                }
289            }
290            Cases {
291                "Lvi<Lwi",
292                "L\u00e4vi<L\u00f6wi",
293                "L\u00fcbeck=Lybeck",
294            }
295        }
296        da_TestTertiary {
297            Info {
298                Description { "This test goes through tertiary strength cases" }
299            }
300            Settings {
301                {
302                    TestLocale { "da" }
303                    Arguments { "[strength 3]" }
304                }
305            }
306            Cases {
307                "Luc<luck",
308                "luck<L\u00fcbeck",
309                "L\u00fcbeck>lybeck",
310                "L\u00e4vi<L\u00f6we",
311                "L\u00f6ww<mast",
312                // constUCharCollationDanishTest::testBugs[][CollationDanishTest::MAX_TOKEN_LEN]="
313                "A/S<"
314                "ANDRE<"
315                "ANDR\u00c9<"
316                "ANDREAS<"
317                "AS<"
318                "CA<"
319                "\u00c7A<"
320                "CB<"
321                "\u00c7C<"
322                "D.S.B.<"
323                "DA<"
324                "\u00d0A<"
325                "DB<"
326                "\u00d0C<"
327                "DSB<"
328                "DSC<"
329                "EKSTRA_ARBEJDE<"
330                "EKSTRABUD0<"
331                "H\u00d8ST<"
332                "HAAG<"
333                "H\u00c5NDBOG<"
334                "HAANDV\u00c6RKSBANKEN<"
335                "Karl<"
336                "karl<"
337                "'NIELS J\u00d8RGEN'<"
338                "NIELS-J\u00d8RGEN<"
339                "NIELSEN<"
340                "'R\u00c9E, A'<"
341                "'REE, B'<"
342                "'R\u00c9E, L'<"
343                "'REE, V'<"
344                "'SCHYTT, B'<"
345                "'SCHYTT, H'<"
346                "'SCH\u00dcTT, H'<"
347                "'SCHYTT, L'<"
348                "'SCH\u00dcTT, M'<"
349                "SS<"
350                "\u00df<"
351                "SSA<"
352                "'STORE VILDMOSE'<"
353                "STOREK\u00c6R0<"
354                "'STORM PETERSEN'<"
355                "STORMLY<"
356                "THORVALD<"
357                "THORVARDUR<"
358                "\u00feORVAR\u00d0UR<"
359                "THYGESEN<"
360                "'VESTERG\u00c5RD, A'<"
361                "'VESTERGAARD, A'<"
362                "'VESTERG\u00c5RD, B'<"
363                "\u00c6BLE<"
364                "\u00c4BLE<"
365                "\u00d8BERG<"
366                "\u00d6BERG",
367
368                // constUCharCollationDanishTest::testNTList[][CollationDanishTest::MAX_TOKEN_LEN]="
369                "andere<"
370                "chaque<"
371                "chemin<"
372                "cote<"
373                "cot\u00e9<"
374                "c\u00f4te<"
375                "c\u00f4t\u00e9<"
376                "\u010du\u010d\u0113t<"
377                "Czech<"
378                "hi\u0161a<"
379                "irdisch<"
380                "lie<"
381                "lire<"
382                "llama<"
383                "l\u00f5ug<"
384                "l\u00f2za<"
385                "lu\u010d<"
386                "luck<"
387                "L\u00fcbeck<"
388                "lye<"
389                "l\u00e4vi<"
390                "L\u00f6wen<"
391                "m\u00e0\u0161ta<"
392                "m\u00eer<"
393                "myndig<"
394                "M\u00e4nner<"
395                "m\u00f6chten<"
396                "pi\u00f1a<"
397                "pint<"
398                "pylon<"
399                "\u0161\u00e0ran<"
400                "savoir<"
401                "\u0160erb\u016bra<"
402                "Sietla<"
403                "\u015blub<"
404                "subtle<"
405                "symbol<"
406                "s\u00e4mtlich<"
407                "verkehrt<"
408                "vox<"
409                "v\u00e4ga<"
410                "waffle<"
411                "wood<"
412                "yen<"
413                "yuan<"
414                "yucca<"
415                "\u017eal<"
416                "\u017eena<"
417                "\u017den\u0113va<"
418                "zoo0<"
419                "Zviedrija<"
420                "Z\u00fcrich<"
421                "zysk0<"
422                "\u00e4ndere"
423            }
424        }
425        hi_TestNewRules {
426            Info {
427                Description { "This test goes through new rules and tests against old rules" }
428            }
429            Settings {
430                {
431                    TestLocale { "hi" }
432                }
433            }
434            Cases {
435						"कॐ<कं<कँ<कः"
436// This data is left over from the rules that were in place prior to CLDR 1.9M1
437//						"०<१<२<३"
438//                 "<४<५<६<७<८<९<अ<आ"
439//                 "<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ"
440//                 "<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ"
441//                 "<क<क़=क़<कँ<कं<कः<क॑<क॒"
442//                 "<क॓<क॔<कऽ<क्<का<कि<की<कु"
443//                 "<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के"
444//                 "<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः"
445//                 "<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि"
446//                 "<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ"
447//                 "<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग"
448//                 "<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔"
449//                 "<गऽ<ग्<गा<गि<गी<गु<गू<गृ"
450//                 "<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ"
451//                 "<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः"
452//                 "<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि"
453//                 "<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ"
454//                 "<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ"
455//                 "<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔"
456//                 "<डऽ<ड्<डा<डि<डी<डु<डू<डृ"
457//                 "<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ"
458//                 "<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः"
459//                 "<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि"
460//                 "<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ"
461//                 "<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण"
462//                 "<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः"
463//                 "<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि"
464//                 "<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ"
465//                 "<नॆ<ने<नै<नॉ<नॊ<नो<नौ"
466//                 "<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒"
467//                 "<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ"
468//                 "<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ"
469//                 "<फॊ<फो<फौ<ब<भ<म<य<य़=य़ "
470//                 "<यँ<यं<यः<य॑<य॒<य॓<य॔"
471//                 "<यऽ<य्<या<यि<यी<यु<यू<यृ"
472//                 "<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ"
473//                 "<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः"
474//                 "<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि"
475//                 "<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ"
476//                 "<रॆ<रे<रै<रॉ<रॊ<रो<रौ"
477//                 "<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒"
478//                 "<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु"
479//                 "<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे"
480//                 "<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह"
481//                 "<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी"
482//                 "<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ"
483//                 "<े<ै<ॉ<ॊ<ो<ौ"
484           }
485        }
486//        fi_TestNewRules {
487//           Info {
488//              Description { "This test goes through new rules and tests against old rules" }
489//         }
490//        Settings {
491//           {
492//              TestLocale { "fi" }
493//         }
494//    }
495//   Cases { 
496//      "xa<xA<Xa<XA<xá<Xá<xax<xAx<xáx<xd<Xd<xð<xÐ<Xð<XÐ<xđ<xĐ<Xđ<XĐ<"
497//     "xdx<xðx<xÐx<xđx<xĐx<xe<Xe<xex<xn<Xn<xŋ<xŊ<Xŋ<XŊ<xnx<xŋx<xŊx<"
498//    "xo<Xo<xó<Xó<xox<xóx<xs<Xs<xß<Xß<xßx<xsx<xt<Xt<xþ<xÞ<Xþ<XÞ<xþx<"
499//   "xÞx<xtx<xu<Xu<xú<Xú<xux<xúx<xv<Xv<xw<Xw<xvx<xwx<xy<Xy<xü<Xü<"
500//  "xű<Xű<xyx<xüx<xűx<xz<Xz<xzx<xå<Xå<xåx<xä<Xä<xæ<xÆ<Xæ<XÆ<xäx<"
501//              "xæx<xÆx<xö<Xö<xø<Xø<xő<Xő<xõ<Xõ<xœ<xŒ<Xœ<XŒ<xöx<xøx<xőx<xõx<xœx<xŒx"
502//}
503//}
504        ro_TestNewRules {
505            Info {
506                Description { "This test goes through new rules and tests against old rules" }
507            }
508            Settings {
509                {
510                    TestLocale { "ro" }
511                }
512            }
513            Cases { 
514                "xAx<xă<xĂ<Xă<XĂ<xăx<xĂx<xâ<xÂ<Xâ<XÂ<xâx<xÂx<xb<xIx<xî<xÎ<Xî<XÎ<xîx<xÎx<"
515                "xj<xSx<xș=xş<xȘ=xŞ<Xș=Xş<XȘ=XŞ<xșx=xşx<xȘx=xŞx<xT<xTx<xț=xţ<xȚ=xŢ<Xț=Xţ<XȚ"
516                "=XŢ<xțx=xţx<xȚx=xŢx<xU"
517            }
518        }
519        
520        testOffsets {
521            Info {
522                Description { "This tests cases where forwards and backwards iteration get different offsets" }
523            }
524            
525            Settings {
526                {
527                    TestLocale { "en" }
528                    Arguments  { "[strength 3]" }
529                }
530            }
531                
532            Cases {
533                "a\uD800\uDC00\uDC00<b\uD800\uDC00\uDC00",
534                "\u0301A\u0301\u0301<\u0301B\u0301\u0301",
535                "abcd\r\u0301<abce\r\u0301"
536            }
537        }    
538    }
539}
540