1// Copyright (c) 2001-2010 International Business Machines 2// Corporation and others. All Rights Reserved. 3DataDrivenCollationTest:table(nofallback) { 4 Info { 5 Headers { "sequence" } 6 Description { "These are the data driven tests" } 7 LongDescription { "The following entries are separate tests containing test data for various locales." 8 "Each entry has the following fields: " 9 "Info/Description - short descrioption of the test" 10 "Settings - settings for the test." 11 "Settings/TestLocale - locale for the collator OR" 12 "Settings/Rules - rules for the collator (can't have both)" 13 "Settings/Arguments - arguments to be passed to the collator before testing. Use rule syntax." 14 "Cases - set of test cases, which are sequences of strings that will be parsed" 15 "Sequences must not change the sign of relation, i.e. we can only have < and = or" 16 "> and = in single sequence. Cannot mix < and > in the same sequence. Whitespace is" 17 "is ignored unless quoted." 18 } 19 } 20 TestData { 21 TestMorePinyin { 22 Info { 23 Description { "Testing the primary strength." } 24 } 25 Settings { 26 { 27 TestLocale { "zh" } 28 Arguments { "[strength 1]" } 29 } 30 } 31 Cases { "lā = lĀ = Lā = LĀ < lān = lĀn < lē = lĒ = Lē = LĒ < lēn = lĒn" } 32 33 } 34 TestLithuanian { 35 Info { 36 Description { "Lithuanian sort order." } 37 } 38 Settings { 39 { 40 TestLocale { "lt" } 41 } 42 } 43 Cases { "cz<č<d<iz<j<sz<š<t<zz<ž" } 44 } 45 TestLatvian { 46 Info { 47 Description { "Latvian sort order." } 48 } 49 Settings { 50 { 51 TestLocale { "lv" } 52 } 53 } 54 Cases { "cz<č<d<gz<ģ<h<iz<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" } 55 } 56 TestEstonian { 57 Info { 58 Description { "Estonian sort order." } 59 } 60 Settings { 61 { 62 TestLocale { "et" } 63 } 64 } 65 Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" } 66 } 67 TestAlbanian { 68 Info { 69 Description { "Albanian sort order." } 70 } 71 Settings { 72 { 73 TestLocale { "sq" } 74 } 75 } 76 Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" } 77 } 78 79 TestSimplifiedChineseOrder { 80 Info { 81 Description { "Sorted file has different order." } 82 } 83 Settings { 84 { 85 TestLocale { "root" } 86 Arguments { "[normalization on]" } 87 } 88 } 89 90 Cases { "\u5F20<\u5F20\u4E00\u8E3F" } 91 } 92 93 TestTibetanNormalizedIterativeCrash { 94 Info { 95 Description { "This pretty much crashes." } 96 } 97 Settings { 98 { 99 TestLocale { "root" } 100 } 101 } 102 103 Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72" 104 "<\u0f80" 105 } 106 } 107 TestThaiPartialSortKeyProblems { 108 Info { 109 Description { "These are examples of strings that caused trouble in partial sort key testing." } 110 } 111 Settings { 112 { 113 TestLocale { "th_TH" } 114 } 115 } 116 // TODO: the tests that are commented out should be enabled when j2720 is fixed 117 Cases { "\u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C" 118 "<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18", 119 "\u0E01\u0E07\u0E01\u0E32\u0E23" 120 "<\u0E01\u0E07\u0E42\u0E01\u0E49", 121 "\u0E01\u0E23\u0E19\u0E17\u0E32" 122 "<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32", 123 "\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27" 124 "<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27", 125 "\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D" 126 "<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32" 127 } 128 } 129 TestJavaStyleRule { 130 Info { 131 Description { "java.text allows rules to start as '<<<x<<<y...' " 132 "we emulate this by assuming a &[first tertiary ignorable] " 133 "in this case." 134 } 135 } 136 Settings { 137 { 138 Rules { "=equal<<<z<<x<<<w<y &[first tertiary ignorable]=a &[first primary ignorable]=b" } 139 } 140 } 141 Cases { "a = equal < z < x < w < b < y" } 142 } 143 TestShiftedIgnorable { 144 Info { 145 Description { "New UCA states that primary ignorables should be completely " 146 "ignorable when following a shifted code point." 147 } 148 } 149 Settings { 150 { 151 TestLocale { "root" } 152 Arguments { "[alternate shifted][strength 4]" } 153 } 154 } 155 Cases { 156 "a' 'b=" 157 "a' '\u0300b=" 158 "a' '\u0301b<" 159 "a_b=" 160 "a_\u0300b=" 161 "a_\u0301b<" 162 "A' 'b=" 163 "A' '\u0300b=" 164 "A' '\u0301b<" 165 "A_b=" 166 "A_\u0300b=" 167 "A_\u0301b<" 168 "a\u0301b<" 169 "A\u0301b<" 170 "a\u0300b<" 171 "A\u0300b" 172 173 } 174 } 175 176 TestNShiftedIgnorable { 177 Info { 178 Description { "New UCA states that primary ignorables should be completely " 179 "ignorable when following a shifted code point." 180 } 181 } 182 Settings { 183 { 184 TestLocale { "root" } 185 Arguments { "[alternate non-ignorable][strength 3]" } 186 } 187 } 188 Cases { 189 "a' 'b<" 190 "A' 'b<" 191 "a' '\u0301b<" 192 "A' '\u0301b<" 193 "a' '\u0300b<" 194 "A' '\u0300b<" 195 "a_b<" 196 "A_b<" 197 "a_\u0301b<" 198 "A_\u0301b<" 199 "a_\u0300b<" 200 "A_\u0300b<" 201 "a\u0301b<" 202 "A\u0301b<" 203 "a\u0300b<" 204 "A\u0300b<" 205 } 206 } 207 208 TestSafeSurrogates { 209 Info { 210 Description { "It turned out that surrogates were not skipped properly " 211 "when iterating backwards if they were in the middle of a " 212 "contraction. This test assures that this is fixed." 213 } 214 } 215 Settings { 216 { 217 Rules { 218 "&a < x\ud800\udc00b" 219 } 220 } 221 } 222 Cases { 223 "a<x\ud800\udc00b" 224 } 225 } 226/* 227 UCA 4.1 removes skipping of ignorable code points in contractions! 228 TestCIgnorableContraction { 229 Info { 230 Description { "Checks whether completely ignorable code points are " 231 "skipped in contractions." 232 } 233 } 234 Settings { 235 { 236 TestLocale { "sh" } 237 } 238 { 239 Rules { 240 "& L < lj, Lj <<< LJ" 241 "& N < nj, Nj <<< NJ " 242 } 243 } 244 } 245 Cases { 246 "njiva=n\ud834\udd65jiva=n\uD834\uDD79jiva=n\u0000\u0000\u0000jiva=n\u0000jiva=n\ud800jiva=n\ufffejiva", 247 "ljubav=l\u0000jubav=l\uD834\uDD79jubav=l\u0000\u0000\u0000jubav=l\ud800jubav=l\ufffejubav", 248 "Ljubav=L\u0000jubav=L\uD834\uDD79jubav=L\u0000\u0000\u0000jubav=L\ud800jubav=L\ufffejubav", 249 } 250 } 251 252*/ 253/* 254 UCA 4.1 removes skipping of ignorable code points in contractions! 255 TestCIgnorablePrefix { 256 Info { 257 Description { "Checks whether completely ignorable code points are " 258 "skipped in prefix processing." 259 } 260 } 261 Settings { 262 { 263 TestLocale { "ja" } 264 } 265 } 266 Cases { 267 "\u30A1\u30FC" 268 "= \u30A1\uDB40\uDC30\u30FC" 269 "= \u30A1\uD800\u30FC" 270 "= \u30A1\uFFFE\u30FC" 271 "= \u30A1\uD834\uDD79\u30FC" 272 "= \u30A1\u0000\u0000\u0000\u30FC" 273 "= \u30A1\u0000\u30FC" 274 "= \u30A1\u30FC" 275 "= \u30A1\u0000\u059a\u30FC" 276 "= \u30A1\u30FC" 277 } 278 } 279*/ 280 da_TestPrimary { 281 Info { 282 Description { "This test goes through primary strength cases" } 283 } 284 Settings { 285 { 286 TestLocale { "da" } 287 Arguments { "[strength 1]" } 288 } 289 } 290 Cases { 291 "Lvi<Lwi", 292 "L\u00e4vi<L\u00f6wi", 293 "L\u00fcbeck=Lybeck", 294 } 295 } 296 da_TestTertiary { 297 Info { 298 Description { "This test goes through tertiary strength cases" } 299 } 300 Settings { 301 { 302 TestLocale { "da" } 303 Arguments { "[strength 3]" } 304 } 305 } 306 Cases { 307 "Luc<luck", 308 "luck<L\u00fcbeck", 309 "L\u00fcbeck>lybeck", 310 "L\u00e4vi<L\u00f6we", 311 "L\u00f6ww<mast", 312 // constUCharCollationDanishTest::testBugs[][CollationDanishTest::MAX_TOKEN_LEN]=" 313 "A/S<" 314 "ANDRE<" 315 "ANDR\u00c9<" 316 "ANDREAS<" 317 "AS<" 318 "CA<" 319 "\u00c7A<" 320 "CB<" 321 "\u00c7C<" 322 "D.S.B.<" 323 "DA<" 324 "\u00d0A<" 325 "DB<" 326 "\u00d0C<" 327 "DSB<" 328 "DSC<" 329 "EKSTRA_ARBEJDE<" 330 "EKSTRABUD0<" 331 "H\u00d8ST<" 332 "HAAG<" 333 "H\u00c5NDBOG<" 334 "HAANDV\u00c6RKSBANKEN<" 335 "Karl<" 336 "karl<" 337 "'NIELS J\u00d8RGEN'<" 338 "NIELS-J\u00d8RGEN<" 339 "NIELSEN<" 340 "'R\u00c9E, A'<" 341 "'REE, B'<" 342 "'R\u00c9E, L'<" 343 "'REE, V'<" 344 "'SCHYTT, B'<" 345 "'SCHYTT, H'<" 346 "'SCH\u00dcTT, H'<" 347 "'SCHYTT, L'<" 348 "'SCH\u00dcTT, M'<" 349 "SS<" 350 "\u00df<" 351 "SSA<" 352 "'STORE VILDMOSE'<" 353 "STOREK\u00c6R0<" 354 "'STORM PETERSEN'<" 355 "STORMLY<" 356 "THORVALD<" 357 "THORVARDUR<" 358 "\u00feORVAR\u00d0UR<" 359 "THYGESEN<" 360 "'VESTERG\u00c5RD, A'<" 361 "'VESTERGAARD, A'<" 362 "'VESTERG\u00c5RD, B'<" 363 "\u00c6BLE<" 364 "\u00c4BLE<" 365 "\u00d8BERG<" 366 "\u00d6BERG", 367 368 // constUCharCollationDanishTest::testNTList[][CollationDanishTest::MAX_TOKEN_LEN]=" 369 "andere<" 370 "chaque<" 371 "chemin<" 372 "cote<" 373 "cot\u00e9<" 374 "c\u00f4te<" 375 "c\u00f4t\u00e9<" 376 "\u010du\u010d\u0113t<" 377 "Czech<" 378 "hi\u0161a<" 379 "irdisch<" 380 "lie<" 381 "lire<" 382 "llama<" 383 "l\u00f5ug<" 384 "l\u00f2za<" 385 "lu\u010d<" 386 "luck<" 387 "L\u00fcbeck<" 388 "lye<" 389 "l\u00e4vi<" 390 "L\u00f6wen<" 391 "m\u00e0\u0161ta<" 392 "m\u00eer<" 393 "myndig<" 394 "M\u00e4nner<" 395 "m\u00f6chten<" 396 "pi\u00f1a<" 397 "pint<" 398 "pylon<" 399 "\u0161\u00e0ran<" 400 "savoir<" 401 "\u0160erb\u016bra<" 402 "Sietla<" 403 "\u015blub<" 404 "subtle<" 405 "symbol<" 406 "s\u00e4mtlich<" 407 "verkehrt<" 408 "vox<" 409 "v\u00e4ga<" 410 "waffle<" 411 "wood<" 412 "yen<" 413 "yuan<" 414 "yucca<" 415 "\u017eal<" 416 "\u017eena<" 417 "\u017den\u0113va<" 418 "zoo0<" 419 "Zviedrija<" 420 "Z\u00fcrich<" 421 "zysk0<" 422 "\u00e4ndere" 423 } 424 } 425 hi_TestNewRules { 426 Info { 427 Description { "This test goes through new rules and tests against old rules" } 428 } 429 Settings { 430 { 431 TestLocale { "hi" } 432 } 433 } 434 Cases { 435 "कॐ<कं<कँ<कः" 436// This data is left over from the rules that were in place prior to CLDR 1.9M1 437// "०<१<२<३" 438// "<४<५<६<७<८<९<अ<आ" 439// "<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ" 440// "<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ" 441// "<क<क़=क़<कँ<कं<कः<क॑<क॒" 442// "<क॓<क॔<कऽ<क्<का<कि<की<कु" 443// "<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के" 444// "<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः" 445// "<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि" 446// "<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ" 447// "<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग" 448// "<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔" 449// "<गऽ<ग्<गा<गि<गी<गु<गू<गृ" 450// "<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ" 451// "<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः" 452// "<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि" 453// "<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ" 454// "<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ" 455// "<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔" 456// "<डऽ<ड्<डा<डि<डी<डु<डू<डृ" 457// "<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ" 458// "<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः" 459// "<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि" 460// "<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ" 461// "<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण" 462// "<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः" 463// "<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि" 464// "<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ" 465// "<नॆ<ने<नै<नॉ<नॊ<नो<नौ" 466// "<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒" 467// "<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ" 468// "<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ" 469// "<फॊ<फो<फौ<ब<भ<म<य<य़=य़ " 470// "<यँ<यं<यः<य॑<य॒<य॓<य॔" 471// "<यऽ<य्<या<यि<यी<यु<यू<यृ" 472// "<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ" 473// "<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः" 474// "<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि" 475// "<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ" 476// "<रॆ<रे<रै<रॉ<रॊ<रो<रौ" 477// "<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒" 478// "<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु" 479// "<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे" 480// "<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह" 481// "<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी" 482// "<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ" 483// "<े<ै<ॉ<ॊ<ो<ौ" 484 } 485 } 486// fi_TestNewRules { 487// Info { 488// Description { "This test goes through new rules and tests against old rules" } 489// } 490// Settings { 491// { 492// TestLocale { "fi" } 493// } 494// } 495// Cases { 496// "xa<xA<Xa<XA<xá<Xá<xax<xAx<xáx<xd<Xd<xð<xÐ<Xð<XÐ<xđ<xĐ<Xđ<XĐ<" 497// "xdx<xðx<xÐx<xđx<xĐx<xe<Xe<xex<xn<Xn<xŋ<xŊ<Xŋ<XŊ<xnx<xŋx<xŊx<" 498// "xo<Xo<xó<Xó<xox<xóx<xs<Xs<xß<Xß<xßx<xsx<xt<Xt<xþ<xÞ<Xþ<XÞ<xþx<" 499// "xÞx<xtx<xu<Xu<xú<Xú<xux<xúx<xv<Xv<xw<Xw<xvx<xwx<xy<Xy<xü<Xü<" 500// "xű<Xű<xyx<xüx<xűx<xz<Xz<xzx<xå<Xå<xåx<xä<Xä<xæ<xÆ<Xæ<XÆ<xäx<" 501// "xæx<xÆx<xö<Xö<xø<Xø<xő<Xő<xõ<Xõ<xœ<xŒ<Xœ<XŒ<xöx<xøx<xőx<xõx<xœx<xŒx" 502//} 503//} 504 ro_TestNewRules { 505 Info { 506 Description { "This test goes through new rules and tests against old rules" } 507 } 508 Settings { 509 { 510 TestLocale { "ro" } 511 } 512 } 513 Cases { 514 "xAx<xă<xĂ<Xă<XĂ<xăx<xĂx<xâ<xÂ<Xâ<XÂ<xâx<xÂx<xb<xIx<xî<xÎ<Xî<XÎ<xîx<xÎx<" 515 "xj<xSx<xș=xş<xȘ=xŞ<Xș=Xş<XȘ=XŞ<xșx=xşx<xȘx=xŞx<xT<xTx<xț=xţ<xȚ=xŢ<Xț=Xţ<XȚ" 516 "=XŢ<xțx=xţx<xȚx=xŢx<xU" 517 } 518 } 519 520 testOffsets { 521 Info { 522 Description { "This tests cases where forwards and backwards iteration get different offsets" } 523 } 524 525 Settings { 526 { 527 TestLocale { "en" } 528 Arguments { "[strength 3]" } 529 } 530 } 531 532 Cases { 533 "a\uD800\uDC00\uDC00<b\uD800\uDC00\uDC00", 534 "\u0301A\u0301\u0301<\u0301B\u0301\u0301", 535 "abcd\r\u0301<abce\r\u0301" 536 } 537 } 538 } 539} 540