1c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru/* 2c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ********************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (C) 2005-2011, International Business Machines 4c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 5c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ********************************************************************** 6c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 7c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 8c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 9c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/utypes.h" 10c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 11c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 12c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 13c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/unistr.h" 14c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/putil.h" 15c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/usearch.h" 16c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 17c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "cmemory.h" 18c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/coll.h" 19c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/tblcoll.h" 20c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/coleitr.h" 21c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/ucoleitr.h" 22c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 23c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/regex.h" // TODO: make conditional on regexp being built. 24c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 25c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/uniset.h" 26c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/uset.h" 27c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "unicode/ustring.h" 28c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "hash.h" 29c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "uhash.h" 30c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "ucol_imp.h" 31c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 32c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "intltest.h" 33c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "ssearch.h" 34c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/colldata.h" 36b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/bmsearch.h" 37b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/bms.h" 38b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "xmlparser.h" 40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "ucbuf.h" 41c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 42c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <stdlib.h> 43c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <string.h> 44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include <stdio.h> 45c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 46c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruchar testId[100]; 47c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 48c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define TEST_ASSERT(x) {if (!(x)) { \ 49c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Failure in file %s, line %d, test ID = \"%s\"", __FILE__, __LINE__, testId);}} 50c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 51c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define TEST_ASSERT_M(x, m) {if (!(x)) { \ 52c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Failure in file %s, line %d. \"%s\"", __FILE__, __LINE__, m);return;}} 53c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 54c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define TEST_ASSERT_SUCCESS(errcode) {if (U_FAILURE(errcode)) { \ 556d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("Failure in file %s, line %d, test ID \"%s\", status = \"%s\"", \ 56c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru __FILE__, __LINE__, testId, u_errorName(errcode));}} 57c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 58c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define DELETE_ARRAY(array) uprv_free((void *) (array)) 61c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 62c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 63c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 64c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Test class boilerplate 65c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 66c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------- 67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruSSearchTest::SSearchTest() 68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 70c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 71c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruSSearchTest::~SSearchTest() 72c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 73c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 74c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 75c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *params ) 76c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 77c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (exec) logln("TestSuite SSearchTest: "); 78c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru switch (index) { 79c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 80c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 0: name = "searchTest"; 81c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (exec) searchTest(); 82c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 83c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 84c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 1: name = "offsetTest"; 85c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (exec) offsetTest(); 86c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 87c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 88c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case 2: name = "monkeyTest"; 89c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (exec) monkeyTest(params); 90c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 3: name = "bmMonkeyTest"; 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) bmMonkeyTest(params); 94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 4: name = "boyerMooreTest"; 97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) boyerMooreTest(); 98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 5: name = "goodSuffixTest"; 101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) goodSuffixTest(); 102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 6: name = "searchTime"; 105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) searchTime(); 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 7: name = "bmsTest"; 109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) bmsTest(); 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 8: name = "bmSearchTest"; 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) bmSearchTest(); 114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 9: name = "udhrTest"; 117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) udhrTest(); 118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 10: name = "stringListTest"; 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) stringListTest(); 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: name = ""; 124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; //needed to end loop 125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define PATH_BUFFER_SIZE 2048 132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruconst char *SSearchTest::getPath(char buffer[2048], const char *filename) { 133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *testDataDirectory = IntlTest::getSourceTestData(status); 135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status) || strlen(testDataDirectory) + strlen(filename) + 1 >= PATH_BUFFER_SIZE) { 137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("ERROR: getPath() failed - %s", u_errorName(status)); 138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strcpy(buffer, testDataDirectory); 142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strcat(buffer, filename); 143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return buffer; 144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid SSearchTest::searchTest() 148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO 150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char path[PATH_BUFFER_SIZE]; 152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *testFilePath = getPath(path, "ssearch.xml"); 153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (testFilePath == NULL) { 155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; /* Couldn't get path: error message already output. */ 156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalPointer<UXMLParser> parser(UXMLParser::createParser(status)); 159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status)); 161c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString *debugTestCase = root->getAttribute("debug"); 167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (debugTestCase != NULL) { 168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// setenv("USEARCH_DEBUG", "1", 1); 169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UXMLElement *testCase; 173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t tc = 0; 174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while((testCase = root->nextChildElement(tc)) != NULL) { 176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (testCase->getTagName().compare("test-case") != 0) { 178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("ssearch, unrecognized XML Element in test file"); 179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString *id = testCase->getAttribute("id"); 182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *testId = 0; 183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (id != NULL) { 184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru id->extract(0, id->length(), testId, sizeof(testId), US_INV); 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If debugging test case has been specified and this is not it, skip to next. 188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) { 189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Get the requested collation strength. 193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Default is tertiary if the XML attribute is missing from the test case. 194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString *strength = testCase->getAttribute("strength"); 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UColAttributeValue collatorStrength = UCOL_PRIMARY; 197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strength==NULL) { collatorStrength = UCOL_TERTIARY;} 198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;} 199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;} 200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;} 201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;} 202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;} 203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Bogus value supplied for strength. Shouldn't happen, even from 205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // typos, if the XML source has been validated. 206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This assert is a little deceiving in that strength can be 207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // any of the allowed values, not just TERTIARY, but it will 208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // do the job of getting the error output. 209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT(*strength=="TERTIARY") 210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Get the collator normalization flag. Default is UCOL_OFF. 214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UColAttributeValue normalize = UCOL_OFF; 216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString *norm = testCase->getAttribute("norm"); 217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF"); 218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (norm!=NULL && *norm=="ON") { 219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru normalize = UCOL_ON; 220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 223b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE. 224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE; 226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *alt = testCase->getAttribute("alternate_handling"); 227b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE"); 228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (alt != NULL && *alt == "SHIFTED") { 229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru alternateHandling = UCOL_SHIFTED; 230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString defLocale("en"); 233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char clocale[100]; 234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString *locale = testCase->getAttribute("locale"); 235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (locale == NULL || locale->length()==0) { 236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru locale = &defLocale; 237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL); 239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString text; 242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString target; 243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pattern; 244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t expectedMatchStart = -1; 245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t expectedMatchLimit = -1; 246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UXMLElement *n; 247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t nodeCount = 0; 248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru n = testCase->getChildElement("pattern"); 250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT(n != NULL); 251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (n==NULL) { 252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = n->getText(FALSE); 255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = text.unescape(); 256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pattern.append(text); 257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru nodeCount++; 258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru n = testCase->getChildElement("pre"); 260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (n!=NULL) { 261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = n->getText(FALSE); 262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = text.unescape(); 263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru target.append(text); 264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru nodeCount++; 265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru n = testCase->getChildElement("m"); 268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (n!=NULL) { 269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectedMatchStart = target.length(); 270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = n->getText(FALSE); 271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = text.unescape(); 272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru target.append(text); 273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectedMatchLimit = target.length(); 274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru nodeCount++; 275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru n = testCase->getChildElement("post"); 278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (n!=NULL) { 279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = n->getText(FALSE); 280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru text = text.unescape(); 281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru target.append(text); 282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru nodeCount++; 283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check that there weren't extra things in the XML 286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT(nodeCount == testCase->countChildren()); 287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 288b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Open a collator and StringSearch based on the parameters 289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // obtained from the XML. 290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_ZERO_ERROR; 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUCollatorPointer collator(ucol_open(clocale, &status)); 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucol_setStrength(collator.getAlias(), collatorStrength); 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternateHandling, &status); 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho target.getBuffer(), target.length(), 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collator.getAlias(), 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho NULL, // the break iterator 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status)); 301b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t foundStart = 0; 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t foundLimit = 0; 309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool foundMatch; 310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Do the search, check the match result against the expected results. 313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status); 315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((foundMatch && expectedMatchStart<0) || 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (foundStart != expectedMatchStart) || 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (foundLimit != expectedMatchLimit)) { 319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT(FALSE); // ouput generic error position 320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln("Found, expected match start = %d, %d \n" 321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "Found, expected match limit = %d, %d", 322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); 323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // In case there are other matches... 326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // (should we only do this if the test case passed?) 327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (foundMatch) { 328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectedMatchStart = foundStart; 329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectedMatchLimit = foundLimit; 330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status); 332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), 335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru target.getBuffer(), target.length(), 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collator.getAlias(), 337c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NULL, 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status)); 339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Do the backwards search, check the match result against the expected results. 342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &foundStart, &foundLimit, &status); 344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((foundMatch && expectedMatchStart<0) || 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (foundStart != expectedMatchStart) || 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (foundLimit != expectedMatchLimit)) { 348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT(FALSE); // ouput generic error position 349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln("Found, expected backwards match start = %d, %d \n" 350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "Found, expected backwards match limit = %d, %d", 351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); 352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 357b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustruct UdhrTestCase 358b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *locale; 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *file; 361b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}; 362b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 363b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SSearchTest::udhrTest() 364b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 365b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 366b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char path[PATH_BUFFER_SIZE]; 367b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *udhrPath = getPath(path, "udhr"); 368b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 369b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (udhrPath == NULL) { 370b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // couldn't get path: error message already output... 371b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 372b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 374b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UdhrTestCase testCases[] = { 375b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"en", "udhr_eng.txt"}, 376b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"de", "udhr_deu_1996.txt"}, 377b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"fr", "udhr_fra.txt"}, 378b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"ru", "udhr_rus.txt"}, 379b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"th", "udhr_tha.txt"}, 380b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"ja", "udhr_jpn.txt"}, 381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"ko", "udhr_kor.txt"}, 382b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"zh", "udhr_cmn_hans.txt"}, 383b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru {"zh_Hant", "udhr_cmn_hant.txt"} 384b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 386b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t testCount = ARRAY_SIZE(testCases); 387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 388b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t t = 0; t < testCount; t += 1) { 389b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t len = 0; 390b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *resolvedFileName = NULL; 391b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *encoding = NULL; 392b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCHARBUF *ucharBuf = NULL; 393b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 394b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status); 395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru resolvedFileName = NEW_ARRAY(char, len); 396b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 397b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(resolvedFileName == NULL){ 398b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 400b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 401b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(status == U_BUFFER_OVERFLOW_ERROR){ 402b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_ZERO_ERROR; 403b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 404b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 405b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &len, &status); 406b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status); 407b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 408b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru DELETE_ARRAY(resolvedFileName); 409b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 410b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U_FAILURE(status)){ 411b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru infoln("Could not open the input file %s. Test skipped\n", testCases[t].file); 412b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 413b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 414b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 415b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t targetLen = 0; 416b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status); 417b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 418b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* The first line of the file contains the pattern */ 419b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t start = 0, end = 0, plen = 0; 420b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 421b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(end = start; ; end += 1) { 422b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar ch = target[end]; 423b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ch == 0x000A || ch == 0x000D || ch == 0x2028) { 425b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 426b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 427b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 429b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru plen = end - start; 430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *pattern = NEW_ARRAY(UChar, plen); 432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t i = 0; i < plen; i += 1) { 433b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pattern[i] = target[start++]; 434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t offset = 0; 437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollator *coll = ucol_open(testCases[t].locale, &status); 438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCD *ucd = NULL; 439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BMS *bms = NULL; 440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Could not open collator for %s", testCases[t].locale); 443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto delete_collator; 444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 445b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucd = ucd_open(coll, &status); 447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Could not open CollData object for %s", testCases[t].locale); 450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto delete_ucd; 451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms = bms_open(ucd, pattern, plen, target, targetLen, &status); 454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Could not open search object for %s", testCases[t].locale); 457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto delete_bms; 458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru start = end = -1; 461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while (bms_search(bms, offset, &start, &end)) { 462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru offset = end; 463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (offset == 0) { 466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Could not find pattern - locale: %s, file: %s ", testCases[t].locale, testCases[t].file); 467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querudelete_bms: 470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms_close(bms); 471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querudelete_ucd: 473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucd_close(ucd); 474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querudelete_collator: 476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_close(coll); 477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru DELETE_ARRAY(pattern); 479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucbuf_close(ucharBuf); 480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 481b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 482b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucd_flushCache(); 483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SSearchTest::bmSearchTest() 486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char path[PATH_BUFFER_SIZE]; 490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *testFilePath = getPath(path, "ssearch.xml"); 491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 492b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (testFilePath == NULL) { 493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; /* Couldn't get path: error message already output. */ 494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UXMLParser *parser = UXMLParser::createParser(status); 497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UXMLElement *root = parser->parseFile(testFilePath, status); 499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *debugTestCase = root->getAttribute("debug"); 505b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (debugTestCase != NULL) { 506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// setenv("USEARCH_DEBUG", "1", 1); 507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UXMLElement *testCase; 511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t tc = 0; 512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while((testCase = root->nextChildElement(tc)) != NULL) { 514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (testCase->getTagName().compare("test-case") != 0) { 516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("ssearch, unrecognized XML Element in test file"); 517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *id = testCase->getAttribute("id"); 520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *testId = 0; 521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (id != NULL) { 522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru id->extract(0, id->length(), testId, sizeof(testId), US_INV); 523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If debugging test case has been specified and this is not it, skip to next. 526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) { 527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Get the requested collation strength. 531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Default is tertiary if the XML attribute is missing from the test case. 532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *strength = testCase->getAttribute("strength"); 53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UColAttributeValue collatorStrength = UCOL_PRIMARY; 535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (strength==NULL) { collatorStrength = UCOL_TERTIARY;} 536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;} 537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;} 538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;} 539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;} 540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;} 541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru else { 542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Bogus value supplied for strength. Shouldn't happen, even from 543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // typos, if the XML source has been validated. 544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // This assert is a little deceiving in that strength can be 545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // any of the allowed values, not just TERTIARY, but it will 546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // do the job of getting the error output. 547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT(*strength=="TERTIARY") 548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Get the collator normalization flag. Default is UCOL_OFF. 552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UColAttributeValue normalize = UCOL_OFF; 554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *norm = testCase->getAttribute("norm"); 555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF"); 556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (norm!=NULL && *norm=="ON") { 557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru normalize = UCOL_ON; 558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE. 562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE; 564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *alt = testCase->getAttribute("alternate_handling"); 565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE"); 566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (alt != NULL && *alt == "SHIFTED") { 567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru alternateHandling = UCOL_SHIFTED; 568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 570b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString defLocale("en"); 571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char clocale[100]; 572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *locale = testCase->getAttribute("locale"); 573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (locale == NULL || locale->length()==0) { 574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru locale = &defLocale; 575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL); 577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString text; 580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString target; 581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString pattern; 582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t expectedMatchStart = -1; 583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t expectedMatchLimit = -1; 584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UXMLElement *n; 585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t nodeCount = 0; 586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru n = testCase->getChildElement("pattern"); 588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT(n != NULL); 589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (n==NULL) { 590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = n->getText(FALSE); 593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = text.unescape(); 594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pattern.append(text); 595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nodeCount++; 596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru n = testCase->getChildElement("pre"); 598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (n!=NULL) { 599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = n->getText(FALSE); 600b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = text.unescape(); 601b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru target.append(text); 602b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nodeCount++; 603b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 604b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 605b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru n = testCase->getChildElement("m"); 606b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (n!=NULL) { 607b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru expectedMatchStart = target.length(); 608b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = n->getText(FALSE); 609b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = text.unescape(); 610b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru target.append(text); 611b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru expectedMatchLimit = target.length(); 612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nodeCount++; 613b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 615b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru n = testCase->getChildElement("post"); 616b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (n!=NULL) { 617b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = n->getText(FALSE); 618b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text = text.unescape(); 619b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru target.append(text); 620b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nodeCount++; 621b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 622b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 623b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Check that there weren't extra things in the XML 624b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT(nodeCount == testCase->countChildren()); 625b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 626b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Open a collator and StringSearch based on the parameters 627b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // obtained from the XML. 628b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_ZERO_ERROR; 630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollator *collator = ucol_open(clocale, &status); 631b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_setStrength(collator, collatorStrength); 632b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status); 633b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status); 634b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCD *ucd = ucd_open(collator, &status); 635b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), &status); 636b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 637b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 638b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 639b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms_close(bms); 640b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucd_close(ucd); 641b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_close(collator); 642b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 643b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 644b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 645b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t foundStart = 0; 646b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t foundLimit = 0; 647b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool foundMatch; 648b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 650b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Do the search, check the match result against the expected results. 651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru foundMatch = bms_search(bms, 0, &foundStart, &foundLimit); 653b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //TEST_ASSERT_SUCCESS(status); 65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((foundMatch && expectedMatchStart < 0) || 65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (foundStart != expectedMatchStart) || 65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (foundLimit != expectedMatchLimit)) { 657b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT(FALSE); // ouput generic error position 658b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru infoln("Found, expected match start = %d, %d \n" 659b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru "Found, expected match limit = %d, %d", 660b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); 661b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 662b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 663b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms_close(bms); 664b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucd_close(ucd); 665b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_close(collator); 666b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 667b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 668b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucd_flushCache(); 669b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete root; 670b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete parser; 671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct Order 675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t order; 677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t lowOffset; 678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t highOffset; 679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruclass OrderList 682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querupublic: 684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru OrderList(); 685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset = 0); 686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ~OrderList(); 687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t size(void) const; 689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void add(int32_t order, int32_t low, int32_t high); 690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const Order *get(int32_t index) const; 691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t getLowOffset(int32_t index) const; 692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t getHighOffset(int32_t index) const; 693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t getOrder(int32_t index) const; 694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void reverse(void); 695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool compare(const OrderList &other) const; 696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool matchesAt(int32_t offset, const OrderList &other) const; 697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprivate: 699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Order *list; 700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t listMax; 701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t listSize; 702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruOrderList::OrderList() 70550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho : list(NULL), listMax(16), listSize(0) 706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list = new Order[listMax]; 708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruOrderList::OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset) 711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru : list(NULL), listMax(16), listSize(0) 712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status); 715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t strengthMask = 0; 716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t order, low, high; 717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 718b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru switch (ucol_getStrength(coll)) 719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru default: 721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strengthMask |= UCOL_TERTIARYORDERMASK; 722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* fall through */ 723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_SECONDARY: 725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strengthMask |= UCOL_SECONDARYORDERMASK; 726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* fall through */ 727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case UCOL_PRIMARY: 729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strengthMask |= UCOL_PRIMARYORDERMASK; 730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list = new Order[listMax]; 733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setOffset(elems, stringOffset, &status); 735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru low = ucol_getOffset(elems); 738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = ucol_next(elems, &status); 739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru high = ucol_getOffset(elems); 740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (order != UCOL_NULLORDER) { 742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order &= strengthMask; 743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (order != UCOL_IGNORABLE) { 746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru add(order, low, high); 747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while (order != UCOL_NULLORDER); 749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_closeElements(elems); 751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruOrderList::~OrderList() 754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 755c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete[] list; 756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid OrderList::add(int32_t order, int32_t low, int32_t high) 759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 760c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (listSize >= listMax) { 761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru listMax *= 2; 762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Order *newList = new Order[listMax]; 764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memcpy(newList, list, listSize * sizeof(Order)); 766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete[] list; 767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list = newList; 768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[listSize].order = order; 771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[listSize].lowOffset = low; 772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[listSize].highOffset = high; 773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru listSize += 1; 775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruconst Order *OrderList::get(int32_t index) const 778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (index >= listSize) { 780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return &list[index]; 784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint32_t OrderList::getLowOffset(int32_t index) const 787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const Order *order = get(index); 789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (order != NULL) { 791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return order->lowOffset; 792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint32_t OrderList::getHighOffset(int32_t index) const 798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const Order *order = get(index); 800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (order != NULL) { 802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return order->highOffset; 803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint32_t OrderList::getOrder(int32_t index) const 809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const Order *order = get(index); 811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (order != NULL) { 813c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return order->order; 814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return UCOL_NULLORDER; 817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint32_t OrderList::size() const 820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return listSize; 822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid OrderList::reverse() 825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t f = 0, b = listSize - 1; f < b; f += 1, b -= 1) { 827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Order swap = list[b]; 828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[b] = list[f]; 830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[f] = swap; 831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool OrderList::compare(const OrderList &other) const 835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (listSize != other.listSize) { 837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 839c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 840c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t i = 0; i < listSize; i += 1) { 841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (list[i].order != other.list[i].order || 842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[i].lowOffset != other.list[i].lowOffset || 843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[i].highOffset != other.list[i].highOffset) { 844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool OrderList::matchesAt(int32_t offset, const OrderList &other) const 852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // NOTE: sizes include the NULLORDER, which we don't want to compare. 854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t otherSize = other.size() - 1; 855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (listSize - 1 - offset < otherSize) { 857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) { 861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (getOrder(i) != other.getOrder(j)) { 862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic char *printOffsets(char *buffer, OrderList &list) 870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t size = list.size(); 872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *s = buffer; 873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t i = 0; i < size; i += 1) { 875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const Order *order = list.get(i); 876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (i != 0) { 878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s += sprintf(s, ", "); 879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s += sprintf(s, "(%d, %d)", order->lowOffset, order->highOffset); 882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return buffer; 885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic char *printOrders(char *buffer, OrderList &list) 888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t size = list.size(); 890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char *s = buffer; 891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t i = 0; i < size; i += 1) { 893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const Order *order = list.get(i); 894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (i != 0) { 896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s += sprintf(s, ", "); 897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s += sprintf(s, "%8.8X", order->order); 900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return buffer; 903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid SSearchTest::offsetTest() 906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 907b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const UVersionInfo icu49 = { 4, 9, 0, 0 }; 908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *test[] = { 909b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The sequence \u0FB3\u0F71\u0F71\u0F80 contains a discontiguous 910b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // contraction (\u0FB3\u0F71\u0F80) logically followed by \u0F71. 911b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru "\\u1E33\\u0FB3\\u0F71\\u0F71\\u0F80\\uD835\\uDF6C\\u01B0", 912b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\ua191\\u16ef\\u2036\\u017a", 914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if 0 916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This results in a complex interaction between contraction, 917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // expansion and normalization that confuses the backwards offset fixups. 918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", 919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85", 922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3", 923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u02FE\\u02FF" 925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F" 926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F" 927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F" 928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F" 92927f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E", // currently not working, see #8081 930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 93127f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318", // currently not working, see #8081 93227f654740f2a26ad62a5c155af9199af9e69b889claireho "a\\u02FF\\u0301\\u0316", // currently not working, see #8081 93327f654740f2a26ad62a5c155af9199af9e69b889claireho "a\\u02FF\\u0316\\u0301", 93427f654740f2a26ad62a5c155af9199af9e69b889claireho "a\\u0430\\u0301\\u0316", 93527f654740f2a26ad62a5c155af9199af9e69b889claireho "a\\u0430\\u0316\\u0301", 936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "abc\\u0E41\\u0301\\u0316", 93727f654740f2a26ad62a5c155af9199af9e69b889claireho "abc\\u0E41\\u0316\\u0301", 93827f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u0E41\\u0301\\u0316", 93927f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u0E41\\u0316\\u0301", 94027f654740f2a26ad62a5c155af9199af9e69b889claireho "a\\u0301\\u0316", 94127f654740f2a26ad62a5c155af9199af9e69b889claireho "a\\u0316\\u0301", 94227f654740f2a26ad62a5c155af9199af9e69b889claireho "\\uAC52\\uAC53", 94327f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u34CA\\u34CB", 94427f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u11ED\\u11EE", 94527f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u30C3\\u30D0", 94627f654740f2a26ad62a5c155af9199af9e69b889claireho "p\\u00E9ch\\u00E9", 947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "a\\u0301\\u0325", 948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "a\\u0300\\u0325", 949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "a\\u0325\\u0300", 950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0323\\u0300B", 951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0300\\u0323B", 952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0301\\u0323B", 953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0302\\u0301\\u0323B", 954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "abc", 955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "ab\\u0300c", 956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "ab\\u0300\\u0323c", 957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru " \\uD800\\uDC00\\uDC00", 958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "a\\uD800\\uDC00\\uDC00", 959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0301\\u0301", 960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0301\\u0323", 961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0301\\u0323B", 962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "B\\u0301\\u0323C", 963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "A\\u0300\\u0323B", 964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0301A\\u0301\\u0301", 965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "abcd\\r\\u0301", 966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "p\\u00EAche", 967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "pe\\u0302che", 968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t testCount = ARRAY_SIZE(test); 971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status); 973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 9746d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Failed to create collator in offsetTest! - %s", u_errorName(status)); 975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases... 978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We could allocate one that's the right size by (CE_count * 10) + 2 979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]" 980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t i = 0; i < testCount; i += 1) { 984b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (!isICUVersionAtLeast(icu49) && i>=4 && i<=6) { 98527f654740f2a26ad62a5c155af9199af9e69b889claireho continue; // timebomb until ticket #8080 is resolved 98627f654740f2a26ad62a5c155af9199af9e69b889claireho } 987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString ts = CharsToUnicodeString(test[i]); 988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru CollationElementIterator *iter = col->createCollationElementIterator(ts); 989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru OrderList forwardList; 990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru OrderList backwardList; 991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t order, low, high; 992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru low = iter->getOffset(); 995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = iter->next(status); 996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru high = iter->getOffset(); 997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru forwardList.add(order, low, high); 999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while (order != CollationElementIterator::NULLORDER); 1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iter->reset(); 1002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru iter->setOffset(ts.length(), status); 1003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset()); 1005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 1007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru high = iter->getOffset(); 1008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru order = iter->previous(status); 1009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru low = iter->getOffset(); 1010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (order == CollationElementIterator::NULLORDER) { 1012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1014c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backwardList.add(order, low, high); 1016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while (TRUE); 1017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru backwardList.reverse(); 1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (forwardList.compare(backwardList)) { 1021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln("Works with \"%s\"", test[i]); 1022c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); 1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// logln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); 1024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln("Forward CEs: [%s]", printOrders(buffer, forwardList)); 1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// logln("Backward CEs: [%s]", printOrders(buffer, backwardList)); 1027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln(); 1029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Fails with \"%s\"", test[i]); 1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList)); 1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList)); 1033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln("Forward CEs: [%s]", printOrders(buffer, forwardList)); 1035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln("Backward CEs: [%s]", printOrders(buffer, backwardList)); 1036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln(); 1038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete iter; 1040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete col; 1042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0 1045b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer) 1046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1047b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(int32_t i = 0; i < string.length(); i += 1) { 1048b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 ch = string.char32At(i); 1049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1050b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ch >= 0x0020 && ch <= 0x007F) { 1051b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ch == 0x005C) { 1052b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru buffer.append("\\\\"); 1053b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1054b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru buffer.append(ch); 1055b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1056b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1057b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char cbuffer[12]; 1058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1059b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ch <= 0xFFFFL) { 1060b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sprintf(cbuffer, "\\u%4.4X", ch); 1061b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1062b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sprintf(cbuffer, "\\U%8.8X", ch); 1063b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1065b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru buffer.append(cbuffer); 1066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1068b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ch >= 0x10000L) { 1069b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru i += 1; 1070b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1073b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return buffer; 1074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1077b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#if 1 1078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1079b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustruct PCE 1080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1081b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint64_t ce; 1082b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t lowOffset; 1083b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t highOffset; 1084b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}; 1085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1086b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruclass PCEList 1087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1088b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querupublic: 1089b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru PCEList(UCollator *coll, const UnicodeString &string); 1090b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ~PCEList(); 1091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1092b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t size() const; 1093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1094b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const PCE *get(int32_t index) const; 1095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1096b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t getLowOffset(int32_t index) const; 1097b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t getHighOffset(int32_t index) const; 1098b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint64_t getOrder(int32_t index) const; 1099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool matchesAt(int32_t offset, const PCEList &other) const; 1101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint64_t operator[](int32_t index) const; 1103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprivate: 1105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void add(uint64_t ce, int32_t low, int32_t high); 1106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru PCE *list; 1108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t listMax; 1109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t listSize; 1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruPCEList::PCEList(UCollator *coll, const UnicodeString &string) 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status); 1116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint64_t order; 1117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t low, high; 1118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru list = new PCE[listMax]; 1120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_setOffset(elems, 0, &status); 1122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru do { 1124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru order = ucol_nextProcessed(elems, &low, &high, &status); 1125b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru add(order, low, high); 1126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } while (order != UCOL_PROCESSED_NULLORDER); 1127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_closeElements(elems); 1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruPCEList::~PCEList() 1132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete[] list; 1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid PCEList::add(uint64_t order, int32_t low, int32_t high) 1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (listSize >= listMax) { 1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru listMax *= 2; 1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru PCE *newList = new PCE[listMax]; 1142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(newList, list, listSize * sizeof(Order)); 1144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete[] list; 1145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru list = newList; 1146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru list[listSize].ce = order; 1149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru list[listSize].lowOffset = low; 1150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru list[listSize].highOffset = high; 1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru listSize += 1; 1153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruconst PCE *PCEList::get(int32_t index) const 1156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (index >= listSize) { 1158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return &list[index]; 1162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruint32_t PCEList::getLowOffset(int32_t index) const 1165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const PCE *pce = get(index); 1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pce != NULL) { 1169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return pce->lowOffset; 1170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return -1; 1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruint32_t PCEList::getHighOffset(int32_t index) const 1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const PCE *pce = get(index); 1178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pce != NULL) { 1180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return pce->highOffset; 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return -1; 1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuint64_t PCEList::getOrder(int32_t index) const 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const PCE *pce = get(index); 1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pce != NULL) { 1191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return pce->ce; 1192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return UCOL_PROCESSED_NULLORDER; 1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruint32_t PCEList::size() const 1198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1199b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return listSize; 1200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruUBool PCEList::matchesAt(int32_t offset, const PCEList &other) const 1203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // NOTE: sizes include the NULLORDER, which we don't want to compare. 1205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t otherSize = other.size() - 1; 1206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (listSize - 1 - offset < otherSize) { 1208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return FALSE; 1209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) { 1212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (getOrder(i) != other.getOrder(j)) { 1213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return FALSE; 1214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return TRUE; 1218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuint64_t PCEList::operator[](int32_t index) const 1221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return getOrder(index); 1223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SSearchTest::boyerMooreTest() 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollator *coll = NULL; 1229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData *data = NULL; 123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const CEList* ce = NULL; 123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const CEList* ce1 = NULL; 1232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString lp = "fuss"; 1233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString sp = "fu\\u00DF"; 1234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BoyerMooreSearch *longPattern = NULL; 1235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BoyerMooreSearch *shortPattern = NULL; 1236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball", 1237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF", 1238b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"}; 1239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t start = -1, end = -1; 1240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 124127f654740f2a26ad62a5c155af9199af9e69b889claireho coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status); 1242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 12436d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Could not open collator. - %s", u_errorName(status)); 1244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 1245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru data = CollData::open(coll, status); 1248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 1249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Could not open CollData object."); 1250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto close_data; 1251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->getDynamicClassID(); 125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get dynamic class ID of CollData."); 125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho data->getStaticClassID(); 126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get static class ID of CollData."); 126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1265b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru longPattern = new BoyerMooreSearch(data, lp.unescape(), NULL, status); 1266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru shortPattern = new BoyerMooreSearch(data, sp.unescape(), NULL, status); 1267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 1268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Could not create pattern objects."); 1269b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto close_patterns; 1270b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho longPattern->getBadCharacterTable(); 127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho shortPattern->getBadCharacterTable(); 127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get bad character table."); 127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho longPattern->getGoodSuffixTable(); 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho shortPattern->getGoodSuffixTable(); 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get good suffix table."); 128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho longPattern->getDynamicClassID(); 128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho shortPattern->getDynamicClassID(); 128850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 128950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get dynamic class ID of BoyerMooreSearch."); 129050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho longPattern->getStaticClassID(); 129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho shortPattern->getStaticClassID(); 129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get static class ID of BoyerMooreSearch."); 129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho longPattern->getData(); 130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho shortPattern->getData(); 130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get collate data."); 130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ce = longPattern->getPatternCEs(); 130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ce1 = shortPattern->getPatternCEs(); 130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get pattern CEs."); 131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ce->getDynamicClassID(); 131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ce1->getDynamicClassID(); 131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get dynamic class ID of CEList."); 131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ce->getStaticClassID(); 132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ce1->getStaticClassID(); 132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Could not get static class ID of CEList."); 132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(data->minLengthInChars(ce,0) != 3){ 132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Minimal Length in Characters for 'data' with 'ce' was suppose to give 3."); 133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(data->minLengthInChars(ce1,0) != 3){ 133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Minimal Length in Characters for 'data' with 'ce1' was suppose to give 3."); 133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto close_patterns; 133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) { 1339b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString target = targets[t].unescape(); 1340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1341b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru longPattern->setTargetString(&target, status); 1342b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (longPattern->search(0, start, end)) { 1343b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("Test %d: found long pattern at [%d, %d].", t, start, end); 1344b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1345b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Test %d: did not find long pattern.", t); 1346b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1347b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1348b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru shortPattern->setTargetString(&target, status); 1349b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (shortPattern->search(0, start, end)) { 1350b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("Test %d: found short pattern at [%d, %d].", t, start, end); 1351b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1352b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Test %d: did not find short pattern.", t); 1353b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(longPattern->empty()){ 135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Test %d: Long pattern should not have been empty."); 135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(shortPattern->empty()){ 136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("Test %d: Short pattern should not have been empty."); 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1362b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1363b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1364b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruclose_patterns: 1365b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete shortPattern; 1366b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete longPattern; 1367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1368b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruclose_data: 1369b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData::close(data); 1370b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_close(coll); 1371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SSearchTest::bmsTest() 1374c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1376b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollator *coll = NULL; 1377b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCD *data = NULL; 1378b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString lp = "fuss"; 1379b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString lpu = lp.unescape(); 1380b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString sp = "fu\\u00DF"; 1381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString spu = sp.unescape(); 1382b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BMS *longPattern = NULL; 1383b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BMS *shortPattern = NULL; 1384b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball", 1385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF", 1386b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"}; 1387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t start = -1, end = -1; 1388b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 138927f654740f2a26ad62a5c155af9199af9e69b889claireho coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status); 1390b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 13916d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Could not open collator. - %s", u_errorName(status)); 1392b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 1393b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1394b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru data = ucd_open(coll, &status); 1396b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 1397b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Could not open CollData object."); 1398b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto close_data; 1399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1401b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru longPattern = bms_open(data, lpu.getBuffer(), lpu.length(), NULL, 0, &status); 1402b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru shortPattern = bms_open(data, spu.getBuffer(), spu.length(), NULL, 0, &status); 1403b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 1404b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Couldn't open pattern objects."); 1405b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto close_patterns; 1406b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) { 1409b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString target = targets[t].unescape(); 1410c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1411b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms_setTargetString(longPattern, target.getBuffer(), target.length(), &status); 1412b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (bms_search(longPattern, 0, &start, &end)) { 1413b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("Test %d: found long pattern at [%d, %d].", t, start, end); 1414b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1415b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Test %d: did not find long pattern.", t); 1416b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1418b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms_setTargetString(shortPattern, target.getBuffer(), target.length(), &status); 1419b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (bms_search(shortPattern, 0, &start, &end)) { 1420b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("Test %d: found short pattern at [%d, %d].", t, start, end); 1421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1422b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Test %d: did not find short pattern.", t); 1423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1425b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Add better coverage for bms code. */ 142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(bms_empty(longPattern)) { 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("FAIL: longgPattern is empty."); 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!bms_getData(longPattern)) { 143250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("FAIL: bms_getData returned NULL."); 143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!ucd_getCollator(data)) { 143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("FAIL: ucd_getCollator returned NULL."); 143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruclose_patterns: 1440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms_close(shortPattern); 1441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms_close(longPattern); 1442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruclose_data: 1444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucd_close(data); 144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ucd_freeCache(); 1446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_close(coll); 1447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SSearchTest::goodSuffixTest() 1450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollator *coll = NULL; 1453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData *data = NULL; 1454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString pat = /*"gcagagag"*/ "fxeld"; 1455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString target = /*"gcatcgcagagagtatacagtacg"*/ "cloveldfxeld"; 1456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BoyerMooreSearch *pattern = NULL; 1457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t start = -1, end = -1; 1458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru coll = ucol_open(NULL, &status); 1460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 14616d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Couldn't open collator. - %s", u_errorName(status)); 1462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 1463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru data = CollData::open(coll, status); 1466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 1467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Couldn't open CollData object."); 1468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto close_data; 1469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1470c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pattern = new BoyerMooreSearch(data, pat, &target, status); 1472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 1473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Couldn't open pattern object."); 1474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto close_pattern; 1475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pattern->search(0, start, end)) { 1478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("Found pattern at [%d, %d].", start, end); 1479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 1480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Did not find pattern."); 1481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1482c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruclose_pattern: 1484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete pattern; 1485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruclose_data: 1487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData::close(data); 1488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_close(coll); 1489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 1492b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// searchTime() A quick and dirty performance test for string search. 1493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Probably doesn't really belong as part of intltest, but it 1494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// does check that the search succeeds, and gets the right result, 1495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// so it serves as a functionality test also. 1496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 1497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// To run as a perf test, up the loop count, select by commenting 1498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// and uncommenting in the code the operation to be measured, 1499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// rebuild, and measure the running time of this test alone. 1500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 1501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// time LD_LIBRARY_PATH=whatever ./intltest collate/SSearchTest/searchTime 1502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 1503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SSearchTest::searchTime() { 1504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru static const char *longishText = 1505b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Whylom, as olde stories tellen us,\n" 1506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Ther was a duk that highte Theseus:\n" 1507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Of Athenes he was lord and governour,\n" 1508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And in his tyme swich a conquerour,\n" 1509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That gretter was ther noon under the sonne.\n" 1510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Ful many a riche contree hadde he wonne;\n" 1511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"What with his wisdom and his chivalrye,\n" 1512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"He conquered al the regne of Femenye,\n" 1513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That whylom was y-cleped Scithia;\n" 1514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And weddede the quene Ipolita,\n" 1515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And broghte hir hoom with him in his contree\n" 1516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"With muchel glorie and greet solempnitee,\n" 1517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And eek hir yonge suster Emelye.\n" 1518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And thus with victorie and with melodye\n" 1519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Lete I this noble duk to Athenes ryde,\n" 1520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And al his hoost, in armes, him bisyde.\n" 1521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And certes, if it nere to long to here,\n" 1522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"I wolde han told yow fully the manere,\n" 1523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"How wonnen was the regne of Femenye\n" 1524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"By Theseus, and by his chivalrye;\n" 1525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And of the grete bataille for the nones\n" 1526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Bitwixen Athen's and Amazones;\n" 1527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And how asseged was Ipolita,\n" 1528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"The faire hardy quene of Scithia;\n" 1529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And of the feste that was at hir weddinge,\n" 1530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And of the tempest at hir hoom-cominge;\n" 1531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"But al that thing I moot as now forbere.\n" 1532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"I have, God woot, a large feeld to ere,\n" 1533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And wayke been the oxen in my plough.\n" 1534b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"The remenant of the tale is long y-nough.\n" 1535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"I wol nat letten eek noon of this route;\n" 1536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Lat every felawe telle his tale aboute,\n" 1537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And lat see now who shal the soper winne;\n" 1538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And ther I lefte, I wol ageyn biginne.\n" 1539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"This duk, of whom I make mencioun,\n" 1540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"When he was come almost unto the toun,\n" 1541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"In al his wele and in his moste pryde,\n" 1542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"He was war, as he caste his eye asyde,\n" 1543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Wher that ther kneled in the hye weye\n" 1544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"A companye of ladies, tweye and tweye,\n" 1545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Ech after other, clad in clothes blake; \n" 1546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"But swich a cry and swich a wo they make,\n" 1547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That in this world nis creature livinge,\n" 1548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That herde swich another weymentinge;\n" 1549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And of this cry they nolde never stenten,\n" 1550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Til they the reynes of his brydel henten.\n" 1551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"'What folk ben ye, that at myn hoomcominge\n" 1552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Perturben so my feste with cryinge'?\n" 1553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Quod Theseus, 'have ye so greet envye\n" 1554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Of myn honour, that thus compleyne and crye? \n" 1555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Or who hath yow misboden, or offended?\n" 1556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And telleth me if it may been amended;\n" 1557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And why that ye ben clothed thus in blak'?\n" 1558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"The eldest lady of hem alle spak,\n" 1559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"When she hadde swowned with a deedly chere,\n" 1560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That it was routhe for to seen and here,\n" 1561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And seyde: 'Lord, to whom Fortune hath yiven\n" 1562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Victorie, and as a conquerour to liven,\n" 1563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Noght greveth us your glorie and your honour;\n" 1564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"But we biseken mercy and socour.\n" 1565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Have mercy on our wo and our distresse.\n" 1566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Som drope of pitee, thurgh thy gentilesse,\n" 1567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Up-on us wrecched wommen lat thou falle.\n" 1568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"For certes, lord, ther nis noon of us alle,\n" 1569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That she nath been a duchesse or a quene;\n" 1570b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Now be we caitifs, as it is wel sene:\n" 1571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Thanked be Fortune, and hir false wheel,\n" 1572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That noon estat assureth to be weel.\n" 1573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And certes, lord, t'abyden your presence,\n" 1574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Here in the temple of the goddesse Clemence\n" 1575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"We han ben waytinge al this fourtenight;\n" 1576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Now help us, lord, sith it is in thy might.\n" 1577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"I wrecche, which that wepe and waille thus,\n" 1578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Was whylom wyf to king Capaneus,\n" 1579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"That starf at Thebes, cursed be that day!\n" 1580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And alle we, that been in this array,\n" 1581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And maken al this lamentacioun,\n" 1582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"We losten alle our housbondes at that toun,\n" 1583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Whyl that the sege ther-aboute lay.\n" 1584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And yet now th'olde Creon, weylaway!\n" 1585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"The lord is now of Thebes the citee, \n" 1586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Fulfild of ire and of iniquitee,\n" 1587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"He, for despyt, and for his tirannye,\n" 1588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"To do the dede bodyes vileinye,\n" 1589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Of alle our lordes, whiche that ben slawe,\n" 1590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Hath alle the bodyes on an heep y-drawe,\n" 1591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"And wol nat suffren hem, by noon assent,\n" 1592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"Neither to been y-buried nor y-brent,\n" 1593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru"But maketh houndes ete hem in despyt. zet'\n"; 1594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define TEST_BOYER_MOORE 1 1596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruconst char *cPattern = "maketh houndes ete hem"; 1597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//const char *cPattern = "Whylom"; 1598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//const char *cPattern = "zet"; 1599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *testId = "searchTime()"; // for error macros. 1600b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString target = longishText; 1601b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUCollatorPointer collator(ucol_open("en", &status)); 160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CollData *data = CollData::open(collator.getAlias(), status); 160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status) || collator.isNull() || data == NULL) { 16076d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Unable to open UCollator or CollData. - %s", u_errorName(status)); 16086d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru return; 16096d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru } 161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho //ucol_setStrength(collator.getAlias(), collatorStrength); 161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho //ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); 1612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString uPattern = cPattern; 1613b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#ifndef TEST_BOYER_MOORE 161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(), 161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho target.getBuffer(), target.length(), 161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho collator.getAlias(), 161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho NULL, // the break iterator 161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status)); 1619b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1620b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#else 1621b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BoyerMooreSearch bms(data, uPattern, &target, status); 1622b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1623b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 1624c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1625b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// int32_t foundStart; 1626b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// int32_t foundEnd; 1627b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool found; 1628b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Find the match position usgin strstr 1630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *pm = strstr(longishText, cPattern); 1631b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_M(pm!=NULL, "No pattern match with strstr"); 1632b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t refMatchPos = (int32_t)(pm - longishText); 1633b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t icuMatchPos; 1634b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t icuMatchEnd; 1635b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#ifndef TEST_BOYER_MOORE 163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); 1637b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1638b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#else 1639b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru found = bms.search(0, icuMatchPos, icuMatchEnd); 1640b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 1641b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions."); 1642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1643b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i; 1644b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // int32_t j=0; 1645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1646b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Try loopcounts around 100000 to some millions, depending on the operation, 1647b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // to get runtimes of at least several seconds. 1648b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i=0; i<10000; i++) { 1649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#ifndef TEST_BOYER_MOORE 165050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); 1651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#else 1652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru found = bms.search(0, icuMatchPos, icuMatchEnd); 1653b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 1654b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //TEST_ASSERT_SUCCESS(status); 1655b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //TEST_ASSERT(found); 1656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 165750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // usearch_setOffset(uss.getAlias(), 0, &status); 165850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // icuMatchPos = usearch_next(uss.getAlias(), &status); 1659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1660b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The i+j stuff is to confuse the optimizer and get it to actually leave the 1661b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // call to strstr in place. 1662b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //pm = strstr(longishText+j, cPattern); 1663b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //j = (j + i)%5; 1664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1665b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1666b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //printf("%ld, %d\n", pm-longishText, j); 166750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef TEST_BOYER_MOORE 1668b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData::close(data); 1669b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 1670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 1672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Random Numbers. Similar to standard lib rand() and srand() 1676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Not using library to 1677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1. Get same results on all platforms. 1678c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 2. Get access to current seed, to more easily reproduce failures. 1679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1680c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 1681c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic uint32_t m_seed = 1; 1682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic uint32_t m_rand() 1684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m_seed = m_seed * 1103515245 + 12345; 1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return (uint32_t)(m_seed/65536) % 32768; 1687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruclass Monkey 1690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querupublic: 1692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual void append(UnicodeString &test, UnicodeString &alternate) = 0; 1693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprotected: 1695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Monkey(); 1696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual ~Monkey(); 1697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 1698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruMonkey::Monkey() 1700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // ook? 1702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruMonkey::~Monkey() 1705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // ook? 1707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruclass SetMonkey : public Monkey 1710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querupublic: 1712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SetMonkey(const USet *theSet); 1713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ~SetMonkey(); 1714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru virtual void append(UnicodeString &test, UnicodeString &alternate); 1716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprivate: 1718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const USet *set; 1719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 1720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruSetMonkey::SetMonkey(const USet *theSet) 1722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru : Monkey(), set(theSet) 1723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // ook? 1725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruSetMonkey::~SetMonkey() 1728c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //ook... 1730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid SetMonkey::append(UnicodeString &test, UnicodeString &alternate) 1733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t size = uset_size(set); 1735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t index = m_rand() % size; 1736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 ch = uset_charAt(set, index); 1737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString str(ch); 1738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru test.append(str); 1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru alternate.append(str); // flip case, or some junk? 1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruclass StringSetMonkey : public Monkey 1744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querupublic: 1746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData); 1747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ~StringSetMonkey(); 1748c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1749c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru void append(UnicodeString &testCase, UnicodeString &alternate); 1750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruprivate: 1752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString &generateAlternative(const UnicodeString &testCase, UnicodeString &alternate); 1753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const USet *set; 1755b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollator *coll; 1756b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData *collData; 1757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 1758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruStringSetMonkey::StringSetMonkey(const USet *theSet, UCollator *theCollator, CollData *theCollData) 1760b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru: Monkey(), set(theSet), coll(theCollator), collData(theCollData) 1761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // ook. 1763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruStringSetMonkey::~StringSetMonkey() 1766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // ook? 1768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid StringSetMonkey::append(UnicodeString &testCase, UnicodeString &alternate) 1771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t itemCount = uset_getItemCount(set), len = 0; 1773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t index = m_rand() % itemCount; 1774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 rangeStart = 0, rangeEnd = 0; 1775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar buffer[16]; 1776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 1777c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1778c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru len = uset_getItem(set, index, &rangeStart, &rangeEnd, buffer, 16, &err); 1779c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (len == 0) { 1781c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t offset = m_rand() % (rangeEnd - rangeStart + 1); 1782c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 ch = rangeStart + offset; 1783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString str(ch); 1784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.append(str); 1786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru generateAlternative(str, alternate); 1787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if (len > 0) { 1788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // should check that len < 16... 1789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString str(buffer, len); 1790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.append(str); 1792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru generateAlternative(str, alternate); 1793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // shouldn't happen... 1795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCase, UnicodeString &alternate) 1799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // find out shortest string for the longest sequence of ces. 1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // needs to be refined to use dynamic programming, but will be roughly right 1802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CEList ceList(coll, testCase, status); 1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString alt; 1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t offset = 0; 1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ceList.size() == 0) { 1808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return alternate.append(testCase); 1809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (offset < ceList.size()) { 1812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t ce = ceList.get(offset); 1813b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const StringList *strings = collData->getStringList(ce); 1814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings == NULL) { 1816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return alternate.append(testCase); 1817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t stringCount = strings->size(); 1820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t tries = 0; 1821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // find random string that generates the same CEList 1823b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const CEList *ceList2 = NULL; 1824b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString *string = NULL; 1825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool matches = FALSE; 1826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 1828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t s = m_rand() % stringCount; 1829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (tries++ > stringCount) { 1831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru alternate.append(testCase); 1832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return alternate; 1833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru string = strings->get(s); 1836b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ceList2 = collData->getCEList(string); 1837b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru matches = ceList.matchesAt(offset, ceList2); 1838b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1839b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (! matches) { 1840b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collData->freeCEList((CEList *) ceList2); 1841b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1842b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } while (! matches); 1843c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru alt.append(*string); 1845c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru offset += ceList2->size(); 1846b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru collData->freeCEList(ceList2); 1847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1849b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const CEList altCEs(coll, alt, status); 1850c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1851c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ceList.matchesAt(0, &altCEs)) { 1852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return alternate.append(alt); 1853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return alternate.append(testCase); 1856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyCount, UnicodeString &testCase, UnicodeString &alternate) 1859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t pieces = (m_rand() % 4) + 1; 1861b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool matches; 1863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru do { 1865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.remove(); 1866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru alternate.remove(); 1867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru monkeys[0]->append(testCase, alternate); 1868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t piece = 0; piece < pieces; piece += 1) { 1870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t monkey = m_rand() % monkeyCount; 1871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1872c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru monkeys[monkey]->append(testCase, alternate); 1873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1875b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const CEList ceTest(coll, testCase, status); 1876b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const CEList ceAlt(coll, alternate, status); 1877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matches = ceTest.matchesAt(0, &ceAlt); 1879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while (! matches); 1880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Find the next acceptable boundary following the specified starting index 1884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// in the target text being searched. 1885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// TODO: refine what is an acceptable boundary. For the moment, 1886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// choose the next position not within a combining sequence. 1887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0 1889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic int32_t nextBoundaryAfter(const UnicodeString &string, int32_t startIndex) { 1890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *text = string.getBuffer(); 1891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t textLen = string.length(); 1892b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startIndex >= textLen) { 1894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return startIndex; 1895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 c; 1898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t i = startIndex; 1899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(text, i, textLen, c); 1901b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If we are on a control character, stop without looking for combining marks. 1903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Control characters do not combine. 1904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); 1905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) { 1906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return i; 1907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1908b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The initial character was not a control, and can thus accept trailing 1910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // combining characters. Advance over however many of them there are. 1911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t indexOfLastCharChecked; 1912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 1914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru indexOfLastCharChecked = i; 1915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (i>=textLen) { 1917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(text, i, textLen, c); 1921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); 1922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) { 1924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return indexOfLastCharChecked; 1929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1931b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0 1933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic UBool isInCombiningSequence(const UnicodeString &string, int32_t index) { 1934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UChar *text = string.getBuffer(); 1935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t textLen = string.length(); 1936b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (index>=textLen || index<=0) { 1938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 1939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1940b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If the character at the current index is not a GRAPHEME_EXTEND 1942c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // then we can not be within a combining sequence. 1943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 c; 1944c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_GET(text, 0, index, textLen, c); 1945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); 1946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) { 1947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 1948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1949b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // We are at a combining mark. If the preceding character is anything 1951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // except a CONTROL, CR or LF, we are in a combining sequence. 1952b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_PREV(text, 0, index, c); 1953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); 1954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR); 1956b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 195750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1958b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 1959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd) 1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 1961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru OrderList targetOrders(coll, target, offset); 1963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru OrderList patternOrders(coll, pattern); 1964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t targetSize = targetOrders.size() - 1; 1965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t patternSize = patternOrders.size() - 1; 1966b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), 1967b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru target.getBuffer(), target.length(), &status); 1968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (patternSize == 0) { 1970b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Searching for an empty pattern always fails 1971b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru matchStart = matchEnd = -1; 1972b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ubrk_close(charBreakIterator); 1973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 1974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matchStart = matchEnd = -1; 1977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t i = 0; i < targetSize; i += 1) { 1979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (targetOrders.matchesAt(i, patternOrders)) { 1980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t start = targetOrders.getLowOffset(i); 1981c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t maxLimit = targetOrders.getLowOffset(i + patternSize); 1982c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1); 1983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // if the low and high offsets of the first CE in 1985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the match are the same, it means that the match 1986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // starts in the middle of an expansion - all but 1987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the first CE of the expansion will have the offset 1988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // of the following character. 1989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (start == targetOrders.getHighOffset(i)) { 1990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Make sure match starts on a grapheme boundary 1994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (! ubrk_isBoundary(charBreakIterator, start)) { 1995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // If the low and high offsets of the CE after the match 1999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // are the same, it means that the match ends in the middle 2000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // of an expansion sequence. 2001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (maxLimit == targetOrders.getHighOffset(i + patternSize) && 2002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) { 2003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 2004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t mend = maxLimit; 2007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Find the first grapheme break after the character index 2009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // of the last CE in the match. If it's after character index 2010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that's after the last CE in the match, use that index 2011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // as the end of the match. 2012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (minLimit < maxLimit) { 2013b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // When the last CE's low index is same with its high index, the CE is likely 2014b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // a part of expansion. In this case, the index is located just after the 2015b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // character corresponding to the CEs compared above. If the index is right 2016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // at the break boundary, move the position to the next boundary will result 2017b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // incorrect match length when there are ignorable characters exist between 2018b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the position and the next character produces CE(s). See ticket#8482. 2019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (minLimit == targetOrders.getHighOffset(i + patternSize - 1) && ubrk_isBoundary(charBreakIterator, minLimit)) { 2020b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho mend = minLimit; 2021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2022b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t nba = ubrk_following(charBreakIterator, minLimit); 2023b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 2024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) { 2025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho mend = nba; 2026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 2027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (mend > maxLimit) { 2031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 2032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2034c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (! ubrk_isBoundary(charBreakIterator, mend)) { 2035c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 2036c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matchStart = start; 2039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru matchEnd = mend; 2040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ubrk_close(charBreakIterator); 2042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 2043c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2044c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2045c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ubrk_close(charBreakIterator); 2047c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 2048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 2049c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2050c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 2051c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic int32_t getIntParam(UnicodeString name, UnicodeString ¶ms, int32_t defaultVal) { 2052c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t val = defaultVal; 2053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2054c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru name.append(" *= *(-?\\d+)"); 2055c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m(name, params, 0, status); 2058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (m.find()) { 2060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The param exists. Convert the string to an int. 2061c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char valString[100]; 2062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t paramLength = m.end(1, status) - m.start(1, status); 2063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (paramLength >= (int32_t)(sizeof(valString)-1)) { 2065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru paramLength = (int32_t)(sizeof(valString)-2); 2066c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru params.extract(m.start(1, status), paramLength, valString, sizeof(valString)); 2069c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru val = strtol(valString, NULL, 10); 2070c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2071c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Delete this parameter from the params string. 2072c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.reset(); 2073c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru params = m.replaceFirst("", status); 2074c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //U_ASSERT(U_SUCCESS(status)); 2077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (! U_SUCCESS(status)) { 2078c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru val = defaultVal; 2079c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2080c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2081c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return val; 2082c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 2083c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 2084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2085c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 2086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern, 2087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *name, const char *strength, uint32_t seed) 2088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 2089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2090c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t actualStart = -1, actualEnd = -1; 2091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length(); 2092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t expectedStart = -1, expectedEnd = -1; 2093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t notFoundCount = 0; 209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), 209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testCase.getBuffer(), testCase.length(), 209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho coll, 209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho NULL, // the break iterator 209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &status)); 2099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** TODO: find *all* matches, not just first one **** 2101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd); 2102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 210350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status); 2104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { 2106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" 2107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru " strength=%s seed=%d", 2108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); 2109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (expectedStart == -1 && actualStart == -1) { 2112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notFoundCount += 1; 2113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // **** TODO: find *all* matches, not just first one **** 2116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd); 2117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho usearch_setPattern(uss.getAlias(), altPattern.getBuffer(), altPattern.length(), &status); 2119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho usearch_search(uss.getAlias(), 0, &actualStart, &actualEnd, &status); 2121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { 2123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" 2124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru " strength=%s seed=%d", 2125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed); 2126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (expectedStart == -1 && actualStart == -1) { 2129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notFoundCount += 1; 2130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return notFoundCount; 2133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 2134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 213527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic void hexForUnicodeString(const UnicodeString &ustr, char * cbuf, int32_t cbuflen) 213627f654740f2a26ad62a5c155af9199af9e69b889claireho{ 213727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t ustri, ustrlen = ustr.length(); 213827f654740f2a26ad62a5c155af9199af9e69b889claireho 213927f654740f2a26ad62a5c155af9199af9e69b889claireho for (ustri = 0; ustri < ustrlen; ++ustri) { 214027f654740f2a26ad62a5c155af9199af9e69b889claireho if (cbuflen >= 9 /* format width for single code unit(5) + terminating ellipsis(3) + null(1) */) { 214127f654740f2a26ad62a5c155af9199af9e69b889claireho int len = sprintf(cbuf, " %04X", ustr.charAt(ustri)); 214227f654740f2a26ad62a5c155af9199af9e69b889claireho cbuflen -= len; 214327f654740f2a26ad62a5c155af9199af9e69b889claireho cbuf += len; 214427f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 214527f654740f2a26ad62a5c155af9199af9e69b889claireho if (cbuflen >= 4 /* terminating ellipsis(3) + null(1) */) { 214627f654740f2a26ad62a5c155af9199af9e69b889claireho sprintf(cbuf, "..."); 214727f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (cbuflen >= 1) { 214827f654740f2a26ad62a5c155af9199af9e69b889claireho cbuf = 0; 214927f654740f2a26ad62a5c155af9199af9e69b889claireho } 215027f654740f2a26ad62a5c155af9199af9e69b889claireho break; 215127f654740f2a26ad62a5c155af9199af9e69b889claireho } 215227f654740f2a26ad62a5c155af9199af9e69b889claireho } 215327f654740f2a26ad62a5c155af9199af9e69b889claireho} 215427f654740f2a26ad62a5c155af9199af9e69b889claireho 2155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruint32_t SSearchTest::bmMonkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern, 2156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BoyerMooreSearch *bms, BoyerMooreSearch *abms, 2157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *name, const char *strength, uint32_t seed) 2158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 2159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t actualStart = -1, actualEnd = -1; 2161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length(); 2162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t expectedStart = -1, expectedEnd = -1; 2163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t notFoundCount = 0; 216427f654740f2a26ad62a5c155af9199af9e69b889claireho char hexbuf[128]; 2165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** TODO: find *all* matches, not just first one **** 2167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd); 2168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms->setTargetString(&testCase, status); 2170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bms->search(0, actualStart, actualEnd); 2171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { 217327f654740f2a26ad62a5c155af9199af9e69b889claireho hexForUnicodeString(pattern, hexbuf, sizeof(hexbuf)); 2174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Boyer-Moore Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" 217527f654740f2a26ad62a5c155af9199af9e69b889claireho " strength=%s seed=%d <pattern>: %s", 217627f654740f2a26ad62a5c155af9199af9e69b889claireho name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed, hexbuf); 2177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (expectedStart == -1 && actualStart == -1) { 2180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru notFoundCount += 1; 2181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** TODO: find *all* matches, not just first one **** 2184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd); 2185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru abms->setTargetString(&testCase, status); 2187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru abms->search(0, actualStart, actualEnd); 2188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (expectedStart >= 0 && (actualStart != expectedStart || actualEnd != expectedEnd)) { 219027f654740f2a26ad62a5c155af9199af9e69b889claireho hexForUnicodeString(altPattern, hexbuf, sizeof(hexbuf)); 2191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Boyer-Moore Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n" 219227f654740f2a26ad62a5c155af9199af9e69b889claireho " strength=%s seed=%d <pattern>: %s", 219327f654740f2a26ad62a5c155af9199af9e69b889claireho name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed, hexbuf); 2194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (expectedStart == -1 && actualStart == -1) { 2197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru notFoundCount += 1; 2198b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2199b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return notFoundCount; 2202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 2203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 2204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid SSearchTest::monkeyTest(char *params) 2206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru{ 2207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // ook! 2208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //UCollator *coll = ucol_open(NULL, &status); 2210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status); 2211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 22136d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status)); 2214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 2215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData *monkeyData = CollData::open(coll, status); 2218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru USet *expansions = uset_openEmpty(); 2220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru USet *contractions = uset_openEmpty(); 2221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); 2223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); 2225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); 2226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru USet *letters = uset_openPattern(letter_pattern, 39, &status); 2227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SetMonkey letterMonkey(letters); 2228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru StringSetMonkey contractionMonkey(contractions, coll, monkeyData); 2229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru StringSetMonkey expansionMonkey(expansions, coll, monkeyData); 2230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString testCase; 2231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString alternate; 2232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pattern, altPattern; 2233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString prefix, altPrefix; 2234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString suffix, altSuffix; 2235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru Monkey *monkeys[] = { 2237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &letterMonkey, 2238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &contractionMonkey, 2239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &expansionMonkey, 2240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &contractionMonkey, 2241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &expansionMonkey, 2242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &contractionMonkey, 2243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &expansionMonkey, 2244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &contractionMonkey, 2245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru &expansionMonkey}; 2246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]); 224750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // int32_t nonMatchCount = 0; 2248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY}; 2250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *strengthNames[] = {"primary", "secondary", "tertiary"}; 2251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]); 2252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t loopCount = quick? 1000 : 10000; 2253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t firstStrength = 0; 2254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t lastStrength = strengthCount - 1; //*/ 0; 2255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (params != NULL) { 2257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 2258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString p(params); 2259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru loopCount = getIntParam("loop", p, loopCount); 2261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m_seed = getIntParam("seed", p, m_seed); 2262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status); 2264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (m.find()) { 2265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString breakType = m.group(1, status); 2266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (int32_t s = 0; s < strengthCount; s += 1) { 2268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (breakType == strengthNames[s]) { 2269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru firstStrength = lastStrength = s; 2270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru m.reset(); 2275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p = m.replaceFirst("", status); 2276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (RegexMatcher("\\S", p, 0, status).find()) { 2279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Each option is stripped out of the option string as it is processed. 2280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // All options have been checked. The option string should have been completely emptied.. 2281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru char buf[100]; 2282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p.extract(buf, sizeof(buf), NULL, status); 2283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buf[sizeof(buf)-1] = 0; 2284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Unrecognized or extra parameter: %s\n", buf); 2285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 2286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#else 2288c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters."); 2289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 2290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(int32_t s = firstStrength; s <= lastStrength; s += 1) { 2293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t notFoundCount = 0; 2294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2295b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("Setting strength to %s.", strengthNames[s]); 2296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_setStrength(coll, strengths[s]); 2297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: try alternate prefix and suffix too? 2299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: alterntaes are only equal at primary strength. Is this OK? 2300b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(int32_t t = 0; t < loopCount; t += 1) { 2301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t seed = m_seed; 230250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // int32_t nmc = 0; 2303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern); 2305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix); 2306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix); 2307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // pattern 2309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed); 2310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.remove(); 2312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.append(prefix); 2313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.append(/*alt*/pattern); 2314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // prefix + pattern 2316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed); 2317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.append(suffix); 2319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // prefix + pattern + suffix 2321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed); 2322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.remove(); 2324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.append(pattern); 2325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru testCase.append(suffix); 2326b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // pattern + suffix 2328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed); 2329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2331b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount); 2332b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2333b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2334b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uset_close(contractions); 2335b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uset_close(expansions); 2336b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uset_close(letters); 2337b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2338b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData::close(monkeyData); 2339b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2340b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_close(coll); 2341b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 2342b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2343b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SSearchTest::bmMonkeyTest(char *params) 2344b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 2345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const UVersionInfo icu49 = { 4, 9, 0, 0 }; // for timebomb 234627f654740f2a26ad62a5c155af9199af9e69b889claireho static const UChar skipChars[] = { 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0xAAB5, 0xAAB6, 0xAAB9, 0xAABB, 0xAABC, 0 }; // for timebomb 2347b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // ook! 2348b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 234927f654740f2a26ad62a5c155af9199af9e69b889claireho UCollator *coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status); 2350b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2351b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 23526d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status)); 2353b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 2354b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2355b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2356b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData *monkeyData = CollData::open(coll, status); 2357b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2358b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru USet *expansions = uset_openEmpty(); 2359b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru USet *contractions = uset_openEmpty(); 2360b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2361b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); 2362b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2363b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); 2364b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); 2365b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru USet *letters = uset_openPattern(letter_pattern, 39, &status); 2366b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SetMonkey letterMonkey(letters); 2367b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru StringSetMonkey contractionMonkey(contractions, coll, monkeyData); 2368b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru StringSetMonkey expansionMonkey(expansions, coll, monkeyData); 2369b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString testCase; 2370b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString alternate; 2371b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString pattern, altPattern; 2372b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString prefix, altPrefix; 2373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString suffix, altSuffix; 2374b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2375b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru Monkey *monkeys[] = { 2376b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &letterMonkey, 2377b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &contractionMonkey, 2378b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &expansionMonkey, 2379b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &contractionMonkey, 2380b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &expansionMonkey, 2381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &contractionMonkey, 2382b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &expansionMonkey, 2383b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &contractionMonkey, 2384b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru &expansionMonkey}; 2385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]); 238650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // int32_t nonMatchCount = 0; 2387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2388b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY}; 2389b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *strengthNames[] = {"primary", "secondary", "tertiary"}; 2390b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]); 2391b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t loopCount = quick? 1000 : 10000; 2392b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t firstStrength = 0; 2393b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t lastStrength = strengthCount - 1; //*/ 0; 2394b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (params != NULL) { 2396b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 2397b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString p(params); 2398b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru loopCount = getIntParam("loop", p, loopCount); 2400b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru m_seed = getIntParam("seed", p, m_seed); 2401b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2402b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status); 2403b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (m.find()) { 2404b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString breakType = m.group(1, status); 2405b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2406b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (int32_t s = 0; s < strengthCount; s += 1) { 2407b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (breakType == strengthNames[s]) { 2408b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru firstStrength = lastStrength = s; 2409b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 2410b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2411b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2412b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2413b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru m.reset(); 2414b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p = m.replaceFirst("", status); 2415b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2416b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2417b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (RegexMatcher("\\S", p, 0, status).find()) { 2418b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Each option is stripped out of the option string as it is processed. 2419b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // All options have been checked. The option string should have been completely emptied.. 2420b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char buf[100]; 2421b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p.extract(buf, sizeof(buf), NULL, status); 2422b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru buf[sizeof(buf)-1] = 0; 2423b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru errln("Unrecognized or extra parameter: %s\n", buf); 2424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 2425b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2426b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#else 2427b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters."); 2428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 2429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(int32_t s = firstStrength; s <= lastStrength; s += 1) { 2432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t notFoundCount = 0; 2433b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("Setting strength to %s.", strengthNames[s]); 2435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucol_setStrength(coll, strengths[s]); 2436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData *data = CollData::open(coll, status); 2438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 243927f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString skipString(skipChars); // for timebomb 244027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet* skipSet = UnicodeSet::createFromAll(skipString); // for timebomb 2441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // TODO: try alternate prefix and suffix too? 2442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // TODO: alterntaes are only equal at primary strength. Is this OK? 2443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for(int32_t t = 0; t < loopCount; t += 1) { 2444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t seed = m_seed; 244550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // int32_t nmc = 0; 2446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern); 2448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix); 2449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix); 245027f654740f2a26ad62a5c155af9199af9e69b889claireho 2451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (!isICUVersionAtLeast(icu49) && skipSet->containsSome(pattern)) { 245227f654740f2a26ad62a5c155af9199af9e69b889claireho continue; // timebomb until ticket #8080 is resolved 245327f654740f2a26ad62a5c155af9199af9e69b889claireho } 2454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BoyerMooreSearch pat(data, pattern, NULL, status); 2456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BoyerMooreSearch alt(data, altPattern, NULL, status); 2457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // **** need a better way to deal with this **** 2459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#if 0 2460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pat.empty() || 2461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru alt.empty()) { 2462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 2463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 2465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // pattern 2467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru notFoundCount += bmMonkeyTestCase(coll, pattern, pattern, altPattern, &pat, &alt, "pattern", strengthNames[s], seed); 2468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru testCase.remove(); 2470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru testCase.append(prefix); 2471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru testCase.append(/*alt*/pattern); 2472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // prefix + pattern 2474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern", strengthNames[s], seed); 2475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru testCase.append(suffix); 2477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // prefix + pattern + suffix 2479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "prefix + pattern + suffix", strengthNames[s], seed); 2480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2481b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru testCase.remove(); 2482b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru testCase.append(pattern); 2483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru testCase.append(suffix); 2484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // pattern + suffix 2486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru notFoundCount += bmMonkeyTestCase(coll, testCase, pattern, altPattern, &pat, &alt, "pattern + suffix", strengthNames[s], seed); 2487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 248827f654740f2a26ad62a5c155af9199af9e69b889claireho delete skipSet; // for timebomb 2489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData::close(data); 2491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2492b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount); 2493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 2494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uset_close(contractions); 2496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uset_close(expansions); 2497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uset_close(letters); 2498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru CollData::close(monkeyData); 2500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucol_close(coll); 2502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 2503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 250450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid SSearchTest::stringListTest(){ 250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho StringList *sl = new StringList(status); 250750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(status)){ 250850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ERROR: stringListTest: Could not start StringList"); 250950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar chars[] = { 251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 0x0000 251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 251450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sl->add(chars, (int32_t) 0, status); 251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(status)){ 251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ERROR: stringListTest: StringList::add"); 251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 251850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(sl->getDynamicClassID() != StringList::getStaticClassID()){ 252050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("ERROR: stringListTest: getDynamicClassID and getStaticClassID does not match"); 252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete sl; 252350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 252450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 2526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 2527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 2528