1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/******************************************************************** 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * COPYRIGHT: 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved. 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ********************************************************************/ 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This program demos string collation 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst char gHelpString[] = 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "usage: strsrch [options*] -source source_string -pattern pattern_string\n" 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-help Display this message.\n" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-locale name ICU locale to use. Default is en_US\n" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-rules rule Collation rules file (overrides locale)\n" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-french French accent ordering\n" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-norm Normalizing mode on\n" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-shifted Shifted mode\n" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-lower Lower case first\n" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-upper Upper case first\n" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-case Enable separate case level\n" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-source string Source string\n" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-pattern string Pattern string to look for in source\n" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-overlap Enable searching to be done on overlapping patterns\n" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-canonical Enable searching to be done matching canonical equivalent patterns" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "The format \\uXXXX is supported for the rules and comparison strings\n" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ; 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h> 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h> 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/utypes.h> 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ucol.h> 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/usearch.h> 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ustring.h> 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Command line option variables 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * These global variables are set according to the options specified 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * on the command line by the user. 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_locale = "en_US"; 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_rules = 0; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_help = FALSE; 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_norm = FALSE; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_french = FALSE; 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_shifted = FALSE; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_lower = FALSE; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_upper = FALSE; 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_case = FALSE; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_overlap = FALSE; 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool opt_canonical = FALSE; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint opt_level = 0; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_source = "International Components for Unicode"; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruchar * opt_pattern = "Unicode"; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCollator * collator = 0; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUStringSearch * search = 0; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar rules[100]; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar source[100]; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar pattern[100]; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Definitions for the command line options 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct OptSpec { 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *name; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum {FLAG, NUM, STRING} type; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void *pVar; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruOptSpec opts[] = { 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-locale", OptSpec::STRING, &opt_locale}, 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-rules", OptSpec::STRING, &opt_rules}, 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-source", OptSpec::STRING, &opt_source}, 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-pattern", OptSpec::STRING, &opt_pattern}, 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-norm", OptSpec::FLAG, &opt_norm}, 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-french", OptSpec::FLAG, &opt_french}, 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-shifted", OptSpec::FLAG, &opt_shifted}, 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-lower", OptSpec::FLAG, &opt_lower}, 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-upper", OptSpec::FLAG, &opt_upper}, 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-case", OptSpec::FLAG, &opt_case}, 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-level", OptSpec::NUM, &opt_level}, 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-overlap", OptSpec::FLAG, &opt_overlap}, 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-canonical", OptSpec::FLAG, &opt_canonical}, 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-help", OptSpec::FLAG, &opt_help}, 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {"-?", OptSpec::FLAG, &opt_help}, 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru {0, OptSpec::FLAG, 0} 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * processOptions() Function to read the command line options. 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool processOptions(int argc, const char **argv, OptSpec opts[]) 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int argNum = 1; argNum < argc; argNum ++) { 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *pArgName = argv[argNum]; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru OptSpec *pOpt; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (pOpt = opts; pOpt->name != 0; pOpt ++) { 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (strcmp(pOpt->name, pArgName) == 0) { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (pOpt->type) { 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case OptSpec::FLAG: 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(UBool *)(pOpt->pVar) = TRUE; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case OptSpec::STRING: 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru argNum ++; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (argNum >= argc) { 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "value expected for \"%s\" option.\n", 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pOpt->name); 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(const char **)(pOpt->pVar) = argv[argNum]; 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case OptSpec::NUM: 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru argNum ++; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (argNum >= argc) { 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "value expected for \"%s\" option.\n", 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pOpt->name); 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *endp; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i = strtol(argv[argNum], &endp, 0); 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (endp == argv[argNum]) { 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "integer value expected for \"%s\" option.\n", 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pOpt->name); 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(int *)(pOpt->pVar) = i; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (pOpt->name == 0) 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Creates a collator 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool processCollator() 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Set up an ICU collator 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_rules != 0) { 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_unescape(opt_rules, rules, 100); 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, &status); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru collator = ucol_open(opt_locale, &status); 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Collator creation failed.: %d\n", status); 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (status == U_USING_DEFAULT_WARNING) { 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru opt_locale); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (status == U_USING_FALLBACK_WARNING) { 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru opt_locale); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_norm) { 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_french) { 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_lower) { 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_upper) { 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_case) { 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status); 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_shifted) { 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_level != 0) { 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (opt_level) { 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status); 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY, 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status); 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY, 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 5: 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL, 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "-level param must be between 1 and 5\n"); 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Collator attribute setting failed.: %d\n", status); 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Creates a string search 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool processStringSearch() 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_unescape(opt_source, source, 100); 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_unescape(opt_pattern, pattern, 100); 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL, 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_overlap == TRUE) { 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status); 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt_canonical == TRUE) { 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON, 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &status); 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Error setting search attributes\n"); 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool findPattern() 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset = usearch_next(search, &status); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset == USEARCH_DONE) { 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stdout, "Pattern not found in source\n"); 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (offset != USEARCH_DONE) { 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stdout, "Pattern found at offset %d size %d\n", offset, 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usearch_getMatchedLength(search)); 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset = usearch_next(search, &status); 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Error in searching for pattern %d\n", status); 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stdout, "End of search\n"); 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Main -- process command line, read in and pre-process the test file, 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * call other functions to do the actual tests. 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint main(int argc, const char** argv) 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (processOptions(argc, argv, opts) != TRUE || opt_help) { 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf(gHelpString); 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (processCollator() != TRUE) { 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Error creating collator\n"); 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (processStringSearch() != TRUE) { 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Error creating string search\n"); 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern, 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru opt_source); 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru findPattern(); 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucol_close(collator); 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usearch_close(search); 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 302