1/* 2********************************************************************** 3* Copyright (C) 2004-2010, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* file name: uregex.h 7* encoding: US-ASCII 8* indentation:4 9* 10* created on: 2004mar09 11* created by: Andy Heninger 12* 13* ICU Regular Expressions, API for C 14*/ 15 16/** 17 * \file 18 * \brief C API: Regular Expressions 19 * 20 * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p> 21 */ 22 23#ifndef UREGEX_H 24#define UREGEX_H 25 26#include "unicode/utext.h" 27#include "unicode/utypes.h" 28 29#if !UCONFIG_NO_REGULAR_EXPRESSIONS 30 31#include "unicode/localpointer.h" 32#include "unicode/parseerr.h" 33 34struct URegularExpression; 35/** 36 * Structure representing a compiled regular rexpression, plus the results 37 * of a match operation. 38 * @stable ICU 3.0 39 */ 40typedef struct URegularExpression URegularExpression; 41 42 43/** 44 * Constants for Regular Expression Match Modes. 45 * @stable ICU 2.4 46 */ 47typedef enum URegexpFlag{ 48 49#ifndef U_HIDE_DRAFT_API 50 /** Forces normalization of pattern and strings. 51 Not implemented yet, just a placeholder, hence draft. 52 @draft ICU 2.4 */ 53 UREGEX_CANON_EQ = 128, 54#endif 55 /** Enable case insensitive matching. @stable ICU 2.4 */ 56 UREGEX_CASE_INSENSITIVE = 2, 57 58 /** Allow white space and comments within patterns @stable ICU 2.4 */ 59 UREGEX_COMMENTS = 4, 60 61 /** If set, '.' matches line terminators, otherwise '.' matching stops at line end. 62 * @stable ICU 2.4 */ 63 UREGEX_DOTALL = 32, 64 65 /** If set, treat the entire pattern as a literal string. 66 * Metacharacters or escape sequences in the input sequence will be given 67 * no special meaning. Not implemented yet as of ICU 4.4. 68 * 69 * The flags CASE_INSENSITIVE and UNICODE_CASE retain their impact 70 * on matching when used in conjunction with this flag. 71 * The other flags become superfluous. 72 * TODO: say which escapes are still handled; anything Java does 73 * early (\\u) we should still do. 74 * @stable ICU 4.0 75 */ 76 UREGEX_LITERAL = 16, 77 78 /** Control behavior of "$" and "^" 79 * If set, recognize line terminators within string, 80 * otherwise, match only at start and end of input string. 81 * @stable ICU 2.4 */ 82 UREGEX_MULTILINE = 8, 83 84 /** Unix-only line endings. 85 * When this mode is enabled, only \\u000a is recognized as a line ending 86 * in the behavior of ., ^, and $. 87 * @stable ICU 4.0 88 */ 89 UREGEX_UNIX_LINES = 1, 90 91 /** Unicode word boundaries. 92 * If set, \b uses the Unicode TR 29 definition of word boundaries. 93 * Warning: Unicode word boundaries are quite different from 94 * traditional regular expression word boundaries. See 95 * http://unicode.org/reports/tr29/#Word_Boundaries 96 * @stable ICU 2.8 97 */ 98 UREGEX_UWORD = 256, 99 100 /** Error on Unrecognized backslash escapes. 101 * If set, fail with an error on patterns that contain 102 * backslash-escaped ASCII letters without a known specail 103 * meaning. If this flag is not set, these 104 * escaped letters represent themselves. 105 * @stable ICU 4.0 106 */ 107 UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512 108 109} URegexpFlag; 110 111/** 112 * Open (compile) an ICU regular expression. Compiles the regular expression in 113 * string form into an internal representation using the specified match mode flags. 114 * The resulting regular expression handle can then be used to perform various 115 * matching operations. 116 * 117 * 118 * @param pattern The Regular Expression pattern to be compiled. 119 * @param patternLength The length of the pattern, or -1 if the pattern is 120 * NUL termintated. 121 * @param flags Flags that alter the default matching behavior for 122 * the regular expression, UREGEX_CASE_INSENSITIVE, for 123 * example. For default behavior, set this parameter to zero. 124 * See <code>enum URegexpFlag</code>. All desired flags 125 * are bitwise-ORed together. 126 * @param pe Receives the position (line and column nubers) of any syntax 127 * error within the source regular expression string. If this 128 * information is not wanted, pass NULL for this parameter. 129 * @param status Receives error detected by this function. 130 * @stable ICU 3.0 131 * 132 */ 133U_STABLE URegularExpression * U_EXPORT2 134uregex_open( const UChar *pattern, 135 int32_t patternLength, 136 uint32_t flags, 137 UParseError *pe, 138 UErrorCode *status); 139 140/** 141 * Open (compile) an ICU regular expression. Compiles the regular expression in 142 * string form into an internal representation using the specified match mode flags. 143 * The resulting regular expression handle can then be used to perform various 144 * matching operations. 145 * <p> 146 * The contents of the pattern UText will be extracted and saved. Ownership of the 147 * UText struct itself remains with the caller. This is to match the behavior of 148 * uregex_open(). 149 * 150 * @param pattern The Regular Expression pattern to be compiled. 151 * @param flags Flags that alter the default matching behavior for 152 * the regular expression, UREGEX_CASE_INSENSITIVE, for 153 * example. For default behavior, set this parameter to zero. 154 * See <code>enum URegexpFlag</code>. All desired flags 155 * are bitwise-ORed together. 156 * @param pe Receives the position (line and column nubers) of any syntax 157 * error within the source regular expression string. If this 158 * information is not wanted, pass NULL for this parameter. 159 * @param status Receives error detected by this function. 160 * 161 * @draft ICU 4.6 162 */ 163U_DRAFT URegularExpression * U_EXPORT2 164uregex_openUText(UText *pattern, 165 uint32_t flags, 166 UParseError *pe, 167 UErrorCode *status); 168 169/** 170 * Open (compile) an ICU regular expression. The resulting regular expression 171 * handle can then be used to perform various matching operations. 172 * <p> 173 * This function is the same as uregex_open, except that the pattern 174 * is supplied as an 8 bit char * string in the default code page. 175 * 176 * @param pattern The Regular Expression pattern to be compiled, 177 * NUL termintated. 178 * @param flags Flags that alter the default matching behavior for 179 * the regular expression, UREGEX_CASE_INSENSITIVE, for 180 * example. For default behavior, set this parameter to zero. 181 * See <code>enum URegexpFlag</code>. All desired flags 182 * are bitwise-ORed together. 183 * @param pe Receives the position (line and column nubers) of any syntax 184 * error within the source regular expression string. If this 185 * information is not wanted, pass NULL for this parameter. 186 * @param status Receives errors detected by this function. 187 * @return The URegularExpression object representing the compiled 188 * pattern. 189 * 190 * @stable ICU 3.0 191 */ 192#if !UCONFIG_NO_CONVERSION 193U_STABLE URegularExpression * U_EXPORT2 194uregex_openC( const char *pattern, 195 uint32_t flags, 196 UParseError *pe, 197 UErrorCode *status); 198#endif 199 200 201 202/** 203 * Close the regular expression, recovering all resources (memory) it 204 * was holding. 205 * 206 * @param regexp The regular expression to be closed. 207 * @stable ICU 3.0 208 */ 209U_STABLE void U_EXPORT2 210uregex_close(URegularExpression *regexp); 211 212#if U_SHOW_CPLUSPLUS_API 213 214U_NAMESPACE_BEGIN 215 216/** 217 * \class LocalURegularExpressionPointer 218 * "Smart pointer" class, closes a URegularExpression via uregex_close(). 219 * For most methods see the LocalPointerBase base class. 220 * 221 * @see LocalPointerBase 222 * @see LocalPointer 223 * @stable ICU 4.4 224 */ 225U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close); 226 227U_NAMESPACE_END 228 229#endif 230 231/** 232 * Make a copy of a compiled regular expression. Cloning a regular 233 * expression is faster than opening a second instance from the source 234 * form of the expression, and requires less memory. 235 * <p> 236 * Note that the current input string and the position of any matched text 237 * within it are not cloned; only the pattern itself and and the 238 * match mode flags are copied. 239 * <p> 240 * Cloning can be particularly useful to threaded applications that perform 241 * multiple match operations in parallel. Each concurrent RE 242 * operation requires its own instance of a URegularExpression. 243 * 244 * @param regexp The compiled regular expression to be cloned. 245 * @param status Receives indication of any errors encountered 246 * @return the cloned copy of the compiled regular expression. 247 * @stable ICU 3.0 248 */ 249U_STABLE URegularExpression * U_EXPORT2 250uregex_clone(const URegularExpression *regexp, UErrorCode *status); 251 252/** 253 * Returns a pointer to the source form of the pattern for this regular expression. 254 * This function will work even if the pattern was originally specified as a UText. 255 * 256 * @param regexp The compiled regular expression. 257 * @param patLength This output parameter will be set to the length of the 258 * pattern string. A NULL pointer may be used here if the 259 * pattern length is not needed, as would be the case if 260 * the pattern is known in advance to be a NUL terminated 261 * string. 262 * @param status Receives errors detected by this function. 263 * @return a pointer to the pattern string. The storage for the string is 264 * owned by the regular expression object, and must not be 265 * altered or deleted by the application. The returned string 266 * will remain valid until the regular expression is closed. 267 * @stable ICU 3.0 268 */ 269U_STABLE const UChar * U_EXPORT2 270uregex_pattern(const URegularExpression *regexp, 271 int32_t *patLength, 272 UErrorCode *status); 273 274/** 275 * Returns the source text of the pattern for this regular expression. 276 * This function will work even if the pattern was originally specified as a UChar string. 277 * 278 * @param regexp The compiled regular expression. 279 * @param status Receives errors detected by this function. 280 * @return the pattern text. The storage for the text is owned by the regular expression 281 * object, and must not be altered or deleted. 282 * 283 * @draft ICU 4.6 284 */ 285U_DRAFT UText * U_EXPORT2 286uregex_patternUText(const URegularExpression *regexp, 287 UErrorCode *status); 288 289 290/** 291 * Get the match mode flags that were specified when compiling this regular expression. 292 * @param status Receives errors detected by this function. 293 * @param regexp The compiled regular expression. 294 * @return The match mode flags 295 * @see URegexpFlag 296 * @stable ICU 3.0 297 */ 298U_STABLE int32_t U_EXPORT2 299uregex_flags(const URegularExpression *regexp, 300 UErrorCode *status); 301 302 303/** 304 * Set the subject text string upon which the regular expression will look for matches. 305 * This function may be called any number of times, allowing the regular 306 * expression pattern to be applied to different strings. 307 * <p> 308 * Regular expression matching operations work directly on the application's 309 * string data. No copy is made. The subject string data must not be 310 * altered after calling this function until after all regular expression 311 * operations involving this string data are completed. 312 * <p> 313 * Zero length strings are permitted. In this case, no subsequent match 314 * operation will dereference the text string pointer. 315 * 316 * @param regexp The compiled regular expression. 317 * @param text The subject text string. 318 * @param textLength The length of the subject text, or -1 if the string 319 * is NUL terminated. 320 * @param status Receives errors detected by this function. 321 * @stable ICU 3.0 322 */ 323U_STABLE void U_EXPORT2 324uregex_setText(URegularExpression *regexp, 325 const UChar *text, 326 int32_t textLength, 327 UErrorCode *status); 328 329 330/** 331 * Set the subject text string upon which the regular expression will look for matches. 332 * This function may be called any number of times, allowing the regular 333 * expression pattern to be applied to different strings. 334 * <p> 335 * Regular expression matching operations work directly on the application's 336 * string data; only a shallow clone is made. The subject string data must not be 337 * altered after calling this function until after all regular expression 338 * operations involving this string data are completed. 339 * 340 * @param regexp The compiled regular expression. 341 * @param text The subject text string. 342 * @param status Receives errors detected by this function. 343 * 344 * @draft ICU 4.6 345 */ 346U_DRAFT void U_EXPORT2 347uregex_setUText(URegularExpression *regexp, 348 UText *text, 349 UErrorCode *status); 350 351/** 352 * Get the subject text that is currently associated with this 353 * regular expression object. If the input was supplied using uregex_setText(), 354 * that pointer will be returned. Otherwise, the characters in the input will 355 * be extracted to a buffer and returned. In either case, ownership remains 356 * with the regular expression object. 357 * 358 * This function will work even if the input was originally specified as a UText. 359 * 360 * @param regexp The compiled regular expression. 361 * @param textLength The length of the string is returned in this output parameter. 362 * A NULL pointer may be used here if the 363 * text length is not needed, as would be the case if 364 * the text is known in advance to be a NUL terminated 365 * string. 366 * @param status Receives errors detected by this function. 367 * @return Pointer to the subject text string currently associated with 368 * this regular expression. 369 * @stable ICU 3.0 370 */ 371U_STABLE const UChar * U_EXPORT2 372uregex_getText(URegularExpression *regexp, 373 int32_t *textLength, 374 UErrorCode *status); 375 376 377/** 378 * Get the subject text that is currently associated with this 379 * regular expression object. 380 * 381 * This function will work even if the input was originally specified as a UChar string. 382 * 383 * @param regexp The compiled regular expression. 384 * @param dest A mutable UText in which to store the current input. 385 * If NULL, a new UText will be created as an immutable shallow clone 386 * of the actual input string. 387 * @param status Receives errors detected by this function. 388 * @return The subject text currently associated with this regular expression. 389 * If a pre-allocated UText was provided, it will always be used and returned. 390 * 391 * @draft ICU 4.6 392 */ 393U_DRAFT UText * U_EXPORT2 394uregex_getUText(URegularExpression *regexp, 395 UText *dest, 396 UErrorCode *status); 397 398/** 399 * Attempts to match the input string against the pattern. 400 * To succeed, the match must extend to the end of the string, 401 * or cover the complete match region. 402 * 403 * If startIndex >= zero the match operation starts at the specified 404 * index and must extend to the end of the input string. Any region 405 * that has been specified is reset. 406 * 407 * If startIndex == -1 the match must cover the input region, or the entire 408 * input string if no region has been set. This directly corresponds to 409 * Matcher.matches() in Java 410 * 411 * @param regexp The compiled regular expression. 412 * @param startIndex The input string (native) index at which to begin matching, or -1 413 * to match the input Region. 414 * @param status Receives errors detected by this function. 415 * @return TRUE if there is a match 416 * @stable ICU 3.0 417 */ 418U_STABLE UBool U_EXPORT2 419uregex_matches(URegularExpression *regexp, 420 int32_t startIndex, 421 UErrorCode *status); 422 423/** 424 * 64bit version of uregex_matches. 425 * @draft ICU 4.6 426 */ 427U_DRAFT UBool U_EXPORT2 428uregex_matches64(URegularExpression *regexp, 429 int64_t startIndex, 430 UErrorCode *status); 431 432/** 433 * Attempts to match the input string, starting from the specified index, against the pattern. 434 * The match may be of any length, and is not required to extend to the end 435 * of the input string. Contrast with uregex_matches(). 436 * 437 * <p>If startIndex is >= 0 any input region that was set for this 438 * URegularExpression is reset before the operation begins. 439 * 440 * <p>If the specified starting index == -1 the match begins at the start of the input 441 * region, or at the start of the full string if no region has been specified. 442 * This corresponds directly with Matcher.lookingAt() in Java. 443 * 444 * <p>If the match succeeds then more information can be obtained via the 445 * <code>uregexp_start()</code>, <code>uregexp_end()</code>, 446 * and <code>uregexp_group()</code> functions.</p> 447 * 448 * @param regexp The compiled regular expression. 449 * @param startIndex The input string (native) index at which to begin matching, or 450 * -1 to match the Input Region 451 * @param status A reference to a UErrorCode to receive any errors. 452 * @return TRUE if there is a match. 453 * @stable ICU 3.0 454 */ 455U_STABLE UBool U_EXPORT2 456uregex_lookingAt(URegularExpression *regexp, 457 int32_t startIndex, 458 UErrorCode *status); 459 460/** 461 * 64bit version of uregex_lookingAt. 462 * @draft ICU 4.6 463 */ 464U_DRAFT UBool U_EXPORT2 465uregex_lookingAt64(URegularExpression *regexp, 466 int64_t startIndex, 467 UErrorCode *status); 468 469/** 470 * Find the first matching substring of the input string that matches the pattern. 471 * If startIndex is >= zero the search for a match begins at the specified index, 472 * and any match region is reset. This corresponds directly with 473 * Matcher.find(startIndex) in Java. 474 * 475 * If startIndex == -1 the search begins at the start of the input region, 476 * or at the start of the full string if no region has been specified. 477 * 478 * If a match is found, <code>uregex_start(), uregex_end()</code>, and 479 * <code>uregex_group()</code> will provide more information regarding the match. 480 * 481 * @param regexp The compiled regular expression. 482 * @param startIndex The position (native) in the input string to begin the search, or 483 * -1 to search within the Input Region. 484 * @param status A reference to a UErrorCode to receive any errors. 485 * @return TRUE if a match is found. 486 * @stable ICU 3.0 487 */ 488U_STABLE UBool U_EXPORT2 489uregex_find(URegularExpression *regexp, 490 int32_t startIndex, 491 UErrorCode *status); 492 493/** 494 * 64bit version of uregex_find. 495 * @draft ICU 4.6 496 */ 497U_DRAFT UBool U_EXPORT2 498uregex_find64(URegularExpression *regexp, 499 int64_t startIndex, 500 UErrorCode *status); 501 502/** 503 * Find the next pattern match in the input string. Begin searching 504 * the input at the location following the end of he previous match, 505 * or at the start of the string (or region) if there is no 506 * previous match. If a match is found, <code>uregex_start(), uregex_end()</code>, and 507 * <code>uregex_group()</code> will provide more information regarding the match. 508 * 509 * @param regexp The compiled regular expression. 510 * @param status A reference to a UErrorCode to receive any errors. 511 * @return TRUE if a match is found. 512 * @see uregex_reset 513 * @stable ICU 3.0 514 */ 515U_STABLE UBool U_EXPORT2 516uregex_findNext(URegularExpression *regexp, 517 UErrorCode *status); 518 519/** 520 * Get the number of capturing groups in this regular expression's pattern. 521 * @param regexp The compiled regular expression. 522 * @param status A reference to a UErrorCode to receive any errors. 523 * @return the number of capture groups 524 * @stable ICU 3.0 525 */ 526U_STABLE int32_t U_EXPORT2 527uregex_groupCount(URegularExpression *regexp, 528 UErrorCode *status); 529 530/** Extract the string for the specified matching expression or subexpression. 531 * Group #0 is the complete string of matched text. 532 * Group #1 is the text matched by the first set of capturing parentheses. 533 * 534 * @param regexp The compiled regular expression. 535 * @param groupNum The capture group to extract. Group 0 is the complete 536 * match. The value of this parameter must be 537 * less than or equal to the number of capture groups in 538 * the pattern. 539 * @param dest Buffer to receive the matching string data 540 * @param destCapacity Capacity of the dest buffer. 541 * @param status A reference to a UErrorCode to receive any errors. 542 * @return Length of matching data, 543 * or -1 if no applicable match. 544 * @stable ICU 3.0 545 */ 546U_STABLE int32_t U_EXPORT2 547uregex_group(URegularExpression *regexp, 548 int32_t groupNum, 549 UChar *dest, 550 int32_t destCapacity, 551 UErrorCode *status); 552 553 554/** Returns a shallow immutable clone of the entire input string. The returned UText current native index 555 * is set to the beginning of the requested capture group. The capture group length is also 556 * returned via groupLength. 557 * Group #0 is the complete string of matched text. 558 * Group #1 is the text matched by the first set of capturing parentheses. 559 * 560 * @param regexp The compiled regular expression. 561 * @param groupNum The capture group to extract. Group 0 is the complete 562 * match. The value of this parameter must be 563 * less than or equal to the number of capture groups in 564 * the pattern. 565 * @param dest A mutable UText in which to store the current input. 566 * If NULL, a new UText will be created as an immutable shallow clone 567 * of the entire input string. 568 * @param groupLength The group length of the desired capture group. 569 * @param status A reference to a UErrorCode to receive any errors. 570 * @return The subject text currently associated with this regular expression. 571 * If a pre-allocated UText was provided, it will always be used and returned. 572 573 * 574 * @draft ICU 4.6 575 */ 576U_DRAFT UText * U_EXPORT2 577uregex_groupUText(URegularExpression *regexp, 578 int32_t groupNum, 579 UText *dest, 580 int64_t *groupLength, 581 UErrorCode *status); 582 583 584/** Extract the string for the specified matching expression or subexpression. 585 * Group #0 is the complete string of matched text. 586 * Group #1 is the text matched by the first set of capturing parentheses. 587 * 588 * @param regexp The compiled regular expression. 589 * @param groupNum The capture group to extract. Group 0 is the complete 590 * match. The value of this parameter must be 591 * less than or equal to the number of capture groups in 592 * the pattern. 593 * @param dest Mutable UText to receive the matching string data. 594 * If NULL, a new UText will be created (which may not be mutable). 595 * @param status A reference to a UErrorCode to receive any errors. 596 * @return The matching string data. If a pre-allocated UText was provided, 597 * it will always be used and returned. 598 * 599 * @internal ICU 4.4 technology preview 600 */ 601U_INTERNAL UText * U_EXPORT2 602uregex_groupUTextDeep(URegularExpression *regexp, 603 int32_t groupNum, 604 UText *dest, 605 UErrorCode *status); 606 607/** 608 * Returns the index in the input string of the start of the text matched by the 609 * specified capture group during the previous match operation. Return -1 if 610 * the capture group was not part of the last match. 611 * Group #0 refers to the complete range of matched text. 612 * Group #1 refers to the text matched by the first set of capturing parentheses. 613 * 614 * @param regexp The compiled regular expression. 615 * @param groupNum The capture group number 616 * @param status A reference to a UErrorCode to receive any errors. 617 * @return the starting (native) position in the input of the text matched 618 * by the specified group. 619 * @stable ICU 3.0 620 */ 621U_STABLE int32_t U_EXPORT2 622uregex_start(URegularExpression *regexp, 623 int32_t groupNum, 624 UErrorCode *status); 625 626/** 627 * 64bit version of uregex_start. 628 * @draft ICU 4.6 629 */ 630U_DRAFT int64_t U_EXPORT2 631uregex_start64(URegularExpression *regexp, 632 int32_t groupNum, 633 UErrorCode *status); 634 635/** 636 * Returns the index in the input string of the position following the end 637 * of the text matched by the specified capture group. 638 * Return -1 if the capture group was not part of the last match. 639 * Group #0 refers to the complete range of matched text. 640 * Group #1 refers to the text matched by the first set of capturing parentheses. 641 * 642 * @param regexp The compiled regular expression. 643 * @param groupNum The capture group number 644 * @param status A reference to a UErrorCode to receive any errors. 645 * @return the (native) index of the position following the last matched character. 646 * @stable ICU 3.0 647 */ 648U_STABLE int32_t U_EXPORT2 649uregex_end(URegularExpression *regexp, 650 int32_t groupNum, 651 UErrorCode *status); 652 653/** 654 * 64bit version of uregex_end. 655 * @draft ICU 4.6 656 */ 657U_DRAFT int64_t U_EXPORT2 658uregex_end64(URegularExpression *regexp, 659 int32_t groupNum, 660 UErrorCode *status); 661 662/** 663 * Reset any saved state from the previous match. Has the effect of 664 * causing uregex_findNext to begin at the specified index, and causing 665 * uregex_start(), uregex_end() and uregex_group() to return an error 666 * indicating that there is no match information available. Clears any 667 * match region that may have been set. 668 * 669 * @param regexp The compiled regular expression. 670 * @param index The position (native) in the text at which a 671 * uregex_findNext() should begin searching. 672 * @param status A reference to a UErrorCode to receive any errors. 673 * @stable ICU 3.0 674 */ 675U_STABLE void U_EXPORT2 676uregex_reset(URegularExpression *regexp, 677 int32_t index, 678 UErrorCode *status); 679 680/** 681 * 64bit version of uregex_reset. 682 * @draft ICU 4.6 683 */ 684U_DRAFT void U_EXPORT2 685uregex_reset64(URegularExpression *regexp, 686 int64_t index, 687 UErrorCode *status); 688 689/** Sets the limits of the matching region for this URegularExpression. 690 * The region is the part of the input string that will be considered when matching. 691 * Invoking this method resets any saved state from the previous match, 692 * then sets the region to start at the index specified by the start parameter 693 * and end at the index specified by the end parameter. 694 * 695 * Depending on the transparency and anchoring being used (see useTransparentBounds 696 * and useAnchoringBounds), certain constructs such as anchors may behave differently 697 * at or around the boundaries of the region 698 * 699 * The function will fail if start is greater than limit, or if either index 700 * is less than zero or greater than the length of the string being matched. 701 * 702 * @param regexp The compiled regular expression. 703 * @param regionStart The (native) index to begin searches at. 704 * @param regionLimit The (native) index to end searches at (exclusive). 705 * @param status A pointer to a UErrorCode to receive any errors. 706 * @stable ICU 4.0 707 */ 708U_STABLE void U_EXPORT2 709uregex_setRegion(URegularExpression *regexp, 710 int32_t regionStart, 711 int32_t regionLimit, 712 UErrorCode *status); 713 714/** 715 * 64bit version of uregex_setRegion. 716 * @draft ICU 4.6 717 */ 718U_DRAFT void U_EXPORT2 719uregex_setRegion64(URegularExpression *regexp, 720 int64_t regionStart, 721 int64_t regionLimit, 722 UErrorCode *status); 723 724/** 725 * Variation on uregex_setRegion to set the region without resetting the start index 726 * without resetting the position for subsequent matches. 727 * @draft ICU 4.6 728 */ 729U_DRAFT void U_EXPORT2 730uregex_setRegionAndStart(URegularExpression *regexp, 731 int64_t regionStart, 732 int64_t regionLimit, 733 int64_t startIndex, 734 UErrorCode *status); 735 736/** 737 * Reports the start index of the matching region. Any matches found are limited to 738 * to the region bounded by regionStart (inclusive) and regionEnd (exclusive). 739 * 740 * @param regexp The compiled regular expression. 741 * @param status A pointer to a UErrorCode to receive any errors. 742 * @return The starting (native) index of this matcher's region. 743 * @stable ICU 4.0 744 */ 745U_STABLE int32_t U_EXPORT2 746uregex_regionStart(const URegularExpression *regexp, 747 UErrorCode *status); 748 749/** 750 * 64bit version of uregex_regionStart. 751 * @draft ICU 4.6 752 */ 753U_DRAFT int64_t U_EXPORT2 754uregex_regionStart64(const URegularExpression *regexp, 755 UErrorCode *status); 756 757/** 758 * Reports the end index (exclusive) of the matching region for this URegularExpression. 759 * Any matches found are limited to to the region bounded by regionStart (inclusive) 760 * and regionEnd (exclusive). 761 * 762 * @param regexp The compiled regular expression. 763 * @param status A pointer to a UErrorCode to receive any errors. 764 * @return The ending point (native) of this matcher's region. 765 * @stable ICU 4.0 766 */ 767U_STABLE int32_t U_EXPORT2 768uregex_regionEnd(const URegularExpression *regexp, 769 UErrorCode *status); 770 771/** 772 * 64bit version of uregex_regionEnd. 773 * @draft ICU 4.6 774 */ 775U_DRAFT int64_t U_EXPORT2 776uregex_regionEnd64(const URegularExpression *regexp, 777 UErrorCode *status); 778 779/** 780 * Queries the transparency of region bounds for this URegularExpression. 781 * See useTransparentBounds for a description of transparent and opaque bounds. 782 * By default, matching boundaries are opaque. 783 * 784 * @param regexp The compiled regular expression. 785 * @param status A pointer to a UErrorCode to receive any errors. 786 * @return TRUE if this matcher is using opaque bounds, false if it is not. 787 * @stable ICU 4.0 788 */ 789U_STABLE UBool U_EXPORT2 790uregex_hasTransparentBounds(const URegularExpression *regexp, 791 UErrorCode *status); 792 793 794/** 795 * Sets the transparency of region bounds for this URegularExpression. 796 * Invoking this function with an argument of TRUE will set matches to use transparent bounds. 797 * If the boolean argument is FALSE, then opaque bounds will be used. 798 * 799 * Using transparent bounds, the boundaries of the matching region are transparent 800 * to lookahead, lookbehind, and boundary matching constructs. Those constructs can 801 * see text beyond the boundaries of the region while checking for a match. 802 * 803 * With opaque bounds, no text outside of the matching region is visible to lookahead, 804 * lookbehind, and boundary matching constructs. 805 * 806 * By default, opaque bounds are used. 807 * 808 * @param regexp The compiled regular expression. 809 * @param b TRUE for transparent bounds; FALSE for opaque bounds 810 * @param status A pointer to a UErrorCode to receive any errors. 811 * @stable ICU 4.0 812 **/ 813U_STABLE void U_EXPORT2 814uregex_useTransparentBounds(URegularExpression *regexp, 815 UBool b, 816 UErrorCode *status); 817 818 819/** 820 * Return true if this URegularExpression is using anchoring bounds. 821 * By default, anchoring region bounds are used. 822 * 823 * @param regexp The compiled regular expression. 824 * @param status A pointer to a UErrorCode to receive any errors. 825 * @return TRUE if this matcher is using anchoring bounds. 826 * @stable ICU 4.0 827 */ 828U_STABLE UBool U_EXPORT2 829uregex_hasAnchoringBounds(const URegularExpression *regexp, 830 UErrorCode *status); 831 832 833/** 834 * Set whether this URegularExpression is using Anchoring Bounds for its region. 835 * With anchoring bounds, pattern anchors such as ^ and $ will match at the start 836 * and end of the region. Without Anchoring Bounds, anchors will only match at 837 * the positions they would in the complete text. 838 * 839 * Anchoring Bounds are the default for regions. 840 * 841 * @param regexp The compiled regular expression. 842 * @param b TRUE if to enable anchoring bounds; FALSE to disable them. 843 * @param status A pointer to a UErrorCode to receive any errors. 844 * @stable ICU 4.0 845 */ 846U_STABLE void U_EXPORT2 847uregex_useAnchoringBounds(URegularExpression *regexp, 848 UBool b, 849 UErrorCode *status); 850 851/** 852 * Return TRUE if the most recent matching operation touched the 853 * end of the text being processed. In this case, additional input text could 854 * change the results of that match. 855 * 856 * @param regexp The compiled regular expression. 857 * @param status A pointer to a UErrorCode to receive any errors. 858 * @return TRUE if the most recent match hit the end of input 859 * @stable ICU 4.0 860 */ 861U_STABLE UBool U_EXPORT2 862uregex_hitEnd(const URegularExpression *regexp, 863 UErrorCode *status); 864 865/** 866 * Return TRUE the most recent match succeeded and additional input could cause 867 * it to fail. If this function returns false and a match was found, then more input 868 * might change the match but the match won't be lost. If a match was not found, 869 * then requireEnd has no meaning. 870 * 871 * @param regexp The compiled regular expression. 872 * @param status A pointer to a UErrorCode to receive any errors. 873 * @return TRUE if more input could cause the most recent match to no longer match. 874 * @stable ICU 4.0 875 */ 876U_STABLE UBool U_EXPORT2 877uregex_requireEnd(const URegularExpression *regexp, 878 UErrorCode *status); 879 880 881 882 883 884/** 885 * Replaces every substring of the input that matches the pattern 886 * with the given replacement string. This is a convenience function that 887 * provides a complete find-and-replace-all operation. 888 * 889 * This method scans the input string looking for matches of the pattern. 890 * Input that is not part of any match is copied unchanged to the 891 * destination buffer. Matched regions are replaced in the output 892 * buffer by the replacement string. The replacement string may contain 893 * references to capture groups; these take the form of $1, $2, etc. 894 * 895 * @param regexp The compiled regular expression. 896 * @param replacementText A string containing the replacement text. 897 * @param replacementLength The length of the replacement string, or 898 * -1 if it is NUL terminated. 899 * @param destBuf A (UChar *) buffer that will receive the result. 900 * @param destCapacity The capacity of the desitnation buffer. 901 * @param status A reference to a UErrorCode to receive any errors. 902 * @return The length of the string resulting from the find 903 * and replace operation. In the event that the 904 * destination capacity is inadequate, the return value 905 * is still the full length of the untruncated string. 906 * @stable ICU 3.0 907 */ 908U_STABLE int32_t U_EXPORT2 909uregex_replaceAll(URegularExpression *regexp, 910 const UChar *replacementText, 911 int32_t replacementLength, 912 UChar *destBuf, 913 int32_t destCapacity, 914 UErrorCode *status); 915 916/** 917 * Replaces every substring of the input that matches the pattern 918 * with the given replacement string. This is a convenience function that 919 * provides a complete find-and-replace-all operation. 920 * 921 * This method scans the input string looking for matches of the pattern. 922 * Input that is not part of any match is copied unchanged to the 923 * destination buffer. Matched regions are replaced in the output 924 * buffer by the replacement string. The replacement string may contain 925 * references to capture groups; these take the form of $1, $2, etc. 926 * 927 * @param regexp The compiled regular expression. 928 * @param replacement A string containing the replacement text. 929 * @param dest A mutable UText that will receive the result. 930 * If NULL, a new UText will be created (which may not be mutable). 931 * @param status A reference to a UErrorCode to receive any errors. 932 * @return A UText containing the results of the find and replace. 933 * If a pre-allocated UText was provided, it will always be used and returned. 934 * 935 * @draft ICU 4.6 936 */ 937U_DRAFT UText * U_EXPORT2 938uregex_replaceAllUText(URegularExpression *regexp, 939 UText *replacement, 940 UText *dest, 941 UErrorCode *status); 942 943/** 944 * Replaces the first substring of the input that matches the pattern 945 * with the given replacement string. This is a convenience function that 946 * provides a complete find-and-replace operation. 947 * 948 * This method scans the input string looking for a match of the pattern. 949 * All input that is not part of the match is copied unchanged to the 950 * destination buffer. The matched region is replaced in the output 951 * buffer by the replacement string. The replacement string may contain 952 * references to capture groups; these take the form of $1, $2, etc. 953 * 954 * @param regexp The compiled regular expression. 955 * @param replacementText A string containing the replacement text. 956 * @param replacementLength The length of the replacement string, or 957 * -1 if it is NUL terminated. 958 * @param destBuf A (UChar *) buffer that will receive the result. 959 * @param destCapacity The capacity of the desitnation buffer. 960 * @param status a reference to a UErrorCode to receive any errors. 961 * @return The length of the string resulting from the find 962 * and replace operation. In the event that the 963 * destination capacity is inadequate, the return value 964 * is still the full length of the untruncated string. 965 * @stable ICU 3.0 966 */ 967U_STABLE int32_t U_EXPORT2 968uregex_replaceFirst(URegularExpression *regexp, 969 const UChar *replacementText, 970 int32_t replacementLength, 971 UChar *destBuf, 972 int32_t destCapacity, 973 UErrorCode *status); 974 975/** 976 * Replaces the first substring of the input that matches the pattern 977 * with the given replacement string. This is a convenience function that 978 * provides a complete find-and-replace operation. 979 * 980 * This method scans the input string looking for a match of the pattern. 981 * All input that is not part of the match is copied unchanged to the 982 * destination buffer. The matched region is replaced in the output 983 * buffer by the replacement string. The replacement string may contain 984 * references to capture groups; these take the form of $1, $2, etc. 985 * 986 * @param regexp The compiled regular expression. 987 * @param replacement A string containing the replacement text. 988 * @param dest A mutable UText that will receive the result. 989 * If NULL, a new UText will be created (which may not be mutable). 990 * @param status A reference to a UErrorCode to receive any errors. 991 * @return A UText containing the results of the find and replace. 992 * If a pre-allocated UText was provided, it will always be used and returned. 993 * 994 * @draft ICU 4.6 995 */ 996U_DRAFT UText * U_EXPORT2 997uregex_replaceFirstUText(URegularExpression *regexp, 998 UText *replacement, 999 UText *dest, 1000 UErrorCode *status); 1001 1002 1003/** 1004 * Implements a replace operation intended to be used as part of an 1005 * incremental find-and-replace. 1006 * 1007 * <p>The input string, starting from the end of the previous match and ending at 1008 * the start of the current match, is appended to the destination string. Then the 1009 * replacement string is appended to the output string, 1010 * including handling any substitutions of captured text.</p> 1011 * 1012 * <p>A note on preflight computation of buffersize and error handling: 1013 * Calls to uregex_appendReplacement() and uregex_appendTail() are 1014 * designed to be chained, one after another, with the destination 1015 * buffer pointer and buffer capacity updated after each in preparation 1016 * to for the next. If the destination buffer is exhausted partway through such a 1017 * sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal 1018 * ICU conventions are for a function to perform no action if it is 1019 * called with an error status, but for this one case, uregex_appendRepacement() 1020 * will operate normally so that buffer size computations will complete 1021 * correctly. 1022 * 1023 * <p>For simple, prepackaged, non-incremental find-and-replace 1024 * operations, see replaceFirst() or replaceAll().</p> 1025 * 1026 * @param regexp The regular expression object. 1027 * @param replacementText The string that will replace the matched portion of the 1028 * input string as it is copied to the destination buffer. 1029 * The replacement text may contain references ($1, for 1030 * example) to capture groups from the match. 1031 * @param replacementLength The length of the replacement text string, 1032 * or -1 if the string is NUL terminated. 1033 * @param destBuf The buffer into which the results of the 1034 * find-and-replace are placed. On return, this pointer 1035 * will be updated to refer to the beginning of the 1036 * unused portion of buffer, leaving it in position for 1037 * a subsequent call to this function. 1038 * @param destCapacity The size of the output buffer, On return, this 1039 * parameter will be updated to reflect the space remaining 1040 * unused in the output buffer. 1041 * @param status A reference to a UErrorCode to receive any errors. 1042 * @return The length of the result string. In the event that 1043 * destCapacity is inadequate, the full length of the 1044 * untruncated output string is returned. 1045 * 1046 * @stable ICU 3.0 1047 * 1048 */ 1049U_STABLE int32_t U_EXPORT2 1050uregex_appendReplacement(URegularExpression *regexp, 1051 const UChar *replacementText, 1052 int32_t replacementLength, 1053 UChar **destBuf, 1054 int32_t *destCapacity, 1055 UErrorCode *status); 1056 1057 1058/** 1059 * Implements a replace operation intended to be used as part of an 1060 * incremental find-and-replace. 1061 * 1062 * <p>The input string, starting from the end of the previous match and ending at 1063 * the start of the current match, is appended to the destination string. Then the 1064 * replacement string is appended to the output string, 1065 * including handling any substitutions of captured text.</p> 1066 * 1067 * <p>For simple, prepackaged, non-incremental find-and-replace 1068 * operations, see replaceFirst() or replaceAll().</p> 1069 * 1070 * @param regexp The regular expression object. 1071 * @param replacementText The string that will replace the matched portion of the 1072 * input string as it is copied to the destination buffer. 1073 * The replacement text may contain references ($1, for 1074 * example) to capture groups from the match. 1075 * @param dest A mutable UText that will receive the result. Must not be NULL. 1076 * @param status A reference to a UErrorCode to receive any errors. 1077 * 1078 * @draft ICU 4.6 1079 */ 1080U_DRAFT void U_EXPORT2 1081uregex_appendReplacementUText(URegularExpression *regexp, 1082 UText *replacementText, 1083 UText *dest, 1084 UErrorCode *status); 1085 1086 1087/** 1088 * As the final step in a find-and-replace operation, append the remainder 1089 * of the input string, starting at the position following the last match, 1090 * to the destination string. <code>uregex_appendTail()</code> is intended 1091 * to be invoked after one or more invocations of the 1092 * <code>uregex_appendReplacement()</code> function. 1093 * 1094 * @param regexp The regular expression object. This is needed to 1095 * obtain the input string and with the position 1096 * of the last match within it. 1097 * @param destBuf The buffer in which the results of the 1098 * find-and-replace are placed. On return, the pointer 1099 * will be updated to refer to the beginning of the 1100 * unused portion of buffer. 1101 * @param destCapacity The size of the output buffer, On return, this 1102 * value will be updated to reflect the space remaining 1103 * unused in the output buffer. 1104 * @param status A reference to a UErrorCode to receive any errors. 1105 * @return The length of the result string. In the event that 1106 * destCapacity is inadequate, the full length of the 1107 * untruncated output string is returned. 1108 * 1109 * @stable ICU 3.0 1110 */ 1111U_STABLE int32_t U_EXPORT2 1112uregex_appendTail(URegularExpression *regexp, 1113 UChar **destBuf, 1114 int32_t *destCapacity, 1115 UErrorCode *status); 1116 1117 1118/** 1119 * As the final step in a find-and-replace operation, append the remainder 1120 * of the input string, starting at the position following the last match, 1121 * to the destination string. <code>uregex_appendTailUText()</code> is intended 1122 * to be invoked after one or more invocations of the 1123 * <code>uregex_appendReplacementUText()</code> function. 1124 * 1125 * @param regexp The regular expression object. This is needed to 1126 * obtain the input string and with the position 1127 * of the last match within it. 1128 * @param dest A mutable UText that will receive the result. Must not be NULL. 1129 * @return The destination UText. 1130 * 1131 * @draft ICU 4.6 1132 */ 1133U_DRAFT UText * U_EXPORT2 1134uregex_appendTailUText(URegularExpression *regexp, 1135 UText *dest, 1136 UErrorCode *status); 1137 1138 1139 1140 /** 1141 * Split a string into fields. Somewhat like split() from Perl. 1142 * The pattern matches identify delimiters that separate the input 1143 * into fields. The input data between the matches becomes the 1144 * fields themselves. 1145 * <p> 1146 * Each of the fields is copied from the input string to the destination 1147 * buffer, and NUL terminated. The position of each field within 1148 * the destination buffer is returned in the destFields array. 1149 * 1150 * Note: another choice for the design of this function would be to not 1151 * copy the resulting fields at all, but to return indexes and 1152 * lengths within the source text. 1153 * Advantages would be 1154 * o Faster. No Copying. 1155 * o Nothing extra needed when field data may contain embedded NUL chars. 1156 * o Less memory needed if working on large data. 1157 * Disadvantages 1158 * o Less consistent with C++ split, which copies into an 1159 * array of UnicodeStrings. 1160 * o No NUL termination, extracted fields would be less convenient 1161 * to use in most cases. 1162 * o Possible problems in the future, when support Unicode Normalization 1163 * could cause the fields to not correspond exactly to 1164 * a range of the source text. 1165 * 1166 * @param regexp The compiled regular expression. 1167 * @param destBuf A (UChar *) buffer to receive the fields that 1168 * are extracted from the input string. These 1169 * field pointers will refer to positions within the 1170 * destination buffer supplied by the caller. Any 1171 * extra positions within the destFields array will be 1172 * set to NULL. 1173 * @param destCapacity The capacity of the destBuf. 1174 * @param requiredCapacity The actual capacity required of the destBuf. 1175 * If destCapacity is too small, requiredCapacity will return 1176 * the total capacity required to hold all of the output, and 1177 * a U_BUFFER_OVERFLOW_ERROR will be returned. 1178 * @param destFields An array to be filled with the position of each 1179 * of the extracted fields within destBuf. 1180 * @param destFieldsCapacity The number of elements in the destFields array. 1181 * If the number of fields found is less than destFieldsCapacity, 1182 * the extra destFields elements are set to zero. 1183 * If destFieldsCapacity is too small, the trailing part of the 1184 * input, including any field delimiters, is treated as if it 1185 * were the last field - it is copied to the destBuf, and 1186 * its position is in the destBuf is stored in the last element 1187 * of destFields. This behavior mimics that of Perl. It is not 1188 * an error condition, and no error status is returned when all destField 1189 * positions are used. 1190 * @param status A reference to a UErrorCode to receive any errors. 1191 * @return The number of fields into which the input string was split. 1192 * @stable ICU 3.0 1193 */ 1194U_STABLE int32_t U_EXPORT2 1195uregex_split( URegularExpression *regexp, 1196 UChar *destBuf, 1197 int32_t destCapacity, 1198 int32_t *requiredCapacity, 1199 UChar *destFields[], 1200 int32_t destFieldsCapacity, 1201 UErrorCode *status); 1202 1203 1204 /** 1205 * Split a string into fields. Somewhat like split() from Perl. 1206 * The pattern matches identify delimiters that separate the input 1207 * into fields. The input data between the matches becomes the 1208 * fields themselves. 1209 * <p> 1210 * The behavior of this function is not very closely aligned with uregex_split(); 1211 * instead, it is based on (and implemented directly on top of) the C++ split method. 1212 * 1213 * @param regexp The compiled regular expression. 1214 * @param destFields An array of mutable UText structs to receive the results of the split. 1215 * If a field is NULL, a new UText is allocated to contain the results for 1216 * that field. This new UText is not guaranteed to be mutable. 1217 * @param destFieldsCapacity The number of elements in the destination array. 1218 * If the number of fields found is less than destCapacity, the 1219 * extra strings in the destination array are not altered. 1220 * If the number of destination strings is less than the number 1221 * of fields, the trailing part of the input string, including any 1222 * field delimiters, is placed in the last destination string. 1223 * This behavior mimics that of Perl. It is not an error condition, and no 1224 * error status is returned when all destField positions are used. 1225 * @param status A reference to a UErrorCode to receive any errors. 1226 * @return The number of fields into which the input string was split. 1227 * 1228 * @draft ICU 4.6 1229 */ 1230U_DRAFT int32_t U_EXPORT2 1231uregex_splitUText(URegularExpression *regexp, 1232 UText *destFields[], 1233 int32_t destFieldsCapacity, 1234 UErrorCode *status); 1235 1236 1237 1238 1239/** 1240 * Set a processing time limit for match operations with this URegularExpression. 1241 * 1242 * Some patterns, when matching certain strings, can run in exponential time. 1243 * For practical purposes, the match operation may appear to be in an 1244 * infinite loop. 1245 * When a limit is set a match operation will fail with an error if the 1246 * limit is exceeded. 1247 * <p> 1248 * The units of the limit are steps of the match engine. 1249 * Correspondence with actual processor time will depend on the speed 1250 * of the processor and the details of the specific pattern, but will 1251 * typically be on the order of milliseconds. 1252 * <p> 1253 * By default, the matching time is not limited. 1254 * <p> 1255 * 1256 * @param regexp The compiled regular expression. 1257 * @param limit The limit value, or 0 for no limit. 1258 * @param status A reference to a UErrorCode to receive any errors. 1259 * @stable ICU 4.0 1260 */ 1261U_STABLE void U_EXPORT2 1262uregex_setTimeLimit(URegularExpression *regexp, 1263 int32_t limit, 1264 UErrorCode *status); 1265 1266/** 1267 * Get the time limit for for matches with this URegularExpression. 1268 * A return value of zero indicates that there is no limit. 1269 * 1270 * @param regexp The compiled regular expression. 1271 * @param status A reference to a UErrorCode to receive any errors. 1272 * @return the maximum allowed time for a match, in units of processing steps. 1273 * @stable ICU 4.0 1274 */ 1275U_STABLE int32_t U_EXPORT2 1276uregex_getTimeLimit(const URegularExpression *regexp, 1277 UErrorCode *status); 1278 1279/** 1280 * Set the amount of heap storage avaliable for use by the match backtracking stack. 1281 * <p> 1282 * ICU uses a backtracking regular expression engine, with the backtrack stack 1283 * maintained on the heap. This function sets the limit to the amount of memory 1284 * that can be used for this purpose. A backtracking stack overflow will 1285 * result in an error from the match operation that caused it. 1286 * <p> 1287 * A limit is desirable because a malicious or poorly designed pattern can use 1288 * excessive memory, potentially crashing the process. A limit is enabled 1289 * by default. 1290 * <p> 1291 * @param regexp The compiled regular expression. 1292 * @param limit The maximum size, in bytes, of the matching backtrack stack. 1293 * A value of -1 means no limit. 1294 * The limit must be greater than zero, or -1. 1295 * @param status A reference to a UErrorCode to receive any errors. 1296 * 1297 * @stable ICU 4.0 1298 */ 1299U_STABLE void U_EXPORT2 1300uregex_setStackLimit(URegularExpression *regexp, 1301 int32_t limit, 1302 UErrorCode *status); 1303 1304/** 1305 * Get the size of the heap storage available for use by the back tracking stack. 1306 * 1307 * @return the maximum backtracking stack size, in bytes, or zero if the 1308 * stack size is unlimited. 1309 * @stable ICU 4.0 1310 */ 1311U_STABLE int32_t U_EXPORT2 1312uregex_getStackLimit(const URegularExpression *regexp, 1313 UErrorCode *status); 1314 1315 1316/** 1317 * Function pointer for a regular expression matching callback function. 1318 * When set, a callback function will be called periodically during matching 1319 * operations. If the call back function returns FALSE, the matching 1320 * operation will be terminated early. 1321 * 1322 * Note: the callback function must not call other functions on this 1323 * URegularExpression. 1324 * 1325 * @param context context pointer. The callback function will be invoked 1326 * with the context specified at the time that 1327 * uregex_setMatchCallback() is called. 1328 * @param steps the accumulated processing time, in match steps, 1329 * for this matching operation. 1330 * @return TRUE to continue the matching operation. 1331 * FALSE to terminate the matching operation. 1332 * @stable ICU 4.0 1333 */ 1334U_CDECL_BEGIN 1335typedef UBool U_CALLCONV URegexMatchCallback ( 1336 const void *context, 1337 int32_t steps); 1338U_CDECL_END 1339 1340/** 1341 * Set a callback function for this URegularExpression. 1342 * During matching operations the function will be called periodically, 1343 * giving the application the opportunity to terminate a long-running 1344 * match. 1345 * 1346 * @param regexp The compiled regular expression. 1347 * @param callback A pointer to the user-supplied callback function. 1348 * @param context User context pointer. The value supplied at the 1349 * time the callback function is set will be saved 1350 * and passed to the callback each time that it is called. 1351 * @param status A reference to a UErrorCode to receive any errors. 1352 * @stable ICU 4.0 1353 */ 1354U_STABLE void U_EXPORT2 1355uregex_setMatchCallback(URegularExpression *regexp, 1356 URegexMatchCallback *callback, 1357 const void *context, 1358 UErrorCode *status); 1359 1360 1361/** 1362 * Get the callback function for this URegularExpression. 1363 * 1364 * @param regexp The compiled regular expression. 1365 * @param callback Out paramater, receives a pointer to the user-supplied 1366 * callback function. 1367 * @param context Out parameter, receives the user context pointer that 1368 * was set when uregex_setMatchCallback() was called. 1369 * @param status A reference to a UErrorCode to receive any errors. 1370 * @stable ICU 4.0 1371 */ 1372U_STABLE void U_EXPORT2 1373uregex_getMatchCallback(const URegularExpression *regexp, 1374 URegexMatchCallback **callback, 1375 const void **context, 1376 UErrorCode *status); 1377 1378 1379/** 1380 * Function pointer for a regular expression find callback function. 1381 * 1382 * When set, a callback function will be called during a find operation 1383 * and for operations that depend on find, such as findNext, split and some replace 1384 * operations like replaceFirst. 1385 * The callback will usually be called after each attempt at a match, but this is not a 1386 * guarantee that the callback will be invoked at each character. For finds where the 1387 * match engine is invoked at each character, this may be close to true, but less likely 1388 * for more optimized loops where the pattern is known to only start, and the match 1389 * engine invoked, at certain characters. 1390 * When invoked, this callback will specify the index at which a match operation is about 1391 * to be attempted, giving the application the opportunity to terminate a long-running 1392 * find operation. 1393 * 1394 * If the call back function returns FALSE, the find operation will be terminated early. 1395 * 1396 * Note: the callback function must not call other functions on this 1397 * URegularExpression 1398 * 1399 * @param context context pointer. The callback function will be invoked 1400 * with the context specified at the time that 1401 * uregex_setFindProgressCallback() is called. 1402 * @param matchIndex the next index at which a match attempt will be attempted for this 1403 * find operation. If this callback interrupts the search, this is the 1404 * index at which a find/findNext operation may be re-initiated. 1405 * @return TRUE to continue the matching operation. 1406 * FALSE to terminate the matching operation. 1407 * @draft ICU 4.6 1408 */ 1409U_CDECL_BEGIN 1410typedef UBool U_CALLCONV URegexFindProgressCallback ( 1411 const void *context, 1412 int64_t matchIndex); 1413U_CDECL_END 1414 1415/** 1416 * Set the find progress callback function for this URegularExpression. 1417 * 1418 * @param regexp The compiled regular expression. 1419 * @param callback A pointer to the user-supplied callback function. 1420 * @param context User context pointer. The value supplied at the 1421 * time the callback function is set will be saved 1422 * and passed to the callback each time that it is called. 1423 * @param status A reference to a UErrorCode to receive any errors. 1424 * @draft ICU 4.6 1425 */ 1426U_DRAFT void U_EXPORT2 1427uregex_setFindProgressCallback(URegularExpression *regexp, 1428 URegexFindProgressCallback *callback, 1429 const void *context, 1430 UErrorCode *status); 1431 1432 1433/** 1434 * Get the find progress callback function for this URegularExpression. 1435 * 1436 * @param regexp The compiled regular expression. 1437 * @param callback Out paramater, receives a pointer to the user-supplied 1438 * callback function. 1439 * @param context Out parameter, receives the user context pointer that 1440 * was set when uregex_setFindProgressCallback() was called. 1441 * @param status A reference to a UErrorCode to receive any errors. 1442 * @draft ICU 4.6 1443 */ 1444U_DRAFT void U_EXPORT2 1445uregex_getFindProgressCallback(const URegularExpression *regexp, 1446 URegexFindProgressCallback **callback, 1447 const void **context, 1448 UErrorCode *status); 1449 1450#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 1451#endif /* UREGEX_H */ 1452