16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org **********************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *   Copyright (C) 1999-2011, International Business Machines
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *   Corporation and others.  All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org **********************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *   Date        Name        Description
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *   11/17/99    aliu        Creation.
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org **********************************************************************
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_TRANSLITERATION
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/rep.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unifilt.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_rule.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_data.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "strmatch.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "strrepl.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "util.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "putilimp.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar FORWARD_OP[] = {32,62,32,0}; // " > "
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Construct a new rule with the given input, output text, and other
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * attributes.  A cursor position may be specified for the output text.
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param input input string, including key and optional ante and
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * post context
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param anteContextPos offset into input to end of ante context, or -1 if
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * none.  Must be <= input.length() if not -1.
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param postContextPos offset into input to start of post context, or -1
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if none.  Must be <= input.length() if not -1, and must be >=
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * anteContextPos.
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param output output string
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param cursorPosition offset into output at which cursor is located, or -1 if
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * none.  If less than zero, then the cursor is placed after the
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <code>output</code>; that is, -1 is equivalent to
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <code>output.length()</code>.  If greater than
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <code>output.length()</code> then an exception is thrown.
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param segs array of UnicodeFunctors corresponding to input pattern
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * segments, or null if there are none.  The array itself is adopted,
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but the pointers within it are not.
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param segsCount number of elements in segs[]
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param anchorStart TRUE if the the rule is anchored on the left to
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the context start
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param anchorEnd TRUE if the rule is anchored on the right to the
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * context limit
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgTransliterationRule::TransliterationRule(const UnicodeString& input,
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         int32_t anteContextPos, int32_t postContextPos,
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         const UnicodeString& outputStr,
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         int32_t cursorPosition, int32_t cursorOffset,
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UnicodeFunctor** segs,
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         int32_t segsCount,
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UBool anchorStart, UBool anchorEnd,
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         const TransliterationRuleData* theData,
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UErrorCode& status) :
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UMemory(),
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    segments(0),
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    data(theData) {
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Do range checks only when warranted to save time
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (anteContextPos < 0) {
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        anteContextLength = 0;
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (anteContextPos > input.length()) {
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // throw new IllegalArgumentException("Invalid ante context");
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ILLEGAL_ARGUMENT_ERROR;
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        anteContextLength = anteContextPos;
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (postContextPos < 0) {
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        keyLength = input.length() - anteContextLength;
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (postContextPos < anteContextLength ||
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            postContextPos > input.length()) {
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // throw new IllegalArgumentException("Invalid post context");
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ILLEGAL_ARGUMENT_ERROR;
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        keyLength = postContextPos - anteContextLength;
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (cursorPosition < 0) {
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cursorPosition = outputStr.length();
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (cursorPosition > outputStr.length()) {
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // throw new IllegalArgumentException("Invalid cursor position");
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ILLEGAL_ARGUMENT_ERROR;
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We don't validate the segments array.  The caller must
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // guarantee that the segments are well-formed (that is, that
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // all $n references in the output refer to indices of this
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // array, and that no array elements are null).
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    this->segments = segs;
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    this->segmentsCount = segsCount;
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pattern = input;
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    flags = 0;
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (anchorStart) {
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flags |= ANCHOR_START;
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (anchorEnd) {
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        flags |= ANCHOR_END;
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    anteContext = NULL;
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (anteContextLength > 0) {
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        anteContext = new StringMatcher(pattern, 0, anteContextLength,
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                        FALSE, *data);
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* test for NULL */
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (anteContext == 0) {
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_MEMORY_ALLOCATION_ERROR;
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    key = NULL;
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (keyLength > 0) {
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        key = new StringMatcher(pattern, anteContextLength, anteContextLength + keyLength,
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                FALSE, *data);
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* test for NULL */
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (key == 0) {
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_MEMORY_ALLOCATION_ERROR;
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t postContextLength = pattern.length() - keyLength - anteContextLength;
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    postContext = NULL;
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (postContextLength > 0) {
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        postContext = new StringMatcher(pattern, anteContextLength + keyLength, pattern.length(),
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                        FALSE, *data);
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* test for NULL */
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (postContext == 0) {
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_MEMORY_ALLOCATION_ERROR;
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    this->output = new StringReplacer(outputStr, cursorPosition + cursorOffset, data);
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* test for NULL */
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (this->output == 0) {
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_MEMORY_ALLOCATION_ERROR;
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copy constructor.
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgTransliterationRule::TransliterationRule(TransliterationRule& other) :
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UMemory(other),
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    anteContext(NULL),
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    key(NULL),
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    postContext(NULL),
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pattern(other.pattern),
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    anteContextLength(other.anteContextLength),
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    keyLength(other.keyLength),
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    flags(other.flags),
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    data(other.data) {
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    segments = NULL;
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    segmentsCount = 0;
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (other.segmentsCount > 0) {
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        segments = (UnicodeFunctor **)uprv_malloc(other.segmentsCount * sizeof(UnicodeFunctor *));
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_memcpy(segments, other.segments, other.segmentsCount*sizeof(segments[0]));
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (other.anteContext != NULL) {
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        anteContext = (StringMatcher*) other.anteContext->clone();
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (other.key != NULL) {
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        key = (StringMatcher*) other.key->clone();
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (other.postContext != NULL) {
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        postContext = (StringMatcher*) other.postContext->clone();
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    output = other.output->clone();
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgTransliterationRule::~TransliterationRule() {
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(segments);
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete anteContext;
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete key;
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete postContext;
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete output;
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the preceding context length.  This method is needed to
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * support the <code>Transliterator</code> method
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <code>getMaximumContextLength()</code>.  Internally, this is
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * implemented as the anteContextLength, optionally plus one if
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * there is a start anchor.  The one character anchor gap is
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * needed to make repeated incremental transliteration with
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * anchors work.
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t TransliterationRule::getContextLength(void) const {
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return anteContextLength + ((flags & ANCHOR_START) ? 1 : 0);
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Internal method.  Returns 8-bit index value for this rule.
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is the low byte of the first character of the key,
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * unless the first character of the key is a set.  If it's a
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * set, or otherwise can match multiple keys, the index value is -1.
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint16_t TransliterationRule::getIndexValue() const {
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (anteContextLength == pattern.length()) {
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A pattern with just ante context {such as foo)>bar} can
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // match any key.
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c = pattern.char32At(anteContextLength);
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int16_t)(data->lookupMatcher(c) == NULL ? (c & 0xFF) : -1);
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Internal method.  Returns true if this rule matches the given
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * index value.  The index value is an 8-bit integer, 0..255,
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * representing the low byte of the first character of the key.
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It matches this rule if it matches the first character of the
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * key, or if the first character of the key is a set, and the set
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * contains any character with a low byte equal to the index
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * value.  If the rule contains only ante context, as in foo)>bar,
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then it will match any key.
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool TransliterationRule::matchesIndexValue(uint8_t v) const {
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Delegate to the key, or if there is none, to the postContext.
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // If there is neither then we match any key; return true.
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeMatcher *m = (key != NULL) ? key : postContext;
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (m != NULL) ? m->matchesIndexValue(v) : TRUE;
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return true if this rule masks another rule.  If r1 masks r2 then
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * r1 matches any input string that r2 matches.  If r1 masks r2 and r2 masks
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * r1 then r1 == r2.  Examples: "a>x" masks "ab>y".  "a>x" masks "a[b]>y".
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * "[c]a>x" masks "[dc]a>y".
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool TransliterationRule::masks(const TransliterationRule& r2) const {
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Rule r1 masks rule r2 if the string formed of the
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * antecontext, key, and postcontext overlaps in the following
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * way:
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * r1:      aakkkpppp
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * r2:     aaakkkkkpppp
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *            ^
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The strings must be aligned at the first character of the
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * key.  The length of r1 to the left of the alignment point
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * must be <= the length of r2 to the left; ditto for the
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * right.  The characters of r1 must equal (or be a superset
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * of) the corresponding characters of r2.  The superset
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * operation should be performed to check for UnicodeSet
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * masking.
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Anchors:  Two patterns that differ only in anchors only
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * mask one another if they are exactly equal, and r2 has
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * all the anchors r1 has (optionally, plus some).  Here Y
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * means the row masks the column, N means it doesn't.
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *         ab   ^ab    ab$  ^ab$
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *   ab    Y     Y     Y     Y
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *  ^ab    N     Y     N     Y
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *   ab$   N     N     Y     Y
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *  ^ab$   N     N     N     Y
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Post context: {a}b masks ab, but not vice versa, since {a}b
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * matches everything ab matches, and {a}b matches {|a|}b but ab
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * does not.  Pre context is different (a{b} does not align with
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * ab).
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* LIMITATION of the current mask algorithm: Some rule
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * maskings are currently not detected.  For example,
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * "{Lu}]a>x" masks "A]a>y".  This can be added later. TODO
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t len = pattern.length();
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t left = anteContextLength;
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t left2 = r2.anteContextLength;
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t right = len - left;
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t right2 = r2.pattern.length() - left2;
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t cachedCompare = r2.pattern.compare(left2 - left, len, pattern);
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // TODO Clean this up -- some logic might be combinable with the
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // next statement.
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Test for anchor masking
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (left == left2 && right == right2 &&
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        keyLength <= r2.keyLength &&
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        0 == cachedCompare) {
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The following boolean logic implements the table above
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (flags == r2.flags) ||
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (!(flags & ANCHOR_START) && !(flags & ANCHOR_END)) ||
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ((r2.flags & ANCHOR_START) && (r2.flags & ANCHOR_END));
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return left <= left2 &&
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (right < right2 ||
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         (right == right2 && keyLength <= r2.keyLength)) &&
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         (0 == cachedCompare);
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline int32_t posBefore(const Replaceable& str, int32_t pos) {
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (pos > 0) ?
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pos - U16_LENGTH(str.char32At(pos-1)) :
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pos - 1;
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline int32_t posAfter(const Replaceable& str, int32_t pos) {
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (pos >= 0 && pos < str.length()) ?
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pos + U16_LENGTH(str.char32At(pos)) :
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pos + 1;
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Attempt a match and replacement at the given position.  Return
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the degree of match between this rule and the given text.  The
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * degree of match may be mismatch, a partial match, or a full
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * match.  A mismatch means at least one character of the text
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * does not match the context or key.  A partial match means some
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * context and key characters match, but the text is not long
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * enough to match all of them.  A full match means all context
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and key characters match.
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If a full match is obtained, perform a replacement, update pos,
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and return U_MATCH.  Otherwise both text and pos are unchanged.
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param text the text
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param pos the position indices
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param incremental if TRUE, test for partial matches that may
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * be completed by additional text inserted at pos.limit.
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return one of <code>U_MISMATCH</code>,
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>.  If
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * incremental is FALSE then U_PARTIAL_MATCH will not be returned.
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                  UTransPosition& pos,
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                  UBool incremental) const {
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Matching and replacing are done in one method because the
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // replacement operation needs information obtained during the
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // match.  Another way to do this is to have the match method
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // create a match result struct with relevant offsets, and to pass
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // this into the replace method.
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // ============================ MATCH ===========================
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Reset segment match data
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (segments != NULL) {
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (int32_t i=0; i<segmentsCount; ++i) {
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ((StringMatcher*) segments[i])->resetMatch();
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    int32_t lenDelta, keyLimit;
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t keyLimit;
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // ------------------------ Ante Context ------------------------
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // A mismatch in the ante context, or with the start anchor,
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // is an outright U_MISMATCH regardless of whether we are
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // incremental or not.
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t oText; // offset into 'text'
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    int32_t newStart = 0;
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t minOText;
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Note (1): We process text in 16-bit code units, rather than
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // 32-bit code points.  This works because stand-ins are
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // always in the BMP and because we are doing a literal match
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // operation, which can be done 16-bits at a time.
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t anteLimit = posBefore(text, pos.contextStart);
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UMatchDegree match;
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Start reverse match at char before pos.start
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    oText = posBefore(text, pos.start);
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (anteContext != NULL) {
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        match = anteContext->matches(text, oText, anteLimit, FALSE);
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (match != U_MATCH) {
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return U_MISMATCH;
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    minOText = posAfter(text, oText);
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // ------------------------ Start Anchor ------------------------
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (((flags & ANCHOR_START) != 0) && oText != anteLimit) {
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return U_MISMATCH;
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // -------------------- Key and Post Context --------------------
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    oText = pos.start;
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (key != NULL) {
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        match = key->matches(text, oText, pos.limit, incremental);
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (match != U_MATCH) {
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return match;
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    keyLimit = oText;
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (postContext != NULL) {
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (incremental && keyLimit == pos.limit) {
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The key matches just before pos.limit, and there is
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // a postContext.  Since we are in incremental mode,
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // we must assume more characters may be inserted at
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // pos.limit -- this is a partial match.
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return U_PARTIAL_MATCH;
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        match = postContext->matches(text, oText, pos.contextLimit, incremental);
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (match != U_MATCH) {
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return match;
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // ------------------------- Stop Anchor ------------------------
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (((flags & ANCHOR_END)) != 0) {
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (oText != pos.contextLimit) {
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return U_MISMATCH;
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (incremental) {
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return U_PARTIAL_MATCH;
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // =========================== REPLACE ==========================
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We have a full match.  The key is between pos.start and
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // keyLimit.
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t newStart;
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t newLength = output->toReplacer()->replace(text, pos.start, keyLimit, newStart);
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t lenDelta = newLength - (keyLimit - pos.start);
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    oText += lenDelta;
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pos.limit += lenDelta;
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pos.contextLimit += lenDelta;
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Restrict new value of start to [minOText, min(oText, pos.limit)].
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pos.start = uprv_max(minOText, uprv_min(uprv_min(oText, pos.limit), newStart));
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return U_MATCH;
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Create a source string that represents this rule.  Append it to the
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * given string.
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString& TransliterationRule::toRule(UnicodeString& rule,
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                           UBool escapeUnprintable) const {
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Accumulate special characters (and non-specials following them)
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // into quoteBuf.  Append quoteBuf, within single quotes, when
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // a non-quoted element must be inserted.
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString str, quoteBuf;
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Do not emit the braces '{' '}' around the pattern if there
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // is neither anteContext nor postContext.
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool emitBraces =
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (anteContext != NULL) || (postContext != NULL);
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Emit start anchor
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((flags & ANCHOR_START) != 0) {
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        rule.append((UChar)94/*^*/);
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Emit the input pattern
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ICU_Utility::appendToRule(rule, anteContext, escapeUnprintable, quoteBuf);
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (emitBraces) {
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ICU_Utility::appendToRule(rule, (UChar) 0x007B /*{*/, TRUE, escapeUnprintable, quoteBuf);
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ICU_Utility::appendToRule(rule, key, escapeUnprintable, quoteBuf);
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (emitBraces) {
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ICU_Utility::appendToRule(rule, (UChar) 0x007D /*}*/, TRUE, escapeUnprintable, quoteBuf);
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ICU_Utility::appendToRule(rule, postContext, escapeUnprintable, quoteBuf);
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Emit end anchor
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((flags & ANCHOR_END) != 0) {
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        rule.append((UChar)36/*$*/);
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ICU_Utility::appendToRule(rule, UnicodeString(TRUE, FORWARD_OP, 3), TRUE, escapeUnprintable, quoteBuf);
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Emit the output pattern
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ICU_Utility::appendToRule(rule, output->toReplacer()->toReplacerPattern(str, escapeUnprintable),
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              TRUE, escapeUnprintable, quoteBuf);
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ICU_Utility::appendToRule(rule, (UChar) 0x003B /*;*/, TRUE, escapeUnprintable, quoteBuf);
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return rule;
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid TransliterationRule::setData(const TransliterationRuleData* d) {
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    data = d;
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (anteContext != NULL) anteContext->setData(d);
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (postContext != NULL) postContext->setData(d);
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (key != NULL) key->setData(d);
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // assert(output != NULL);
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    output->setData(d);
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Don't have to do segments since they are in the context or key
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Union the set of all characters that may be modified by this rule
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * into the given set.
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid TransliterationRule::addSourceSetTo(UnicodeSet& toUnionTo) const {
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t limit = anteContextLength + keyLength;
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (int32_t i=anteContextLength; i<limit; ) {
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 ch = pattern.char32At(i);
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        i += U16_LENGTH(ch);
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UnicodeMatcher* matcher = data->lookupMatcher(ch);
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (matcher == NULL) {
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            toUnionTo.add(ch);
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            matcher->addMatchSetTo(toUnionTo);
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Union the set of all characters that may be emitted by this rule
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * into the given set.
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid TransliterationRule::addTargetSetTo(UnicodeSet& toUnionTo) const {
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    output->toReplacer()->addReplacementSetTo(toUnionTo);
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_TRANSLITERATION */
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//eof
558