12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2005, International Business Machines Corporation and    *
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.ParsePosition;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * An interface that defines both lookup protocol and parsing of
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * symbolic names.
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>This interface is used by UnicodeSet to resolve $Variable style
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * references that appear in set patterns.  RBBI and Transliteration
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * both independently implement this interface.
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>A symbol table maintains two kinds of mappings.  The first is
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between symbolic names and their values.  For example, if the
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * variable with the name "start" is set to the value "alpha"
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (perhaps, though not necessarily, through an expression such as
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * "$start=alpha"), then the call lookup("start") will return the
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * char[] array ['a', 'l', 'p', 'h', 'a'].
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>The second kind of mapping is between character values and
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UnicodeMatcher objects.  This is used by RuleBasedTransliterator,
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * which uses characters in the private use area to represent objects
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * such as UnicodeSets.  If U+E015 is mapped to the UnicodeSet [a-z],
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>Finally, a symbol table defines parsing behavior for symbolic
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * names.  All symbolic names start with the SYMBOL_REF character.
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * When a parser encounters this character, it calls parseReference()
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * with the position immediately following the SYMBOL_REF.  The symbol
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * table parses the name, if there is one, and returns it.
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.8
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic interface SymbolTable {
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The character preceding a symbol reference name.
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final char SYMBOL_REF = '$';
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Lookup the characters associated with this string and return it.
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return <tt>null</tt> if no such name exists.  The resultant
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * array may have length zero.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param s the symbolic name to lookup
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return a char array containing the name's value, or null if
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * there is no mapping for s.
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    char[] lookup(String s);
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Lookup the UnicodeMatcher associated with the given character, and
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * return it.  Return <tt>null</tt> if not found.
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the UnicodeMatcher object represented by the given
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character, or null if there is no mapping for ch.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    UnicodeMatcher lookupMatcher(int ch);
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Parse a symbol reference name from the given string, starting
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * at the given position.  If no valid symbol reference name is
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * found, return null and leave pos unchanged.  That is, if the
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * character at pos cannot start a name, or if pos is at or after
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text.length(), then return null.  This indicates an isolated
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * SYMBOL_REF character.
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param text the text to parse for the name
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param pos on entry, the index of the first character to parse.
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is the character following the SYMBOL_REF character.  On
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * exit, the index after the last parsed character.  If the parse
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * failed, pos is unchanged on exit.
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param limit the index after the last character to be parsed.
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the parsed name, or null if there is no valid symbolic
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * name at the given position.
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String parseReference(String text, ParsePosition pos, int limit);
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
89