1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (c) 2001-2007, International Business Machines 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Date Name Description 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 11/20/2001 aliu Creation. 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef ESCTRN_H 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ESCTRN_H 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_TRANSLITERATION 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/translit.h" 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A transliterator that converts Unicode characters to an escape 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * form. Examples of escape forms are "U+4E01" and "". 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Escape forms have a prefix and suffix, either of which may be 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * empty, a radix, typically 16 or 10, a minimum digit count, 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * typically 1, 4, or 8, and a boolean that specifies whether 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * supplemental characters are handled as 32-bit code points or as two 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 16-bit code units. Most escape forms handle 32-bit code points, 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * but some, such as the Java form, intentionally break them into two 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * surrogate pairs, for backward compatibility. 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p>Some escape forms actually have two different patterns, one for 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * BMP characters (0..FFFF) and one for supplements (>FFFF). To 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * handle this, a second EscapeTransliterator may be defined that 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * specifies the pattern to be produced for supplementals. An example 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of a form that requires this is the C form, which uses "\\uFFFF" 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for BMP characters and "\\U0010FFFF" for supplementals. 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p>This class is package private. It registers several standard 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * variants with the system which are then accessed via their IDs. 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @author Alan Liu 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class EscapeTransliterator : public Transliterator { 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) private: 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The prefix of the escape form; may be empty, but usually isn't. 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString prefix; 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The prefix of the escape form; often empty. 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString suffix; 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The radix to display the number in. Typically 16 or 10. Must 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * be in the range 2 to 36. 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t radix; 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The minimum number of digits. Typically 1, 4, or 8. Values 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * less than 1 are equivalent to 1. 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t minDigits; 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If true, supplementals are handled as 32-bit code points. If 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * false, they are handled as two 16-bit code units. 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool grokSupplementals; 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The form to be used for supplementals. If this is null then 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the same form is used for BMP characters and supplementals. If 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * this is not null and if grokSupplementals is true then the 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * prefix, suffix, radix, and minDigits of this object are used 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for supplementals. This pointer is owned. 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) EscapeTransliterator* supplementalHandler; 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) public: 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Registers standard variants with the system. Called by 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Transliterator during initialization. 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static void registerIDs(); 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Constructs an escape transliterator with the given ID and 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * parameters. See the class member documentation for details. 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) EscapeTransliterator(const UnicodeString& ID, 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString& prefix, const UnicodeString& suffix, 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t radix, int32_t minDigits, 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool grokSupplementals, 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) EscapeTransliterator* adoptedSupplementalHandler); 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copy constructor. 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) EscapeTransliterator(const EscapeTransliterator&); 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Destructor. 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~EscapeTransliterator(); 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Transliterator API. 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual Transliterator* clone() const; 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ICU "poor man's RTTI", returns a UClassID for the actual class. 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UClassID getDynamicClassID() const; 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ICU "poor man's RTTI", returns a UClassID for this class. 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_I18N_API static UClassID U_EXPORT2 getStaticClassID(); 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) protected: 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Implements {@link Transliterator#handleTransliterate}. 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool isIncremental) const; 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 143