17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2010, International Business Machines Corporation and 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A transliterator that performs character to name mapping. 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It generates the Perl syntax \N{name}. 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass UnicodeNameTransliterator extends Transliterator { 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String _ID = "Any-Name"; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String OPEN_DELIM = "\\N{"; 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final char CLOSE_DELIM = '}'; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final int OPEN_DELIM_LEN = 3; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * System registration hook. 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static void register() { 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator.registerFactory(_ID, new Transliterator.Factory() { 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Transliterator getInstance(String ID) { 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new UnicodeNameTransliterator(null); 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }); 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a transliterator. 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public UnicodeNameTransliterator(UnicodeFilter filter) { 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(_ID, filter); 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implements {@link Transliterator#handleTransliterate}. 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected void handleTransliterate(Replaceable text, 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position offsets, boolean isIncremental) { 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int cursor = offsets.start; 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = offsets.limit; 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder str = new StringBuilder(); 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.append(OPEN_DELIM); 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int len; 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String name; 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (cursor < limit) { 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = text.char32At(cursor); 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if ((name=UCharacter.getExtendedName(c)) != null) { 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.setLength(OPEN_DELIM_LEN); 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert str.append(name).append(CLOSE_DELIM); 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int clen = UTF16.getCharCount(c); 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(cursor, cursor+clen, str.toString()); 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert len = str.length(); 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cursor += len; // advance cursor by 1 and adjust for new text 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit += len-clen; // change in length 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++cursor; 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.contextLimit += limit - offsets.limit; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.limit = limit; 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.start = cursor; 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* (non-Javadoc) 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (myFilter.size() > 0) { 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceSet.addAll(myFilter); 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert targetSet.addAll('0', '9') 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .addAll('A', 'Z') 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .add('-') 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .add(' ') 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .addAll(OPEN_DELIM) 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .add(CLOSE_DELIM) 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .addAll('a', 'z') // for controls 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .add('<').add('>') // for controls 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .add('(').add(')') // for controls 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ; 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 95