17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2010, International Business Machines Corporation and
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A transliterator that performs character to name mapping.
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It generates the Perl syntax \N{name}.
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass UnicodeNameTransliterator extends Transliterator {
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final String _ID = "Any-Name";
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final String OPEN_DELIM = "\\N{";
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final char CLOSE_DELIM = '}';
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int OPEN_DELIM_LEN = 3;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * System registration hook.
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static void register() {
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Transliterator.registerFactory(_ID, new Transliterator.Factory() {
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public Transliterator getInstance(String ID) {
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return new UnicodeNameTransliterator(null);
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        });
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Constructs a transliterator.
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public UnicodeNameTransliterator(UnicodeFilter filter) {
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(_ID, filter);
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implements {@link Transliterator#handleTransliterate}.
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void handleTransliterate(Replaceable text,
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       Position offsets, boolean isIncremental) {
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int cursor = offsets.start;
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limit = offsets.limit;
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder str = new StringBuilder();
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        str.append(OPEN_DELIM);
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int len;
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String name;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (cursor < limit) {
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = text.char32At(cursor);
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if ((name=UCharacter.getExtendedName(c)) != null) {
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.setLength(OPEN_DELIM_LEN);
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append(name).append(CLOSE_DELIM);
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int clen = UTF16.getCharCount(c);
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                text.replace(cursor, cursor+clen, str.toString());
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                len = str.length();
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cursor += len; // advance cursor by 1 and adjust for new text
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit += len-clen; // change in length
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++cursor;
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offsets.contextLimit += limit - offsets.limit;
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offsets.limit = limit;
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offsets.start = cursor;
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* (non-Javadoc)
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (myFilter.size() > 0) {
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            sourceSet.addAll(myFilter);
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            targetSet.addAll('0', '9')
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .addAll('A', 'Z')
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .add('-')
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .add(' ')
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .addAll(OPEN_DELIM)
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .add(CLOSE_DELIM)
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .addAll('a', 'z') // for controls
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .add('<').add('>') // for controls
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .add('(').add(')') // for controls
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ;
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
95