12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2011, International Business Machines Corporation and    *
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UCaseProps;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A transliterator that performs locale-sensitive toLower()
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * case mapping.
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass LowercaseTransliterator extends Transliterator{
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Package accessible ID.
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final String _ID = "Any-Lower";
252d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    // TODO: Add variants for tr/az, lt, default = default locale: ICU ticket #12720
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * System registration hook.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static void register() {
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Transliterator.registerFactory(_ID, new Transliterator.Factory() {
332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            @Override
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public Transliterator getInstance(String ID) {
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return new LowercaseTransliterator(ULocale.US);
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        });
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Transliterator.registerSpecialInverse("Lower", "Upper", true);
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    private final ULocale locale;
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    private final UCaseProps csp;
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private ReplaceableContextIterator iter;
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder result;
4763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer    private int caseLocale;
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Constructs a transliterator.
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public LowercaseTransliterator(ULocale loc) {
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        super(_ID, null);
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        locale = loc;
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        csp=UCaseProps.INSTANCE;
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter=new ReplaceableContextIterator();
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result = new StringBuilder();
5963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer        caseLocale = UCaseProps.getCaseLocale(locale);
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implements {@link Transliterator#handleTransliterate}.
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert    @Override
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected synchronized void handleTransliterate(Replaceable text,
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                       Position offsets, boolean isIncremental) {
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(csp==null) {
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(offsets.start >= offsets.limit) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert        }
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter.setText(text);
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.setLength(0);
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c, delta;
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Walk through original string
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If there is a case change, modify corresponding position in replaceable
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter.setIndex(offsets.start);
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter.setLimit(offsets.limit);
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while((c=iter.nextCaseMapCP())>=0) {
8763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer            c=csp.toFullLower(c, iter, result, caseLocale);
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(iter.didReachLimit() && isIncremental) {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the case mapping function tried to look beyond the context limit
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // wait for more input
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offsets.start=iter.getCaseMapCPStart();
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* decode the result */
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<0) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* c mapped to itself, no change */
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* replace by the mapping string */
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                delta=iter.replace(result.toString());
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result.setLength(0);
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /* replace by single-code point mapping */
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                delta=iter.replace(UTF16.valueOf(c));
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(delta!=0) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offsets.limit += delta;
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offsets.contextLimit += delta;
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offsets.start = offsets.limit;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // NOTE: normally this would be static, but because the results vary by locale....
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    SourceTargetUtility sourceTargetUtility = null;
1192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* (non-Javadoc)
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        synchronized (this) {
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (sourceTargetUtility == null) {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() {
1282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                    @Override
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    public String transform(String source) {
1302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                        return UCharacter.toLowerCase(locale, source);
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                });
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet);
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
138