1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 * Copyright (C) 1996-2011, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 *
7 */
8package com.ibm.icu.text;
9
10import com.ibm.icu.impl.UCaseProps;
11import com.ibm.icu.lang.UCharacter;
12import com.ibm.icu.util.ULocale;
13
14/**
15 * A transliterator that converts all letters (as defined by
16 * <code>UCharacter.isLetter()</code>) to lower case, except for those
17 * letters preceded by non-letters.  The latter are converted to title
18 * case using <code>UCharacter.toTitleCase()</code>.
19 * @author Alan Liu
20 */
21class TitlecaseTransliterator extends Transliterator {
22
23    static final String _ID = "Any-Title";
24    // TODO: Add variants for tr/az, lt, default = default locale: ICU ticket #12720
25
26    /**
27     * System registration hook.
28     */
29    static void register() {
30        Transliterator.registerFactory(_ID, new Transliterator.Factory() {
31            @Override
32            public Transliterator getInstance(String ID) {
33                return new TitlecaseTransliterator(ULocale.US);
34            }
35        });
36
37        registerSpecialInverse("Title", "Lower", false);
38    }
39
40    private final ULocale locale;
41
42    private final UCaseProps csp;
43    private ReplaceableContextIterator iter;
44    private StringBuilder result;
45    private int caseLocale;
46
47   /**
48     * Constructs a transliterator.
49     */
50    public TitlecaseTransliterator(ULocale loc) {
51        super(_ID, null);
52        locale = loc;
53        // Need to look back 2 characters in the case of "can't"
54        setMaximumContextLength(2);
55        csp=UCaseProps.INSTANCE;
56        iter=new ReplaceableContextIterator();
57        result = new StringBuilder();
58        caseLocale = UCaseProps.getCaseLocale(locale);
59    }
60
61    /**
62     * Implements {@link Transliterator#handleTransliterate}.
63     */
64    @Override
65    protected synchronized void handleTransliterate(Replaceable text,
66                                       Position offsets, boolean isIncremental) {
67        // TODO reimplement, see ustrcase.c
68        // using a real word break iterator
69        //   instead of just looking for a transition between cased and uncased characters
70        // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
71        // needs to take isIncremental into account because case mappings are context-sensitive
72        //   also detect when lowercasing function did not finish because of context
73
74        if (offsets.start >= offsets.limit) {
75            return;
76        }
77
78        // case type: >0 cased (UCaseProps.LOWER etc.)  ==0 uncased  <0 case-ignorable
79        int type;
80
81        // Our mode; we are either converting letter toTitle or
82        // toLower.
83        boolean doTitle = true;
84
85        // Determine if there is a preceding context of cased case-ignorable*,
86        // in which case we want to start in toLower mode.  If the
87        // prior context is anything else (including empty) then start
88        // in toTitle mode.
89        int c, start;
90        for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16.getCharCount(c)) {
91            c = text.char32At(start);
92            type=csp.getTypeOrIgnorable(c);
93            if(type>0) { // cased
94                doTitle=false;
95                break;
96            } else if(type==0) { // uncased but not ignorable
97                break;
98            }
99            // else (type<0) case-ignorable: continue
100        }
101
102        // Convert things after a cased character toLower; things
103        // after a uncased, non-case-ignorable character toTitle.  Case-ignorable
104        // characters are copied directly and do not change the mode.
105
106        iter.setText(text);
107        iter.setIndex(offsets.start);
108        iter.setLimit(offsets.limit);
109        iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
110
111        result.setLength(0);
112
113        // Walk through original string
114        // If there is a case change, modify corresponding position in replaceable
115        int delta;
116
117        while((c=iter.nextCaseMapCP())>=0) {
118            type=csp.getTypeOrIgnorable(c);
119            if(type>=0) { // not case-ignorable
120                if(doTitle) {
121                    c=csp.toFullTitle(c, iter, result, caseLocale);
122                } else {
123                    c=csp.toFullLower(c, iter, result, caseLocale);
124                }
125                doTitle = type==0; // doTitle=isUncased
126
127                if(iter.didReachLimit() && isIncremental) {
128                    // the case mapping function tried to look beyond the context limit
129                    // wait for more input
130                    offsets.start=iter.getCaseMapCPStart();
131                    return;
132                }
133
134                /* decode the result */
135                if(c<0) {
136                    /* c mapped to itself, no change */
137                    continue;
138                } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
139                    /* replace by the mapping string */
140                    delta=iter.replace(result.toString());
141                    result.setLength(0);
142                } else {
143                    /* replace by single-code point mapping */
144                    delta=iter.replace(UTF16.valueOf(c));
145                }
146
147                if(delta!=0) {
148                    offsets.limit += delta;
149                    offsets.contextLimit += delta;
150                }
151            }
152        }
153        offsets.start = offsets.limit;
154    }
155
156    // NOTE: normally this would be static, but because the results vary by locale....
157    SourceTargetUtility sourceTargetUtility = null;
158
159    /* (non-Javadoc)
160     * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
161     */
162    @Override
163    public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
164        synchronized (this) {
165            if (sourceTargetUtility == null) {
166                sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() {
167                    @Override
168                    public String transform(String source) {
169                        return UCharacter.toTitleCase(locale, source, null);
170                    }
171                });
172            }
173        }
174        sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet);
175    }
176}
177