1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2014-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9package com.ibm.icu.impl;
10
11/**
12 * Formats simple patterns like "{1} was born in {0}".
13 * Internal version of {@link com.ibm.icu.text.SimpleFormatter}
14 * with only static methods, to avoid wrapper objects.
15 *
16 * <p>This class "compiles" pattern strings into a binary format
17 * and implements formatting etc. based on that.
18 *
19 * <p>Format:
20 * Index 0: One more than the highest argument number.
21 * Followed by zero or more arguments or literal-text segments.
22 *
23 * <p>An argument is stored as its number, less than ARG_NUM_LIMIT.
24 * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
25 * followed by that many chars.
26 */
27public final class SimpleFormatterImpl {
28    /**
29     * Argument numbers must be smaller than this limit.
30     * Text segment lengths are offset by this much.
31     * This is currently the only unused char value in compiled patterns,
32     * except it is the maximum value of the first unit (max arg +1).
33     */
34    private static final int ARG_NUM_LIMIT = 0x100;
35    private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1);
36    private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2);
37    private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3);
38    /**
39     * Initial and maximum char/UChar value set for a text segment.
40     * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
41     * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
42     */
43    private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff;
44    /**
45     * Maximum length of a text segment. Longer segments are split into shorter ones.
46     */
47    private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT;
48
49    /** "Intern" some common patterns. */
50    private static final String[][] COMMON_PATTERNS = {
51        { "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" },
52        { "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' },
53        { "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" },
54        { "{0} – {1}", "\u0002\u0000" + LEN3_CHAR + " – \u0001" },  // en dash
55    };
56
57    /** Use only static methods. */
58    private SimpleFormatterImpl() {}
59
60    /**
61     * Creates a compiled form of the pattern string, for use with appropriate static methods.
62     * The number of arguments checked against the given limits is the
63     * highest argument number plus one, not the number of occurrences of arguments.
64     *
65     * @param pattern The pattern string.
66     * @param min The pattern must have at least this many arguments.
67     * @param max The pattern must have at most this many arguments.
68     * @return The compiled-pattern string.
69     * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
70     */
71    public static String compileToStringMinMaxArguments(
72            CharSequence pattern, StringBuilder sb, int min, int max) {
73        // Return some precompiled common two-argument patterns.
74        if (min <= 2 && 2 <= max) {
75            for (String[] pair : COMMON_PATTERNS) {
76                if (pair[0].contentEquals(pattern)) {
77                    assert pair[1].charAt(0) == 2;
78                    return pair[1];
79                }
80            }
81        }
82        // Parse consistent with MessagePattern, but
83        // - support only simple numbered arguments
84        // - build a simple binary structure into the result string
85        int patternLength = pattern.length();
86        sb.ensureCapacity(patternLength);
87        // Reserve the first char for the number of arguments.
88        sb.setLength(1);
89        int textLength = 0;
90        int maxArg = -1;
91        boolean inQuote = false;
92        for (int i = 0; i < patternLength;) {
93            char c = pattern.charAt(i++);
94            if (c == '\'') {
95                if (i < patternLength && (c = pattern.charAt(i)) == '\'') {
96                    // double apostrophe, skip the second one
97                    ++i;
98                } else if (inQuote) {
99                    // skip the quote-ending apostrophe
100                    inQuote = false;
101                    continue;
102                } else if (c == '{' || c == '}') {
103                    // Skip the quote-starting apostrophe, find the end of the quoted literal text.
104                    ++i;
105                    inQuote = true;
106                } else {
107                    // The apostrophe is part of literal text.
108                    c = '\'';
109                }
110            } else if (!inQuote && c == '{') {
111                if (textLength > 0) {
112                    sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
113                    textLength = 0;
114                }
115                int argNumber;
116                if ((i + 1) < patternLength &&
117                        0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 &&
118                        pattern.charAt(i + 1) == '}') {
119                    i += 2;
120                } else {
121                    // Multi-digit argument number (no leading zero) or syntax error.
122                    // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
123                    // around the number, but this class does not.
124                    int argStart = i - 1;
125                    argNumber = -1;
126                    if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') {
127                        argNumber = c - '0';
128                        while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') {
129                            argNumber = argNumber * 10 + (c - '0');
130                            if (argNumber >= ARG_NUM_LIMIT) {
131                                break;
132                            }
133                        }
134                    }
135                    if (argNumber < 0 || c != '}') {
136                        throw new IllegalArgumentException(
137                                "Argument syntax error in pattern \"" + pattern +
138                                "\" at index " + argStart +
139                                ": " + pattern.subSequence(argStart, i));
140                    }
141                }
142                if (argNumber > maxArg) {
143                    maxArg = argNumber;
144                }
145                sb.append((char)argNumber);
146                continue;
147            }  // else: c is part of literal text
148            // Append c and track the literal-text segment length.
149            if (textLength == 0) {
150                // Reserve a char for the length of a new text segment, preset the maximum length.
151                sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR);
152            }
153            sb.append(c);
154            if (++textLength == MAX_SEGMENT_LENGTH) {
155                textLength = 0;
156            }
157        }
158        if (textLength > 0) {
159            sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
160        }
161        int argCount = maxArg + 1;
162        if (argCount < min) {
163            throw new IllegalArgumentException(
164                    "Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\"");
165        }
166        if (argCount > max) {
167            throw new IllegalArgumentException(
168                    "More than maximum " + max + " arguments in pattern \"" + pattern + "\"");
169        }
170        sb.setCharAt(0, (char)argCount);
171        return sb.toString();
172    }
173
174    /**
175     * @param compiledPattern Compiled form of a pattern string.
176     * @return The max argument number + 1.
177     */
178    public static int getArgumentLimit(String compiledPattern) {
179        return compiledPattern.charAt(0);
180    }
181
182    /**
183     * Formats the given values.
184     *
185     * @param compiledPattern Compiled form of a pattern string.
186     */
187    public static String formatCompiledPattern(String compiledPattern, CharSequence... values) {
188        return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString();
189    }
190
191    /**
192     * Formats the not-compiled pattern with the given values.
193     * Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern().
194     * The number of arguments checked against the given limits is the
195     * highest argument number plus one, not the number of occurrences of arguments.
196     *
197     * @param pattern Not-compiled form of a pattern string.
198     * @param min The pattern must have at least this many arguments.
199     * @param max The pattern must have at most this many arguments.
200     * @return The compiled-pattern string.
201     * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
202     */
203    public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) {
204        StringBuilder sb = new StringBuilder();
205        String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max);
206        sb.setLength(0);
207        return formatAndAppend(compiledPattern, sb, null, values).toString();
208    }
209
210    /**
211     * Formats the given values, appending to the appendTo builder.
212     *
213     * @param compiledPattern Compiled form of a pattern string.
214     * @param appendTo Gets the formatted pattern and values appended.
215     * @param offsets offsets[i] receives the offset of where
216     *                values[i] replaced pattern argument {i}.
217     *                Can be null, or can be shorter or longer than values.
218     *                If there is no {i} in the pattern, then offsets[i] is set to -1.
219     * @param values The argument values.
220     *               An argument value must not be the same object as appendTo.
221     *               values.length must be at least getArgumentLimit().
222     *               Can be null if getArgumentLimit()==0.
223     * @return appendTo
224     */
225    public static StringBuilder formatAndAppend(
226            String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) {
227        int valuesLength = values != null ? values.length : 0;
228        if (valuesLength < getArgumentLimit(compiledPattern)) {
229            throw new IllegalArgumentException("Too few values.");
230        }
231        return format(compiledPattern, values, appendTo, null, true, offsets);
232    }
233
234    /**
235     * Formats the given values, replacing the contents of the result builder.
236     * May optimize by actually appending to the result if it is the same object
237     * as the value corresponding to the initial argument in the pattern.
238     *
239     * @param compiledPattern Compiled form of a pattern string.
240     * @param result Gets its contents replaced by the formatted pattern and values.
241     * @param offsets offsets[i] receives the offset of where
242     *                values[i] replaced pattern argument {i}.
243     *                Can be null, or can be shorter or longer than values.
244     *                If there is no {i} in the pattern, then offsets[i] is set to -1.
245     * @param values The argument values.
246     *               An argument value may be the same object as result.
247     *               values.length must be at least getArgumentLimit().
248     * @return result
249     */
250    public static StringBuilder formatAndReplace(
251            String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) {
252        int valuesLength = values != null ? values.length : 0;
253        if (valuesLength < getArgumentLimit(compiledPattern)) {
254            throw new IllegalArgumentException("Too few values.");
255        }
256
257        // If the pattern starts with an argument whose value is the same object
258        // as the result, then we keep the result contents and append to it.
259        // Otherwise we replace its contents.
260        int firstArg = -1;
261        // If any non-initial argument value is the same object as the result,
262        // then we first copy its contents and use that instead while formatting.
263        String resultCopy = null;
264        if (getArgumentLimit(compiledPattern) > 0) {
265            for (int i = 1; i < compiledPattern.length();) {
266                int n = compiledPattern.charAt(i++);
267                if (n < ARG_NUM_LIMIT) {
268                    if (values[n] == result) {
269                        if (i == 2) {
270                            firstArg = n;
271                        } else if (resultCopy == null) {
272                            resultCopy = result.toString();
273                        }
274                    }
275                } else {
276                    i += n - ARG_NUM_LIMIT;
277                }
278            }
279        }
280        if (firstArg < 0) {
281            result.setLength(0);
282        }
283        return format(compiledPattern, values, result, resultCopy, false, offsets);
284    }
285
286    /**
287     * Returns the pattern text with none of the arguments.
288     * Like formatting with all-empty string values.
289     *
290     * @param compiledPattern Compiled form of a pattern string.
291     */
292    public static String getTextWithNoArguments(String compiledPattern) {
293        int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern);
294        StringBuilder sb = new StringBuilder(capacity);
295        for (int i = 1; i < compiledPattern.length();) {
296            int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT;
297            if (segmentLength > 0) {
298                int limit = i + segmentLength;
299                sb.append(compiledPattern, i, limit);
300                i = limit;
301            }
302        }
303        return sb.toString();
304    }
305
306    private static StringBuilder format(
307            String compiledPattern, CharSequence[] values,
308            StringBuilder result, String resultCopy, boolean forbidResultAsValue,
309            int[] offsets) {
310        int offsetsLength;
311        if (offsets == null) {
312            offsetsLength = 0;
313        } else {
314            offsetsLength = offsets.length;
315            for (int i = 0; i < offsetsLength; i++) {
316                offsets[i] = -1;
317            }
318        }
319        for (int i = 1; i < compiledPattern.length();) {
320            int n = compiledPattern.charAt(i++);
321            if (n < ARG_NUM_LIMIT) {
322                CharSequence value = values[n];
323                if (value == result) {
324                    if (forbidResultAsValue) {
325                        throw new IllegalArgumentException("Value must not be same object as result");
326                    }
327                    if (i == 2) {
328                        // We are appending to result which is also the first value object.
329                        if (n < offsetsLength) {
330                            offsets[n] = 0;
331                        }
332                    } else {
333                        if (n < offsetsLength) {
334                            offsets[n] = result.length();
335                        }
336                        result.append(resultCopy);
337                    }
338                } else {
339                    if (n < offsetsLength) {
340                        offsets[n] = result.length();
341                    }
342                    result.append(value);
343                }
344            } else {
345                int limit = i + (n - ARG_NUM_LIMIT);
346                result.append(compiledPattern, i, limit);
347                i = limit;
348            }
349        }
350        return result;
351    }
352}
353