1/* Copyright 2015 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7package org.brotli.dec;
8
9import static org.brotli.dec.WordTransformType.IDENTITY;
10import static org.brotli.dec.WordTransformType.OMIT_FIRST_1;
11import static org.brotli.dec.WordTransformType.OMIT_FIRST_2;
12import static org.brotli.dec.WordTransformType.OMIT_FIRST_3;
13import static org.brotli.dec.WordTransformType.OMIT_FIRST_4;
14import static org.brotli.dec.WordTransformType.OMIT_FIRST_5;
15import static org.brotli.dec.WordTransformType.OMIT_FIRST_6;
16import static org.brotli.dec.WordTransformType.OMIT_FIRST_7;
17import static org.brotli.dec.WordTransformType.OMIT_FIRST_9;
18import static org.brotli.dec.WordTransformType.OMIT_LAST_1;
19import static org.brotli.dec.WordTransformType.OMIT_LAST_2;
20import static org.brotli.dec.WordTransformType.OMIT_LAST_3;
21import static org.brotli.dec.WordTransformType.OMIT_LAST_4;
22import static org.brotli.dec.WordTransformType.OMIT_LAST_5;
23import static org.brotli.dec.WordTransformType.OMIT_LAST_6;
24import static org.brotli.dec.WordTransformType.OMIT_LAST_7;
25import static org.brotli.dec.WordTransformType.OMIT_LAST_8;
26import static org.brotli.dec.WordTransformType.OMIT_LAST_9;
27import static org.brotli.dec.WordTransformType.UPPERCASE_ALL;
28import static org.brotli.dec.WordTransformType.UPPERCASE_FIRST;
29
30/**
31 * Transformations on dictionary words.
32 */
33final class Transform {
34
35  private final byte[] prefix;
36  private final int type;
37  private final byte[] suffix;
38
39  Transform(String prefix, int type, String suffix) {
40    this.prefix = readUniBytes(prefix);
41    this.type = type;
42    this.suffix = readUniBytes(suffix);
43  }
44
45  static byte[] readUniBytes(String uniBytes) {
46    byte[] result = new byte[uniBytes.length()];
47    for (int i = 0; i < result.length; ++i) {
48      result[i] = (byte) uniBytes.charAt(i);
49    }
50    return result;
51  }
52
53  static final Transform[] TRANSFORMS = {
54      new Transform("", IDENTITY, ""),
55      new Transform("", IDENTITY, " "),
56      new Transform(" ", IDENTITY, " "),
57      new Transform("", OMIT_FIRST_1, ""),
58      new Transform("", UPPERCASE_FIRST, " "),
59      new Transform("", IDENTITY, " the "),
60      new Transform(" ", IDENTITY, ""),
61      new Transform("s ", IDENTITY, " "),
62      new Transform("", IDENTITY, " of "),
63      new Transform("", UPPERCASE_FIRST, ""),
64      new Transform("", IDENTITY, " and "),
65      new Transform("", OMIT_FIRST_2, ""),
66      new Transform("", OMIT_LAST_1, ""),
67      new Transform(", ", IDENTITY, " "),
68      new Transform("", IDENTITY, ", "),
69      new Transform(" ", UPPERCASE_FIRST, " "),
70      new Transform("", IDENTITY, " in "),
71      new Transform("", IDENTITY, " to "),
72      new Transform("e ", IDENTITY, " "),
73      new Transform("", IDENTITY, "\""),
74      new Transform("", IDENTITY, "."),
75      new Transform("", IDENTITY, "\">"),
76      new Transform("", IDENTITY, "\n"),
77      new Transform("", OMIT_LAST_3, ""),
78      new Transform("", IDENTITY, "]"),
79      new Transform("", IDENTITY, " for "),
80      new Transform("", OMIT_FIRST_3, ""),
81      new Transform("", OMIT_LAST_2, ""),
82      new Transform("", IDENTITY, " a "),
83      new Transform("", IDENTITY, " that "),
84      new Transform(" ", UPPERCASE_FIRST, ""),
85      new Transform("", IDENTITY, ". "),
86      new Transform(".", IDENTITY, ""),
87      new Transform(" ", IDENTITY, ", "),
88      new Transform("", OMIT_FIRST_4, ""),
89      new Transform("", IDENTITY, " with "),
90      new Transform("", IDENTITY, "'"),
91      new Transform("", IDENTITY, " from "),
92      new Transform("", IDENTITY, " by "),
93      new Transform("", OMIT_FIRST_5, ""),
94      new Transform("", OMIT_FIRST_6, ""),
95      new Transform(" the ", IDENTITY, ""),
96      new Transform("", OMIT_LAST_4, ""),
97      new Transform("", IDENTITY, ". The "),
98      new Transform("", UPPERCASE_ALL, ""),
99      new Transform("", IDENTITY, " on "),
100      new Transform("", IDENTITY, " as "),
101      new Transform("", IDENTITY, " is "),
102      new Transform("", OMIT_LAST_7, ""),
103      new Transform("", OMIT_LAST_1, "ing "),
104      new Transform("", IDENTITY, "\n\t"),
105      new Transform("", IDENTITY, ":"),
106      new Transform(" ", IDENTITY, ". "),
107      new Transform("", IDENTITY, "ed "),
108      new Transform("", OMIT_FIRST_9, ""),
109      new Transform("", OMIT_FIRST_7, ""),
110      new Transform("", OMIT_LAST_6, ""),
111      new Transform("", IDENTITY, "("),
112      new Transform("", UPPERCASE_FIRST, ", "),
113      new Transform("", OMIT_LAST_8, ""),
114      new Transform("", IDENTITY, " at "),
115      new Transform("", IDENTITY, "ly "),
116      new Transform(" the ", IDENTITY, " of "),
117      new Transform("", OMIT_LAST_5, ""),
118      new Transform("", OMIT_LAST_9, ""),
119      new Transform(" ", UPPERCASE_FIRST, ", "),
120      new Transform("", UPPERCASE_FIRST, "\""),
121      new Transform(".", IDENTITY, "("),
122      new Transform("", UPPERCASE_ALL, " "),
123      new Transform("", UPPERCASE_FIRST, "\">"),
124      new Transform("", IDENTITY, "=\""),
125      new Transform(" ", IDENTITY, "."),
126      new Transform(".com/", IDENTITY, ""),
127      new Transform(" the ", IDENTITY, " of the "),
128      new Transform("", UPPERCASE_FIRST, "'"),
129      new Transform("", IDENTITY, ". This "),
130      new Transform("", IDENTITY, ","),
131      new Transform(".", IDENTITY, " "),
132      new Transform("", UPPERCASE_FIRST, "("),
133      new Transform("", UPPERCASE_FIRST, "."),
134      new Transform("", IDENTITY, " not "),
135      new Transform(" ", IDENTITY, "=\""),
136      new Transform("", IDENTITY, "er "),
137      new Transform(" ", UPPERCASE_ALL, " "),
138      new Transform("", IDENTITY, "al "),
139      new Transform(" ", UPPERCASE_ALL, ""),
140      new Transform("", IDENTITY, "='"),
141      new Transform("", UPPERCASE_ALL, "\""),
142      new Transform("", UPPERCASE_FIRST, ". "),
143      new Transform(" ", IDENTITY, "("),
144      new Transform("", IDENTITY, "ful "),
145      new Transform(" ", UPPERCASE_FIRST, ". "),
146      new Transform("", IDENTITY, "ive "),
147      new Transform("", IDENTITY, "less "),
148      new Transform("", UPPERCASE_ALL, "'"),
149      new Transform("", IDENTITY, "est "),
150      new Transform(" ", UPPERCASE_FIRST, "."),
151      new Transform("", UPPERCASE_ALL, "\">"),
152      new Transform(" ", IDENTITY, "='"),
153      new Transform("", UPPERCASE_FIRST, ","),
154      new Transform("", IDENTITY, "ize "),
155      new Transform("", UPPERCASE_ALL, "."),
156      new Transform("\u00c2\u00a0", IDENTITY, ""),
157      new Transform(" ", IDENTITY, ","),
158      new Transform("", UPPERCASE_FIRST, "=\""),
159      new Transform("", UPPERCASE_ALL, "=\""),
160      new Transform("", IDENTITY, "ous "),
161      new Transform("", UPPERCASE_ALL, ", "),
162      new Transform("", UPPERCASE_FIRST, "='"),
163      new Transform(" ", UPPERCASE_FIRST, ","),
164      new Transform(" ", UPPERCASE_ALL, "=\""),
165      new Transform(" ", UPPERCASE_ALL, ", "),
166      new Transform("", UPPERCASE_ALL, ","),
167      new Transform("", UPPERCASE_ALL, "("),
168      new Transform("", UPPERCASE_ALL, ". "),
169      new Transform(" ", UPPERCASE_ALL, "."),
170      new Transform("", UPPERCASE_ALL, "='"),
171      new Transform(" ", UPPERCASE_ALL, ". "),
172      new Transform(" ", UPPERCASE_FIRST, "=\""),
173      new Transform(" ", UPPERCASE_ALL, "='"),
174      new Transform(" ", UPPERCASE_FIRST, "='")
175  };
176
177  static int transformDictionaryWord(byte[] dst, int dstOffset, byte[] word, int wordOffset,
178      int len, Transform transform) {
179    int offset = dstOffset;
180
181    // Copy prefix.
182    byte[] string = transform.prefix;
183    int tmp = string.length;
184    int i = 0;
185    // In most cases tmp < 10 -> no benefits from System.arrayCopy
186    while (i < tmp) {
187      dst[offset++] = string[i++];
188    }
189
190    // Copy trimmed word.
191    int op = transform.type;
192    tmp = WordTransformType.getOmitFirst(op);
193    if (tmp > len) {
194      tmp = len;
195    }
196    wordOffset += tmp;
197    len -= tmp;
198    len -= WordTransformType.getOmitLast(op);
199    i = len;
200    while (i > 0) {
201      dst[offset++] = word[wordOffset++];
202      i--;
203    }
204
205    if (op == UPPERCASE_ALL || op == UPPERCASE_FIRST) {
206      int uppercaseOffset = offset - len;
207      if (op == UPPERCASE_FIRST) {
208        len = 1;
209      }
210      while (len > 0) {
211        tmp = dst[uppercaseOffset] & 0xFF;
212        if (tmp < 0xc0) {
213          if (tmp >= 'a' && tmp <= 'z') {
214            dst[uppercaseOffset] ^= (byte) 32;
215          }
216          uppercaseOffset += 1;
217          len -= 1;
218        } else if (tmp < 0xe0) {
219          dst[uppercaseOffset + 1] ^= (byte) 32;
220          uppercaseOffset += 2;
221          len -= 2;
222        } else {
223          dst[uppercaseOffset + 2] ^= (byte) 5;
224          uppercaseOffset += 3;
225          len -= 3;
226        }
227      }
228    }
229
230    // Copy suffix.
231    string = transform.suffix;
232    tmp = string.length;
233    i = 0;
234    while (i < tmp) {
235      dst[offset++] = string[i++];
236    }
237
238    return offset - dstOffset;
239  }
240}
241