Transform.java revision 03739d2b113afe60638069c4e1604dc2ac27380d
1/* Copyright 2015 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7package org.brotli.dec;
8
9import static org.brotli.dec.WordTransformType.IDENTITY;
10import static org.brotli.dec.WordTransformType.OMIT_FIRST_1;
11import static org.brotli.dec.WordTransformType.OMIT_FIRST_2;
12import static org.brotli.dec.WordTransformType.OMIT_FIRST_3;
13import static org.brotli.dec.WordTransformType.OMIT_FIRST_4;
14import static org.brotli.dec.WordTransformType.OMIT_FIRST_5;
15import static org.brotli.dec.WordTransformType.OMIT_FIRST_6;
16import static org.brotli.dec.WordTransformType.OMIT_FIRST_7;
17import static org.brotli.dec.WordTransformType.OMIT_FIRST_9;
18import static org.brotli.dec.WordTransformType.OMIT_LAST_1;
19import static org.brotli.dec.WordTransformType.OMIT_LAST_2;
20import static org.brotli.dec.WordTransformType.OMIT_LAST_3;
21import static org.brotli.dec.WordTransformType.OMIT_LAST_4;
22import static org.brotli.dec.WordTransformType.OMIT_LAST_5;
23import static org.brotli.dec.WordTransformType.OMIT_LAST_6;
24import static org.brotli.dec.WordTransformType.OMIT_LAST_7;
25import static org.brotli.dec.WordTransformType.OMIT_LAST_8;
26import static org.brotli.dec.WordTransformType.OMIT_LAST_9;
27import static org.brotli.dec.WordTransformType.UPPERCASE_ALL;
28import static org.brotli.dec.WordTransformType.UPPERCASE_FIRST;
29
30import java.nio.ByteBuffer;
31
32/**
33 * Transformations on dictionary words.
34 */
35final class Transform {
36
37  private final byte[] prefix;
38  private final int type;
39  private final byte[] suffix;
40
41  Transform(String prefix, int type, String suffix) {
42    this.prefix = readUniBytes(prefix);
43    this.type = type;
44    this.suffix = readUniBytes(suffix);
45  }
46
47  static byte[] readUniBytes(String uniBytes) {
48    byte[] result = new byte[uniBytes.length()];
49    for (int i = 0; i < result.length; ++i) {
50      result[i] = (byte) uniBytes.charAt(i);
51    }
52    return result;
53  }
54
55  static final Transform[] TRANSFORMS = {
56      new Transform("", IDENTITY, ""),
57      new Transform("", IDENTITY, " "),
58      new Transform(" ", IDENTITY, " "),
59      new Transform("", OMIT_FIRST_1, ""),
60      new Transform("", UPPERCASE_FIRST, " "),
61      new Transform("", IDENTITY, " the "),
62      new Transform(" ", IDENTITY, ""),
63      new Transform("s ", IDENTITY, " "),
64      new Transform("", IDENTITY, " of "),
65      new Transform("", UPPERCASE_FIRST, ""),
66      new Transform("", IDENTITY, " and "),
67      new Transform("", OMIT_FIRST_2, ""),
68      new Transform("", OMIT_LAST_1, ""),
69      new Transform(", ", IDENTITY, " "),
70      new Transform("", IDENTITY, ", "),
71      new Transform(" ", UPPERCASE_FIRST, " "),
72      new Transform("", IDENTITY, " in "),
73      new Transform("", IDENTITY, " to "),
74      new Transform("e ", IDENTITY, " "),
75      new Transform("", IDENTITY, "\""),
76      new Transform("", IDENTITY, "."),
77      new Transform("", IDENTITY, "\">"),
78      new Transform("", IDENTITY, "\n"),
79      new Transform("", OMIT_LAST_3, ""),
80      new Transform("", IDENTITY, "]"),
81      new Transform("", IDENTITY, " for "),
82      new Transform("", OMIT_FIRST_3, ""),
83      new Transform("", OMIT_LAST_2, ""),
84      new Transform("", IDENTITY, " a "),
85      new Transform("", IDENTITY, " that "),
86      new Transform(" ", UPPERCASE_FIRST, ""),
87      new Transform("", IDENTITY, ". "),
88      new Transform(".", IDENTITY, ""),
89      new Transform(" ", IDENTITY, ", "),
90      new Transform("", OMIT_FIRST_4, ""),
91      new Transform("", IDENTITY, " with "),
92      new Transform("", IDENTITY, "'"),
93      new Transform("", IDENTITY, " from "),
94      new Transform("", IDENTITY, " by "),
95      new Transform("", OMIT_FIRST_5, ""),
96      new Transform("", OMIT_FIRST_6, ""),
97      new Transform(" the ", IDENTITY, ""),
98      new Transform("", OMIT_LAST_4, ""),
99      new Transform("", IDENTITY, ". The "),
100      new Transform("", UPPERCASE_ALL, ""),
101      new Transform("", IDENTITY, " on "),
102      new Transform("", IDENTITY, " as "),
103      new Transform("", IDENTITY, " is "),
104      new Transform("", OMIT_LAST_7, ""),
105      new Transform("", OMIT_LAST_1, "ing "),
106      new Transform("", IDENTITY, "\n\t"),
107      new Transform("", IDENTITY, ":"),
108      new Transform(" ", IDENTITY, ". "),
109      new Transform("", IDENTITY, "ed "),
110      new Transform("", OMIT_FIRST_9, ""),
111      new Transform("", OMIT_FIRST_7, ""),
112      new Transform("", OMIT_LAST_6, ""),
113      new Transform("", IDENTITY, "("),
114      new Transform("", UPPERCASE_FIRST, ", "),
115      new Transform("", OMIT_LAST_8, ""),
116      new Transform("", IDENTITY, " at "),
117      new Transform("", IDENTITY, "ly "),
118      new Transform(" the ", IDENTITY, " of "),
119      new Transform("", OMIT_LAST_5, ""),
120      new Transform("", OMIT_LAST_9, ""),
121      new Transform(" ", UPPERCASE_FIRST, ", "),
122      new Transform("", UPPERCASE_FIRST, "\""),
123      new Transform(".", IDENTITY, "("),
124      new Transform("", UPPERCASE_ALL, " "),
125      new Transform("", UPPERCASE_FIRST, "\">"),
126      new Transform("", IDENTITY, "=\""),
127      new Transform(" ", IDENTITY, "."),
128      new Transform(".com/", IDENTITY, ""),
129      new Transform(" the ", IDENTITY, " of the "),
130      new Transform("", UPPERCASE_FIRST, "'"),
131      new Transform("", IDENTITY, ". This "),
132      new Transform("", IDENTITY, ","),
133      new Transform(".", IDENTITY, " "),
134      new Transform("", UPPERCASE_FIRST, "("),
135      new Transform("", UPPERCASE_FIRST, "."),
136      new Transform("", IDENTITY, " not "),
137      new Transform(" ", IDENTITY, "=\""),
138      new Transform("", IDENTITY, "er "),
139      new Transform(" ", UPPERCASE_ALL, " "),
140      new Transform("", IDENTITY, "al "),
141      new Transform(" ", UPPERCASE_ALL, ""),
142      new Transform("", IDENTITY, "='"),
143      new Transform("", UPPERCASE_ALL, "\""),
144      new Transform("", UPPERCASE_FIRST, ". "),
145      new Transform(" ", IDENTITY, "("),
146      new Transform("", IDENTITY, "ful "),
147      new Transform(" ", UPPERCASE_FIRST, ". "),
148      new Transform("", IDENTITY, "ive "),
149      new Transform("", IDENTITY, "less "),
150      new Transform("", UPPERCASE_ALL, "'"),
151      new Transform("", IDENTITY, "est "),
152      new Transform(" ", UPPERCASE_FIRST, "."),
153      new Transform("", UPPERCASE_ALL, "\">"),
154      new Transform(" ", IDENTITY, "='"),
155      new Transform("", UPPERCASE_FIRST, ","),
156      new Transform("", IDENTITY, "ize "),
157      new Transform("", UPPERCASE_ALL, "."),
158      new Transform("\u00c2\u00a0", IDENTITY, ""),
159      new Transform(" ", IDENTITY, ","),
160      new Transform("", UPPERCASE_FIRST, "=\""),
161      new Transform("", UPPERCASE_ALL, "=\""),
162      new Transform("", IDENTITY, "ous "),
163      new Transform("", UPPERCASE_ALL, ", "),
164      new Transform("", UPPERCASE_FIRST, "='"),
165      new Transform(" ", UPPERCASE_FIRST, ","),
166      new Transform(" ", UPPERCASE_ALL, "=\""),
167      new Transform(" ", UPPERCASE_ALL, ", "),
168      new Transform("", UPPERCASE_ALL, ","),
169      new Transform("", UPPERCASE_ALL, "("),
170      new Transform("", UPPERCASE_ALL, ". "),
171      new Transform(" ", UPPERCASE_ALL, "."),
172      new Transform("", UPPERCASE_ALL, "='"),
173      new Transform(" ", UPPERCASE_ALL, ". "),
174      new Transform(" ", UPPERCASE_FIRST, "=\""),
175      new Transform(" ", UPPERCASE_ALL, "='"),
176      new Transform(" ", UPPERCASE_FIRST, "='")
177  };
178
179  static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer data, int wordOffset,
180      int len, Transform transform) {
181    int offset = dstOffset;
182
183    // Copy prefix.
184    byte[] string = transform.prefix;
185    int tmp = string.length;
186    int i = 0;
187    // In most cases tmp < 10 -> no benefits from System.arrayCopy
188    while (i < tmp) {
189      dst[offset++] = string[i++];
190    }
191
192    // Copy trimmed word.
193    int op = transform.type;
194    tmp = WordTransformType.getOmitFirst(op);
195    if (tmp > len) {
196      tmp = len;
197    }
198    wordOffset += tmp;
199    len -= tmp;
200    len -= WordTransformType.getOmitLast(op);
201    i = len;
202    while (i > 0) {
203      dst[offset++] = data.get(wordOffset++);
204      i--;
205    }
206
207    if (op == UPPERCASE_ALL || op == UPPERCASE_FIRST) {
208      int uppercaseOffset = offset - len;
209      if (op == UPPERCASE_FIRST) {
210        len = 1;
211      }
212      while (len > 0) {
213        tmp = dst[uppercaseOffset] & 0xFF;
214        if (tmp < 0xc0) {
215          if (tmp >= 'a' && tmp <= 'z') {
216            dst[uppercaseOffset] ^= (byte) 32;
217          }
218          uppercaseOffset += 1;
219          len -= 1;
220        } else if (tmp < 0xe0) {
221          dst[uppercaseOffset + 1] ^= (byte) 32;
222          uppercaseOffset += 2;
223          len -= 2;
224        } else {
225          dst[uppercaseOffset + 2] ^= (byte) 5;
226          uppercaseOffset += 3;
227          len -= 3;
228        }
229      }
230    }
231
232    // Copy suffix.
233    string = transform.suffix;
234    tmp = string.length;
235    i = 0;
236    while (i < tmp) {
237      dst[offset++] = string[i++];
238    }
239
240    return offset - dstOffset;
241  }
242}
243