1/* Copyright 2015 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5*/ 6 7package org.brotli.dec; 8 9import static org.brotli.dec.WordTransformType.IDENTITY; 10import static org.brotli.dec.WordTransformType.OMIT_FIRST_1; 11import static org.brotli.dec.WordTransformType.OMIT_FIRST_2; 12import static org.brotli.dec.WordTransformType.OMIT_FIRST_3; 13import static org.brotli.dec.WordTransformType.OMIT_FIRST_4; 14import static org.brotli.dec.WordTransformType.OMIT_FIRST_5; 15import static org.brotli.dec.WordTransformType.OMIT_FIRST_6; 16import static org.brotli.dec.WordTransformType.OMIT_FIRST_7; 17import static org.brotli.dec.WordTransformType.OMIT_FIRST_9; 18import static org.brotli.dec.WordTransformType.OMIT_LAST_1; 19import static org.brotli.dec.WordTransformType.OMIT_LAST_2; 20import static org.brotli.dec.WordTransformType.OMIT_LAST_3; 21import static org.brotli.dec.WordTransformType.OMIT_LAST_4; 22import static org.brotli.dec.WordTransformType.OMIT_LAST_5; 23import static org.brotli.dec.WordTransformType.OMIT_LAST_6; 24import static org.brotli.dec.WordTransformType.OMIT_LAST_7; 25import static org.brotli.dec.WordTransformType.OMIT_LAST_8; 26import static org.brotli.dec.WordTransformType.OMIT_LAST_9; 27import static org.brotli.dec.WordTransformType.UPPERCASE_ALL; 28import static org.brotli.dec.WordTransformType.UPPERCASE_FIRST; 29 30/** 31 * Transformations on dictionary words. 32 */ 33final class Transform { 34 35 private final byte[] prefix; 36 private final int type; 37 private final byte[] suffix; 38 39 Transform(String prefix, int type, String suffix) { 40 this.prefix = readUniBytes(prefix); 41 this.type = type; 42 this.suffix = readUniBytes(suffix); 43 } 44 45 static byte[] readUniBytes(String uniBytes) { 46 byte[] result = new byte[uniBytes.length()]; 47 for (int i = 0; i < result.length; ++i) { 48 result[i] = (byte) uniBytes.charAt(i); 49 } 50 return result; 51 } 52 53 static final Transform[] TRANSFORMS = { 54 new Transform("", IDENTITY, ""), 55 new Transform("", IDENTITY, " "), 56 new Transform(" ", IDENTITY, " "), 57 new Transform("", OMIT_FIRST_1, ""), 58 new Transform("", UPPERCASE_FIRST, " "), 59 new Transform("", IDENTITY, " the "), 60 new Transform(" ", IDENTITY, ""), 61 new Transform("s ", IDENTITY, " "), 62 new Transform("", IDENTITY, " of "), 63 new Transform("", UPPERCASE_FIRST, ""), 64 new Transform("", IDENTITY, " and "), 65 new Transform("", OMIT_FIRST_2, ""), 66 new Transform("", OMIT_LAST_1, ""), 67 new Transform(", ", IDENTITY, " "), 68 new Transform("", IDENTITY, ", "), 69 new Transform(" ", UPPERCASE_FIRST, " "), 70 new Transform("", IDENTITY, " in "), 71 new Transform("", IDENTITY, " to "), 72 new Transform("e ", IDENTITY, " "), 73 new Transform("", IDENTITY, "\""), 74 new Transform("", IDENTITY, "."), 75 new Transform("", IDENTITY, "\">"), 76 new Transform("", IDENTITY, "\n"), 77 new Transform("", OMIT_LAST_3, ""), 78 new Transform("", IDENTITY, "]"), 79 new Transform("", IDENTITY, " for "), 80 new Transform("", OMIT_FIRST_3, ""), 81 new Transform("", OMIT_LAST_2, ""), 82 new Transform("", IDENTITY, " a "), 83 new Transform("", IDENTITY, " that "), 84 new Transform(" ", UPPERCASE_FIRST, ""), 85 new Transform("", IDENTITY, ". "), 86 new Transform(".", IDENTITY, ""), 87 new Transform(" ", IDENTITY, ", "), 88 new Transform("", OMIT_FIRST_4, ""), 89 new Transform("", IDENTITY, " with "), 90 new Transform("", IDENTITY, "'"), 91 new Transform("", IDENTITY, " from "), 92 new Transform("", IDENTITY, " by "), 93 new Transform("", OMIT_FIRST_5, ""), 94 new Transform("", OMIT_FIRST_6, ""), 95 new Transform(" the ", IDENTITY, ""), 96 new Transform("", OMIT_LAST_4, ""), 97 new Transform("", IDENTITY, ". The "), 98 new Transform("", UPPERCASE_ALL, ""), 99 new Transform("", IDENTITY, " on "), 100 new Transform("", IDENTITY, " as "), 101 new Transform("", IDENTITY, " is "), 102 new Transform("", OMIT_LAST_7, ""), 103 new Transform("", OMIT_LAST_1, "ing "), 104 new Transform("", IDENTITY, "\n\t"), 105 new Transform("", IDENTITY, ":"), 106 new Transform(" ", IDENTITY, ". "), 107 new Transform("", IDENTITY, "ed "), 108 new Transform("", OMIT_FIRST_9, ""), 109 new Transform("", OMIT_FIRST_7, ""), 110 new Transform("", OMIT_LAST_6, ""), 111 new Transform("", IDENTITY, "("), 112 new Transform("", UPPERCASE_FIRST, ", "), 113 new Transform("", OMIT_LAST_8, ""), 114 new Transform("", IDENTITY, " at "), 115 new Transform("", IDENTITY, "ly "), 116 new Transform(" the ", IDENTITY, " of "), 117 new Transform("", OMIT_LAST_5, ""), 118 new Transform("", OMIT_LAST_9, ""), 119 new Transform(" ", UPPERCASE_FIRST, ", "), 120 new Transform("", UPPERCASE_FIRST, "\""), 121 new Transform(".", IDENTITY, "("), 122 new Transform("", UPPERCASE_ALL, " "), 123 new Transform("", UPPERCASE_FIRST, "\">"), 124 new Transform("", IDENTITY, "=\""), 125 new Transform(" ", IDENTITY, "."), 126 new Transform(".com/", IDENTITY, ""), 127 new Transform(" the ", IDENTITY, " of the "), 128 new Transform("", UPPERCASE_FIRST, "'"), 129 new Transform("", IDENTITY, ". This "), 130 new Transform("", IDENTITY, ","), 131 new Transform(".", IDENTITY, " "), 132 new Transform("", UPPERCASE_FIRST, "("), 133 new Transform("", UPPERCASE_FIRST, "."), 134 new Transform("", IDENTITY, " not "), 135 new Transform(" ", IDENTITY, "=\""), 136 new Transform("", IDENTITY, "er "), 137 new Transform(" ", UPPERCASE_ALL, " "), 138 new Transform("", IDENTITY, "al "), 139 new Transform(" ", UPPERCASE_ALL, ""), 140 new Transform("", IDENTITY, "='"), 141 new Transform("", UPPERCASE_ALL, "\""), 142 new Transform("", UPPERCASE_FIRST, ". "), 143 new Transform(" ", IDENTITY, "("), 144 new Transform("", IDENTITY, "ful "), 145 new Transform(" ", UPPERCASE_FIRST, ". "), 146 new Transform("", IDENTITY, "ive "), 147 new Transform("", IDENTITY, "less "), 148 new Transform("", UPPERCASE_ALL, "'"), 149 new Transform("", IDENTITY, "est "), 150 new Transform(" ", UPPERCASE_FIRST, "."), 151 new Transform("", UPPERCASE_ALL, "\">"), 152 new Transform(" ", IDENTITY, "='"), 153 new Transform("", UPPERCASE_FIRST, ","), 154 new Transform("", IDENTITY, "ize "), 155 new Transform("", UPPERCASE_ALL, "."), 156 new Transform("\u00c2\u00a0", IDENTITY, ""), 157 new Transform(" ", IDENTITY, ","), 158 new Transform("", UPPERCASE_FIRST, "=\""), 159 new Transform("", UPPERCASE_ALL, "=\""), 160 new Transform("", IDENTITY, "ous "), 161 new Transform("", UPPERCASE_ALL, ", "), 162 new Transform("", UPPERCASE_FIRST, "='"), 163 new Transform(" ", UPPERCASE_FIRST, ","), 164 new Transform(" ", UPPERCASE_ALL, "=\""), 165 new Transform(" ", UPPERCASE_ALL, ", "), 166 new Transform("", UPPERCASE_ALL, ","), 167 new Transform("", UPPERCASE_ALL, "("), 168 new Transform("", UPPERCASE_ALL, ". "), 169 new Transform(" ", UPPERCASE_ALL, "."), 170 new Transform("", UPPERCASE_ALL, "='"), 171 new Transform(" ", UPPERCASE_ALL, ". "), 172 new Transform(" ", UPPERCASE_FIRST, "=\""), 173 new Transform(" ", UPPERCASE_ALL, "='"), 174 new Transform(" ", UPPERCASE_FIRST, "='") 175 }; 176 177 static int transformDictionaryWord(byte[] dst, int dstOffset, byte[] word, int wordOffset, 178 int len, Transform transform) { 179 int offset = dstOffset; 180 181 // Copy prefix. 182 byte[] string = transform.prefix; 183 int tmp = string.length; 184 int i = 0; 185 // In most cases tmp < 10 -> no benefits from System.arrayCopy 186 while (i < tmp) { 187 dst[offset++] = string[i++]; 188 } 189 190 // Copy trimmed word. 191 int op = transform.type; 192 tmp = WordTransformType.getOmitFirst(op); 193 if (tmp > len) { 194 tmp = len; 195 } 196 wordOffset += tmp; 197 len -= tmp; 198 len -= WordTransformType.getOmitLast(op); 199 i = len; 200 while (i > 0) { 201 dst[offset++] = word[wordOffset++]; 202 i--; 203 } 204 205 if (op == UPPERCASE_ALL || op == UPPERCASE_FIRST) { 206 int uppercaseOffset = offset - len; 207 if (op == UPPERCASE_FIRST) { 208 len = 1; 209 } 210 while (len > 0) { 211 tmp = dst[uppercaseOffset] & 0xFF; 212 if (tmp < 0xc0) { 213 if (tmp >= 'a' && tmp <= 'z') { 214 dst[uppercaseOffset] ^= (byte) 32; 215 } 216 uppercaseOffset += 1; 217 len -= 1; 218 } else if (tmp < 0xe0) { 219 dst[uppercaseOffset + 1] ^= (byte) 32; 220 uppercaseOffset += 2; 221 len -= 2; 222 } else { 223 dst[uppercaseOffset + 2] ^= (byte) 5; 224 uppercaseOffset += 3; 225 len -= 3; 226 } 227 } 228 } 229 230 // Copy suffix. 231 string = transform.suffix; 232 tmp = string.length; 233 i = 0; 234 while (i < tmp) { 235 dst[offset++] = string[i++]; 236 } 237 238 return offset - dstOffset; 239 } 240} 241