1/* Copyright 2013 Google Inc. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 15 Transformations on dictionary words. 16*/ 17 18#ifndef BROTLI_DEC_TRANSFORM_H_ 19#define BROTLI_DEC_TRANSFORM_H_ 20 21#include <stdio.h> 22#include <ctype.h> 23#include "./types.h" 24 25#if defined(__cplusplus) || defined(c_plusplus) 26extern "C" { 27#endif 28 29enum WordTransformType { 30 kIdentity = 0, 31 kOmitLast1 = 1, 32 kOmitLast2 = 2, 33 kOmitLast3 = 3, 34 kOmitLast4 = 4, 35 kOmitLast5 = 5, 36 kOmitLast6 = 6, 37 kOmitLast7 = 7, 38 kOmitLast8 = 8, 39 kOmitLast9 = 9, 40 kUppercaseFirst = 10, 41 kUppercaseAll = 11, 42 kOmitFirst1 = 12, 43 kOmitFirst2 = 13, 44 kOmitFirst3 = 14, 45 kOmitFirst4 = 15, 46 kOmitFirst5 = 16, 47 kOmitFirst6 = 17, 48 kOmitFirst7 = 18, 49 kOmitFirst8 = 19, 50 kOmitFirst9 = 20 51}; 52 53typedef struct { 54 const char* prefix; 55 enum WordTransformType transform; 56 const char* suffix; 57} Transform; 58 59static const Transform kTransforms[] = { 60 { "", kIdentity, "" }, 61 { "", kIdentity, " " }, 62 { " ", kIdentity, " " }, 63 { "", kOmitFirst1, "" }, 64 { "", kUppercaseFirst, " " }, 65 { "", kIdentity, " the " }, 66 { " ", kIdentity, "" }, 67 { "s ", kIdentity, " " }, 68 { "", kIdentity, " of " }, 69 { "", kUppercaseFirst, "" }, 70 { "", kIdentity, " and " }, 71 { "", kOmitFirst2, "" }, 72 { "", kOmitLast1, "" }, 73 { ", ", kIdentity, " " }, 74 { "", kIdentity, ", " }, 75 { " ", kUppercaseFirst, " " }, 76 { "", kIdentity, " in " }, 77 { "", kIdentity, " to " }, 78 { "e ", kIdentity, " " }, 79 { "", kIdentity, "\"" }, 80 { "", kIdentity, "." }, 81 { "", kIdentity, "\">" }, 82 { "", kIdentity, "\n" }, 83 { "", kOmitLast3, "" }, 84 { "", kIdentity, "]" }, 85 { "", kIdentity, " for " }, 86 { "", kOmitFirst3, "" }, 87 { "", kOmitLast2, "" }, 88 { "", kIdentity, " a " }, 89 { "", kIdentity, " that " }, 90 { " ", kUppercaseFirst, "" }, 91 { "", kIdentity, ". " }, 92 { ".", kIdentity, "" }, 93 { " ", kIdentity, ", " }, 94 { "", kOmitFirst4, "" }, 95 { "", kIdentity, " with " }, 96 { "", kIdentity, "'" }, 97 { "", kIdentity, " from " }, 98 { "", kIdentity, " by " }, 99 { "", kOmitFirst5, "" }, 100 { "", kOmitFirst6, "" }, 101 { " the ", kIdentity, "" }, 102 { "", kOmitLast4, "" }, 103 { "", kIdentity, ". The " }, 104 { "", kUppercaseAll, "" }, 105 { "", kIdentity, " on " }, 106 { "", kIdentity, " as " }, 107 { "", kIdentity, " is " }, 108 { "", kOmitLast7, "" }, 109 { "", kOmitLast1, "ing " }, 110 { "", kIdentity, "\n\t" }, 111 { "", kIdentity, ":" }, 112 { " ", kIdentity, ". " }, 113 { "", kIdentity, "ed " }, 114 { "", kOmitFirst9, "" }, 115 { "", kOmitFirst7, "" }, 116 { "", kOmitLast6, "" }, 117 { "", kIdentity, "(" }, 118 { "", kUppercaseFirst, ", " }, 119 { "", kOmitLast8, "" }, 120 { "", kIdentity, " at " }, 121 { "", kIdentity, "ly " }, 122 { " the ", kIdentity, " of " }, 123 { "", kOmitLast5, "" }, 124 { "", kOmitLast9, "" }, 125 { " ", kUppercaseFirst, ", " }, 126 { "", kUppercaseFirst, "\"" }, 127 { ".", kIdentity, "(" }, 128 { "", kUppercaseAll, " " }, 129 { "", kUppercaseFirst, "\">" }, 130 { "", kIdentity, "=\"" }, 131 { " ", kIdentity, "." }, 132 { ".com/", kIdentity, "" }, 133 { " the ", kIdentity, " of the " }, 134 { "", kUppercaseFirst, "'" }, 135 { "", kIdentity, ". This " }, 136 { "", kIdentity, "," }, 137 { ".", kIdentity, " " }, 138 { "", kUppercaseFirst, "(" }, 139 { "", kUppercaseFirst, "." }, 140 { "", kIdentity, " not " }, 141 { " ", kIdentity, "=\"" }, 142 { "", kIdentity, "er " }, 143 { " ", kUppercaseAll, " " }, 144 { "", kIdentity, "al " }, 145 { " ", kUppercaseAll, "" }, 146 { "", kIdentity, "='" }, 147 { "", kUppercaseAll, "\"" }, 148 { "", kUppercaseFirst, ". " }, 149 { " ", kIdentity, "(" }, 150 { "", kIdentity, "ful " }, 151 { " ", kUppercaseFirst, ". " }, 152 { "", kIdentity, "ive " }, 153 { "", kIdentity, "less " }, 154 { "", kUppercaseAll, "'" }, 155 { "", kIdentity, "est " }, 156 { " ", kUppercaseFirst, "." }, 157 { "", kUppercaseAll, "\">" }, 158 { " ", kIdentity, "='" }, 159 { "", kUppercaseFirst, "," }, 160 { "", kIdentity, "ize " }, 161 { "", kUppercaseAll, "." }, 162 { "\xc2\xa0", kIdentity, "" }, 163 { " ", kIdentity, "," }, 164 { "", kUppercaseFirst, "=\"" }, 165 { "", kUppercaseAll, "=\"" }, 166 { "", kIdentity, "ous " }, 167 { "", kUppercaseAll, ", " }, 168 { "", kUppercaseFirst, "='" }, 169 { " ", kUppercaseFirst, "," }, 170 { " ", kUppercaseAll, "=\"" }, 171 { " ", kUppercaseAll, ", " }, 172 { "", kUppercaseAll, "," }, 173 { "", kUppercaseAll, "(" }, 174 { "", kUppercaseAll, ". " }, 175 { " ", kUppercaseAll, "." }, 176 { "", kUppercaseAll, "='" }, 177 { " ", kUppercaseAll, ". " }, 178 { " ", kUppercaseFirst, "=\"" }, 179 { " ", kUppercaseAll, "='" }, 180 { " ", kUppercaseFirst, "='" }, 181}; 182 183static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); 184 185static int ToUpperCase(uint8_t *p, int len) { 186 if (p[0] < 0xc0) { 187 if (p[0] >= 'a' && p[0] <= 'z') { 188 p[0] ^= 32; 189 } 190 return 1; 191 } 192 /* An overly simplified uppercasing model for utf-8. */ 193 if (p[0] < 0xe0) { 194 p[1] ^= 32; 195 return 2; 196 } 197 /* An arbitrary transform for three byte characters. */ 198 p[2] ^= 5; 199 return 3; 200} 201 202static BROTLI_INLINE int TransformDictionaryWord( 203 uint8_t* dst, const uint8_t* word, int len, int transform) { 204 const char* prefix = kTransforms[transform].prefix; 205 const char* suffix = kTransforms[transform].suffix; 206 const int t = kTransforms[transform].transform; 207 int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1); 208 int idx = 0; 209 int i = 0; 210 uint8_t* uppercase; 211 if (skip > len) { 212 skip = len; 213 } 214 while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } 215 word += skip; 216 len -= skip; 217 if (t <= kOmitLast9) { 218 len -= t; 219 } 220 while (i < len) { dst[idx++] = word[i++]; } 221 uppercase = &dst[idx - len]; 222 if (t == kUppercaseFirst) { 223 ToUpperCase(uppercase, len); 224 } else if (t == kUppercaseAll) { 225 while (len > 0) { 226 int step = ToUpperCase(uppercase, len); 227 uppercase += step; 228 len -= step; 229 } 230 } 231 while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } 232 return idx; 233} 234 235#if defined(__cplusplus) || defined(c_plusplus) 236} /* extern "C" */ 237#endif 238 239#endif /* BROTLI_DEC_TRANSFORM_H_ */ 240