BC_PDF417HighLevelEncoder.cpp revision 4d3acf4ec42bf6e838f9060103aff98fbf170794
1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6// Original code is licensed as follows: 7/* 8 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part 9 * 10 * Licensed under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 23#include "xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h" 24 25#include "third_party/bigint/BigIntegerLibrary.hh" 26#include "xfa/fxbarcode/BC_UtilCodingConvert.h" 27#include "xfa/fxbarcode/pdf417/BC_PDF417Compaction.h" 28#include "xfa/fxbarcode/utils.h" 29 30#define SUBMODE_ALPHA 0 31#define SUBMODE_LOWER 1 32#define SUBMODE_MIXED 2 33 34int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0; 35int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1; 36int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2; 37int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3; 38int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900; 39int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901; 40int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902; 41int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913; 42int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924; 43uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = { 44 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58, 45 35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0, 32, 0, 0, 0}; 46uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = { 47 59, 60, 62, 64, 91, 92, 93, 95, 96, 126, 33, 13, 9, 44, 58, 48 10, 45, 46, 36, 47, 34, 124, 42, 40, 41, 63, 123, 125, 39, 0}; 49int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0}; 50int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0}; 51 52void CBC_PDF417HighLevelEncoder::Initialize() { 53 Inverse(); 54} 55 56void CBC_PDF417HighLevelEncoder::Finalize() {} 57 58CFX_WideString CBC_PDF417HighLevelEncoder::encodeHighLevel( 59 CFX_WideString wideMsg, 60 Compaction compaction, 61 int32_t& e) { 62 CFX_ByteString bytes; 63 CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes); 64 CFX_WideString msg; 65 int32_t len = bytes.GetLength(); 66 for (int32_t i = 0; i < len; i++) { 67 FX_WCHAR ch = (FX_WCHAR)(bytes.GetAt(i) & 0xff); 68 if (ch == '?' && bytes.GetAt(i) != '?') { 69 e = BCExceptionCharactersOutsideISO88591Encoding; 70 return CFX_WideString(); 71 } 72 msg += ch; 73 } 74 CFX_ByteArray byteArr; 75 for (int32_t k = 0; k < bytes.GetLength(); k++) { 76 byteArr.Add(bytes.GetAt(k)); 77 } 78 CFX_WideString sb; 79 len = msg.GetLength(); 80 int32_t p = 0; 81 int32_t textSubMode = SUBMODE_ALPHA; 82 if (compaction == TEXT) { 83 encodeText(msg, p, len, sb, textSubMode); 84 } else if (compaction == BYTES) { 85 encodeBinary(&byteArr, p, byteArr.GetSize(), BYTE_COMPACTION, sb); 86 } else if (compaction == NUMERIC) { 87 sb += (FX_WCHAR)LATCH_TO_NUMERIC; 88 encodeNumeric(msg, p, len, sb); 89 } else { 90 int32_t encodingMode = LATCH_TO_TEXT; 91 while (p < len) { 92 int32_t n = determineConsecutiveDigitCount(msg, p); 93 if (n >= 13) { 94 sb += (FX_WCHAR)LATCH_TO_NUMERIC; 95 encodingMode = NUMERIC_COMPACTION; 96 textSubMode = SUBMODE_ALPHA; 97 encodeNumeric(msg, p, n, sb); 98 p += n; 99 } else { 100 int32_t t = determineConsecutiveTextCount(msg, p); 101 if (t >= 5 || n == len) { 102 if (encodingMode != TEXT_COMPACTION) { 103 sb += (FX_WCHAR)LATCH_TO_TEXT; 104 encodingMode = TEXT_COMPACTION; 105 textSubMode = SUBMODE_ALPHA; 106 } 107 textSubMode = encodeText(msg, p, t, sb, textSubMode); 108 p += t; 109 } else { 110 int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e); 111 BC_EXCEPTION_CHECK_ReturnValue(e, (FX_WCHAR)' '); 112 if (b == 0) { 113 b = 1; 114 } 115 if (b == 1 && encodingMode == TEXT_COMPACTION) { 116 encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb); 117 } else { 118 encodeBinary(&byteArr, p, b, encodingMode, sb); 119 encodingMode = BYTE_COMPACTION; 120 textSubMode = SUBMODE_ALPHA; 121 } 122 p += b; 123 } 124 } 125 } 126 } 127 return sb; 128} 129 130void CBC_PDF417HighLevelEncoder::Inverse() { 131 for (size_t l = 0; l < FX_ArraySize(MIXED); ++l) 132 MIXED[l] = -1; 133 134 for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) { 135 uint8_t b = TEXT_MIXED_RAW[i]; 136 if (b != 0) 137 MIXED[b] = i; 138 } 139 140 for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l) 141 PUNCTUATION[l] = -1; 142 143 for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) { 144 uint8_t b = TEXT_PUNCTUATION_RAW[i]; 145 if (b != 0) 146 PUNCTUATION[b] = i; 147 } 148} 149 150int32_t CBC_PDF417HighLevelEncoder::encodeText(CFX_WideString msg, 151 int32_t startpos, 152 int32_t count, 153 CFX_WideString& sb, 154 int32_t initialSubmode) { 155 CFX_WideString tmp; 156 int32_t submode = initialSubmode; 157 int32_t idx = 0; 158 while (true) { 159 FX_WCHAR ch = msg.GetAt(startpos + idx); 160 switch (submode) { 161 case SUBMODE_ALPHA: 162 if (isAlphaUpper(ch)) { 163 if (ch == ' ') { 164 tmp += (FX_WCHAR)26; 165 } else { 166 tmp += (FX_WCHAR)(ch - 65); 167 } 168 } else { 169 if (isAlphaLower(ch)) { 170 submode = SUBMODE_LOWER; 171 tmp += (FX_WCHAR)27; 172 continue; 173 } else if (isMixed(ch)) { 174 submode = SUBMODE_MIXED; 175 tmp += (FX_WCHAR)28; 176 continue; 177 } else { 178 tmp += (FX_WCHAR)29; 179 tmp += PUNCTUATION[ch]; 180 break; 181 } 182 } 183 break; 184 case SUBMODE_LOWER: 185 if (isAlphaLower(ch)) { 186 if (ch == ' ') { 187 tmp += (FX_WCHAR)26; 188 } else { 189 tmp += (FX_WCHAR)(ch - 97); 190 } 191 } else { 192 if (isAlphaUpper(ch)) { 193 tmp += (FX_WCHAR)27; 194 tmp += (FX_WCHAR)(ch - 65); 195 break; 196 } else if (isMixed(ch)) { 197 submode = SUBMODE_MIXED; 198 tmp += (FX_WCHAR)28; 199 continue; 200 } else { 201 tmp += (FX_WCHAR)29; 202 tmp += PUNCTUATION[ch]; 203 break; 204 } 205 } 206 break; 207 case SUBMODE_MIXED: 208 if (isMixed(ch)) { 209 tmp += MIXED[ch]; 210 } else { 211 if (isAlphaUpper(ch)) { 212 submode = SUBMODE_ALPHA; 213 tmp += (FX_WCHAR)28; 214 continue; 215 } else if (isAlphaLower(ch)) { 216 submode = SUBMODE_LOWER; 217 tmp += (FX_WCHAR)27; 218 continue; 219 } else { 220 if (startpos + idx + 1 < count) { 221 FX_WCHAR next = msg.GetAt(startpos + idx + 1); 222 if (isPunctuation(next)) { 223 submode = SUBMODE_PUNCTUATION; 224 tmp += (FX_WCHAR)25; 225 continue; 226 } 227 } 228 tmp += (FX_WCHAR)29; 229 tmp += PUNCTUATION[ch]; 230 } 231 } 232 break; 233 default: 234 if (isPunctuation(ch)) { 235 tmp += PUNCTUATION[ch]; 236 } else { 237 submode = SUBMODE_ALPHA; 238 tmp += (FX_WCHAR)29; 239 continue; 240 } 241 } 242 idx++; 243 if (idx >= count) { 244 break; 245 } 246 } 247 FX_WCHAR h = 0; 248 int32_t len = tmp.GetLength(); 249 for (int32_t i = 0; i < len; i++) { 250 bool odd = (i % 2) != 0; 251 if (odd) { 252 h = (FX_WCHAR)((h * 30) + tmp.GetAt(i)); 253 sb += h; 254 } else { 255 h = tmp.GetAt(i); 256 } 257 } 258 if ((len % 2) != 0) { 259 sb += (FX_WCHAR)((h * 30) + 29); 260 } 261 return submode; 262} 263void CBC_PDF417HighLevelEncoder::encodeBinary(CFX_ByteArray* bytes, 264 int32_t startpos, 265 int32_t count, 266 int32_t startmode, 267 CFX_WideString& sb) { 268 if (count == 1 && startmode == TEXT_COMPACTION) { 269 sb += (FX_WCHAR)SHIFT_TO_BYTE; 270 } 271 int32_t idx = startpos; 272 int32_t i = 0; 273 if (count >= 6) { 274 sb += (FX_WCHAR)LATCH_TO_BYTE; 275 FX_WCHAR chars[5]; 276 while ((startpos + count - idx) >= 6) { 277 int64_t t = 0; 278 for (i = 0; i < 6; i++) { 279 t <<= 8; 280 t += bytes->GetAt(idx + i) & 0xff; 281 } 282 for (i = 0; i < 5; i++) { 283 chars[i] = (FX_WCHAR)(t % 900); 284 t /= 900; 285 } 286 for (i = 4; i >= 0; i--) { 287 sb += (chars[i]); 288 } 289 idx += 6; 290 } 291 } 292 if (idx < startpos + count) { 293 sb += (FX_WCHAR)LATCH_TO_BYTE_PADDED; 294 } 295 for (i = idx; i < startpos + count; i++) { 296 int32_t ch = bytes->GetAt(i) & 0xff; 297 sb += (FX_WCHAR)ch; 298 } 299} 300void CBC_PDF417HighLevelEncoder::encodeNumeric(CFX_WideString msg, 301 int32_t startpos, 302 int32_t count, 303 CFX_WideString& sb) { 304 int32_t idx = 0; 305 BigInteger num900 = 900; 306 while (idx < count) { 307 CFX_WideString tmp; 308 int32_t len = 44 < count - idx ? 44 : count - idx; 309 CFX_ByteString part = 310 ((FX_WCHAR)'1' + msg.Mid(startpos + idx, len)).UTF8Encode(); 311 BigInteger bigint = stringToBigInteger(part.c_str()); 312 do { 313 int32_t c = (bigint % num900).toInt(); 314 tmp += (FX_WCHAR)(c); 315 bigint = bigint / num900; 316 } while (!bigint.isZero()); 317 for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) { 318 sb += tmp.GetAt(i); 319 } 320 idx += len; 321 } 322} 323bool CBC_PDF417HighLevelEncoder::isDigit(FX_WCHAR ch) { 324 return ch >= '0' && ch <= '9'; 325} 326bool CBC_PDF417HighLevelEncoder::isAlphaUpper(FX_WCHAR ch) { 327 return ch == ' ' || (ch >= 'A' && ch <= 'Z'); 328} 329bool CBC_PDF417HighLevelEncoder::isAlphaLower(FX_WCHAR ch) { 330 return ch == ' ' || (ch >= 'a' && ch <= 'z'); 331} 332bool CBC_PDF417HighLevelEncoder::isMixed(FX_WCHAR ch) { 333 return MIXED[ch] != -1; 334} 335bool CBC_PDF417HighLevelEncoder::isPunctuation(FX_WCHAR ch) { 336 return PUNCTUATION[ch] != -1; 337} 338bool CBC_PDF417HighLevelEncoder::isText(FX_WCHAR ch) { 339 return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126); 340} 341int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount( 342 CFX_WideString msg, 343 int32_t startpos) { 344 int32_t count = 0; 345 int32_t len = msg.GetLength(); 346 int32_t idx = startpos; 347 if (idx < len) { 348 FX_WCHAR ch = msg.GetAt(idx); 349 while (isDigit(ch) && idx < len) { 350 count++; 351 idx++; 352 if (idx < len) { 353 ch = msg.GetAt(idx); 354 } 355 } 356 } 357 return count; 358} 359int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount( 360 CFX_WideString msg, 361 int32_t startpos) { 362 int32_t len = msg.GetLength(); 363 int32_t idx = startpos; 364 while (idx < len) { 365 FX_WCHAR ch = msg.GetAt(idx); 366 int32_t numericCount = 0; 367 while (numericCount < 13 && isDigit(ch) && idx < len) { 368 numericCount++; 369 idx++; 370 if (idx < len) { 371 ch = msg.GetAt(idx); 372 } 373 } 374 if (numericCount >= 13) { 375 return idx - startpos - numericCount; 376 } 377 if (numericCount > 0) { 378 continue; 379 } 380 ch = msg.GetAt(idx); 381 if (!isText(ch)) { 382 break; 383 } 384 idx++; 385 } 386 return idx - startpos; 387} 388int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount( 389 CFX_WideString msg, 390 CFX_ByteArray* bytes, 391 int32_t startpos, 392 int32_t& e) { 393 int32_t len = msg.GetLength(); 394 int32_t idx = startpos; 395 while (idx < len) { 396 FX_WCHAR ch = msg.GetAt(idx); 397 int32_t numericCount = 0; 398 while (numericCount < 13 && isDigit(ch)) { 399 numericCount++; 400 int32_t i = idx + numericCount; 401 if (i >= len) { 402 break; 403 } 404 ch = msg.GetAt(i); 405 } 406 if (numericCount >= 13) { 407 return idx - startpos; 408 } 409 int32_t textCount = 0; 410 while (textCount < 5 && isText(ch)) { 411 textCount++; 412 int32_t i = idx + textCount; 413 if (i >= len) { 414 break; 415 } 416 ch = msg.GetAt(i); 417 } 418 if (textCount >= 5) { 419 return idx - startpos; 420 } 421 ch = msg.GetAt(idx); 422 if (bytes->GetAt(idx) == 63 && ch != '?') { 423 e = BCExceptionNonEncodableCharacterDetected; 424 return -1; 425 } 426 idx++; 427 } 428 return idx - startpos; 429} 430