BC_HighLevelEncoder.cpp revision 5ae9d0c6fd838a2967cca72aa5751b51dadc2769
1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6// Original code is licensed as follows: 7/* 8 * Copyright 2006-2007 Jeremias Maerki. 9 * 10 * Licensed under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 */ 22 23#include <limits> 24#include <memory> 25#include <vector> 26 27#include "xfa/fxbarcode/BC_Dimension.h" 28#include "xfa/fxbarcode/BC_UtilCodingConvert.h" 29#include "xfa/fxbarcode/common/BC_CommonBitMatrix.h" 30#include "xfa/fxbarcode/datamatrix/BC_ASCIIEncoder.h" 31#include "xfa/fxbarcode/datamatrix/BC_Base256Encoder.h" 32#include "xfa/fxbarcode/datamatrix/BC_C40Encoder.h" 33#include "xfa/fxbarcode/datamatrix/BC_EdifactEncoder.h" 34#include "xfa/fxbarcode/datamatrix/BC_Encoder.h" 35#include "xfa/fxbarcode/datamatrix/BC_EncoderContext.h" 36#include "xfa/fxbarcode/datamatrix/BC_HighLevelEncoder.h" 37#include "xfa/fxbarcode/datamatrix/BC_SymbolInfo.h" 38#include "xfa/fxbarcode/datamatrix/BC_SymbolShapeHint.h" 39#include "xfa/fxbarcode/datamatrix/BC_TextEncoder.h" 40#include "xfa/fxbarcode/datamatrix/BC_X12Encoder.h" 41#include "xfa/fxbarcode/utils.h" 42 43FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_C40 = 230; 44FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_BASE256 = 231; 45FX_WCHAR CBC_HighLevelEncoder::UPPER_SHIFT = 235; 46FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_ANSIX12 = 238; 47FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_TEXT = 239; 48FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_EDIFACT = 240; 49FX_WCHAR CBC_HighLevelEncoder::C40_UNLATCH = 254; 50FX_WCHAR CBC_HighLevelEncoder::X12_UNLATCH = 254; 51FX_WCHAR CBC_HighLevelEncoder::PAD = 129; 52FX_WCHAR CBC_HighLevelEncoder::MACRO_05 = 236; 53FX_WCHAR CBC_HighLevelEncoder::MACRO_06 = 237; 54const wchar_t* CBC_HighLevelEncoder::MACRO_05_HEADER = L"[)>05"; 55const wchar_t* CBC_HighLevelEncoder::MACRO_06_HEADER = L"[)>06"; 56const wchar_t CBC_HighLevelEncoder::MACRO_TRAILER = 0x0004; 57 58CBC_HighLevelEncoder::CBC_HighLevelEncoder() {} 59CBC_HighLevelEncoder::~CBC_HighLevelEncoder() {} 60 61CFX_ArrayTemplate<uint8_t>& CBC_HighLevelEncoder::getBytesForMessage( 62 CFX_WideString msg) { 63 CFX_ByteString bytestr; 64 CBC_UtilCodingConvert::UnicodeToUTF8(msg, bytestr); 65 for (int32_t i = 0; i < bytestr.GetLength(); i++) { 66 m_bytearray.Add(bytestr.GetAt(i)); 67 } 68 return m_bytearray; 69} 70CFX_WideString CBC_HighLevelEncoder::encodeHighLevel(CFX_WideString msg, 71 CFX_WideString ecLevel, 72 int32_t& e) { 73 return encodeHighLevel(msg, ecLevel, FORCE_NONE, nullptr, nullptr, e); 74} 75CFX_WideString CBC_HighLevelEncoder::encodeHighLevel(CFX_WideString msg, 76 CFX_WideString ecLevel, 77 SymbolShapeHint shape, 78 CBC_Dimension* minSize, 79 CBC_Dimension* maxSize, 80 int32_t& e) { 81 CBC_EncoderContext context(msg, ecLevel, e); 82 if (e != BCExceptionNO) 83 return CFX_WideString(); 84 context.setSymbolShape(shape); 85 context.setSizeConstraints(minSize, maxSize); 86 if ((msg.Mid(0, 6) == MACRO_05_HEADER) && 87 (msg.Mid(msg.GetLength() - 1, 1) == MACRO_TRAILER)) { 88 context.writeCodeword(MACRO_05); 89 context.setSkipAtEnd(2); 90 context.m_pos += 6; 91 } else if ((msg.Mid(0, 6) == MACRO_06_HEADER) && 92 (msg.Mid(msg.GetLength() - 1, 1) == MACRO_TRAILER)) { 93 context.writeCodeword(MACRO_06); 94 context.setSkipAtEnd(2); 95 context.m_pos += 6; 96 } 97 98 std::vector<std::unique_ptr<CBC_Encoder>> encoders; 99 encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_ASCIIEncoder())); 100 encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_C40Encoder())); 101 encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_TextEncoder())); 102 encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_X12Encoder())); 103 encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_EdifactEncoder())); 104 encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_Base256Encoder())); 105 int32_t encodingMode = ASCII_ENCODATION; 106 while (context.hasMoreCharacters()) { 107 encoders[encodingMode]->Encode(context, e); 108 if (e != BCExceptionNO) 109 return L""; 110 111 if (context.m_newEncoding >= 0) { 112 encodingMode = context.m_newEncoding; 113 context.resetEncoderSignal(); 114 } 115 } 116 int32_t len = context.m_codewords.GetLength(); 117 context.updateSymbolInfo(e); 118 if (e != BCExceptionNO) 119 return L""; 120 121 int32_t capacity = context.m_symbolInfo->m_dataCapacity; 122 if (len < capacity) { 123 if (encodingMode != ASCII_ENCODATION && 124 encodingMode != BASE256_ENCODATION) { 125 context.writeCodeword(0x00fe); 126 } 127 } 128 CFX_WideString codewords = context.m_codewords; 129 if (codewords.GetLength() < capacity) { 130 codewords += PAD; 131 } 132 while (codewords.GetLength() < capacity) { 133 codewords += (randomize253State(PAD, codewords.GetLength() + 1)); 134 } 135 return codewords; 136} 137int32_t CBC_HighLevelEncoder::lookAheadTest(CFX_WideString msg, 138 int32_t startpos, 139 int32_t currentMode) { 140 if (startpos >= msg.GetLength()) { 141 return currentMode; 142 } 143 std::vector<FX_FLOAT> charCounts; 144 if (currentMode == ASCII_ENCODATION) { 145 charCounts.push_back(0); 146 charCounts.push_back(1); 147 charCounts.push_back(1); 148 charCounts.push_back(1); 149 charCounts.push_back(1); 150 charCounts.push_back(1.25f); 151 } else { 152 charCounts.push_back(1); 153 charCounts.push_back(2); 154 charCounts.push_back(2); 155 charCounts.push_back(2); 156 charCounts.push_back(2); 157 charCounts.push_back(2.25f); 158 charCounts[currentMode] = 0; 159 } 160 int32_t charsProcessed = 0; 161 while (true) { 162 if ((startpos + charsProcessed) == msg.GetLength()) { 163 int32_t min = std::numeric_limits<int32_t>::max(); 164 CFX_ArrayTemplate<uint8_t> mins; 165 mins.SetSize(6); 166 CFX_ArrayTemplate<int32_t> intCharCounts; 167 intCharCounts.SetSize(6); 168 min = findMinimums(charCounts, intCharCounts, min, mins); 169 int32_t minCount = getMinimumCount(mins); 170 if (intCharCounts[ASCII_ENCODATION] == min) { 171 return ASCII_ENCODATION; 172 } 173 if (minCount == 1 && mins[BASE256_ENCODATION] > 0) { 174 return BASE256_ENCODATION; 175 } 176 if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) { 177 return EDIFACT_ENCODATION; 178 } 179 if (minCount == 1 && mins[TEXT_ENCODATION] > 0) { 180 return TEXT_ENCODATION; 181 } 182 if (minCount == 1 && mins[X12_ENCODATION] > 0) { 183 return X12_ENCODATION; 184 } 185 return C40_ENCODATION; 186 } 187 FX_WCHAR c = msg.GetAt(startpos + charsProcessed); 188 charsProcessed++; 189 if (isDigit(c)) { 190 charCounts[ASCII_ENCODATION] += 0.5; 191 } else if (isExtendedASCII(c)) { 192 charCounts[ASCII_ENCODATION] = 193 (FX_FLOAT)ceil(charCounts[ASCII_ENCODATION]); 194 charCounts[ASCII_ENCODATION] += 2; 195 } else { 196 charCounts[ASCII_ENCODATION] = 197 (FX_FLOAT)ceil(charCounts[ASCII_ENCODATION]); 198 charCounts[ASCII_ENCODATION]++; 199 } 200 if (isNativeC40(c)) { 201 charCounts[C40_ENCODATION] += 2.0f / 3.0f; 202 } else if (isExtendedASCII(c)) { 203 charCounts[C40_ENCODATION] += 8.0f / 3.0f; 204 } else { 205 charCounts[C40_ENCODATION] += 4.0f / 3.0f; 206 } 207 if (isNativeText(c)) { 208 charCounts[TEXT_ENCODATION] += 2.0f / 3.0f; 209 } else if (isExtendedASCII(c)) { 210 charCounts[TEXT_ENCODATION] += 8.0f / 3.0f; 211 } else { 212 charCounts[TEXT_ENCODATION] += 4.0f / 3.0f; 213 } 214 if (isNativeX12(c)) { 215 charCounts[X12_ENCODATION] += 2.0f / 3.0f; 216 } else if (isExtendedASCII(c)) { 217 charCounts[X12_ENCODATION] += 13.0f / 3.0f; 218 } else { 219 charCounts[X12_ENCODATION] += 10.0f / 3.0f; 220 } 221 if (isNativeEDIFACT(c)) { 222 charCounts[EDIFACT_ENCODATION] += 3.0f / 4.0f; 223 } else if (isExtendedASCII(c)) { 224 charCounts[EDIFACT_ENCODATION] += 17.0f / 4.0f; 225 } else { 226 charCounts[EDIFACT_ENCODATION] += 13.0f / 4.0f; 227 } 228 if (isSpecialB256(c)) { 229 charCounts[BASE256_ENCODATION] += 4; 230 } else { 231 charCounts[BASE256_ENCODATION]++; 232 } 233 if (charsProcessed >= 4) { 234 CFX_ArrayTemplate<int32_t> intCharCounts; 235 intCharCounts.SetSize(6); 236 CFX_ArrayTemplate<uint8_t> mins; 237 mins.SetSize(6); 238 findMinimums(charCounts, intCharCounts, 239 std::numeric_limits<int32_t>::max(), mins); 240 int32_t minCount = getMinimumCount(mins); 241 if (intCharCounts[ASCII_ENCODATION] < intCharCounts[BASE256_ENCODATION] && 242 intCharCounts[ASCII_ENCODATION] < intCharCounts[C40_ENCODATION] && 243 intCharCounts[ASCII_ENCODATION] < intCharCounts[TEXT_ENCODATION] && 244 intCharCounts[ASCII_ENCODATION] < intCharCounts[X12_ENCODATION] && 245 intCharCounts[ASCII_ENCODATION] < intCharCounts[EDIFACT_ENCODATION]) { 246 return ASCII_ENCODATION; 247 } 248 if (intCharCounts[BASE256_ENCODATION] < intCharCounts[ASCII_ENCODATION] || 249 (mins[C40_ENCODATION] + mins[TEXT_ENCODATION] + mins[X12_ENCODATION] + 250 mins[EDIFACT_ENCODATION]) == 0) { 251 return BASE256_ENCODATION; 252 } 253 if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) { 254 return EDIFACT_ENCODATION; 255 } 256 if (minCount == 1 && mins[TEXT_ENCODATION] > 0) { 257 return TEXT_ENCODATION; 258 } 259 if (minCount == 1 && mins[X12_ENCODATION] > 0) { 260 return X12_ENCODATION; 261 } 262 if (intCharCounts[C40_ENCODATION] + 1 < intCharCounts[ASCII_ENCODATION] && 263 intCharCounts[C40_ENCODATION] + 1 < 264 intCharCounts[BASE256_ENCODATION] && 265 intCharCounts[C40_ENCODATION] + 1 < 266 intCharCounts[EDIFACT_ENCODATION] && 267 intCharCounts[C40_ENCODATION] + 1 < intCharCounts[TEXT_ENCODATION]) { 268 if (intCharCounts[C40_ENCODATION] < intCharCounts[X12_ENCODATION]) { 269 return C40_ENCODATION; 270 } 271 if (intCharCounts[C40_ENCODATION] == intCharCounts[X12_ENCODATION]) { 272 int32_t p = startpos + charsProcessed + 1; 273 while (p < msg.GetLength()) { 274 FX_WCHAR tc = msg.GetAt(p); 275 if (isX12TermSep(tc)) { 276 return X12_ENCODATION; 277 } 278 if (!isNativeX12(tc)) { 279 break; 280 } 281 p++; 282 } 283 return C40_ENCODATION; 284 } 285 } 286 } 287 } 288} 289bool CBC_HighLevelEncoder::isDigit(FX_WCHAR ch) { 290 return ch >= '0' && ch <= '9'; 291} 292bool CBC_HighLevelEncoder::isExtendedASCII(FX_WCHAR ch) { 293 return ch >= 128 && ch <= 255; 294} 295int32_t CBC_HighLevelEncoder::determineConsecutiveDigitCount(CFX_WideString msg, 296 int32_t startpos) { 297 int32_t count = 0; 298 int32_t len = msg.GetLength(); 299 int32_t idx = startpos; 300 if (idx < len) { 301 FX_WCHAR ch = msg.GetAt(idx); 302 while (isDigit(ch) && idx < len) { 303 count++; 304 idx++; 305 if (idx < len) { 306 ch = msg.GetAt(idx); 307 } 308 } 309 } 310 return count; 311} 312void CBC_HighLevelEncoder::illegalCharacter(FX_WCHAR c, int32_t& e) { 313 e = BCExceptionIllegalArgument; 314} 315FX_WCHAR CBC_HighLevelEncoder::randomize253State(FX_WCHAR ch, 316 int32_t codewordPosition) { 317 int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1; 318 int32_t tempVariable = ch + pseudoRandom; 319 return tempVariable <= 254 ? (FX_WCHAR)tempVariable 320 : (FX_WCHAR)(tempVariable - 254); 321} 322int32_t CBC_HighLevelEncoder::findMinimums( 323 std::vector<FX_FLOAT>& charCounts, 324 CFX_ArrayTemplate<int32_t>& intCharCounts, 325 int32_t min, 326 CFX_ArrayTemplate<uint8_t>& mins) { 327 for (int32_t l = 0; l < mins.GetSize(); l++) { 328 mins[l] = (uint8_t)0; 329 } 330 for (int32_t i = 0; i < 6; i++) { 331 intCharCounts[i] = (int32_t)ceil(charCounts[i]); 332 int32_t current = intCharCounts[i]; 333 if (min > current) { 334 min = current; 335 for (int32_t j = 0; j < mins.GetSize(); j++) { 336 mins[j] = (uint8_t)0; 337 } 338 } 339 if (min == current) { 340 mins[i]++; 341 } 342 } 343 return min; 344} 345int32_t CBC_HighLevelEncoder::getMinimumCount( 346 CFX_ArrayTemplate<uint8_t>& mins) { 347 int32_t minCount = 0; 348 for (int32_t i = 0; i < 6; i++) { 349 minCount += mins[i]; 350 } 351 return minCount; 352} 353bool CBC_HighLevelEncoder::isNativeC40(FX_WCHAR ch) { 354 return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z'); 355} 356bool CBC_HighLevelEncoder::isNativeText(FX_WCHAR ch) { 357 return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z'); 358} 359bool CBC_HighLevelEncoder::isNativeX12(FX_WCHAR ch) { 360 return isX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') || 361 (ch >= 'A' && ch <= 'Z'); 362} 363bool CBC_HighLevelEncoder::isX12TermSep(FX_WCHAR ch) { 364 return (ch == '\r') || (ch == '*') || (ch == '>'); 365} 366bool CBC_HighLevelEncoder::isNativeEDIFACT(FX_WCHAR ch) { 367 return ch >= ' ' && ch <= '^'; 368} 369bool CBC_HighLevelEncoder::isSpecialB256(FX_WCHAR ch) { 370 return false; 371} 372