BC_HighLevelEncoder.cpp revision 5ae9d0c6fd838a2967cca72aa5751b51dadc2769
1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6// Original code is licensed as follows:
7/*
8 * Copyright 2006-2007 Jeremias Maerki.
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 *      http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22
23#include <limits>
24#include <memory>
25#include <vector>
26
27#include "xfa/fxbarcode/BC_Dimension.h"
28#include "xfa/fxbarcode/BC_UtilCodingConvert.h"
29#include "xfa/fxbarcode/common/BC_CommonBitMatrix.h"
30#include "xfa/fxbarcode/datamatrix/BC_ASCIIEncoder.h"
31#include "xfa/fxbarcode/datamatrix/BC_Base256Encoder.h"
32#include "xfa/fxbarcode/datamatrix/BC_C40Encoder.h"
33#include "xfa/fxbarcode/datamatrix/BC_EdifactEncoder.h"
34#include "xfa/fxbarcode/datamatrix/BC_Encoder.h"
35#include "xfa/fxbarcode/datamatrix/BC_EncoderContext.h"
36#include "xfa/fxbarcode/datamatrix/BC_HighLevelEncoder.h"
37#include "xfa/fxbarcode/datamatrix/BC_SymbolInfo.h"
38#include "xfa/fxbarcode/datamatrix/BC_SymbolShapeHint.h"
39#include "xfa/fxbarcode/datamatrix/BC_TextEncoder.h"
40#include "xfa/fxbarcode/datamatrix/BC_X12Encoder.h"
41#include "xfa/fxbarcode/utils.h"
42
43FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_C40 = 230;
44FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_BASE256 = 231;
45FX_WCHAR CBC_HighLevelEncoder::UPPER_SHIFT = 235;
46FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_ANSIX12 = 238;
47FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_TEXT = 239;
48FX_WCHAR CBC_HighLevelEncoder::LATCH_TO_EDIFACT = 240;
49FX_WCHAR CBC_HighLevelEncoder::C40_UNLATCH = 254;
50FX_WCHAR CBC_HighLevelEncoder::X12_UNLATCH = 254;
51FX_WCHAR CBC_HighLevelEncoder::PAD = 129;
52FX_WCHAR CBC_HighLevelEncoder::MACRO_05 = 236;
53FX_WCHAR CBC_HighLevelEncoder::MACRO_06 = 237;
54const wchar_t* CBC_HighLevelEncoder::MACRO_05_HEADER = L"[)>05";
55const wchar_t* CBC_HighLevelEncoder::MACRO_06_HEADER = L"[)>06";
56const wchar_t CBC_HighLevelEncoder::MACRO_TRAILER = 0x0004;
57
58CBC_HighLevelEncoder::CBC_HighLevelEncoder() {}
59CBC_HighLevelEncoder::~CBC_HighLevelEncoder() {}
60
61CFX_ArrayTemplate<uint8_t>& CBC_HighLevelEncoder::getBytesForMessage(
62    CFX_WideString msg) {
63  CFX_ByteString bytestr;
64  CBC_UtilCodingConvert::UnicodeToUTF8(msg, bytestr);
65  for (int32_t i = 0; i < bytestr.GetLength(); i++) {
66    m_bytearray.Add(bytestr.GetAt(i));
67  }
68  return m_bytearray;
69}
70CFX_WideString CBC_HighLevelEncoder::encodeHighLevel(CFX_WideString msg,
71                                                     CFX_WideString ecLevel,
72                                                     int32_t& e) {
73  return encodeHighLevel(msg, ecLevel, FORCE_NONE, nullptr, nullptr, e);
74}
75CFX_WideString CBC_HighLevelEncoder::encodeHighLevel(CFX_WideString msg,
76                                                     CFX_WideString ecLevel,
77                                                     SymbolShapeHint shape,
78                                                     CBC_Dimension* minSize,
79                                                     CBC_Dimension* maxSize,
80                                                     int32_t& e) {
81  CBC_EncoderContext context(msg, ecLevel, e);
82  if (e != BCExceptionNO)
83    return CFX_WideString();
84  context.setSymbolShape(shape);
85  context.setSizeConstraints(minSize, maxSize);
86  if ((msg.Mid(0, 6) == MACRO_05_HEADER) &&
87      (msg.Mid(msg.GetLength() - 1, 1) == MACRO_TRAILER)) {
88    context.writeCodeword(MACRO_05);
89    context.setSkipAtEnd(2);
90    context.m_pos += 6;
91  } else if ((msg.Mid(0, 6) == MACRO_06_HEADER) &&
92             (msg.Mid(msg.GetLength() - 1, 1) == MACRO_TRAILER)) {
93    context.writeCodeword(MACRO_06);
94    context.setSkipAtEnd(2);
95    context.m_pos += 6;
96  }
97
98  std::vector<std::unique_ptr<CBC_Encoder>> encoders;
99  encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_ASCIIEncoder()));
100  encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_C40Encoder()));
101  encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_TextEncoder()));
102  encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_X12Encoder()));
103  encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_EdifactEncoder()));
104  encoders.push_back(std::unique_ptr<CBC_Encoder>(new CBC_Base256Encoder()));
105  int32_t encodingMode = ASCII_ENCODATION;
106  while (context.hasMoreCharacters()) {
107    encoders[encodingMode]->Encode(context, e);
108    if (e != BCExceptionNO)
109      return L"";
110
111    if (context.m_newEncoding >= 0) {
112      encodingMode = context.m_newEncoding;
113      context.resetEncoderSignal();
114    }
115  }
116  int32_t len = context.m_codewords.GetLength();
117  context.updateSymbolInfo(e);
118  if (e != BCExceptionNO)
119    return L"";
120
121  int32_t capacity = context.m_symbolInfo->m_dataCapacity;
122  if (len < capacity) {
123    if (encodingMode != ASCII_ENCODATION &&
124        encodingMode != BASE256_ENCODATION) {
125      context.writeCodeword(0x00fe);
126    }
127  }
128  CFX_WideString codewords = context.m_codewords;
129  if (codewords.GetLength() < capacity) {
130    codewords += PAD;
131  }
132  while (codewords.GetLength() < capacity) {
133    codewords += (randomize253State(PAD, codewords.GetLength() + 1));
134  }
135  return codewords;
136}
137int32_t CBC_HighLevelEncoder::lookAheadTest(CFX_WideString msg,
138                                            int32_t startpos,
139                                            int32_t currentMode) {
140  if (startpos >= msg.GetLength()) {
141    return currentMode;
142  }
143  std::vector<FX_FLOAT> charCounts;
144  if (currentMode == ASCII_ENCODATION) {
145    charCounts.push_back(0);
146    charCounts.push_back(1);
147    charCounts.push_back(1);
148    charCounts.push_back(1);
149    charCounts.push_back(1);
150    charCounts.push_back(1.25f);
151  } else {
152    charCounts.push_back(1);
153    charCounts.push_back(2);
154    charCounts.push_back(2);
155    charCounts.push_back(2);
156    charCounts.push_back(2);
157    charCounts.push_back(2.25f);
158    charCounts[currentMode] = 0;
159  }
160  int32_t charsProcessed = 0;
161  while (true) {
162    if ((startpos + charsProcessed) == msg.GetLength()) {
163      int32_t min = std::numeric_limits<int32_t>::max();
164      CFX_ArrayTemplate<uint8_t> mins;
165      mins.SetSize(6);
166      CFX_ArrayTemplate<int32_t> intCharCounts;
167      intCharCounts.SetSize(6);
168      min = findMinimums(charCounts, intCharCounts, min, mins);
169      int32_t minCount = getMinimumCount(mins);
170      if (intCharCounts[ASCII_ENCODATION] == min) {
171        return ASCII_ENCODATION;
172      }
173      if (minCount == 1 && mins[BASE256_ENCODATION] > 0) {
174        return BASE256_ENCODATION;
175      }
176      if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
177        return EDIFACT_ENCODATION;
178      }
179      if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
180        return TEXT_ENCODATION;
181      }
182      if (minCount == 1 && mins[X12_ENCODATION] > 0) {
183        return X12_ENCODATION;
184      }
185      return C40_ENCODATION;
186    }
187    FX_WCHAR c = msg.GetAt(startpos + charsProcessed);
188    charsProcessed++;
189    if (isDigit(c)) {
190      charCounts[ASCII_ENCODATION] += 0.5;
191    } else if (isExtendedASCII(c)) {
192      charCounts[ASCII_ENCODATION] =
193          (FX_FLOAT)ceil(charCounts[ASCII_ENCODATION]);
194      charCounts[ASCII_ENCODATION] += 2;
195    } else {
196      charCounts[ASCII_ENCODATION] =
197          (FX_FLOAT)ceil(charCounts[ASCII_ENCODATION]);
198      charCounts[ASCII_ENCODATION]++;
199    }
200    if (isNativeC40(c)) {
201      charCounts[C40_ENCODATION] += 2.0f / 3.0f;
202    } else if (isExtendedASCII(c)) {
203      charCounts[C40_ENCODATION] += 8.0f / 3.0f;
204    } else {
205      charCounts[C40_ENCODATION] += 4.0f / 3.0f;
206    }
207    if (isNativeText(c)) {
208      charCounts[TEXT_ENCODATION] += 2.0f / 3.0f;
209    } else if (isExtendedASCII(c)) {
210      charCounts[TEXT_ENCODATION] += 8.0f / 3.0f;
211    } else {
212      charCounts[TEXT_ENCODATION] += 4.0f / 3.0f;
213    }
214    if (isNativeX12(c)) {
215      charCounts[X12_ENCODATION] += 2.0f / 3.0f;
216    } else if (isExtendedASCII(c)) {
217      charCounts[X12_ENCODATION] += 13.0f / 3.0f;
218    } else {
219      charCounts[X12_ENCODATION] += 10.0f / 3.0f;
220    }
221    if (isNativeEDIFACT(c)) {
222      charCounts[EDIFACT_ENCODATION] += 3.0f / 4.0f;
223    } else if (isExtendedASCII(c)) {
224      charCounts[EDIFACT_ENCODATION] += 17.0f / 4.0f;
225    } else {
226      charCounts[EDIFACT_ENCODATION] += 13.0f / 4.0f;
227    }
228    if (isSpecialB256(c)) {
229      charCounts[BASE256_ENCODATION] += 4;
230    } else {
231      charCounts[BASE256_ENCODATION]++;
232    }
233    if (charsProcessed >= 4) {
234      CFX_ArrayTemplate<int32_t> intCharCounts;
235      intCharCounts.SetSize(6);
236      CFX_ArrayTemplate<uint8_t> mins;
237      mins.SetSize(6);
238      findMinimums(charCounts, intCharCounts,
239                   std::numeric_limits<int32_t>::max(), mins);
240      int32_t minCount = getMinimumCount(mins);
241      if (intCharCounts[ASCII_ENCODATION] < intCharCounts[BASE256_ENCODATION] &&
242          intCharCounts[ASCII_ENCODATION] < intCharCounts[C40_ENCODATION] &&
243          intCharCounts[ASCII_ENCODATION] < intCharCounts[TEXT_ENCODATION] &&
244          intCharCounts[ASCII_ENCODATION] < intCharCounts[X12_ENCODATION] &&
245          intCharCounts[ASCII_ENCODATION] < intCharCounts[EDIFACT_ENCODATION]) {
246        return ASCII_ENCODATION;
247      }
248      if (intCharCounts[BASE256_ENCODATION] < intCharCounts[ASCII_ENCODATION] ||
249          (mins[C40_ENCODATION] + mins[TEXT_ENCODATION] + mins[X12_ENCODATION] +
250           mins[EDIFACT_ENCODATION]) == 0) {
251        return BASE256_ENCODATION;
252      }
253      if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
254        return EDIFACT_ENCODATION;
255      }
256      if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
257        return TEXT_ENCODATION;
258      }
259      if (minCount == 1 && mins[X12_ENCODATION] > 0) {
260        return X12_ENCODATION;
261      }
262      if (intCharCounts[C40_ENCODATION] + 1 < intCharCounts[ASCII_ENCODATION] &&
263          intCharCounts[C40_ENCODATION] + 1 <
264              intCharCounts[BASE256_ENCODATION] &&
265          intCharCounts[C40_ENCODATION] + 1 <
266              intCharCounts[EDIFACT_ENCODATION] &&
267          intCharCounts[C40_ENCODATION] + 1 < intCharCounts[TEXT_ENCODATION]) {
268        if (intCharCounts[C40_ENCODATION] < intCharCounts[X12_ENCODATION]) {
269          return C40_ENCODATION;
270        }
271        if (intCharCounts[C40_ENCODATION] == intCharCounts[X12_ENCODATION]) {
272          int32_t p = startpos + charsProcessed + 1;
273          while (p < msg.GetLength()) {
274            FX_WCHAR tc = msg.GetAt(p);
275            if (isX12TermSep(tc)) {
276              return X12_ENCODATION;
277            }
278            if (!isNativeX12(tc)) {
279              break;
280            }
281            p++;
282          }
283          return C40_ENCODATION;
284        }
285      }
286    }
287  }
288}
289bool CBC_HighLevelEncoder::isDigit(FX_WCHAR ch) {
290  return ch >= '0' && ch <= '9';
291}
292bool CBC_HighLevelEncoder::isExtendedASCII(FX_WCHAR ch) {
293  return ch >= 128 && ch <= 255;
294}
295int32_t CBC_HighLevelEncoder::determineConsecutiveDigitCount(CFX_WideString msg,
296                                                             int32_t startpos) {
297  int32_t count = 0;
298  int32_t len = msg.GetLength();
299  int32_t idx = startpos;
300  if (idx < len) {
301    FX_WCHAR ch = msg.GetAt(idx);
302    while (isDigit(ch) && idx < len) {
303      count++;
304      idx++;
305      if (idx < len) {
306        ch = msg.GetAt(idx);
307      }
308    }
309  }
310  return count;
311}
312void CBC_HighLevelEncoder::illegalCharacter(FX_WCHAR c, int32_t& e) {
313  e = BCExceptionIllegalArgument;
314}
315FX_WCHAR CBC_HighLevelEncoder::randomize253State(FX_WCHAR ch,
316                                                 int32_t codewordPosition) {
317  int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
318  int32_t tempVariable = ch + pseudoRandom;
319  return tempVariable <= 254 ? (FX_WCHAR)tempVariable
320                             : (FX_WCHAR)(tempVariable - 254);
321}
322int32_t CBC_HighLevelEncoder::findMinimums(
323    std::vector<FX_FLOAT>& charCounts,
324    CFX_ArrayTemplate<int32_t>& intCharCounts,
325    int32_t min,
326    CFX_ArrayTemplate<uint8_t>& mins) {
327  for (int32_t l = 0; l < mins.GetSize(); l++) {
328    mins[l] = (uint8_t)0;
329  }
330  for (int32_t i = 0; i < 6; i++) {
331    intCharCounts[i] = (int32_t)ceil(charCounts[i]);
332    int32_t current = intCharCounts[i];
333    if (min > current) {
334      min = current;
335      for (int32_t j = 0; j < mins.GetSize(); j++) {
336        mins[j] = (uint8_t)0;
337      }
338    }
339    if (min == current) {
340      mins[i]++;
341    }
342  }
343  return min;
344}
345int32_t CBC_HighLevelEncoder::getMinimumCount(
346    CFX_ArrayTemplate<uint8_t>& mins) {
347  int32_t minCount = 0;
348  for (int32_t i = 0; i < 6; i++) {
349    minCount += mins[i];
350  }
351  return minCount;
352}
353bool CBC_HighLevelEncoder::isNativeC40(FX_WCHAR ch) {
354  return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
355}
356bool CBC_HighLevelEncoder::isNativeText(FX_WCHAR ch) {
357  return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
358}
359bool CBC_HighLevelEncoder::isNativeX12(FX_WCHAR ch) {
360  return isX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
361         (ch >= 'A' && ch <= 'Z');
362}
363bool CBC_HighLevelEncoder::isX12TermSep(FX_WCHAR ch) {
364  return (ch == '\r') || (ch == '*') || (ch == '>');
365}
366bool CBC_HighLevelEncoder::isNativeEDIFACT(FX_WCHAR ch) {
367  return ch >= ' ' && ch <= '^';
368}
369bool CBC_HighLevelEncoder::isSpecialB256(FX_WCHAR ch) {
370  return false;
371}
372