BC_PDF417HighLevelEncoder.cpp revision 4d3acf4ec42bf6e838f9060103aff98fbf170794
1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6// Original code is licensed as follows:
7/*
8 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22
23#include "xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24
25#include "third_party/bigint/BigIntegerLibrary.hh"
26#include "xfa/fxbarcode/BC_UtilCodingConvert.h"
27#include "xfa/fxbarcode/pdf417/BC_PDF417Compaction.h"
28#include "xfa/fxbarcode/utils.h"
29
30#define SUBMODE_ALPHA 0
31#define SUBMODE_LOWER 1
32#define SUBMODE_MIXED 2
33
34int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
35int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
36int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
37int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
38int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
39int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
40int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
41int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
42int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
43uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
44    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
45    35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0,  32, 0, 0,  0};
46uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
47    59, 60, 62, 64, 91, 92, 93,  95, 96, 126, 33, 13,  9,   44, 58,
48    10, 45, 46, 36, 47, 34, 124, 42, 40, 41,  63, 123, 125, 39, 0};
49int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
50int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
51
52void CBC_PDF417HighLevelEncoder::Initialize() {
53  Inverse();
54}
55
56void CBC_PDF417HighLevelEncoder::Finalize() {}
57
58CFX_WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(
59    CFX_WideString wideMsg,
60    Compaction compaction,
61    int32_t& e) {
62  CFX_ByteString bytes;
63  CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
64  CFX_WideString msg;
65  int32_t len = bytes.GetLength();
66  for (int32_t i = 0; i < len; i++) {
67    FX_WCHAR ch = (FX_WCHAR)(bytes.GetAt(i) & 0xff);
68    if (ch == '?' && bytes.GetAt(i) != '?') {
69      e = BCExceptionCharactersOutsideISO88591Encoding;
70      return CFX_WideString();
71    }
72    msg += ch;
73  }
74  CFX_ByteArray byteArr;
75  for (int32_t k = 0; k < bytes.GetLength(); k++) {
76    byteArr.Add(bytes.GetAt(k));
77  }
78  CFX_WideString sb;
79  len = msg.GetLength();
80  int32_t p = 0;
81  int32_t textSubMode = SUBMODE_ALPHA;
82  if (compaction == TEXT) {
83    encodeText(msg, p, len, sb, textSubMode);
84  } else if (compaction == BYTES) {
85    encodeBinary(&byteArr, p, byteArr.GetSize(), BYTE_COMPACTION, sb);
86  } else if (compaction == NUMERIC) {
87    sb += (FX_WCHAR)LATCH_TO_NUMERIC;
88    encodeNumeric(msg, p, len, sb);
89  } else {
90    int32_t encodingMode = LATCH_TO_TEXT;
91    while (p < len) {
92      int32_t n = determineConsecutiveDigitCount(msg, p);
93      if (n >= 13) {
94        sb += (FX_WCHAR)LATCH_TO_NUMERIC;
95        encodingMode = NUMERIC_COMPACTION;
96        textSubMode = SUBMODE_ALPHA;
97        encodeNumeric(msg, p, n, sb);
98        p += n;
99      } else {
100        int32_t t = determineConsecutiveTextCount(msg, p);
101        if (t >= 5 || n == len) {
102          if (encodingMode != TEXT_COMPACTION) {
103            sb += (FX_WCHAR)LATCH_TO_TEXT;
104            encodingMode = TEXT_COMPACTION;
105            textSubMode = SUBMODE_ALPHA;
106          }
107          textSubMode = encodeText(msg, p, t, sb, textSubMode);
108          p += t;
109        } else {
110          int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
111          BC_EXCEPTION_CHECK_ReturnValue(e, (FX_WCHAR)' ');
112          if (b == 0) {
113            b = 1;
114          }
115          if (b == 1 && encodingMode == TEXT_COMPACTION) {
116            encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
117          } else {
118            encodeBinary(&byteArr, p, b, encodingMode, sb);
119            encodingMode = BYTE_COMPACTION;
120            textSubMode = SUBMODE_ALPHA;
121          }
122          p += b;
123        }
124      }
125    }
126  }
127  return sb;
128}
129
130void CBC_PDF417HighLevelEncoder::Inverse() {
131  for (size_t l = 0; l < FX_ArraySize(MIXED); ++l)
132    MIXED[l] = -1;
133
134  for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) {
135    uint8_t b = TEXT_MIXED_RAW[i];
136    if (b != 0)
137      MIXED[b] = i;
138  }
139
140  for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l)
141    PUNCTUATION[l] = -1;
142
143  for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) {
144    uint8_t b = TEXT_PUNCTUATION_RAW[i];
145    if (b != 0)
146      PUNCTUATION[b] = i;
147  }
148}
149
150int32_t CBC_PDF417HighLevelEncoder::encodeText(CFX_WideString msg,
151                                               int32_t startpos,
152                                               int32_t count,
153                                               CFX_WideString& sb,
154                                               int32_t initialSubmode) {
155  CFX_WideString tmp;
156  int32_t submode = initialSubmode;
157  int32_t idx = 0;
158  while (true) {
159    FX_WCHAR ch = msg.GetAt(startpos + idx);
160    switch (submode) {
161      case SUBMODE_ALPHA:
162        if (isAlphaUpper(ch)) {
163          if (ch == ' ') {
164            tmp += (FX_WCHAR)26;
165          } else {
166            tmp += (FX_WCHAR)(ch - 65);
167          }
168        } else {
169          if (isAlphaLower(ch)) {
170            submode = SUBMODE_LOWER;
171            tmp += (FX_WCHAR)27;
172            continue;
173          } else if (isMixed(ch)) {
174            submode = SUBMODE_MIXED;
175            tmp += (FX_WCHAR)28;
176            continue;
177          } else {
178            tmp += (FX_WCHAR)29;
179            tmp += PUNCTUATION[ch];
180            break;
181          }
182        }
183        break;
184      case SUBMODE_LOWER:
185        if (isAlphaLower(ch)) {
186          if (ch == ' ') {
187            tmp += (FX_WCHAR)26;
188          } else {
189            tmp += (FX_WCHAR)(ch - 97);
190          }
191        } else {
192          if (isAlphaUpper(ch)) {
193            tmp += (FX_WCHAR)27;
194            tmp += (FX_WCHAR)(ch - 65);
195            break;
196          } else if (isMixed(ch)) {
197            submode = SUBMODE_MIXED;
198            tmp += (FX_WCHAR)28;
199            continue;
200          } else {
201            tmp += (FX_WCHAR)29;
202            tmp += PUNCTUATION[ch];
203            break;
204          }
205        }
206        break;
207      case SUBMODE_MIXED:
208        if (isMixed(ch)) {
209          tmp += MIXED[ch];
210        } else {
211          if (isAlphaUpper(ch)) {
212            submode = SUBMODE_ALPHA;
213            tmp += (FX_WCHAR)28;
214            continue;
215          } else if (isAlphaLower(ch)) {
216            submode = SUBMODE_LOWER;
217            tmp += (FX_WCHAR)27;
218            continue;
219          } else {
220            if (startpos + idx + 1 < count) {
221              FX_WCHAR next = msg.GetAt(startpos + idx + 1);
222              if (isPunctuation(next)) {
223                submode = SUBMODE_PUNCTUATION;
224                tmp += (FX_WCHAR)25;
225                continue;
226              }
227            }
228            tmp += (FX_WCHAR)29;
229            tmp += PUNCTUATION[ch];
230          }
231        }
232        break;
233      default:
234        if (isPunctuation(ch)) {
235          tmp += PUNCTUATION[ch];
236        } else {
237          submode = SUBMODE_ALPHA;
238          tmp += (FX_WCHAR)29;
239          continue;
240        }
241    }
242    idx++;
243    if (idx >= count) {
244      break;
245    }
246  }
247  FX_WCHAR h = 0;
248  int32_t len = tmp.GetLength();
249  for (int32_t i = 0; i < len; i++) {
250    bool odd = (i % 2) != 0;
251    if (odd) {
252      h = (FX_WCHAR)((h * 30) + tmp.GetAt(i));
253      sb += h;
254    } else {
255      h = tmp.GetAt(i);
256    }
257  }
258  if ((len % 2) != 0) {
259    sb += (FX_WCHAR)((h * 30) + 29);
260  }
261  return submode;
262}
263void CBC_PDF417HighLevelEncoder::encodeBinary(CFX_ByteArray* bytes,
264                                              int32_t startpos,
265                                              int32_t count,
266                                              int32_t startmode,
267                                              CFX_WideString& sb) {
268  if (count == 1 && startmode == TEXT_COMPACTION) {
269    sb += (FX_WCHAR)SHIFT_TO_BYTE;
270  }
271  int32_t idx = startpos;
272  int32_t i = 0;
273  if (count >= 6) {
274    sb += (FX_WCHAR)LATCH_TO_BYTE;
275    FX_WCHAR chars[5];
276    while ((startpos + count - idx) >= 6) {
277      int64_t t = 0;
278      for (i = 0; i < 6; i++) {
279        t <<= 8;
280        t += bytes->GetAt(idx + i) & 0xff;
281      }
282      for (i = 0; i < 5; i++) {
283        chars[i] = (FX_WCHAR)(t % 900);
284        t /= 900;
285      }
286      for (i = 4; i >= 0; i--) {
287        sb += (chars[i]);
288      }
289      idx += 6;
290    }
291  }
292  if (idx < startpos + count) {
293    sb += (FX_WCHAR)LATCH_TO_BYTE_PADDED;
294  }
295  for (i = idx; i < startpos + count; i++) {
296    int32_t ch = bytes->GetAt(i) & 0xff;
297    sb += (FX_WCHAR)ch;
298  }
299}
300void CBC_PDF417HighLevelEncoder::encodeNumeric(CFX_WideString msg,
301                                               int32_t startpos,
302                                               int32_t count,
303                                               CFX_WideString& sb) {
304  int32_t idx = 0;
305  BigInteger num900 = 900;
306  while (idx < count) {
307    CFX_WideString tmp;
308    int32_t len = 44 < count - idx ? 44 : count - idx;
309    CFX_ByteString part =
310        ((FX_WCHAR)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
311    BigInteger bigint = stringToBigInteger(part.c_str());
312    do {
313      int32_t c = (bigint % num900).toInt();
314      tmp += (FX_WCHAR)(c);
315      bigint = bigint / num900;
316    } while (!bigint.isZero());
317    for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
318      sb += tmp.GetAt(i);
319    }
320    idx += len;
321  }
322}
323bool CBC_PDF417HighLevelEncoder::isDigit(FX_WCHAR ch) {
324  return ch >= '0' && ch <= '9';
325}
326bool CBC_PDF417HighLevelEncoder::isAlphaUpper(FX_WCHAR ch) {
327  return ch == ' ' || (ch >= 'A' && ch <= 'Z');
328}
329bool CBC_PDF417HighLevelEncoder::isAlphaLower(FX_WCHAR ch) {
330  return ch == ' ' || (ch >= 'a' && ch <= 'z');
331}
332bool CBC_PDF417HighLevelEncoder::isMixed(FX_WCHAR ch) {
333  return MIXED[ch] != -1;
334}
335bool CBC_PDF417HighLevelEncoder::isPunctuation(FX_WCHAR ch) {
336  return PUNCTUATION[ch] != -1;
337}
338bool CBC_PDF417HighLevelEncoder::isText(FX_WCHAR ch) {
339  return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
340}
341int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
342    CFX_WideString msg,
343    int32_t startpos) {
344  int32_t count = 0;
345  int32_t len = msg.GetLength();
346  int32_t idx = startpos;
347  if (idx < len) {
348    FX_WCHAR ch = msg.GetAt(idx);
349    while (isDigit(ch) && idx < len) {
350      count++;
351      idx++;
352      if (idx < len) {
353        ch = msg.GetAt(idx);
354      }
355    }
356  }
357  return count;
358}
359int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
360    CFX_WideString msg,
361    int32_t startpos) {
362  int32_t len = msg.GetLength();
363  int32_t idx = startpos;
364  while (idx < len) {
365    FX_WCHAR ch = msg.GetAt(idx);
366    int32_t numericCount = 0;
367    while (numericCount < 13 && isDigit(ch) && idx < len) {
368      numericCount++;
369      idx++;
370      if (idx < len) {
371        ch = msg.GetAt(idx);
372      }
373    }
374    if (numericCount >= 13) {
375      return idx - startpos - numericCount;
376    }
377    if (numericCount > 0) {
378      continue;
379    }
380    ch = msg.GetAt(idx);
381    if (!isText(ch)) {
382      break;
383    }
384    idx++;
385  }
386  return idx - startpos;
387}
388int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
389    CFX_WideString msg,
390    CFX_ByteArray* bytes,
391    int32_t startpos,
392    int32_t& e) {
393  int32_t len = msg.GetLength();
394  int32_t idx = startpos;
395  while (idx < len) {
396    FX_WCHAR ch = msg.GetAt(idx);
397    int32_t numericCount = 0;
398    while (numericCount < 13 && isDigit(ch)) {
399      numericCount++;
400      int32_t i = idx + numericCount;
401      if (i >= len) {
402        break;
403      }
404      ch = msg.GetAt(i);
405    }
406    if (numericCount >= 13) {
407      return idx - startpos;
408    }
409    int32_t textCount = 0;
410    while (textCount < 5 && isText(ch)) {
411      textCount++;
412      int32_t i = idx + textCount;
413      if (i >= len) {
414        break;
415      }
416      ch = msg.GetAt(i);
417    }
418    if (textCount >= 5) {
419      return idx - startpos;
420    }
421    ch = msg.GetAt(idx);
422    if (bytes->GetAt(idx) == 63 && ch != '?') {
423      e = BCExceptionNonEncodableCharacterDetected;
424      return -1;
425    }
426    idx++;
427  }
428  return idx - startpos;
429}
430