1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file.
4e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov
7ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "core/include/fxcrt/fx_string.h"
8ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "unicodenormalizationdata.h"
9ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
10ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannconst FX_WCHAR* const g_UnicodeData_Normalization_Maps[5] = {
11ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    nullptr,
12e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    g_UnicodeData_Normalization_Map1,
13e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    g_UnicodeData_Normalization_Map2,
14e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    g_UnicodeData_Normalization_Map3,
15ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    g_UnicodeData_Normalization_Map4};
16ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
17ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) {
18ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  wch = wch & 0xFFFF;
19ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
20ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  if (!wFind) {
21e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    if (pDst) {
22ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      *pDst = wch;
23e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
24ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    return 1;
25ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  }
26ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  if (wFind >= 0x8000) {
27ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    wch = wFind - 0x8000;
28ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    wFind = 1;
29ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  } else {
30ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    wch = wFind & 0x0FFF;
31ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    wFind >>= 12;
32ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  }
33ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  const FX_WCHAR* pMap = g_UnicodeData_Normalization_Maps[wFind];
34ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  if (pMap == g_UnicodeData_Normalization_Map4) {
35ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    pMap = g_UnicodeData_Normalization_Map4 + wch;
36ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    wFind = (FX_WCHAR)(*pMap++);
37ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  } else {
38ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    pMap += wch;
39ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  }
40ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  if (pDst) {
41ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    FX_WCHAR n = wFind;
42ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    while (n--) {
43ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      *pDst++ = *pMap++;
44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
45ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  }
46ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  return (FX_STRSIZE)wFind;
47e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
48ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_WideString_GetNormalization(const CFX_WideStringC& wsSrc,
49ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                                          FX_WCHAR* pDst) {
50ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_STRSIZE nCount = 0;
51ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  for (FX_STRSIZE len = 0; len < wsSrc.GetLength(); len++) {
52ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    FX_WCHAR wch = wsSrc.GetAt(len);
53ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    if (pDst) {
54ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      nCount += FX_Unicode_GetNormalization(wch, pDst + nCount);
55ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    } else {
56ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann      nCount += FX_Unicode_GetNormalization(wch, pDst);
57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    }
58ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  }
59ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  return nCount;
60ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
61ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_WideString_GetNormalization(const CFX_WideStringC& wsSrc,
62ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                                          CFX_WideString& wsDst) {
63ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_STRSIZE nLen = FX_WideString_GetNormalization(wsSrc, (FX_WCHAR*)NULL);
64ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  if (!nLen) {
65ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    return 0;
66ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  }
67ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_WCHAR* pBuf = wsDst.GetBuffer(nLen);
68ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  FX_WideString_GetNormalization(wsSrc, pBuf);
69ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  wsDst.ReleaseBuffer(nLen);
70ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  return nLen;
71e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov}
72