1ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved.
2ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Use of this source code is governed by a BSD-style license that can be
3ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// found in the LICENSE file.
4ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
5ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
7ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#include "../../include/fpdftext/fpdf_text.h"
8ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization[65536];
9ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map1[5376];
10ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map2[1734];
11ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map3[1164];
12ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map4[488];
13ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_LPCWSTR g_UnicodeData_Normalization_Maps[5] = {
14ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    NULL,
15ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    g_UnicodeData_Normalization_Map1,
16ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    g_UnicodeData_Normalization_Map2,
17ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    g_UnicodeData_Normalization_Map3,
18ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    g_UnicodeData_Normalization_Map4
19ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov};
20ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst)
21ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
22ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    wch = wch & 0xFFFF;
23ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_WCHAR wFind = g_UnicodeData_Normalization[wch];
24ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    if (!wFind) {
25ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if (pDst) {
26ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            *pDst = wch;
27ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
28ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return 1;
29ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
30ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    if(wFind >= 0x8000) {
31ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        wch = wFind - 0x8000;
32ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        wFind = 1;
33ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    } else {
34ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        wch = wFind & 0x0FFF;
35ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        wFind >>= 12;
36ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
37ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_LPCWSTR pMap = g_UnicodeData_Normalization_Maps[wFind];
38ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    if (pMap == g_UnicodeData_Normalization_Map4) {
39ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        pMap = g_UnicodeData_Normalization_Map4 + wch;
40ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        wFind = (FX_WCHAR)(*pMap ++);
41ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    } else {
42ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        pMap += wch;
43ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
44ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    if (pDst) {
45ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        FX_WCHAR n = wFind;
46ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        while (n --) {
47ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            *pDst ++ = *pMap ++;
48ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
49ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
50ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    return (FX_STRSIZE)wFind;
51ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov}
52ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, FX_LPWSTR pDst)
53ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
54ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_STRSIZE nCount = 0;
55ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    for (FX_STRSIZE len = 0; len < wsSrc.GetLength(); len ++) {
56ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        FX_WCHAR wch = wsSrc.GetAt(len);
57ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if(pDst) {
58ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            nCount += FX_Unicode_GetNormalization(wch, pDst + nCount);
59ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        } else {
60ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            nCount += FX_Unicode_GetNormalization(wch, pDst);
61ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        }
62ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
63ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    return nCount;
64ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov}
65ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, CFX_WideString &wsDst)
66ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{
67ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_STRSIZE nLen = FX_WideString_GetNormalization(wsSrc, (FX_LPWSTR)NULL);
68ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    if (!nLen) {
69ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return 0;
70ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    }
71ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_LPWSTR pBuf = wsDst.GetBuffer(nLen);
72ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    FX_WideString_GetNormalization(wsSrc, pBuf);
73ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    wsDst.ReleaseBuffer(nLen);
74ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    return nLen;
75ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov}
76