1ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved. 2ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Use of this source code is governed by a BSD-style license that can be 3ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// found in the LICENSE file. 4ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 5ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 7ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#include "../../include/fpdftext/fpdf_text.h" 8ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization[65536]; 9ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map1[5376]; 10ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map2[1734]; 11ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map3[1164]; 12ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovextern const FX_WCHAR g_UnicodeData_Normalization_Map4[488]; 13ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_LPCWSTR g_UnicodeData_Normalization_Maps[5] = { 14ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov NULL, 15ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov g_UnicodeData_Normalization_Map1, 16ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov g_UnicodeData_Normalization_Map2, 17ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov g_UnicodeData_Normalization_Map3, 18ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov g_UnicodeData_Normalization_Map4 19ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov}; 20ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_LPWSTR pDst) 21ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{ 22ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov wch = wch & 0xFFFF; 23ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_WCHAR wFind = g_UnicodeData_Normalization[wch]; 24ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if (!wFind) { 25ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if (pDst) { 26ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov *pDst = wch; 27ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 28ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return 1; 29ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 30ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if(wFind >= 0x8000) { 31ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov wch = wFind - 0x8000; 32ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov wFind = 1; 33ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } else { 34ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov wch = wFind & 0x0FFF; 35ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov wFind >>= 12; 36ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 37ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_LPCWSTR pMap = g_UnicodeData_Normalization_Maps[wFind]; 38ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if (pMap == g_UnicodeData_Normalization_Map4) { 39ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov pMap = g_UnicodeData_Normalization_Map4 + wch; 40ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov wFind = (FX_WCHAR)(*pMap ++); 41ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } else { 42ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov pMap += wch; 43ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 44ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if (pDst) { 45ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_WCHAR n = wFind; 46ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov while (n --) { 47ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov *pDst ++ = *pMap ++; 48ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 49ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 50ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return (FX_STRSIZE)wFind; 51ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov} 52ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, FX_LPWSTR pDst) 53ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{ 54ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_STRSIZE nCount = 0; 55ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov for (FX_STRSIZE len = 0; len < wsSrc.GetLength(); len ++) { 56ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_WCHAR wch = wsSrc.GetAt(len); 57ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if(pDst) { 58ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov nCount += FX_Unicode_GetNormalization(wch, pDst + nCount); 59ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } else { 60ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov nCount += FX_Unicode_GetNormalization(wch, pDst); 61ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 62ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 63ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return nCount; 64ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov} 65ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFX_STRSIZE FX_WideString_GetNormalization(FX_WSTR wsSrc, CFX_WideString &wsDst) 66ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov{ 67ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_STRSIZE nLen = FX_WideString_GetNormalization(wsSrc, (FX_LPWSTR)NULL); 68ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if (!nLen) { 69ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return 0; 70ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov } 71ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_LPWSTR pBuf = wsDst.GetBuffer(nLen); 72ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov FX_WideString_GetNormalization(wsSrc, pBuf); 73ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov wsDst.ReleaseBuffer(nLen); 74ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return nLen; 75ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov} 76