1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Copyright 2014 PDFium Authors. All rights reserved. 2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Use of this source code is governed by a BSD-style license that can be 3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// found in the LICENSE file. 4e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov 7ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "core/include/fxcrt/fx_string.h" 8ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "unicodenormalizationdata.h" 9ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 10ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannconst FX_WCHAR* const g_UnicodeData_Normalization_Maps[5] = { 11ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann nullptr, 12e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov g_UnicodeData_Normalization_Map1, 13e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov g_UnicodeData_Normalization_Map2, 14e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov g_UnicodeData_Normalization_Map3, 15ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann g_UnicodeData_Normalization_Map4}; 16ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann 17ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst) { 18ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann wch = wch & 0xFFFF; 19ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WCHAR wFind = g_UnicodeData_Normalization[wch]; 20ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (!wFind) { 21e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov if (pDst) { 22ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *pDst = wch; 23e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 24ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return 1; 25ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 26ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (wFind >= 0x8000) { 27ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann wch = wFind - 0x8000; 28ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann wFind = 1; 29ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } else { 30ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann wch = wFind & 0x0FFF; 31ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann wFind >>= 12; 32ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 33ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann const FX_WCHAR* pMap = g_UnicodeData_Normalization_Maps[wFind]; 34ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (pMap == g_UnicodeData_Normalization_Map4) { 35ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann pMap = g_UnicodeData_Normalization_Map4 + wch; 36ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann wFind = (FX_WCHAR)(*pMap++); 37ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } else { 38ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann pMap += wch; 39ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 40ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (pDst) { 41ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WCHAR n = wFind; 42ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann while (n--) { 43ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *pDst++ = *pMap++; 44e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 45ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 46ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return (FX_STRSIZE)wFind; 47e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 48ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_WideString_GetNormalization(const CFX_WideStringC& wsSrc, 49ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WCHAR* pDst) { 50ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_STRSIZE nCount = 0; 51ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann for (FX_STRSIZE len = 0; len < wsSrc.GetLength(); len++) { 52ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WCHAR wch = wsSrc.GetAt(len); 53ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (pDst) { 54ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann nCount += FX_Unicode_GetNormalization(wch, pDst + nCount); 55ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } else { 56ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann nCount += FX_Unicode_GetNormalization(wch, pDst); 57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov } 58ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 59ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return nCount; 60ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann} 61ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. MoltmannFX_STRSIZE FX_WideString_GetNormalization(const CFX_WideStringC& wsSrc, 62ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann CFX_WideString& wsDst) { 63ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_STRSIZE nLen = FX_WideString_GetNormalization(wsSrc, (FX_WCHAR*)NULL); 64ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann if (!nLen) { 65ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return 0; 66ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann } 67ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WCHAR* pBuf = wsDst.GetBuffer(nLen); 68ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann FX_WideString_GetNormalization(wsSrc, pBuf); 69ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann wsDst.ReleaseBuffer(nLen); 70ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann return nLen; 71e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov} 72