15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "encodings/compact_lang_det/win/normalizedunicodetext.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <tchar.h> 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <windows.h> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <winnls.h> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "encodings/compact_lang_det/win/cld_scopedptr.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Function prototypes copied from MSDN. 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef BOOL (WINAPI *IsNormalizedStringFunction)(NORM_FORM NormForm, 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LPCWSTR lpSrcString, 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cwSrcLength); 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef int (WINAPI *NormalizeStringFunction)(NORM_FORM NormForm, 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LPCWSTR lpSrcString, 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cwSrcLength, 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LPWSTR lpDstString, 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cwDstLength); 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A class to provide an access to Normaliz.dll functions. 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// New normalization API implemented in Normaliz.dll is available starting 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// from Windows XP SP2, that's why we have to bind to it dynamically. 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NormalizationAPI { 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Creates fully initialized NormalizationAPI object. 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Loads DLL and binds all referenced functions. 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NormalizationAPI() 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : library_(_T("Normaliz.dll")) { 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (library_.IsValid()) { 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) is_normalized_string_.Bind(library_.handle(), "IsNormalizedString"); 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) normalize_string_.Bind(library_.handle(), "NormalizeString"); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Proxy functions for the ones loaded from DLL. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BOOL IsNormalizedString(NORM_FORM NormForm, LPCWSTR lpSrcString, 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cwSrcLength) { 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!is_normalized_string_.IsValid()) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return FALSE; 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return is_normalized_string_.function()(NormForm, lpSrcString, cwSrcLength); 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int NormalizeString(NORM_FORM NormForm, LPCWSTR lpSrcString, int cwSrcLength, 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LPWSTR lpDstString, int cwDstLength) { 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!normalize_string_.IsValid()) { 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ::SetLastError(ERROR_INVALID_FUNCTION); 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return normalize_string_.function()(NormForm, lpSrcString, cwSrcLength, 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lpDstString, cwDstLength); 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if all functions were bound successfully. 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This implies that library_ itself was loaded successfully. 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool IsValid() const { 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return is_normalized_string_.IsValid() && normalize_string_.IsValid(); 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Holds a handle to loaded Normaliz.dll. 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ScopedLibrary library_; 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Pointers to the functions loaded from Normaliz.dll. 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FunctionFromDll<IsNormalizedStringFunction> is_normalized_string_; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FunctionFromDll<NormalizeStringFunction> normalize_string_; 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(NormalizationAPI); 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static NormalizationAPI normalization_api; 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// NormalizedUnicodeText 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NormalizedUnicodeText::NormalizedUnicodeText() 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : normalized_text_(NULL) { 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DWORD NormalizedUnicodeText::Normalize(NORM_FORM normalization_form, 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const WCHAR* text) { 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DWORD result = 0; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) normalized_text_ = TryToNormalizeText(normalization_form, text, &result); 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return result; 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const WCHAR* NormalizedUnicodeText::TryToNormalizeText( 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NORM_FORM normalization_form, const WCHAR* text, DWORD *error_code) { 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!text) { 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_.reset(); 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text; 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) _ASSERT(NULL != error_code); 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!error_code) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!normalization_api.IsValid()) { 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Fall back to the previous version of normalization API. 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int folded_text_size = ::FoldStringW(MAP_PRECOMPOSED, text, -1, NULL, 0); 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!folded_text_size) { 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *error_code = ::GetLastError(); 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text; 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_.reset(new WCHAR[folded_text_size]); 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!text_.get()) { 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *error_code = ERROR_OUTOFMEMORY; 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text; 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int folding_result = 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ::FoldStringW(MAP_PRECOMPOSED, text, -1, text_.get(), folded_text_size); 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!folding_result) { 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *error_code = ::GetLastError(); 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_.reset(); 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text; 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text_.get(); 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // No need to allocate anything when text is already normalized. 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (normalization_api.IsNormalizedString(normalization_form, text, -1)) 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Get the first approximation for the buffer size required to store 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // normalized text. 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int normalized_text_size_guess = 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) normalization_api.NormalizeString(normalization_form, text, -1, NULL, 0); 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (normalized_text_size_guess > 0) { 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_.reset(new WCHAR[normalized_text_size_guess]); 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!text_.get()) { 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *error_code = ERROR_OUTOFMEMORY; 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int normalized_text_size = 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) normalization_api.NormalizeString(normalization_form, text, -1, 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_.get(), 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) normalized_text_size_guess); 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (normalized_text_size > 0) { 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Text was successfully converted. 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text_.get(); 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ERROR_INSUFFICIENT_BUFFER != ::GetLastError()) { 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *error_code = ::GetLastError(); 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Text cannot be normalized, use the original. 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // By the way, ERROR_SUCCESS is a puzzling case. 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // MSDN says 'The action completed successfully but yielded no results'. 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Does this mean that output buffer was not changed? 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Anyway, just in case, also return the original text. 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Try again with the corrected buffer size. 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) normalized_text_size_guess = -normalized_text_size; 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Use the original text in case of any problem with normalization. 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_.reset(); 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return text; 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 173