15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "encodings/compact_lang_det/win/normalizedunicodetext.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <tchar.h>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <windows.h>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <winnls.h>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "encodings/compact_lang_det/win/cld_scopedptr.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Function prototypes copied from MSDN.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef BOOL (WINAPI *IsNormalizedStringFunction)(NORM_FORM NormForm,
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                  LPCWSTR lpSrcString,
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                                  int cwSrcLength);
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef int (WINAPI *NormalizeStringFunction)(NORM_FORM NormForm,
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              LPCWSTR lpSrcString,
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              int cwSrcLength,
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              LPWSTR lpDstString,
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              int cwDstLength);
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A class to provide an access to Normaliz.dll functions.
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// New normalization API implemented in Normaliz.dll is available starting
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// from Windows XP SP2, that's why we have to bind to it dynamically.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NormalizationAPI {
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Creates fully initialized NormalizationAPI object.
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Loads DLL and binds all referenced functions.
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NormalizationAPI()
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      : library_(_T("Normaliz.dll")) {
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (library_.IsValid()) {
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      is_normalized_string_.Bind(library_.handle(), "IsNormalizedString");
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      normalize_string_.Bind(library_.handle(), "NormalizeString");
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Proxy functions for the ones loaded from DLL.
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BOOL IsNormalizedString(NORM_FORM NormForm, LPCWSTR lpSrcString,
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          int cwSrcLength) {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!is_normalized_string_.IsValid())
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return FALSE;
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return is_normalized_string_.function()(NormForm, lpSrcString, cwSrcLength);
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int NormalizeString(NORM_FORM NormForm, LPCWSTR lpSrcString, int cwSrcLength,
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      LPWSTR lpDstString, int cwDstLength) {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!normalize_string_.IsValid()) {
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ::SetLastError(ERROR_INVALID_FUNCTION);
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 0;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return normalize_string_.function()(NormForm, lpSrcString, cwSrcLength,
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        lpDstString, cwDstLength);
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if all functions were bound successfully.
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This implies that library_ itself was loaded successfully.
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool IsValid() const {
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return is_normalized_string_.IsValid() && normalize_string_.IsValid();
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Holds a handle to loaded Normaliz.dll.
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ScopedLibrary library_;
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Pointers to the functions loaded from Normaliz.dll.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FunctionFromDll<IsNormalizedStringFunction> is_normalized_string_;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FunctionFromDll<NormalizeStringFunction> normalize_string_;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(NormalizationAPI);
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static NormalizationAPI normalization_api;
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// NormalizedUnicodeText
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NormalizedUnicodeText::NormalizedUnicodeText()
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : normalized_text_(NULL) {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)DWORD NormalizedUnicodeText::Normalize(NORM_FORM normalization_form,
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       const WCHAR* text) {
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DWORD result = 0;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  normalized_text_ = TryToNormalizeText(normalization_form, text, &result);
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return result;
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const WCHAR* NormalizedUnicodeText::TryToNormalizeText(
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NORM_FORM normalization_form, const WCHAR* text, DWORD *error_code) {
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!text) {
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    text_.reset();
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return text;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  _ASSERT(NULL != error_code);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!error_code)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return text;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!normalization_api.IsValid()) {
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Fall back to the previous version of normalization API.
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int folded_text_size = ::FoldStringW(MAP_PRECOMPOSED, text, -1, NULL, 0);
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!folded_text_size) {
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *error_code = ::GetLastError();
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return text;
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    text_.reset(new WCHAR[folded_text_size]);
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!text_.get()) {
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *error_code = ERROR_OUTOFMEMORY;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return text;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int folding_result =
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ::FoldStringW(MAP_PRECOMPOSED, text, -1, text_.get(), folded_text_size);
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!folding_result) {
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *error_code = ::GetLastError();
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      text_.reset();
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return text;
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return text_.get();
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // No need to allocate anything when text is already normalized.
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (normalization_api.IsNormalizedString(normalization_form, text, -1))
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return text;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Get the first approximation for the buffer size required to store
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // normalized text.
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int normalized_text_size_guess =
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      normalization_api.NormalizeString(normalization_form, text, -1, NULL, 0);
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (normalized_text_size_guess > 0) {
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    text_.reset(new WCHAR[normalized_text_size_guess]);
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!text_.get()) {
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *error_code = ERROR_OUTOFMEMORY;
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int normalized_text_size =
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        normalization_api.NormalizeString(normalization_form, text, -1,
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                          text_.get(),
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                          normalized_text_size_guess);
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (normalized_text_size > 0) {
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Text was successfully converted.
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return text_.get();
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ERROR_INSUFFICIENT_BUFFER != ::GetLastError()) {
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      *error_code = ::GetLastError();
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Text cannot be normalized, use the original.
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // By the way, ERROR_SUCCESS is a puzzling case.
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // MSDN says 'The action completed successfully but yielded no results'.
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Does this mean that output buffer was not changed?
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Anyway, just in case, also return the original text.
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Try again with the corrected buffer size.
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    normalized_text_size_guess = -normalized_text_size;
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Use the original text in case of any problem with normalization.
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  text_.reset();
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return text;
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
173