normalizedunicodetext.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "encodings/compact_lang_det/win/normalizedunicodetext.h"
6
7#include <tchar.h>
8#include <windows.h>
9#include <winnls.h>
10
11#include "encodings/compact_lang_det/win/cld_scopedptr.h"
12
13
14namespace {
15
16// Function prototypes copied from MSDN.
17typedef BOOL (WINAPI *IsNormalizedStringFunction)(NORM_FORM NormForm,
18                                                  LPCWSTR lpSrcString,
19                                                  int cwSrcLength);
20typedef int (WINAPI *NormalizeStringFunction)(NORM_FORM NormForm,
21                                              LPCWSTR lpSrcString,
22                                              int cwSrcLength,
23                                              LPWSTR lpDstString,
24                                              int cwDstLength);
25
26// A class to provide an access to Normaliz.dll functions.
27// New normalization API implemented in Normaliz.dll is available starting
28// from Windows XP SP2, that's why we have to bind to it dynamically.
29class NormalizationAPI {
30 public:
31  // Creates fully initialized NormalizationAPI object.
32  // Loads DLL and binds all referenced functions.
33  NormalizationAPI()
34      : library_(_T("Normaliz.dll")) {
35    if (library_.IsValid()) {
36      is_normalized_string_.Bind(library_.handle(), "IsNormalizedString");
37      normalize_string_.Bind(library_.handle(), "NormalizeString");
38    }
39  }
40
41  // Proxy functions for the ones loaded from DLL.
42  BOOL IsNormalizedString(NORM_FORM NormForm, LPCWSTR lpSrcString,
43                          int cwSrcLength) {
44    if (!is_normalized_string_.IsValid())
45      return FALSE;
46    return is_normalized_string_.function()(NormForm, lpSrcString, cwSrcLength);
47  }
48  int NormalizeString(NORM_FORM NormForm, LPCWSTR lpSrcString, int cwSrcLength,
49                      LPWSTR lpDstString, int cwDstLength) {
50    if (!normalize_string_.IsValid()) {
51      ::SetLastError(ERROR_INVALID_FUNCTION);
52      return 0;
53    }
54    return normalize_string_.function()(NormForm, lpSrcString, cwSrcLength,
55                                        lpDstString, cwDstLength);
56  }
57
58  // Returns true if all functions were bound successfully.
59  // This implies that library_ itself was loaded successfully.
60  bool IsValid() const {
61    return is_normalized_string_.IsValid() && normalize_string_.IsValid();
62  }
63
64 private:
65  // Holds a handle to loaded Normaliz.dll.
66  ScopedLibrary library_;
67  // Pointers to the functions loaded from Normaliz.dll.
68  FunctionFromDll<IsNormalizedStringFunction> is_normalized_string_;
69  FunctionFromDll<NormalizeStringFunction> normalize_string_;
70
71  DISALLOW_COPY_AND_ASSIGN(NormalizationAPI);
72};
73
74static NormalizationAPI normalization_api;
75
76}  // namespace
77
78
79// NormalizedUnicodeText
80
81NormalizedUnicodeText::NormalizedUnicodeText()
82    : normalized_text_(NULL) {
83}
84
85
86DWORD NormalizedUnicodeText::Normalize(NORM_FORM normalization_form,
87                                       const WCHAR* text) {
88  DWORD result = 0;
89  normalized_text_ = TryToNormalizeText(normalization_form, text, &result);
90  return result;
91}
92
93
94const WCHAR* NormalizedUnicodeText::TryToNormalizeText(
95    NORM_FORM normalization_form, const WCHAR* text, DWORD *error_code) {
96  if (!text) {
97    text_.reset();
98    return text;
99  }
100  _ASSERT(NULL != error_code);
101  if (!error_code)
102    return text;
103
104  if (!normalization_api.IsValid()) {
105    // Fall back to the previous version of normalization API.
106    int folded_text_size = ::FoldStringW(MAP_PRECOMPOSED, text, -1, NULL, 0);
107    if (!folded_text_size) {
108      *error_code = ::GetLastError();
109      return text;
110    }
111
112    text_.reset(new WCHAR[folded_text_size]);
113    if (!text_.get()) {
114      *error_code = ERROR_OUTOFMEMORY;
115      return text;
116    }
117
118    int folding_result =
119        ::FoldStringW(MAP_PRECOMPOSED, text, -1, text_.get(), folded_text_size);
120    if (!folding_result) {
121      *error_code = ::GetLastError();
122      text_.reset();
123      return text;
124    }
125
126    return text_.get();
127  }
128
129  // No need to allocate anything when text is already normalized.
130  if (normalization_api.IsNormalizedString(normalization_form, text, -1))
131    return text;
132
133  // Get the first approximation for the buffer size required to store
134  // normalized text.
135  int normalized_text_size_guess =
136      normalization_api.NormalizeString(normalization_form, text, -1, NULL, 0);
137
138  while (normalized_text_size_guess > 0) {
139    text_.reset(new WCHAR[normalized_text_size_guess]);
140    if (!text_.get()) {
141      *error_code = ERROR_OUTOFMEMORY;
142      break;
143    }
144
145    int normalized_text_size =
146        normalization_api.NormalizeString(normalization_form, text, -1,
147                                          text_.get(),
148                                          normalized_text_size_guess);
149
150    if (normalized_text_size > 0) {
151      // Text was successfully converted.
152      return text_.get();
153    }
154
155    if (ERROR_INSUFFICIENT_BUFFER != ::GetLastError()) {
156      *error_code = ::GetLastError();
157      // Text cannot be normalized, use the original.
158      // By the way, ERROR_SUCCESS is a puzzling case.
159      // MSDN says 'The action completed successfully but yielded no results'.
160      // Does this mean that output buffer was not changed?
161      // Anyway, just in case, also return the original text.
162      break;
163    }
164
165    // Try again with the corrected buffer size.
166    normalized_text_size_guess = -normalized_text_size;
167  }
168
169  // Use the original text in case of any problem with normalization.
170  text_.reset();
171  return text;
172}
173