1// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
6#define ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
7
8#include <tchar.h>
9#include <windows.h>
10
11#include "encodings/compact_lang_det/win/cld_scopedptr.h"
12
13
14#if (WINVER < 0x0600)
15// Copied from winnls.h, we're not using the latest SDK yet.
16typedef enum _NORM_FORM {
17  NormalizationOther  = 0,
18  NormalizationC = 0x1,
19  NormalizationD = 0x2,
20  NormalizationKC = 0x5,
21  NormalizationKD = 0x6
22} NORM_FORM;
23#endif
24
25
26// Gives you back a normalized version of the input text.  Normalization is
27// performed to the specified form.
28// Instance lifetime should be within the lifetime span of the 'text'.
29class NormalizedUnicodeText {
30 public:
31  // Creates an empty instance of NormalizedUnicodeText.
32  NormalizedUnicodeText();
33
34  // Creates a fully initialized instance of NormalizedUnicodeText.
35  // [in] normalization_form - normalization rule set (see MSDN for details).
36  // [in] text - zero-terminated UTF-16 encoded string.
37  // Returns 0 in case of success, Win32 error code in case of failure.
38  //     In case of failure, get() returns the original text.
39  DWORD Normalize(NORM_FORM normalization_form, const WCHAR* text);
40
41  // Returns pointer to the normalized text.
42  const WCHAR* get() const { return normalized_text_; }
43
44 private:
45  // Normalizes 'text' by the 'normalization_form' rules.
46  // [in] normalization_form - normalization rule set (see MSDN for details).
47  // [in] text - zero-terminated UTF-16 encoded string.
48  // [out] error_code - Win32 error code.
49  const WCHAR* TryToNormalizeText(NORM_FORM normalization_form,
50                                  const WCHAR* text, DWORD *error_code);
51
52  // Pointer to the normalized text.
53  const WCHAR* normalized_text_;
54  // When the source text is already normalized by the requested normalization
55  // form, text_ is not used and normalized_text_ just points to the source
56  // text. When the source text requres normalization, text_ contains normalized
57  // version of the source text and normalized_text_ points to this buffer.
58  // Since CLD requires NormalizationC form and the overwhelming majority of all
59  // texts in the Internet is already normalized to this form, it's expected
60  // that this class will not introduce any runtime memory overhead.
61  scoped_array<WCHAR> text_;
62
63  DISALLOW_COPY_AND_ASSIGN(NormalizedUnicodeText);
64};
65
66
67#endif  // ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
68