15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <tchar.h>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <windows.h>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "encodings/compact_lang_det/win/cld_scopedptr.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if (WINVER < 0x0600)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copied from winnls.h, we're not using the latest SDK yet.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef enum _NORM_FORM {
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NormalizationOther  = 0,
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NormalizationC = 0x1,
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NormalizationD = 0x2,
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NormalizationKC = 0x5,
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NormalizationKD = 0x6
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} NORM_FORM;
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Gives you back a normalized version of the input text.  Normalization is
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// performed to the specified form.
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Instance lifetime should be within the lifetime span of the 'text'.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NormalizedUnicodeText {
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Creates an empty instance of NormalizedUnicodeText.
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NormalizedUnicodeText();
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Creates a fully initialized instance of NormalizedUnicodeText.
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // [in] normalization_form - normalization rule set (see MSDN for details).
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // [in] text - zero-terminated UTF-16 encoded string.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns 0 in case of success, Win32 error code in case of failure.
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //     In case of failure, get() returns the original text.
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DWORD Normalize(NORM_FORM normalization_form, const WCHAR* text);
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns pointer to the normalized text.
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const WCHAR* get() const { return normalized_text_; }
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Normalizes 'text' by the 'normalization_form' rules.
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // [in] normalization_form - normalization rule set (see MSDN for details).
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // [in] text - zero-terminated UTF-16 encoded string.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // [out] error_code - Win32 error code.
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const WCHAR* TryToNormalizeText(NORM_FORM normalization_form,
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  const WCHAR* text, DWORD *error_code);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Pointer to the normalized text.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const WCHAR* normalized_text_;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // When the source text is already normalized by the requested normalization
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // form, text_ is not used and normalized_text_ just points to the source
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // text. When the source text requres normalization, text_ contains normalized
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // version of the source text and normalized_text_ points to this buffer.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Since CLD requires NormalizationC form and the overwhelming majority of all
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // texts in the Internet is already normalized to this form, it's expected
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that this class will not introduce any runtime memory overhead.
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_array<WCHAR> text_;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(NormalizedUnicodeText);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_
68