15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <tchar.h> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <windows.h> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "encodings/compact_lang_det/win/cld_scopedptr.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if (WINVER < 0x0600) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copied from winnls.h, we're not using the latest SDK yet. 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef enum _NORM_FORM { 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NormalizationOther = 0, 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NormalizationC = 0x1, 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NormalizationD = 0x2, 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NormalizationKC = 0x5, 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NormalizationKD = 0x6 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} NORM_FORM; 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Gives you back a normalized version of the input text. Normalization is 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// performed to the specified form. 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Instance lifetime should be within the lifetime span of the 'text'. 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NormalizedUnicodeText { 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Creates an empty instance of NormalizedUnicodeText. 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NormalizedUnicodeText(); 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Creates a fully initialized instance of NormalizedUnicodeText. 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // [in] normalization_form - normalization rule set (see MSDN for details). 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // [in] text - zero-terminated UTF-16 encoded string. 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns 0 in case of success, Win32 error code in case of failure. 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // In case of failure, get() returns the original text. 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DWORD Normalize(NORM_FORM normalization_form, const WCHAR* text); 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns pointer to the normalized text. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const WCHAR* get() const { return normalized_text_; } 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Normalizes 'text' by the 'normalization_form' rules. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // [in] normalization_form - normalization rule set (see MSDN for details). 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // [in] text - zero-terminated UTF-16 encoded string. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // [out] error_code - Win32 error code. 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const WCHAR* TryToNormalizeText(NORM_FORM normalization_form, 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const WCHAR* text, DWORD *error_code); 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Pointer to the normalized text. 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const WCHAR* normalized_text_; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // When the source text is already normalized by the requested normalization 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // form, text_ is not used and normalized_text_ just points to the source 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // text. When the source text requres normalization, text_ contains normalized 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // version of the source text and normalized_text_ points to this buffer. 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Since CLD requires NormalizationC form and the overwhelming majority of all 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // texts in the Internet is already normalized to this form, it's expected 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // that this class will not introduce any runtime memory overhead. 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_array<WCHAR> text_; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(NormalizedUnicodeText); 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // ENCODINGS_COMPACT_LANG_DET_WIN_NORMALIZEDUNICODETEXT_H_ 68