1// Locale support (codecvt) -*- C++ -*- 2 3// Copyright (C) 2000-2013 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 3, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// Under Section 7 of GPL version 3, you are granted additional 17// permissions described in the GCC Runtime Library Exception, version 18// 3.1, as published by the Free Software Foundation. 19 20// You should have received a copy of the GNU General Public License and 21// a copy of the GCC Runtime Library Exception along with this program; 22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23// <http://www.gnu.org/licenses/>. 24 25// 26// ISO C++ 14882: 22.2.1.5 Template class codecvt 27// 28 29// Written by Benjamin Kosnik <bkoz@redhat.com> 30 31/** @file ext/codecvt_specializations.h 32 * This file is a GNU extension to the Standard C++ Library. 33 */ 34 35#ifndef _EXT_CODECVT_SPECIALIZATIONS_H 36#define _EXT_CODECVT_SPECIALIZATIONS_H 1 37 38#include <bits/c++config.h> 39#include <locale> 40#include <iconv.h> 41 42namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) 43{ 44_GLIBCXX_BEGIN_NAMESPACE_VERSION 45 46 /// Extension to use iconv for dealing with character encodings. 47 // This includes conversions and comparisons between various character 48 // sets. This object encapsulates data that may need to be shared between 49 // char_traits, codecvt and ctype. 50 class encoding_state 51 { 52 public: 53 // Types: 54 // NB: A conversion descriptor subsumes and enhances the 55 // functionality of a simple state type such as mbstate_t. 56 typedef iconv_t descriptor_type; 57 58 protected: 59 // Name of internal character set encoding. 60 std::string _M_int_enc; 61 62 // Name of external character set encoding. 63 std::string _M_ext_enc; 64 65 // Conversion descriptor between external encoding to internal encoding. 66 descriptor_type _M_in_desc; 67 68 // Conversion descriptor between internal encoding to external encoding. 69 descriptor_type _M_out_desc; 70 71 // The byte-order marker for the external encoding, if necessary. 72 int _M_ext_bom; 73 74 // The byte-order marker for the internal encoding, if necessary. 75 int _M_int_bom; 76 77 // Number of external bytes needed to construct one complete 78 // character in the internal encoding. 79 // NB: -1 indicates variable, or stateful, encodings. 80 int _M_bytes; 81 82 public: 83 explicit 84 encoding_state() 85 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) 86 { } 87 88 explicit 89 encoding_state(const char* __int, const char* __ext, 90 int __ibom = 0, int __ebom = 0, int __bytes = 1) 91 : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 92 _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) 93 { init(); } 94 95 // 21.1.2 traits typedefs 96 // p4 97 // typedef STATE_T state_type 98 // requires: state_type shall meet the requirements of 99 // CopyConstructible types (20.1.3) 100 // NB: This does not preserve the actual state of the conversion 101 // descriptor member, but it does duplicate the encoding 102 // information. 103 encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) 104 { construct(__obj); } 105 106 // Need assignment operator as well. 107 encoding_state& 108 operator=(const encoding_state& __obj) 109 { 110 construct(__obj); 111 return *this; 112 } 113 114 ~encoding_state() 115 { destroy(); } 116 117 bool 118 good() const throw() 119 { 120 const descriptor_type __err = (iconv_t)(-1); 121 bool __test = _M_in_desc && _M_in_desc != __err; 122 __test &= _M_out_desc && _M_out_desc != __err; 123 return __test; 124 } 125 126 int 127 character_ratio() const 128 { return _M_bytes; } 129 130 const std::string 131 internal_encoding() const 132 { return _M_int_enc; } 133 134 int 135 internal_bom() const 136 { return _M_int_bom; } 137 138 const std::string 139 external_encoding() const 140 { return _M_ext_enc; } 141 142 int 143 external_bom() const 144 { return _M_ext_bom; } 145 146 const descriptor_type& 147 in_descriptor() const 148 { return _M_in_desc; } 149 150 const descriptor_type& 151 out_descriptor() const 152 { return _M_out_desc; } 153 154 protected: 155 void 156 init() 157 { 158 const descriptor_type __err = (iconv_t)(-1); 159 const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); 160 if (!_M_in_desc && __have_encodings) 161 { 162 _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); 163 if (_M_in_desc == __err) 164 std::__throw_runtime_error(__N("encoding_state::_M_init " 165 "creating iconv input descriptor failed")); 166 } 167 if (!_M_out_desc && __have_encodings) 168 { 169 _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); 170 if (_M_out_desc == __err) 171 std::__throw_runtime_error(__N("encoding_state::_M_init " 172 "creating iconv output descriptor failed")); 173 } 174 } 175 176 void 177 construct(const encoding_state& __obj) 178 { 179 destroy(); 180 _M_int_enc = __obj._M_int_enc; 181 _M_ext_enc = __obj._M_ext_enc; 182 _M_ext_bom = __obj._M_ext_bom; 183 _M_int_bom = __obj._M_int_bom; 184 _M_bytes = __obj._M_bytes; 185 init(); 186 } 187 188 void 189 destroy() throw() 190 { 191 const descriptor_type __err = (iconv_t)(-1); 192 if (_M_in_desc && _M_in_desc != __err) 193 { 194 iconv_close(_M_in_desc); 195 _M_in_desc = 0; 196 } 197 if (_M_out_desc && _M_out_desc != __err) 198 { 199 iconv_close(_M_out_desc); 200 _M_out_desc = 0; 201 } 202 } 203 }; 204 205 /// encoding_char_traits 206 // Custom traits type with encoding_state for the state type, and the 207 // associated fpos<encoding_state> for the position type, all other 208 // bits equivalent to the required char_traits instantiations. 209 template<typename _CharT> 210 struct encoding_char_traits : public std::char_traits<_CharT> 211 { 212 typedef encoding_state state_type; 213 typedef typename std::fpos<state_type> pos_type; 214 }; 215 216_GLIBCXX_END_NAMESPACE_VERSION 217} // namespace 218 219 220namespace std _GLIBCXX_VISIBILITY(default) 221{ 222_GLIBCXX_BEGIN_NAMESPACE_VERSION 223 224 using __gnu_cxx::encoding_state; 225 226 /// codecvt<InternT, _ExternT, encoding_state> specialization. 227 // This partial specialization takes advantage of iconv to provide 228 // code conversions between a large number of character encodings. 229 template<typename _InternT, typename _ExternT> 230 class codecvt<_InternT, _ExternT, encoding_state> 231 : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> 232 { 233 public: 234 // Types: 235 typedef codecvt_base::result result; 236 typedef _InternT intern_type; 237 typedef _ExternT extern_type; 238 typedef __gnu_cxx::encoding_state state_type; 239 typedef state_type::descriptor_type descriptor_type; 240 241 // Data Members: 242 static locale::id id; 243 244 explicit 245 codecvt(size_t __refs = 0) 246 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 247 { } 248 249 explicit 250 codecvt(state_type& __enc, size_t __refs = 0) 251 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 252 { } 253 254 protected: 255 virtual 256 ~codecvt() { } 257 258 virtual result 259 do_out(state_type& __state, const intern_type* __from, 260 const intern_type* __from_end, const intern_type*& __from_next, 261 extern_type* __to, extern_type* __to_end, 262 extern_type*& __to_next) const; 263 264 virtual result 265 do_unshift(state_type& __state, extern_type* __to, 266 extern_type* __to_end, extern_type*& __to_next) const; 267 268 virtual result 269 do_in(state_type& __state, const extern_type* __from, 270 const extern_type* __from_end, const extern_type*& __from_next, 271 intern_type* __to, intern_type* __to_end, 272 intern_type*& __to_next) const; 273 274 virtual int 275 do_encoding() const throw(); 276 277 virtual bool 278 do_always_noconv() const throw(); 279 280 virtual int 281 do_length(state_type&, const extern_type* __from, 282 const extern_type* __end, size_t __max) const; 283 284 virtual int 285 do_max_length() const throw(); 286 }; 287 288 template<typename _InternT, typename _ExternT> 289 locale::id 290 codecvt<_InternT, _ExternT, encoding_state>::id; 291 292 // This adaptor works around the signature problems of the second 293 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 294 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 295 // Using this adaptor, g++ will do the work for us. 296 template<typename _Tp> 297 inline size_t 298 __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), 299 iconv_t __cd, char** __inbuf, size_t* __inbytes, 300 char** __outbuf, size_t* __outbytes) 301 { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } 302 303 template<typename _InternT, typename _ExternT> 304 codecvt_base::result 305 codecvt<_InternT, _ExternT, encoding_state>:: 306 do_out(state_type& __state, const intern_type* __from, 307 const intern_type* __from_end, const intern_type*& __from_next, 308 extern_type* __to, extern_type* __to_end, 309 extern_type*& __to_next) const 310 { 311 result __ret = codecvt_base::error; 312 if (__state.good()) 313 { 314 const descriptor_type& __desc = __state.out_descriptor(); 315 const size_t __fmultiple = sizeof(intern_type); 316 size_t __fbytes = __fmultiple * (__from_end - __from); 317 const size_t __tmultiple = sizeof(extern_type); 318 size_t __tbytes = __tmultiple * (__to_end - __to); 319 320 // Argument list for iconv specifies a byte sequence. Thus, 321 // all to/from arrays must be brutally casted to char*. 322 char* __cto = reinterpret_cast<char*>(__to); 323 char* __cfrom; 324 size_t __conv; 325 326 // Some encodings need a byte order marker as the first item 327 // in the byte stream, to designate endian-ness. The default 328 // value for the byte order marker is NULL, so if this is 329 // the case, it's not necessary and we can just go on our 330 // merry way. 331 int __int_bom = __state.internal_bom(); 332 if (__int_bom) 333 { 334 size_t __size = __from_end - __from; 335 intern_type* __cfixed = static_cast<intern_type*> 336 (__builtin_alloca(sizeof(intern_type) * (__size + 1))); 337 __cfixed[0] = static_cast<intern_type>(__int_bom); 338 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 339 __cfrom = reinterpret_cast<char*>(__cfixed); 340 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 341 &__fbytes, &__cto, &__tbytes); 342 } 343 else 344 { 345 intern_type* __cfixed = const_cast<intern_type*>(__from); 346 __cfrom = reinterpret_cast<char*>(__cfixed); 347 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 348 &__cto, &__tbytes); 349 } 350 351 if (__conv != size_t(-1)) 352 { 353 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 354 __to_next = reinterpret_cast<extern_type*>(__cto); 355 __ret = codecvt_base::ok; 356 } 357 else 358 { 359 if (__fbytes < __fmultiple * (__from_end - __from)) 360 { 361 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 362 __to_next = reinterpret_cast<extern_type*>(__cto); 363 __ret = codecvt_base::partial; 364 } 365 else 366 __ret = codecvt_base::error; 367 } 368 } 369 return __ret; 370 } 371 372 template<typename _InternT, typename _ExternT> 373 codecvt_base::result 374 codecvt<_InternT, _ExternT, encoding_state>:: 375 do_unshift(state_type& __state, extern_type* __to, 376 extern_type* __to_end, extern_type*& __to_next) const 377 { 378 result __ret = codecvt_base::error; 379 if (__state.good()) 380 { 381 const descriptor_type& __desc = __state.in_descriptor(); 382 const size_t __tmultiple = sizeof(intern_type); 383 size_t __tlen = __tmultiple * (__to_end - __to); 384 385 // Argument list for iconv specifies a byte sequence. Thus, 386 // all to/from arrays must be brutally casted to char*. 387 char* __cto = reinterpret_cast<char*>(__to); 388 size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0, 389 &__cto, &__tlen); 390 391 if (__conv != size_t(-1)) 392 { 393 __to_next = reinterpret_cast<extern_type*>(__cto); 394 if (__tlen == __tmultiple * (__to_end - __to)) 395 __ret = codecvt_base::noconv; 396 else if (__tlen == 0) 397 __ret = codecvt_base::ok; 398 else 399 __ret = codecvt_base::partial; 400 } 401 else 402 __ret = codecvt_base::error; 403 } 404 return __ret; 405 } 406 407 template<typename _InternT, typename _ExternT> 408 codecvt_base::result 409 codecvt<_InternT, _ExternT, encoding_state>:: 410 do_in(state_type& __state, const extern_type* __from, 411 const extern_type* __from_end, const extern_type*& __from_next, 412 intern_type* __to, intern_type* __to_end, 413 intern_type*& __to_next) const 414 { 415 result __ret = codecvt_base::error; 416 if (__state.good()) 417 { 418 const descriptor_type& __desc = __state.in_descriptor(); 419 const size_t __fmultiple = sizeof(extern_type); 420 size_t __flen = __fmultiple * (__from_end - __from); 421 const size_t __tmultiple = sizeof(intern_type); 422 size_t __tlen = __tmultiple * (__to_end - __to); 423 424 // Argument list for iconv specifies a byte sequence. Thus, 425 // all to/from arrays must be brutally casted to char*. 426 char* __cto = reinterpret_cast<char*>(__to); 427 char* __cfrom; 428 size_t __conv; 429 430 // Some encodings need a byte order marker as the first item 431 // in the byte stream, to designate endian-ness. The default 432 // value for the byte order marker is NULL, so if this is 433 // the case, it's not necessary and we can just go on our 434 // merry way. 435 int __ext_bom = __state.external_bom(); 436 if (__ext_bom) 437 { 438 size_t __size = __from_end - __from; 439 extern_type* __cfixed = static_cast<extern_type*> 440 (__builtin_alloca(sizeof(extern_type) * (__size + 1))); 441 __cfixed[0] = static_cast<extern_type>(__ext_bom); 442 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 443 __cfrom = reinterpret_cast<char*>(__cfixed); 444 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 445 &__flen, &__cto, &__tlen); 446 } 447 else 448 { 449 extern_type* __cfixed = const_cast<extern_type*>(__from); 450 __cfrom = reinterpret_cast<char*>(__cfixed); 451 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 452 &__flen, &__cto, &__tlen); 453 } 454 455 456 if (__conv != size_t(-1)) 457 { 458 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 459 __to_next = reinterpret_cast<intern_type*>(__cto); 460 __ret = codecvt_base::ok; 461 } 462 else 463 { 464 if (__flen < static_cast<size_t>(__from_end - __from)) 465 { 466 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 467 __to_next = reinterpret_cast<intern_type*>(__cto); 468 __ret = codecvt_base::partial; 469 } 470 else 471 __ret = codecvt_base::error; 472 } 473 } 474 return __ret; 475 } 476 477 template<typename _InternT, typename _ExternT> 478 int 479 codecvt<_InternT, _ExternT, encoding_state>:: 480 do_encoding() const throw() 481 { 482 int __ret = 0; 483 if (sizeof(_ExternT) <= sizeof(_InternT)) 484 __ret = sizeof(_InternT) / sizeof(_ExternT); 485 return __ret; 486 } 487 488 template<typename _InternT, typename _ExternT> 489 bool 490 codecvt<_InternT, _ExternT, encoding_state>:: 491 do_always_noconv() const throw() 492 { return false; } 493 494 template<typename _InternT, typename _ExternT> 495 int 496 codecvt<_InternT, _ExternT, encoding_state>:: 497 do_length(state_type&, const extern_type* __from, 498 const extern_type* __end, size_t __max) const 499 { return std::min(__max, static_cast<size_t>(__end - __from)); } 500 501 // _GLIBCXX_RESOLVE_LIB_DEFECTS 502 // 74. Garbled text for codecvt::do_max_length 503 template<typename _InternT, typename _ExternT> 504 int 505 codecvt<_InternT, _ExternT, encoding_state>:: 506 do_max_length() const throw() 507 { return 1; } 508 509_GLIBCXX_END_NAMESPACE_VERSION 510} // namespace 511 512#endif 513