1/* 2 * xsltlocale.c: locale handling 3 * 4 * Reference: 5 * RFC 3066: Tags for the Identification of Languages 6 * http://www.ietf.org/rfc/rfc3066.txt 7 * ISO 639-1, ISO 3166-1 8 * 9 * Author: Nick Wellnhofer 10 * winapi port: Roumen Petrov 11 */ 12 13#define IN_LIBXSLT 14#include "libxslt.h" 15 16#include <string.h> 17#include <libxml/xmlmemory.h> 18 19#include "xsltlocale.h" 20#include "xsltutils.h" 21 22#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2 23#define newlocale __newlocale 24#define freelocale __freelocale 25#define strxfrm_l __strxfrm_l 26#define LC_COLLATE_MASK (1 << LC_COLLATE) 27#endif 28 29#define ISALPHA(c) ((c & 0xc0) == 0x40 && (unsigned)((c & 0x1f) - 1) < 26) 30#define TOUPPER(c) (c & ~0x20) 31#define TOLOWER(c) (c | 0x20) 32 33/*without terminating null character*/ 34#define XSLTMAX_ISO639LANGLEN 8 35#define XSLTMAX_ISO3166CNTRYLEN 8 36 /* <lang>-<cntry> */ 37#define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN) 38 39static const xmlChar* xsltDefaultRegion(const xmlChar *localeName); 40 41#ifdef XSLT_LOCALE_WINAPI 42xmlRMutexPtr xsltLocaleMutex = NULL; 43 44struct xsltRFC1766Info_s { 45 /*note typedef unsigned char xmlChar !*/ 46 xmlChar tag[XSLTMAX_LANGTAGLEN+1]; 47 /*note typedef LCID xsltLocale !*/ 48 xsltLocale lcid; 49}; 50typedef struct xsltRFC1766Info_s xsltRFC1766Info; 51 52static int xsltLocaleListSize = 0; 53static xsltRFC1766Info *xsltLocaleList = NULL; 54 55 56static xsltLocale 57xslt_locale_WINAPI(const xmlChar *languageTag) { 58 int k; 59 xsltRFC1766Info *p = xsltLocaleList; 60 61 for (k=0; k<xsltLocaleListSize; k++, p++) 62 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid; 63 return((xsltLocale)0); 64} 65 66static void xsltEnumSupportedLocales(void); 67#endif 68 69/** 70 * xsltNewLocale: 71 * @languageTag: RFC 3066 language tag 72 * 73 * Creates a new locale of an opaque system dependent type based on the 74 * language tag. 75 * 76 * Returns the locale or NULL on error or if no matching locale was found 77 */ 78xsltLocale 79xsltNewLocale(const xmlChar *languageTag) { 80#ifdef XSLT_LOCALE_XLOCALE 81 xsltLocale locale; 82 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */ 83 const xmlChar *p = languageTag; 84 const char *region = NULL; 85 char *q = localeName; 86 int i, llen; 87 88 /* Convert something like "pt-br" to "pt_BR.utf8" */ 89 90 if (languageTag == NULL) 91 return(NULL); 92 93 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 94 *q++ = TOLOWER(*p++); 95 96 if (i == 0) 97 return(NULL); 98 99 llen = i; 100 *q++ = '_'; 101 102 if (*p) { 103 if (*p++ != '-') 104 return(NULL); 105 106 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 107 *q++ = TOUPPER(*p++); 108 109 if (i == 0 || *p) 110 return(NULL); 111 112 memcpy(q, ".utf8", 6); 113 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 114 if (locale != NULL) 115 return(locale); 116 117 /* Continue without using country code */ 118 119 q = localeName + llen + 1; 120 } 121 122 /* Try locale without territory, e.g. for Esperanto (eo) */ 123 124 memcpy(q, ".utf8", 6); 125 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 126 if (locale != NULL) 127 return(locale); 128 129 /* Try to find most common country for language */ 130 131 if (llen != 2) 132 return(NULL); 133 134 region = (char *)xsltDefaultRegion((xmlChar *)localeName); 135 if (region == NULL) 136 return(NULL); 137 138 q = localeName + llen + 1; 139 *q++ = region[0]; 140 *q++ = region[1]; 141 memcpy(q, ".utf8", 6); 142 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 143 144 return(locale); 145#endif 146 147#ifdef XSLT_LOCALE_WINAPI 148{ 149 xsltLocale locale = (xsltLocale)0; 150 xmlChar localeName[XSLTMAX_LANGTAGLEN+1]; 151 xmlChar *q = localeName; 152 const xmlChar *p = languageTag; 153 int i, llen; 154 const xmlChar *region = NULL; 155 156 if (languageTag == NULL) goto end; 157 158 xsltEnumSupportedLocales(); 159 160 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 161 *q++ = TOLOWER(*p++); 162 if (i == 0) goto end; 163 164 llen = i; 165 *q++ = '-'; 166 if (*p) { /*if country tag is given*/ 167 if (*p++ != '-') goto end; 168 169 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 170 *q++ = TOUPPER(*p++); 171 if (i == 0 || *p) goto end; 172 173 *q = '\0'; 174 locale = xslt_locale_WINAPI(localeName); 175 if (locale != (xsltLocale)0) goto end; 176 } 177 /* Try to find most common country for language */ 178 region = xsltDefaultRegion(localeName); 179 if (region == NULL) goto end; 180 181 strcpy(localeName + llen + 1, region); 182 locale = xslt_locale_WINAPI(localeName); 183end: 184 return(locale); 185} 186#endif 187 188#ifdef XSLT_LOCALE_NONE 189 return(NULL); 190#endif 191} 192 193static const xmlChar* 194xsltDefaultRegion(const xmlChar *localeName) { 195 xmlChar c; 196 /* region should be xmlChar, but gcc warns on all string assignments */ 197 const char *region = NULL; 198 199 c = localeName[1]; 200 /* This is based on the locales from glibc 2.3.3 */ 201 202 switch (localeName[0]) { 203 case 'a': 204 if (c == 'a' || c == 'm') region = "ET"; 205 else if (c == 'f') region = "ZA"; 206 else if (c == 'n') region = "ES"; 207 else if (c == 'r') region = "AE"; 208 else if (c == 'z') region = "AZ"; 209 break; 210 case 'b': 211 if (c == 'e') region = "BY"; 212 else if (c == 'g') region = "BG"; 213 else if (c == 'n') region = "BD"; 214 else if (c == 'r') region = "FR"; 215 else if (c == 's') region = "BA"; 216 break; 217 case 'c': 218 if (c == 'a') region = "ES"; 219 else if (c == 's') region = "CZ"; 220 else if (c == 'y') region = "GB"; 221 break; 222 case 'd': 223 if (c == 'a') region = "DK"; 224 else if (c == 'e') region = "DE"; 225 break; 226 case 'e': 227 if (c == 'l') region = "GR"; 228 else if (c == 'n' || c == 'o') region = "US"; 229 else if (c == 's' || c == 'u') region = "ES"; 230 else if (c == 't') region = "EE"; 231 break; 232 case 'f': 233 if (c == 'a') region = "IR"; 234 else if (c == 'i') region = "FI"; 235 else if (c == 'o') region = "FO"; 236 else if (c == 'r') region = "FR"; 237 break; 238 case 'g': 239 if (c == 'a') region = "IE"; 240 else if (c == 'l') region = "ES"; 241 else if (c == 'v') region = "GB"; 242 break; 243 case 'h': 244 if (c == 'e') region = "IL"; 245 else if (c == 'i') region = "IN"; 246 else if (c == 'r') region = "HT"; 247 else if (c == 'u') region = "HU"; 248 break; 249 case 'i': 250 if (c == 'd') region = "ID"; 251 else if (c == 's') region = "IS"; 252 else if (c == 't') region = "IT"; 253 else if (c == 'w') region = "IL"; 254 break; 255 case 'j': 256 if (c == 'a') region = "JP"; 257 break; 258 case 'k': 259 if (c == 'l') region = "GL"; 260 else if (c == 'o') region = "KR"; 261 else if (c == 'w') region = "GB"; 262 break; 263 case 'l': 264 if (c == 't') region = "LT"; 265 else if (c == 'v') region = "LV"; 266 break; 267 case 'm': 268 if (c == 'k') region = "MK"; 269 else if (c == 'l' || c == 'r') region = "IN"; 270 else if (c == 'n') region = "MN"; 271 else if (c == 's') region = "MY"; 272 else if (c == 't') region = "MT"; 273 break; 274 case 'n': 275 if (c == 'b' || c == 'n' || c == 'o') region = "NO"; 276 else if (c == 'e') region = "NP"; 277 else if (c == 'l') region = "NL"; 278 break; 279 case 'o': 280 if (c == 'm') region = "ET"; 281 break; 282 case 'p': 283 if (c == 'a') region = "IN"; 284 else if (c == 'l') region = "PL"; 285 else if (c == 't') region = "PT"; 286 break; 287 case 'r': 288 if (c == 'o') region = "RO"; 289 else if (c == 'u') region = "RU"; 290 break; 291 case 's': 292 switch (c) { 293 case 'e': region = "NO"; break; 294 case 'h': region = "YU"; break; 295 case 'k': region = "SK"; break; 296 case 'l': region = "SI"; break; 297 case 'o': region = "ET"; break; 298 case 'q': region = "AL"; break; 299 case 't': region = "ZA"; break; 300 case 'v': region = "SE"; break; 301 } 302 break; 303 case 't': 304 if (c == 'a' || c == 'e') region = "IN"; 305 else if (c == 'h') region = "TH"; 306 else if (c == 'i') region = "ER"; 307 else if (c == 'r') region = "TR"; 308 else if (c == 't') region = "RU"; 309 break; 310 case 'u': 311 if (c == 'k') region = "UA"; 312 else if (c == 'r') region = "PK"; 313 break; 314 case 'v': 315 if (c == 'i') region = "VN"; 316 break; 317 case 'w': 318 if (c == 'a') region = "BE"; 319 break; 320 case 'x': 321 if (c == 'h') region = "ZA"; 322 break; 323 case 'z': 324 if (c == 'h') region = "CN"; 325 else if (c == 'u') region = "ZA"; 326 break; 327 } 328 return((xmlChar *)region); 329} 330 331/** 332 * xsltFreeLocale: 333 * @locale: the locale to free 334 * 335 * Frees a locale created with xsltNewLocale 336 */ 337void 338xsltFreeLocale(xsltLocale locale) { 339#ifdef XSLT_LOCALE_XLOCALE 340 freelocale(locale); 341#endif 342} 343 344/** 345 * xsltStrxfrm: 346 * @locale: locale created with xsltNewLocale 347 * @string: UTF-8 string to transform 348 * 349 * Transforms a string according to locale. The transformed string must then be 350 * compared with xsltLocaleStrcmp and freed with xmlFree. 351 * 352 * Returns the transformed string or NULL on error 353 */ 354xsltLocaleChar * 355xsltStrxfrm(xsltLocale locale, const xmlChar *string) 356{ 357#ifdef XSLT_LOCALE_NONE 358 return(NULL); 359#else 360 size_t xstrlen, r; 361 xsltLocaleChar *xstr; 362 363#ifdef XSLT_LOCALE_XLOCALE 364 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1; 365 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen); 366 if (xstr == NULL) { 367 xsltTransformError(NULL, NULL, NULL, 368 "xsltStrxfrm : out of memory error\n"); 369 return(NULL); 370 } 371 372 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale); 373#endif 374 375#ifdef XSLT_LOCALE_WINAPI 376 xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0); 377 if (xstrlen == 0) { 378 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n"); 379 return(NULL); 380 } 381 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar)); 382 if (xstr == NULL) { 383 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n"); 384 return(NULL); 385 } 386 r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen); 387 if (r == 0) { 388 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n"); 389 xmlFree(xstr); 390 return(NULL); 391 } 392 return(xstr); 393#endif /* XSLT_LOCALE_WINAPI */ 394 395 if (r >= xstrlen) { 396 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n"); 397 xmlFree(xstr); 398 return(NULL); 399 } 400 401 return(xstr); 402#endif /* XSLT_LOCALE_NONE */ 403} 404 405/** 406 * xsltLocaleStrcmp: 407 * @locale: a locale identifier 408 * @str1: a string transformed with xsltStrxfrm 409 * @str2: a string transformed with xsltStrxfrm 410 * 411 * Compares two strings transformed with xsltStrxfrm 412 * 413 * Returns a value < 0 if str1 sorts before str2, 414 * a value > 0 if str1 sorts after str2, 415 * 0 if str1 and str2 are equal wrt sorting 416 */ 417int 418xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) { 419 (void)locale; 420#ifdef XSLT_LOCALE_WINAPI 421{ 422 int ret; 423 if (str1 == str2) return(0); 424 if (str1 == NULL) return(-1); 425 if (str2 == NULL) return(1); 426 ret = CompareStringW(locale, 0, str1, -1, str2, -1); 427 if (ret == 0) { 428 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n"); 429 return(0); 430 } 431 return(ret - 2); 432} 433#else 434 return(xmlStrcmp(str1, str2)); 435#endif 436} 437 438#ifdef XSLT_LOCALE_WINAPI 439/** 440 * xsltCountSupportedLocales: 441 * @lcid: not used 442 * 443 * callback used to count locales 444 * 445 * Returns TRUE 446 */ 447BOOL CALLBACK 448xsltCountSupportedLocales(LPSTR lcid) { 449 (void) lcid; 450 ++xsltLocaleListSize; 451 return(TRUE); 452} 453 454/** 455 * xsltIterateSupportedLocales: 456 * @lcid: not used 457 * 458 * callback used to track locales 459 * 460 * Returns TRUE if not at the end of the array 461 */ 462BOOL CALLBACK 463xsltIterateSupportedLocales(LPSTR lcid) { 464 static int count = 0; 465 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1]; 466 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1]; 467 int k, l; 468 xsltRFC1766Info *p = xsltLocaleList + count; 469 470 k = sscanf(lcid, "%lx", (long*)&p->lcid); 471 if (k < 1) goto end; 472 /*don't count terminating null character*/ 473 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang )); 474 if (--k < 1) goto end; 475 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry)); 476 if (--l < 1) goto end; 477 478 { /*fill results*/ 479 xmlChar *q = p->tag; 480 memcpy(q, iso639lang, k); 481 q += k; 482 *q++ = '-'; 483 memcpy(q, iso3136ctry, l); 484 q += l; 485 *q = '\0'; 486 } 487 ++count; 488end: 489 return((count < xsltLocaleListSize) ? TRUE : FALSE); 490} 491 492 493static void 494xsltEnumSupportedLocales(void) { 495 xmlRMutexLock(xsltLocaleMutex); 496 if (xsltLocaleListSize <= 0) { 497 size_t len; 498 499 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED); 500 501 len = xsltLocaleListSize * sizeof(xsltRFC1766Info); 502 xsltLocaleList = xmlMalloc(len); 503 memset(xsltLocaleList, 0, len); 504 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED); 505 } 506 xmlRMutexUnlock(xsltLocaleMutex); 507} 508 509#endif /*def XSLT_LOCALE_WINAPI*/ 510