1/*
2 * xsltlocale.c: locale handling
3 *
4 * Reference:
5 * RFC 3066: Tags for the Identification of Languages
6 * http://www.ietf.org/rfc/rfc3066.txt
7 * ISO 639-1, ISO 3166-1
8 *
9 * Author: Nick Wellnhofer
10 * winapi port: Roumen Petrov
11 */
12
13#define IN_LIBXSLT
14#include "libxslt.h"
15
16#include <string.h>
17#include <libxml/xmlmemory.h>
18
19#include "xsltlocale.h"
20#include "xsltutils.h"
21
22#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
23#define newlocale __newlocale
24#define freelocale __freelocale
25#define strxfrm_l __strxfrm_l
26#define LC_COLLATE_MASK (1 << LC_COLLATE)
27#endif
28
29#define ISALPHA(c) ((c & 0xc0) == 0x40 && (unsigned)((c & 0x1f) - 1) < 26)
30#define TOUPPER(c) (c & ~0x20)
31#define TOLOWER(c) (c | 0x20)
32
33/*without terminating null character*/
34#define XSLTMAX_ISO639LANGLEN		8
35#define XSLTMAX_ISO3166CNTRYLEN		8
36					/* <lang>-<cntry> */
37#define XSLTMAX_LANGTAGLEN		(XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
38
39static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
40
41#ifdef XSLT_LOCALE_WINAPI
42xmlRMutexPtr xsltLocaleMutex = NULL;
43
44struct xsltRFC1766Info_s {
45      /*note typedef unsigned char xmlChar !*/
46    xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
47      /*note typedef LCID xsltLocale !*/
48    xsltLocale lcid;
49};
50typedef struct xsltRFC1766Info_s xsltRFC1766Info;
51
52static int xsltLocaleListSize = 0;
53static xsltRFC1766Info *xsltLocaleList = NULL;
54
55
56static xsltLocale
57xslt_locale_WINAPI(const xmlChar *languageTag) {
58    int k;
59    xsltRFC1766Info *p = xsltLocaleList;
60
61    for (k=0; k<xsltLocaleListSize; k++, p++)
62	if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
63    return((xsltLocale)0);
64}
65
66static void xsltEnumSupportedLocales(void);
67#endif
68
69/**
70 * xsltNewLocale:
71 * @languageTag: RFC 3066 language tag
72 *
73 * Creates a new locale of an opaque system dependent type based on the
74 * language tag.
75 *
76 * Returns the locale or NULL on error or if no matching locale was found
77 */
78xsltLocale
79xsltNewLocale(const xmlChar *languageTag) {
80#ifdef XSLT_LOCALE_XLOCALE
81    xsltLocale locale;
82    char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
83    const xmlChar *p = languageTag;
84    const char *region = NULL;
85    char *q = localeName;
86    int i, llen;
87
88    /* Convert something like "pt-br" to "pt_BR.utf8" */
89
90    if (languageTag == NULL)
91    	return(NULL);
92
93    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
94	*q++ = TOLOWER(*p++);
95
96    if (i == 0)
97    	return(NULL);
98
99    llen = i;
100    *q++ = '_';
101
102    if (*p) {
103    	if (*p++ != '-')
104    	    return(NULL);
105
106	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
107	    *q++ = TOUPPER(*p++);
108
109    	if (i == 0 || *p)
110    	    return(NULL);
111
112        memcpy(q, ".utf8", 6);
113        locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
114        if (locale != NULL)
115            return(locale);
116
117        /* Continue without using country code */
118
119        q = localeName + llen + 1;
120    }
121
122    /* Try locale without territory, e.g. for Esperanto (eo) */
123
124    memcpy(q, ".utf8", 6);
125    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
126    if (locale != NULL)
127        return(locale);
128
129    /* Try to find most common country for language */
130
131    if (llen != 2)
132        return(NULL);
133
134    region = (char *)xsltDefaultRegion((xmlChar *)localeName);
135    if (region == NULL)
136        return(NULL);
137
138    q = localeName + llen + 1;
139    *q++ = region[0];
140    *q++ = region[1];
141    memcpy(q, ".utf8", 6);
142    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
143
144    return(locale);
145#endif
146
147#ifdef XSLT_LOCALE_WINAPI
148{
149    xsltLocale    locale = (xsltLocale)0;
150    xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
151    xmlChar       *q = localeName;
152    const xmlChar *p = languageTag;
153    int           i, llen;
154    const xmlChar *region = NULL;
155
156    if (languageTag == NULL) goto end;
157
158    xsltEnumSupportedLocales();
159
160    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
161	*q++ = TOLOWER(*p++);
162    if (i == 0) goto end;
163
164    llen = i;
165    *q++ = '-';
166    if (*p) { /*if country tag is given*/
167	if (*p++ != '-') goto end;
168
169	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
170	    *q++ = TOUPPER(*p++);
171	if (i == 0 || *p) goto end;
172
173	*q = '\0';
174	locale = xslt_locale_WINAPI(localeName);
175	if (locale != (xsltLocale)0) goto end;
176    }
177    /* Try to find most common country for language */
178    region = xsltDefaultRegion(localeName);
179    if (region == NULL) goto end;
180
181    strcpy(localeName + llen + 1, region);
182    locale = xslt_locale_WINAPI(localeName);
183end:
184    return(locale);
185}
186#endif
187
188#ifdef XSLT_LOCALE_NONE
189    return(NULL);
190#endif
191}
192
193static const xmlChar*
194xsltDefaultRegion(const xmlChar *localeName) {
195    xmlChar c;
196    /* region should be xmlChar, but gcc warns on all string assignments */
197    const char *region = NULL;
198
199    c = localeName[1];
200    /* This is based on the locales from glibc 2.3.3 */
201
202    switch (localeName[0]) {
203        case 'a':
204            if (c == 'a' || c == 'm') region = "ET";
205            else if (c == 'f') region = "ZA";
206            else if (c == 'n') region = "ES";
207            else if (c == 'r') region = "AE";
208            else if (c == 'z') region = "AZ";
209            break;
210        case 'b':
211            if (c == 'e') region = "BY";
212            else if (c == 'g') region = "BG";
213            else if (c == 'n') region = "BD";
214            else if (c == 'r') region = "FR";
215            else if (c == 's') region = "BA";
216            break;
217        case 'c':
218            if (c == 'a') region = "ES";
219            else if (c == 's') region = "CZ";
220            else if (c == 'y') region = "GB";
221            break;
222        case 'd':
223            if (c == 'a') region = "DK";
224            else if (c == 'e') region = "DE";
225            break;
226        case 'e':
227            if (c == 'l') region = "GR";
228            else if (c == 'n' || c == 'o') region = "US";
229            else if (c == 's' || c == 'u') region = "ES";
230            else if (c == 't') region = "EE";
231            break;
232        case 'f':
233            if (c == 'a') region = "IR";
234            else if (c == 'i') region = "FI";
235            else if (c == 'o') region = "FO";
236            else if (c == 'r') region = "FR";
237            break;
238        case 'g':
239            if (c == 'a') region = "IE";
240            else if (c == 'l') region = "ES";
241            else if (c == 'v') region = "GB";
242            break;
243        case 'h':
244            if (c == 'e') region = "IL";
245            else if (c == 'i') region = "IN";
246            else if (c == 'r') region = "HT";
247            else if (c == 'u') region = "HU";
248            break;
249        case 'i':
250            if (c == 'd') region = "ID";
251            else if (c == 's') region = "IS";
252            else if (c == 't') region = "IT";
253            else if (c == 'w') region = "IL";
254            break;
255        case 'j':
256            if (c == 'a') region = "JP";
257            break;
258        case 'k':
259            if (c == 'l') region = "GL";
260            else if (c == 'o') region = "KR";
261            else if (c == 'w') region = "GB";
262            break;
263        case 'l':
264            if (c == 't') region = "LT";
265            else if (c == 'v') region = "LV";
266            break;
267        case 'm':
268            if (c == 'k') region = "MK";
269            else if (c == 'l' || c == 'r') region = "IN";
270            else if (c == 'n') region = "MN";
271            else if (c == 's') region = "MY";
272            else if (c == 't') region = "MT";
273            break;
274        case 'n':
275            if (c == 'b' || c == 'n' || c == 'o') region = "NO";
276            else if (c == 'e') region = "NP";
277            else if (c == 'l') region = "NL";
278            break;
279        case 'o':
280            if (c == 'm') region = "ET";
281            break;
282        case 'p':
283            if (c == 'a') region = "IN";
284            else if (c == 'l') region = "PL";
285            else if (c == 't') region = "PT";
286            break;
287        case 'r':
288            if (c == 'o') region = "RO";
289            else if (c == 'u') region = "RU";
290            break;
291        case 's':
292            switch (c) {
293                case 'e': region = "NO"; break;
294                case 'h': region = "YU"; break;
295                case 'k': region = "SK"; break;
296                case 'l': region = "SI"; break;
297                case 'o': region = "ET"; break;
298                case 'q': region = "AL"; break;
299                case 't': region = "ZA"; break;
300                case 'v': region = "SE"; break;
301            }
302            break;
303        case 't':
304            if (c == 'a' || c == 'e') region = "IN";
305            else if (c == 'h') region = "TH";
306            else if (c == 'i') region = "ER";
307            else if (c == 'r') region = "TR";
308            else if (c == 't') region = "RU";
309            break;
310        case 'u':
311            if (c == 'k') region = "UA";
312            else if (c == 'r') region = "PK";
313            break;
314        case 'v':
315            if (c == 'i') region = "VN";
316            break;
317        case 'w':
318            if (c == 'a') region = "BE";
319            break;
320        case 'x':
321            if (c == 'h') region = "ZA";
322            break;
323        case 'z':
324            if (c == 'h') region = "CN";
325            else if (c == 'u') region = "ZA";
326            break;
327    }
328    return((xmlChar *)region);
329}
330
331/**
332 * xsltFreeLocale:
333 * @locale: the locale to free
334 *
335 * Frees a locale created with xsltNewLocale
336 */
337void
338xsltFreeLocale(xsltLocale locale) {
339#ifdef XSLT_LOCALE_XLOCALE
340    freelocale(locale);
341#endif
342}
343
344/**
345 * xsltStrxfrm:
346 * @locale: locale created with xsltNewLocale
347 * @string: UTF-8 string to transform
348 *
349 * Transforms a string according to locale. The transformed string must then be
350 * compared with xsltLocaleStrcmp and freed with xmlFree.
351 *
352 * Returns the transformed string or NULL on error
353 */
354xsltLocaleChar *
355xsltStrxfrm(xsltLocale locale, const xmlChar *string)
356{
357#ifdef XSLT_LOCALE_NONE
358    return(NULL);
359#else
360    size_t xstrlen, r;
361    xsltLocaleChar *xstr;
362
363#ifdef XSLT_LOCALE_XLOCALE
364    xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
365    xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
366    if (xstr == NULL) {
367	xsltTransformError(NULL, NULL, NULL,
368	    "xsltStrxfrm : out of memory error\n");
369	return(NULL);
370    }
371
372    r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
373#endif
374
375#ifdef XSLT_LOCALE_WINAPI
376    xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
377    if (xstrlen == 0) {
378        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
379        return(NULL);
380    }
381    xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
382    if (xstr == NULL) {
383        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
384        return(NULL);
385    }
386    r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
387    if (r == 0) {
388        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
389        xmlFree(xstr);
390        return(NULL);
391    }
392    return(xstr);
393#endif /* XSLT_LOCALE_WINAPI */
394
395    if (r >= xstrlen) {
396	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
397        xmlFree(xstr);
398        return(NULL);
399    }
400
401    return(xstr);
402#endif /* XSLT_LOCALE_NONE */
403}
404
405/**
406 * xsltLocaleStrcmp:
407 * @locale: a locale identifier
408 * @str1: a string transformed with xsltStrxfrm
409 * @str2: a string transformed with xsltStrxfrm
410 *
411 * Compares two strings transformed with xsltStrxfrm
412 *
413 * Returns a value < 0 if str1 sorts before str2,
414 *         a value > 0 if str1 sorts after str2,
415 *         0 if str1 and str2 are equal wrt sorting
416 */
417int
418xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
419    (void)locale;
420#ifdef XSLT_LOCALE_WINAPI
421{
422    int ret;
423    if (str1 == str2) return(0);
424    if (str1 == NULL) return(-1);
425    if (str2 == NULL) return(1);
426    ret = CompareStringW(locale, 0, str1, -1, str2, -1);
427    if (ret == 0) {
428        xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
429        return(0);
430    }
431    return(ret - 2);
432}
433#else
434    return(xmlStrcmp(str1, str2));
435#endif
436}
437
438#ifdef XSLT_LOCALE_WINAPI
439/**
440 * xsltCountSupportedLocales:
441 * @lcid: not used
442 *
443 * callback used to count locales
444 *
445 * Returns TRUE
446 */
447BOOL CALLBACK
448xsltCountSupportedLocales(LPSTR lcid) {
449    (void) lcid;
450    ++xsltLocaleListSize;
451    return(TRUE);
452}
453
454/**
455 * xsltIterateSupportedLocales:
456 * @lcid: not used
457 *
458 * callback used to track locales
459 *
460 * Returns TRUE if not at the end of the array
461 */
462BOOL CALLBACK
463xsltIterateSupportedLocales(LPSTR lcid) {
464    static int count = 0;
465    xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
466    xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
467    int        k, l;
468    xsltRFC1766Info *p = xsltLocaleList + count;
469
470    k = sscanf(lcid, "%lx", (long*)&p->lcid);
471    if (k < 1) goto end;
472    /*don't count terminating null character*/
473    k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
474    if (--k < 1) goto end;
475    l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
476    if (--l < 1) goto end;
477
478    {  /*fill results*/
479	xmlChar    *q = p->tag;
480	memcpy(q, iso639lang, k);
481	q += k;
482	*q++ = '-';
483	memcpy(q, iso3136ctry, l);
484	q += l;
485	*q = '\0';
486    }
487    ++count;
488end:
489    return((count < xsltLocaleListSize) ? TRUE : FALSE);
490}
491
492
493static void
494xsltEnumSupportedLocales(void) {
495    xmlRMutexLock(xsltLocaleMutex);
496    if (xsltLocaleListSize <= 0) {
497	size_t len;
498
499	EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
500
501	len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
502	xsltLocaleList = xmlMalloc(len);
503	memset(xsltLocaleList, 0, len);
504	EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
505    }
506    xmlRMutexUnlock(xsltLocaleMutex);
507}
508
509#endif /*def XSLT_LOCALE_WINAPI*/
510