1a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o/* Determine a canonical name for the current locale's character encoding. 2a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 3b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o Copyright (C) 2000-2003 Free Software Foundation, Inc. 4a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 5a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o This program is free software; you can redistribute it and/or modify it 6a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o under the terms of the GNU Library General Public License as published 7a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o by the Free Software Foundation; either version 2, or (at your option) 8a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o any later version. 9a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 10a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o This program is distributed in the hope that it will be useful, 11a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o but WITHOUT ANY WARRANTY; without even the implied warranty of 12a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o Library General Public License for more details. 14a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 15a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o You should have received a copy of the GNU Library General Public 16a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o License along with this program; if not, write to the Free Software 17a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 18a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o USA. */ 19a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 20a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o/* Written by Bruno Haible <bruno@clisp.org>. */ 21a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 22a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#ifdef HAVE_CONFIG_H 23a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <config.h> 24a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 25a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 26b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o/* Specification. */ 27b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o#include "localcharset.h" 28b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o 29a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if HAVE_STDDEF_H 30a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <stddef.h> 31a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 32a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 33a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#include <stdio.h> 34a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if HAVE_STRING_H 35a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <string.h> 36a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#else 37a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <strings.h> 38a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 39a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if HAVE_STDLIB_H 40a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <stdlib.h> 41a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 42a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 43a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if defined _WIN32 || defined __WIN32__ 44a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# undef WIN32 /* avoid warning on mingw32 */ 45a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define WIN32 46a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 47a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 48a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if defined __EMX__ 49a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o/* Assume EMX program runs on OS/2, even if compiled under DOS. */ 50a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define OS2 51a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 52a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 53a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if !defined WIN32 54a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# if HAVE_LANGINFO_CODESET 55a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <langinfo.h> 56a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# else 57a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# if HAVE_SETLOCALE 58a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <locale.h> 59a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# endif 60a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# endif 61a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#elif defined WIN32 62a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define WIN32_LEAN_AND_MEAN 63a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <windows.h> 64a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 65a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if defined OS2 66a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define INCL_DOS 67a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# include <os2.h> 68a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 69a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 70b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o#if ENABLE_RELOCATABLE 71b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o# include "relocatable.h" 72b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o#else 73b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o# define relocate(pathname) (pathname) 74b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o#endif 75b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o 76a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ 77a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Win32, OS/2, DOS */ 78a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define ISSLASH(C) ((C) == '/' || (C) == '\\') 79a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 80a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 81a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#ifndef DIRECTORY_SEPARATOR 82a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define DIRECTORY_SEPARATOR '/' 83a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 84a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 85a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#ifndef ISSLASH 86a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) 87a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 88a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 89b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o#if HAVE_DECL_GETC_UNLOCKED 90a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# undef getc 91a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define getc getc_unlocked 92a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 93a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 94a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o/* The following static variable is declared 'volatile' to avoid a 95a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o possible multithread problem in the function get_charset_aliases. If we 96a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o are running in a threaded environment, and if two threads initialize 97a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 'charset_aliases' simultaneously, both will produce the same value, 98a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o and everything will be ok if the two assignments to 'charset_aliases' 99a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o are atomic. But I don't know what will happen if the two assignments mix. */ 100a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if __STDC__ != 1 101a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# define volatile /* empty */ 102a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 103a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o/* Pointer to the contents of the charset.alias file, if it has already been 104a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o read, else NULL. Its format is: 105a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ 106a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'ostatic const char * volatile charset_aliases; 107a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 108a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o/* Return a pointer to the contents of the charset.alias file. */ 109a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'ostatic const char * 110a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'oget_charset_aliases () 111a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o{ 112a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *cp; 113a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 114a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o cp = charset_aliases; 115a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (cp == NULL) 116a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 117b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o#if !(defined VMS || defined WIN32) 118a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o FILE *fp; 119b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o const char *dir = relocate (LIBDIR); 120a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *base = "charset.alias"; 121a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o char *file_name; 122a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 123a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Concatenate dir and base into freshly allocated file_name. */ 124a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 125a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o size_t dir_len = strlen (dir); 126a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o size_t base_len = strlen (base); 127a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); 128a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o file_name = (char *) malloc (dir_len + add_slash + base_len + 1); 129a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (file_name != NULL) 130a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 131a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o memcpy (file_name, dir, dir_len); 132a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (add_slash) 133a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o file_name[dir_len] = DIRECTORY_SEPARATOR; 134a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o memcpy (file_name + dir_len + add_slash, base, base_len + 1); 135a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 136a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 137a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 138a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL) 139a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Out of memory or file not found, treat it as empty. */ 140a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o cp = ""; 141a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o else 142a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 143a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Parse the file's contents. */ 144a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o int c; 145a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o char buf1[50+1]; 146a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o char buf2[50+1]; 147a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o char *res_ptr = NULL; 148a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o size_t res_size = 0; 149a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o size_t l1, l2; 150a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 151a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o for (;;) 152a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 153a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o c = getc (fp); 154a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (c == EOF) 155a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o break; 156a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (c == '\n' || c == ' ' || c == '\t') 157a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o continue; 158a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (c == '#') 159a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 160a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Skip comment, to end of line. */ 161a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o do 162a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o c = getc (fp); 163a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o while (!(c == EOF || c == '\n')); 164a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (c == EOF) 165a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o break; 166a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o continue; 167a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 168a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o ungetc (c, fp); 169a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) 170a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o break; 171a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o l1 = strlen (buf1); 172a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o l2 = strlen (buf2); 173a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (res_size == 0) 174a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 175a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o res_size = l1 + 1 + l2 + 1; 176a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o res_ptr = (char *) malloc (res_size + 1); 177a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 178a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o else 179a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 180a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o res_size += l1 + 1 + l2 + 1; 181a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o res_ptr = (char *) realloc (res_ptr, res_size + 1); 182a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 183a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (res_ptr == NULL) 184a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 185a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Out of memory. */ 186a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o res_size = 0; 187a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o break; 188a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 189a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); 190a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o strcpy (res_ptr + res_size - (l2 + 1), buf2); 191a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 192a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o fclose (fp); 193a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (res_size == 0) 194a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o cp = ""; 195a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o else 196a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 197a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o *(res_ptr + res_size) = '\0'; 198a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o cp = res_ptr; 199a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 200a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 201a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 20245e338f5332a54295893dba2e32cc093d1316f60Jim Meyering free (file_name); 203a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 204a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#else 205a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 206b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o# if defined VMS 207b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o /* To avoid the troubles of an extra file charset.alias_vms in the 208b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o sources of many GNU packages, simply inline the aliases here. */ 209b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o /* The list of encodings is taken from the OpenVMS 7.3-1 documentation 210b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "Compaq C Run-Time Library Reference Manual for OpenVMS systems" 211b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o section 10.7 "Handling Different Character Sets". */ 212b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" 213b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "ISO8859-2" "\0" "ISO-8859-2" "\0" 214b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "ISO8859-5" "\0" "ISO-8859-5" "\0" 215b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "ISO8859-7" "\0" "ISO-8859-7" "\0" 216b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "ISO8859-8" "\0" "ISO-8859-8" "\0" 217b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "ISO8859-9" "\0" "ISO-8859-9" "\0" 218b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o /* Japanese */ 219b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "eucJP" "\0" "EUC-JP" "\0" 220b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "SJIS" "\0" "SHIFT_JIS" "\0" 221b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "DECKANJI" "\0" "DEC-KANJI" "\0" 222b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "SDECKANJI" "\0" "EUC-JP" "\0" 223b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o /* Chinese */ 224b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "eucTW" "\0" "EUC-TW" "\0" 225b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "DECHANYU" "\0" "DEC-HANYU" "\0" 226b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "DECHANZI" "\0" "GB2312" "\0" 227b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o /* Korean */ 228b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o "DECKOREAN" "\0" "EUC-KR" "\0"; 229b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o# endif 230b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o 231b0cacab066000b940551d59aad3e4553d4bad268Theodore Ts'o# if defined WIN32 232a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* To avoid the troubles of installing a separate file in the same 233a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o directory as the DLL and of retrieving the DLL's directory at 234a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o runtime, simply inline the aliases here. */ 235a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 236a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o cp = "CP936" "\0" "GBK" "\0" 237a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP1361" "\0" "JOHAB" "\0" 238a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP20127" "\0" "ASCII" "\0" 239a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP20866" "\0" "KOI8-R" "\0" 240a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP21866" "\0" "KOI8-RU" "\0" 241a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28591" "\0" "ISO-8859-1" "\0" 242a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28592" "\0" "ISO-8859-2" "\0" 243a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28593" "\0" "ISO-8859-3" "\0" 244a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28594" "\0" "ISO-8859-4" "\0" 245a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28595" "\0" "ISO-8859-5" "\0" 246a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28596" "\0" "ISO-8859-6" "\0" 247a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28597" "\0" "ISO-8859-7" "\0" 248a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28598" "\0" "ISO-8859-8" "\0" 249a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28599" "\0" "ISO-8859-9" "\0" 250a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o "CP28605" "\0" "ISO-8859-15" "\0"; 251a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# endif 252a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 253a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 254a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o charset_aliases = cp; 255a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 256a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 257a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o return cp; 258a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o} 259a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 260a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o/* Determine the current locale's character encoding, and canonicalize it 261a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o into one of the canonical names listed in config.charset. 262a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o The result must not be freed; it is statically allocated. 263a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o If the canonical name cannot be determined, the result is a non-canonical 264a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o name. */ 265a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 266a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#ifdef STATIC 267a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'oSTATIC 268a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 269a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'oconst char * 270a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'olocale_charset () 271a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o{ 272a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *codeset; 273a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *aliases; 274a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 275a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#if !(defined WIN32 || defined OS2) 276a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 277a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# if HAVE_LANGINFO_CODESET 278a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 279a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Most systems support nl_langinfo (CODESET) nowadays. */ 280a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = nl_langinfo (CODESET); 281a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 282a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# else 283a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 284a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* On old systems which lack it, use setlocale or getenv. */ 285a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *locale = NULL; 286a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 287a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* But most old systems don't have a complete set of locales. Some 288a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't 289a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o use setlocale here; it would return "C" when it doesn't support the 290a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale name the user has set. */ 291a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# if HAVE_SETLOCALE && 0 292a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale = setlocale (LC_CTYPE, NULL); 293a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# endif 294a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (locale == NULL || locale[0] == '\0') 295a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 296a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale = getenv ("LC_ALL"); 297a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (locale == NULL || locale[0] == '\0') 298a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 299a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale = getenv ("LC_CTYPE"); 300a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (locale == NULL || locale[0] == '\0') 301a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale = getenv ("LANG"); 302a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 303a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 304a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 305a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* On some old systems, one used to set locale = "iso8859_1". On others, 306a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o you set it to "language_COUNTRY.charset". In any case, we resolve it 307a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o through the charset.alias file. */ 308a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = locale; 309a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 310a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o# endif 311a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 312a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#elif defined WIN32 313a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 314a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o static char buf[2 + 10 + 1]; 315a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 316a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Woe32 has a function returning the locale's codepage as a number. */ 317a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o sprintf (buf, "CP%u", GetACP ()); 318a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = buf; 319a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 320a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#elif defined OS2 321a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 322a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *locale; 323a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o static char buf[2 + 10 + 1]; 324a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o ULONG cp[3]; 325a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o ULONG cplen; 326a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 327a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Allow user to override the codeset, as set in the operating system, 328a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o with standard language environment variables. */ 329a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale = getenv ("LC_ALL"); 330a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (locale == NULL || locale[0] == '\0') 331a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 332a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale = getenv ("LC_CTYPE"); 333a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (locale == NULL || locale[0] == '\0') 334a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o locale = getenv ("LANG"); 335a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 336a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (locale != NULL && locale[0] != '\0') 337a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 338a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* If the locale name contains an encoding after the dot, return it. */ 339a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *dot = strchr (locale, '.'); 340a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 341a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (dot != NULL) 342a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 343a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o const char *modifier; 344a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 345a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o dot++; 346a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Look for the possible @... trailer and remove it, if any. */ 347a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o modifier = strchr (dot, '@'); 348a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (modifier == NULL) 349a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o return dot; 350a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (modifier - dot < sizeof (buf)) 351a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 352a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o memcpy (buf, dot, modifier - dot); 353a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o buf [modifier - dot] = '\0'; 354a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o return buf; 355a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 356a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 357a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 358a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Resolve through the charset.alias file. */ 359a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = locale; 360a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 361a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o else 362a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 363a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* OS/2 has a function returning the locale's codepage as a number. */ 364a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (DosQueryCp (sizeof (cp), cp, &cplen)) 365a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = ""; 366a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o else 367a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 368a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o sprintf (buf, "CP%u", cp[0]); 369a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = buf; 370a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 371a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 372a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 373a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o#endif 374a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 375a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (codeset == NULL) 376a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* The canonical name cannot be determined. */ 377a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = ""; 378a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 379a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Resolve alias. */ 380a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o for (aliases = get_charset_aliases (); 381a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o *aliases != '\0'; 382a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) 383a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (strcmp (codeset, aliases) == 0 384a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o || (aliases[0] == '*' && aliases[1] == '\0')) 385a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o { 386a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = aliases + strlen (aliases) + 1; 387a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o break; 388a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o } 389a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 390a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o /* Don't return an empty string. GNU libc and GNU libiconv interpret 391a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o the empty string as denoting "the locale's character encoding", 392a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o thus GNU libiconv would call this function a second time. */ 393a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o if (codeset[0] == '\0') 394a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o codeset = "ASCII"; 395a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o 396a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o return codeset; 397a04eba3f8868af1d9b7b504d3d430c55ed3dc777Theodore Ts'o} 398