1/*
2**********************************************************************
3*   Copyright (C) 1997-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File resbund.cpp
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   02/05/97    aliu        Fixed bug in chopLocale.  Added scanForLocaleInFile
13*                           based on code taken from scanForLocale.  Added
14*                           constructor which attempts to read resource bundle
15*                           from a specific file, without searching other files.
16*   02/11/97    aliu        Added UErrorCode return values to constructors. Fixed
17*                           infinite loops in scanForFile and scanForLocale.
18*                           Modified getRawResourceData to not delete storage in
19*                           localeData and resourceData which it doesn't own.
20*                           Added Mac compatibility #ifdefs for tellp() and
21*                           ios::nocreate.
22*   03/04/97    aliu        Modified to use ExpandingDataSink objects instead of
23*                           the highly inefficient ostrstream objects.
24*   03/13/97    aliu        Rewrote to load in entire resource bundle and store
25*                           it as a Hashtable of ResourceBundleData objects.
26*                           Added state table to govern parsing of files.
27*                           Modified to load locale index out of new file distinct
28*                           from default.txt.
29*   03/25/97    aliu        Modified to support 2-d arrays, needed for timezone data.
30*                           Added support for custom file suffixes.  Again, needed
31*                           to support timezone data.  Improved error handling to
32*                           detect duplicate tags and subtags.
33*   04/07/97    aliu        Fixed bug in getHashtableForLocale().  Fixed handling
34*                           of failing UErrorCode values on entry to API methods.
35*                           Fixed bugs in getArrayItem() for negative indices.
36*   04/29/97    aliu        Update to use new Hashtable deletion protocol.
37*   05/06/97    aliu        Flattened kTransitionTable for HP compiler.
38*                           Fixed usage of CharString.
39* 06/11/99      stephen     Removed parsing of .txt files.
40*                           Reworked to use new binary format.
41*                           Cleaned up.
42* 06/14/99      stephen     Removed methods taking a filename suffix.
43* 06/22/99      stephen     Added missing T_FileStream_close in parse()
44* 11/09/99      weiv        Added getLocale(), rewritten constructForLocale()
45* March 2000    weiv        complete overhaul.
46******************************************************************************
47*/
48
49#include "unicode/utypes.h"
50#include "unicode/resbund.h"
51
52#include "mutex.h"
53#include "uassert.h"
54#include "umutex.h"
55
56#include "uresimp.h"
57
58U_NAMESPACE_BEGIN
59
60/*-----------------------------------------------------------------------------
61 * Implementation Notes
62 *
63 * Resource bundles are read in once, and thereafter cached.
64 * ResourceBundle statically keeps track of which files have been
65 * read, so we are guaranteed that each file is read at most once.
66 * Resource bundles can be loaded from different data directories and
67 * will be treated as distinct, even if they are for the same locale.
68 *
69 * Resource bundles are lightweight objects, which have pointers to
70 * one or more shared Hashtable objects containing all the data.
71 * Copying would be cheap, but there is no copy constructor, since
72 * there wasn't one in the original API.
73 *
74 * The ResourceBundle parsing mechanism is implemented as a transition
75 * network, for easy maintenance and modification.  The network is
76 * implemented as a matrix (instead of in code) to make this even
77 * easier.  The matrix contains Transition objects.  Each Transition
78 * object describes a destination node and an action to take before
79 * moving to the destination node.  The source node is encoded by the
80 * index of the object in the array that contains it.  The pieces
81 * needed to understand the transition network are the enums for node
82 * IDs and actions, the parse() method, which walks through the
83 * network and implements the actions, and the network itself.  The
84 * network guarantees certain conditions, for example, that a new
85 * resource will not be closed until one has been opened first; or
86 * that data will not be stored into a TaggedList until a TaggedList
87 * has been created.  Nonetheless, the code in parse() does some
88 * consistency checks as it runs the network, and fails with an
89 * U_INTERNAL_PROGRAM_ERROR if one of these checks fails.  If the input
90 * data has a bad format, an U_INVALID_FORMAT_ERROR is returned.  If you
91 * see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in
92 * it.
93 *
94 * Old functionality of multiple locales in a single file is still
95 * supported.  For this reason, LOCALE names override FILE names.  If
96 * data for en_US is located in the en.txt file, once it is loaded,
97 * the code will not care where it came from (other than remembering
98 * which directory it came from).  However, if there is an en_US
99 * resource in en_US.txt, that will take precedence.  There is no
100 * limit to the number or type of resources that can be stored in a
101 * file, however, files are only searched in a specific way.  If
102 * en_US_CA is requested, then first en_US_CA.txt is searched, then
103 * en_US.txt, then en.txt, then default.txt.  So it only makes sense
104 * to put certain locales in certain files.  In this example, it would
105 * be logical to put en_US_CA, en_US, and en into the en.txt file,
106 * since they would be found there if asked for.  The extreme example
107 * is to place all locale resources into default.txt, which should
108 * also work.
109 *
110 * Inheritance is implemented.  For example, xx_YY_zz inherits as
111 * follows: xx_YY_zz, xx_YY, xx, default.  Inheritance is implemented
112 * as an array of hashtables.  There will be from 1 to 4 hashtables in
113 * the array.
114 *
115 * Fallback files are implemented.  The fallback pattern is Language
116 * Country Variant (LCV) -> LC -> L.  Fallback is first done for the
117 * requested locale.  Then it is done for the default locale, as
118 * returned by Locale::getDefault().  Then the special file
119 * default.txt is searched for the default locale.  The overall FILE
120 * fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default.
121 *
122 * Note that although file name searching includes the default locale,
123 * once a ResourceBundle object is constructed, the inheritance path
124 * no longer includes the default locale.  The path is LCV -> LC -> L
125 * -> default.
126 *
127 * File parsing is lazy.  Nothing is parsed unless it is called for by
128 * someone.  So when a ResourceBundle for xx_YY_zz is constructed,
129 * only that locale is parsed (along with anything else in the same
130 * file).  Later, if the FooBar tag is asked for, and if it isn't
131 * found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and
132 * so forth, until the chain is exhausted or the tag is found.
133 *
134 * Thread-safety is implemented around caches, both the cache that
135 * stores all the resouce data, and the cache that stores flags
136 * indicating whether or not a file has been visited.  These caches
137 * delete their storage at static cleanup time, when the process
138 * quits.
139 *
140 * ResourceBundle supports TableCollation as a special case.  This
141 * involves having special ResourceBundle objects which DO own their
142 * data, since we don't want large collation rule strings in the
143 * ResourceBundle cache (these are already cached in the
144 * TableCollation cache).  TableCollation files (.ctx files) have the
145 * same format as normal resource data files, with a different
146 * interpretation, from the standpoint of ResourceBundle.  .ctx files
147 * are loaded into otherwise ordinary ResourceBundle objects.  They
148 * don't inherit (that's implemented by TableCollation) and they own
149 * their data (as mentioned above).  However, they still support
150 * possible multiple locales in a single .ctx file.  (This is in
151 * practice a bad idea, since you only want the one locale you're
152 * looking for, and only one tag will be present
153 * ("CollationElements"), so you don't need an inheritance chain of
154 * multiple locales.)  Up to 4 locale resources will be loaded from a
155 * .ctx file; everything after the first 4 is ignored (parsed and
156 * deleted).  (Normal .txt files have no limit.)  Instead of being
157 * loaded into the cache, and then looked up as needed, the locale
158 * resources are read straight into the ResourceBundle object.
159 *
160 * The Index, which used to reside in default.txt, has been moved to a
161 * new file, index.txt.  This file contains a slightly modified format
162 * with the addition of the "InstalledLocales" tag; it looks like:
163 *
164 * Index {
165 *   InstalledLocales {
166 *     ar
167 *     ..
168 *     zh_TW
169 *   }
170 * }
171 */
172//-----------------------------------------------------------------------------
173
174UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
175
176ResourceBundle::ResourceBundle(UErrorCode &err)
177                                :UObject(), fLocale(NULL)
178{
179    fResource = ures_open(0, Locale::getDefault().getName(), &err);
180}
181
182ResourceBundle::ResourceBundle(const ResourceBundle &other)
183                              :UObject(other), fLocale(NULL)
184{
185    UErrorCode status = U_ZERO_ERROR;
186
187    if (other.fResource) {
188        fResource = ures_copyResb(0, other.fResource, &status);
189    } else {
190        /* Copying a bad resource bundle */
191        fResource = NULL;
192    }
193}
194
195ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
196                               :UObject(), fLocale(NULL)
197{
198    if (res) {
199        fResource = ures_copyResb(0, res, &err);
200    } else {
201        /* Copying a bad resource bundle */
202        fResource = NULL;
203    }
204}
205
206ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err)
207                               :UObject(), fLocale(NULL)
208{
209    fResource = ures_open(path, locale.getName(), &err);
210}
211
212
213ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
214{
215    if(this == &other) {
216        return *this;
217    }
218    if(fResource != 0) {
219        ures_close(fResource);
220        fResource = NULL;
221    }
222    if (fLocale != NULL) {
223        delete fLocale;
224        fLocale = NULL;
225    }
226    UErrorCode status = U_ZERO_ERROR;
227    if (other.fResource) {
228        fResource = ures_copyResb(0, other.fResource, &status);
229    } else {
230        /* Copying a bad resource bundle */
231        fResource = NULL;
232    }
233    return *this;
234}
235
236ResourceBundle::~ResourceBundle()
237{
238    if(fResource != 0) {
239        ures_close(fResource);
240    }
241    if(fLocale != NULL) {
242      delete(fLocale);
243    }
244}
245
246ResourceBundle *
247ResourceBundle::clone() const {
248    return new ResourceBundle(*this);
249}
250
251UnicodeString ResourceBundle::getString(UErrorCode& status) const {
252    int32_t len = 0;
253    const UChar *r = ures_getString(fResource, &len, &status);
254    return UnicodeString(TRUE, r, len);
255}
256
257const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const {
258    return ures_getBinary(fResource, &len, &status);
259}
260
261const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const {
262    return ures_getIntVector(fResource, &len, &status);
263}
264
265uint32_t ResourceBundle::getUInt(UErrorCode& status) const {
266    return ures_getUInt(fResource, &status);
267}
268
269int32_t ResourceBundle::getInt(UErrorCode& status) const {
270    return ures_getInt(fResource, &status);
271}
272
273const char *ResourceBundle::getName(void) const {
274    return ures_getName(fResource);
275}
276
277const char *ResourceBundle::getKey(void) const {
278    return ures_getKey(fResource);
279}
280
281UResType ResourceBundle::getType(void) const {
282    return ures_getType(fResource);
283}
284
285int32_t ResourceBundle::getSize(void) const {
286    return ures_getSize(fResource);
287}
288
289UBool ResourceBundle::hasNext(void) const {
290    return ures_hasNext(fResource);
291}
292
293void ResourceBundle::resetIterator(void) {
294    ures_resetIterator(fResource);
295}
296
297ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
298    UResourceBundle r;
299
300    ures_initStackObject(&r);
301    ures_getNextResource(fResource, &r, &status);
302    ResourceBundle res(&r, status);
303    if (U_SUCCESS(status)) {
304        ures_close(&r);
305    }
306    return res;
307}
308
309UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
310    int32_t len = 0;
311    const UChar* r = ures_getNextString(fResource, &len, 0, &status);
312    return UnicodeString(TRUE, r, len);
313}
314
315UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) {
316    int32_t len = 0;
317    const UChar* r = ures_getNextString(fResource, &len, key, &status);
318    return UnicodeString(TRUE, r, len);
319}
320
321ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const {
322    UResourceBundle r;
323
324    ures_initStackObject(&r);
325    ures_getByIndex(fResource, indexR, &r, &status);
326    ResourceBundle res(&r, status);
327    if (U_SUCCESS(status)) {
328        ures_close(&r);
329    }
330    return res;
331}
332
333UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const {
334    int32_t len = 0;
335    const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status);
336    return UnicodeString(TRUE, r, len);
337}
338
339ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const {
340    UResourceBundle r;
341
342    ures_initStackObject(&r);
343    ures_getByKey(fResource, key, &r, &status);
344    ResourceBundle res(&r, status);
345    if (U_SUCCESS(status)) {
346        ures_close(&r);
347    }
348    return res;
349}
350
351ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){
352    UResourceBundle r;
353    ures_initStackObject(&r);
354    ures_getByKeyWithFallback(fResource, key, &r, &status);
355    ResourceBundle res(&r, status);
356    if(U_SUCCESS(status)){
357        ures_close(&r);
358    }
359    return res;
360}
361UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const {
362    int32_t len = 0;
363    const UChar* r = ures_getStringByKey(fResource, key, &len, &status);
364    return UnicodeString(TRUE, r, len);
365}
366
367const char*
368ResourceBundle::getVersionNumber()  const
369{
370    return ures_getVersionNumberInternal(fResource);
371}
372
373void ResourceBundle::getVersion(UVersionInfo versionInfo) const {
374    ures_getVersion(fResource, versionInfo);
375}
376
377static UMutex gLocaleLock = U_MUTEX_INITIALIZER;
378const Locale &ResourceBundle::getLocale(void) const {
379    Mutex lock(&gLocaleLock);
380    if (fLocale != NULL) {
381        return *fLocale;
382    }
383    UErrorCode status = U_ZERO_ERROR;
384    const char *localeName = ures_getLocaleInternal(fResource, &status);
385    ResourceBundle *ncThis = const_cast<ResourceBundle *>(this);
386    ncThis->fLocale = new Locale(localeName);
387    return ncThis->fLocale != NULL ? *ncThis->fLocale : Locale::getDefault();
388}
389
390const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
391{
392  return ures_getLocaleByType(fResource, type, &status);
393}
394
395U_NAMESPACE_END
396//eof
397