1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2011-2016, International Business Machines Corporation
6 * All Rights Reserved.
7 *******************************************************************************
8 */
9package com.ibm.icu.util;
10
11import java.util.ArrayList;
12import java.util.Arrays;
13import java.util.Collections;
14import java.util.HashMap;
15import java.util.List;
16import java.util.Map;
17import java.util.Set;
18import java.util.TreeSet;
19
20import com.ibm.icu.impl.ICUData;
21import com.ibm.icu.impl.ICUResourceBundle;
22
23/**
24 * <code>Region</code> is the class representing a Unicode Region Code, also known as a
25 * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of
26 * "regions" as "countries" when defining the characteristics of a locale.  Region codes There are different
27 * types of region codes that are important to distinguish.
28 * <p>
29 *  Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or
30 *  selected economic and other grouping" as defined in
31 *  UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm).
32 *  These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO
33 *  added for Outlying Oceania.  Not all UNM.49 codes are defined in LDML, but most of them are.
34 *  Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ),
35 *  CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly
36 *  by a continent ).
37 *  <p>
38 *  TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also
39 *  include areas that are not separate countries, such as the code "AQ" for Antarctica or the code
40 *  "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate
41 *  codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows
42 *  for the use of 3-digit codes in the future.
43 *  <p>
44 *  UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown,
45 *  or that the value supplied as a region was invalid.
46 *  <p>
47 *  DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage,
48 *  usually due to a country splitting into multiple territories or changing its name.
49 *  <p>
50 *  GROUPING - A widely understood grouping of territories that has a well defined membership such
51 *  that a region code has been assigned for it.  Some of these are UNM.49 codes that do't fall into
52 *  the world/continent/sub-continent hierarchy, while others are just well known groupings that have
53 *  their own region code. Region "EU" (European Union) is one such region code that is a grouping.
54 *  Groupings will never be returned by the getContainingRegion() API, since a different type of region
55 *  ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead.
56 *
57 * @author       John Emmons
58 * @stable ICU 50
59 */
60
61public class Region implements Comparable<Region> {
62
63    /**
64     * RegionType is an enumeration defining the different types of regions.  Current possible
65     * values are WORLD, CONTINENT, SUBCONTINENT, TERRITORY, GROUPING, DEPRECATED, and UNKNOWN.
66     *
67     * @stable ICU 50
68     */
69
70    public enum RegionType {
71        /**
72         * Type representing the unknown region.
73         * @stable ICU 50
74         */
75        UNKNOWN,
76
77        /**
78         * Type representing a territory.
79         * @stable ICU 50
80         */
81        TERRITORY,
82
83        /**
84         * Type representing the whole world.
85         * @stable ICU 50
86         */
87        WORLD,
88        /**
89         * Type representing a continent.
90         * @stable ICU 50
91         */
92        CONTINENT,
93        /**
94         * Type representing a sub-continent.
95         * @stable ICU 50
96         */
97        SUBCONTINENT,
98        /**
99         * Type representing a grouping of territories that is not to be used in
100         * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree.
101         * @stable ICU 50
102         */
103        GROUPING,
104        /**
105         * Type representing a region whose code has been deprecated, usually
106         * due to a country splitting into multiple territories or changing its name.
107         * @stable ICU 50
108         */
109        DEPRECATED,
110    }
111
112    private String id;
113    private int code;
114    private RegionType type;
115    private Region containingRegion = null;
116    private Set<Region> containedRegions = new TreeSet<Region>();
117    private List<Region> preferredValues = null;
118
119    private static boolean regionDataIsLoaded = false;
120
121    private static Map<String,Region> regionIDMap = null;       // Map from ID the regions
122    private static Map<Integer,Region> numericCodeMap = null;   // Map from numeric code to the regions
123    private static Map<String,Region> regionAliases = null;     // Aliases
124
125    private static ArrayList<Region> regions = null;            // This is the main data structure where the Regions are stored.
126    private static ArrayList<Set<Region>> availableRegions = null;
127
128    private static final String UNKNOWN_REGION_ID = "ZZ";
129    private static final String OUTLYING_OCEANIA_REGION_ID = "QO";
130    private static final String WORLD_ID = "001";
131
132    /*
133     * Private default constructor.  Use factory methods only.
134     */
135    private Region () {}
136
137    /*
138     * Initializes the region data from the ICU resource bundles.  The region data
139     * contains the basic relationships such as which regions are known, what the numeric
140     * codes are, any known aliases, and the territory containment data.
141     *
142     * If the region data has already loaded, then this method simply returns without doing
143     * anything meaningful.
144     *
145     */
146    private static synchronized void loadRegionData() {
147
148        if ( regionDataIsLoaded ) {
149            return;
150        }
151
152        regionAliases = new HashMap<String,Region>();
153        regionIDMap = new HashMap<String,Region>();
154        numericCodeMap = new HashMap<Integer,Region>();
155
156        availableRegions = new ArrayList<Set<Region>>(RegionType.values().length);
157
158
159        UResourceBundle metadataAlias = null;
160        UResourceBundle territoryAlias = null;
161        UResourceBundle codeMappings = null;
162        UResourceBundle idValidity = null;
163        UResourceBundle regionList = null;
164        UResourceBundle regionRegular = null;
165        UResourceBundle regionMacro = null;
166        UResourceBundle regionUnknown = null;
167        UResourceBundle worldContainment = null;
168        UResourceBundle territoryContainment = null;
169        UResourceBundle groupingContainment = null;
170
171        UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
172        metadataAlias = metadata.get("alias");
173        territoryAlias = metadataAlias.get("territory");
174
175        UResourceBundle supplementalData = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER);
176        codeMappings = supplementalData.get("codeMappings");
177        idValidity = supplementalData.get("idValidity");
178        regionList = idValidity.get("region");
179        regionRegular = regionList.get("regular");
180        regionMacro = regionList.get("macroregion");
181        regionUnknown = regionList.get("unknown");
182
183        territoryContainment = supplementalData.get("territoryContainment");
184        worldContainment = territoryContainment.get("001");
185        groupingContainment = territoryContainment.get("grouping");
186
187        String[] continentsArr = worldContainment.getStringArray();
188        List<String> continents = Arrays.asList(continentsArr);
189        String[] groupingArr = groupingContainment.getStringArray();
190        List<String> groupings = Arrays.asList(groupingArr);
191        List<String> regionCodes = new ArrayList<String>();
192
193        List<String> allRegions = new ArrayList<String>();
194        allRegions.addAll(Arrays.asList(regionRegular.getStringArray()));
195        allRegions.addAll(Arrays.asList(regionMacro.getStringArray()));
196        allRegions.add(regionUnknown.getString());
197
198        for ( String r : allRegions ) {
199            int rangeMarkerLocation = r.indexOf("~");
200            if ( rangeMarkerLocation > 0 ) {
201                StringBuilder regionName = new StringBuilder(r);
202                char endRange = regionName.charAt(rangeMarkerLocation+1);
203                regionName.setLength(rangeMarkerLocation);
204                char lastChar = regionName.charAt(rangeMarkerLocation-1);
205                while ( lastChar <= endRange ) {
206                    String newRegion = regionName.toString();
207                    regionCodes.add(newRegion);
208                    lastChar++;
209                    regionName.setCharAt(rangeMarkerLocation-1,lastChar);
210                }
211            } else {
212                regionCodes.add(r);
213            }
214        }
215
216        regions = new ArrayList<Region>(regionCodes.size());
217
218        // First process the region codes and create the master array of regions.
219        for ( String id : regionCodes) {
220            Region r = new Region();
221            r.id = id;
222            r.type = RegionType.TERRITORY; // Only temporary - figure out the real type later once the aliases are known.
223            regionIDMap.put(id, r);
224            if ( id.matches("[0-9]{3}")) {
225                r.code = Integer.valueOf(id).intValue();
226                numericCodeMap.put(r.code, r);
227                r.type = RegionType.SUBCONTINENT;
228            } else {
229                r.code = -1;
230            }
231            regions.add(r);
232        }
233
234
235        // Process the territory aliases
236        for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
237            UResourceBundle res = territoryAlias.get(i);
238            String aliasFrom = res.getKey();
239            String aliasTo = res.get("replacement").getString();
240
241            if ( regionIDMap.containsKey(aliasTo) && !regionIDMap.containsKey(aliasFrom) ) { // This is just an alias from some string to a region
242                regionAliases.put(aliasFrom, regionIDMap.get(aliasTo));
243            } else {
244                Region r;
245                if ( regionIDMap.containsKey(aliasFrom) ) {  // This is a deprecated region
246                    r = regionIDMap.get(aliasFrom);
247                } else { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
248                    r = new Region();
249                    r.id = aliasFrom;
250                    regionIDMap.put(aliasFrom, r);
251                    if ( aliasFrom.matches("[0-9]{3}")) {
252                        r.code = Integer.valueOf(aliasFrom).intValue();
253                        numericCodeMap.put(r.code, r);
254                    } else {
255                        r.code = -1;
256                    }
257                    regions.add(r);
258                }
259                r.type = RegionType.DEPRECATED;
260                List<String> aliasToRegionStrings = Arrays.asList(aliasTo.split(" "));
261                r.preferredValues = new ArrayList<Region>();
262                for ( String s : aliasToRegionStrings ) {
263                    if (regionIDMap.containsKey(s)) {
264                        r.preferredValues.add(regionIDMap.get(s));
265                    }
266                }
267            }
268        }
269
270        // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
271        for ( int i = 0 ; i < codeMappings.getSize(); i++ ) {
272            UResourceBundle mapping = codeMappings.get(i);
273            if ( mapping.getType() == UResourceBundle.ARRAY ) {
274                String [] codeMappingStrings = mapping.getStringArray();
275                String codeMappingID = codeMappingStrings[0];
276                Integer codeMappingNumber = Integer.valueOf(codeMappingStrings[1]);
277                String codeMapping3Letter = codeMappingStrings[2];
278
279                if ( regionIDMap.containsKey(codeMappingID)) {
280                    Region r = regionIDMap.get(codeMappingID);
281                    r.code = codeMappingNumber.intValue();
282                    numericCodeMap.put(r.code, r);
283                    regionAliases.put(codeMapping3Letter, r);
284                }
285            }
286        }
287
288        // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
289        Region r;
290        if ( regionIDMap.containsKey(WORLD_ID)) {
291            r = regionIDMap.get(WORLD_ID);
292            r.type = RegionType.WORLD;
293        }
294
295        if ( regionIDMap.containsKey(UNKNOWN_REGION_ID)) {
296            r = regionIDMap.get(UNKNOWN_REGION_ID);
297            r.type = RegionType.UNKNOWN;
298        }
299
300        for ( String continent : continents ) {
301            if (regionIDMap.containsKey(continent)) {
302                r = regionIDMap.get(continent);
303                r.type = RegionType.CONTINENT;
304            }
305        }
306
307        for ( String grouping : groupings ) {
308            if (regionIDMap.containsKey(grouping)) {
309                r = regionIDMap.get(grouping);
310                r.type = RegionType.GROUPING;
311            }
312        }
313
314        // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
315        // even though it looks like a territory code.  Need to handle it here.
316
317        if ( regionIDMap.containsKey(OUTLYING_OCEANIA_REGION_ID)) {
318            r = regionIDMap.get(OUTLYING_OCEANIA_REGION_ID);
319            r.type = RegionType.SUBCONTINENT;
320        }
321
322        // Load territory containment info from the supplemental data.
323        for ( int i = 0 ; i < territoryContainment.getSize(); i++ ) {
324            UResourceBundle mapping = territoryContainment.get(i);
325            String parent = mapping.getKey();
326            if (parent.equals("containedGroupings") || parent.equals("deprecated")) {
327                continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip.
328                // #11232 is to do something useful with these.
329            }
330            Region parentRegion = regionIDMap.get(parent);
331            for ( int j = 0 ; j < mapping.getSize(); j++ ) {
332                String child = mapping.getString(j);
333                Region childRegion = regionIDMap.get(child);
334                if ( parentRegion != null && childRegion != null ) {
335
336                    // Add the child region to the set of regions contained by the parent
337                    parentRegion.containedRegions.add(childRegion);
338
339                    // Set the parent region to be the containing region of the child.
340                    // Regions of type GROUPING can't be set as the parent, since another region
341                    // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
342                    if ( parentRegion.getType() != RegionType.GROUPING) {
343                        childRegion.containingRegion = parentRegion;
344                    }
345                }
346            }
347        }
348
349        // Create the availableRegions lists
350
351        for (int i = 0 ; i < RegionType.values().length ; i++) {
352            availableRegions.add(new TreeSet<Region>());
353        }
354
355        for ( Region ar : regions ) {
356            Set<Region> currentSet = availableRegions.get(ar.type.ordinal());
357            currentSet.add(ar);
358            availableRegions.set(ar.type.ordinal(),currentSet);
359        }
360
361        regionDataIsLoaded = true;
362    }
363
364    /** Returns a Region using the given region ID.  The region ID can be either a 2-letter ISO code,
365     * 3-letter ISO code,  UNM.49 numeric code, or other valid Unicode Region Code as defined by the CLDR.
366     * @param id The id of the region to be retrieved.
367     * @return The corresponding region.
368     * @throws NullPointerException if the supplied id is null.
369     * @throws IllegalArgumentException if the supplied ID cannot be canonicalized to a Region ID that is known by ICU.
370     * @stable ICU 50
371     */
372
373    public static Region getInstance(String id) {
374
375        if ( id == null ) {
376            throw new NullPointerException();
377        }
378
379        loadRegionData();
380
381        Region r = regionIDMap.get(id);
382
383        if ( r == null ) {
384            r = regionAliases.get(id);
385        }
386
387        if ( r == null ) {
388            throw new IllegalArgumentException("Unknown region id: " + id);
389        }
390
391        if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) {
392            r = r.preferredValues.get(0);
393        }
394
395        return r;
396    }
397
398
399    /** Returns a Region using the given numeric code as defined by UNM.49
400     * @param code The numeric code of the region to be retrieved.
401     * @return The corresponding region.
402     * @throws IllegalArgumentException if the supplied numeric code is not recognized.
403     * @stable ICU 50
404     */
405
406    public static Region getInstance(int code) {
407
408        loadRegionData();
409
410        Region r = numericCodeMap.get(code);
411
412        if ( r == null ) { // Just in case there's an alias that's numeric, try to find it.
413            String pad = "";
414            if ( code < 10 ) {
415                pad = "00";
416            } else if ( code < 100 ) {
417                pad = "0";
418            }
419            String id = pad + Integer.toString(code);
420            r = regionAliases.get(id);
421        }
422
423        if ( r == null ) {
424            throw new IllegalArgumentException("Unknown region code: " + code);
425        }
426
427        if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) {
428            r = r.preferredValues.get(0);
429        }
430
431        return r;
432    }
433
434
435    /** Used to retrieve all available regions of a specific type.
436     *
437     * @param type The type of regions to be returned ( TERRITORY, MACROREGION, etc. )
438     * @return An unmodifiable set of all known regions that match the given type.
439     * @stable ICU 50
440     */
441
442    public static Set<Region> getAvailable(RegionType type) {
443
444        loadRegionData();
445        return Collections.unmodifiableSet(availableRegions.get(type.ordinal()));
446    }
447
448
449    /** Used to determine the macroregion that geographically contains this region.
450     *
451     * @return The region that geographically contains this region.  Returns NULL if this region is
452     *  code "001" (World) or "ZZ" (Unknown region).  For example, calling this method with region "IT" (Italy)
453     *  returns the region "039" (Southern Europe).
454     * @stable ICU 50
455     */
456
457    public Region getContainingRegion() {
458        loadRegionData();
459        return containingRegion;
460    }
461
462    /** Used to determine the macroregion that geographically contains this region and that matches the given type.
463     *
464     * @return The region that geographically contains this region and matches the given type.  May return NULL if
465     *  no containing region can be found that matches the given type.  For example, calling this method with region "IT" (Italy)
466     *  and type CONTINENT returns the region "150" (Europe).
467     * @stable ICU 50
468     */
469
470    public Region getContainingRegion(RegionType type) {
471        loadRegionData();
472        if ( containingRegion == null ) {
473            return null;
474        }
475        if ( containingRegion.type.equals(type)) {
476            return containingRegion;
477        } else {
478            return containingRegion.getContainingRegion(type);
479        }
480    }
481
482    /** Used to determine the sub-regions that are contained within this region.
483     *
484     * @return An unmodifiable set containing all the regions that are immediate children
485     * of this region in the region hierarchy.  These returned regions could be either macro
486     * regions, territories, or a mixture of the two, depending on the containment data as defined
487     * in CLDR.  This API may return an empty set if this region doesn't have any sub-regions.
488     * For example, calling this method with region "150" (Europe) returns a set containing
489     * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe)
490     * - "154" (Northern Europe) and "155" (Western Europe).
491     *
492     * @stable ICU 50
493     */
494
495    public Set<Region> getContainedRegions() {
496        loadRegionData();
497        return Collections.unmodifiableSet(containedRegions);
498    }
499
500    /** Used to determine all the regions that are contained within this region and that match the given type
501     *
502     * @return An unmodifiable set containing all the regions that are children of this region
503     * anywhere in the region hierarchy and match the given type.  This API may return an empty set
504     * if this region doesn't have any sub-regions that match the given type.
505     * For example, calling this method with region "150" (Europe) and type "TERRITORY" returns a set
506     *  containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. )
507     * @stable ICU 50
508     */
509
510    public Set<Region> getContainedRegions(RegionType type) {
511
512        loadRegionData();
513
514        Set<Region> result = new TreeSet<Region>();
515        Set<Region> cr = getContainedRegions();
516
517        for ( Region r : cr ) {
518            if ( r.getType() == type ) {
519                result.add(r);
520            } else {
521                result.addAll(r.getContainedRegions(type));
522            }
523        }
524        return Collections.unmodifiableSet(result);
525    }
526
527    /**
528     * @return For deprecated regions, return an unmodifiable list of the regions that are the preferred replacement regions for this region.
529     * Returns null for a non-deprecated region.  For example, calling this method with region "SU" (Soviet Union) would
530     * return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc...
531     *
532     * @stable ICU 50
533     */
534    public List<Region> getPreferredValues() {
535
536        loadRegionData();
537
538        if ( type == RegionType.DEPRECATED) {
539            return Collections.unmodifiableList(preferredValues);
540        } else {
541            return null;
542        }
543    }
544
545    /**
546     * @return Returns true if this region contains the supplied other region anywhere in the region hierarchy.
547     *
548     * @stable ICU 50
549     */
550    public boolean contains(Region other) {
551
552        loadRegionData();
553
554        if (containedRegions.contains(other)) {
555            return true;
556        } else {
557            for (Region cr : containedRegions) {
558                if (cr.contains(other)) {
559                    return true;
560                }
561            }
562        }
563
564        return false;
565    }
566
567    /** Returns the string representation of this region
568     *
569     * @return The string representation of this region, which is its ID.
570     *
571     * @stable ICU 50
572     */
573
574    public String toString() {
575        return id;
576    }
577
578    /**
579     * Returns the numeric code for this region
580     *
581     * @return The numeric code for this region. Returns a negative value if the given region does not have a numeric
582     *         code assigned to it. This is a very rare case and only occurs for a few very small territories.
583     *
584     * @stable ICU 50
585     */
586
587    public int getNumericCode() {
588        return code;
589    }
590
591    /** Returns this region's type.
592     *
593     * @return This region's type classification, such as MACROREGION or TERRITORY.
594     *
595     * @stable ICU 50
596     */
597
598    public RegionType getType() {
599        return type;
600    }
601
602    /**
603     * {@inheritDoc}
604     * @stable ICU 50
605     */
606    public int compareTo(Region other) {
607        return id.compareTo(other.id);
608    }
609}
610