146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangfrom __future__ import absolute_import, division, unicode_literals
246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangfrom types import ModuleType
446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangfrom six import text_type
646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangtry:
846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    import xml.etree.cElementTree as default_etree
946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangexcept ImportError:
1046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    import xml.etree.ElementTree as default_etree
1146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
1246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
1346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
1446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang           "surrogatePairToCodepoint", "moduleFactoryFactory",
1546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang           "supports_lone_surrogates"]
1646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
1746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
1846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
1946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# caught by the below test. In general this would be any platform
2046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# using UTF-16 as its encoding of unicode strings, such as
2146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Jython. This is because UTF-16 itself is based on the use of such
2246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# surrogates, and there is no mechanism to further escape such
2346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# escapes.
2446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangtry:
2546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    _x = eval('"\\uD800"')
2646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    if not isinstance(_x, text_type):
2746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        # We need this with u"" because of http://bugs.jython.org/issue2039
2846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        _x = eval('u"\\uD800"')
2946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        assert isinstance(_x, text_type)
3046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangexcept:
3146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    supports_lone_surrogates = False
3246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangelse:
3346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    supports_lone_surrogates = True
3446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
3546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
3646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangclass MethodDispatcher(dict):
3746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    """Dict with 2 special properties:
3846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
3946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    On initiation, keys that are lists, sets or tuples are converted to
4046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    multiple keys so accessing any one of the items in the original
4146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    list-like object returns the matching value
4246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
4346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    md = MethodDispatcher({("foo", "bar"):"baz"})
4446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    md["foo"] == "baz"
4546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
4646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    A default value which can be set through the default attribute.
4746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    """
4846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
4946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    def __init__(self, items=()):
5046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        # Using _dictEntries instead of directly assigning to self is about
5146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        # twice as fast. Please do careful performance testing before changing
5246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        # anything here.
5346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        _dictEntries = []
5446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        for name, value in items:
5546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            if type(name) in (list, tuple, frozenset, set):
5646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang                for item in name:
5746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang                    _dictEntries.append((item, value))
5846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            else:
5946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang                _dictEntries.append((name, value))
6046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        dict.__init__(self, _dictEntries)
6146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        self.default = None
6246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
6346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    def __getitem__(self, key):
6446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        return dict.get(self, key, self.default)
6546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
6646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
6746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Some utility functions to dal with weirdness around UCS2 vs UCS4
6846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# python builds
6946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
7046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangdef isSurrogatePair(data):
7146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    return (len(data) == 2 and
7246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
7346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
7446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
7546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
7646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangdef surrogatePairToCodepoint(data):
7746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
7846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang                (ord(data[1]) - 0xDC00))
7946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    return char_val
8046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
8146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Module Factory Factory (no, this isn't Java, I know)
8246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Here to stop this being duplicated all over the place.
8346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
8446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
8546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangdef moduleFactoryFactory(factory):
8646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    moduleCache = {}
8746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
8846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    def moduleFactory(baseModule, *args, **kwargs):
8946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        if isinstance(ModuleType.__name__, type("")):
9046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            name = "_%s_factory" % baseModule.__name__
9146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        else:
9246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            name = b"_%s_factory" % baseModule.__name__
9346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
9446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        if name in moduleCache:
9546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            return moduleCache[name]
9646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang        else:
9746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            mod = ModuleType(name)
9846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            objs = factory(baseModule, *args, **kwargs)
9946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            mod.__dict__.update(objs)
10046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            moduleCache[name] = mod
10146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang            return mod
10246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang
10346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang    return moduleFactory
104