146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangfrom __future__ import absolute_import, division, unicode_literals 246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangfrom types import ModuleType 446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangfrom six import text_type 646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangtry: 846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang import xml.etree.cElementTree as default_etree 946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangexcept ImportError: 1046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang import xml.etree.ElementTree as default_etree 1146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 1246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 1346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", 1446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang "surrogatePairToCodepoint", "moduleFactoryFactory", 1546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang "supports_lone_surrogates"] 1646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 1746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 1846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be 1946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# caught by the below test. In general this would be any platform 2046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# using UTF-16 as its encoding of unicode strings, such as 2146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Jython. This is because UTF-16 itself is based on the use of such 2246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# surrogates, and there is no mechanism to further escape such 2346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# escapes. 2446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangtry: 2546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang _x = eval('"\\uD800"') 2646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang if not isinstance(_x, text_type): 2746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang # We need this with u"" because of http://bugs.jython.org/issue2039 2846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang _x = eval('u"\\uD800"') 2946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang assert isinstance(_x, text_type) 3046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangexcept: 3146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang supports_lone_surrogates = False 3246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangelse: 3346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang supports_lone_surrogates = True 3446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 3546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 3646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangclass MethodDispatcher(dict): 3746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang """Dict with 2 special properties: 3846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 3946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang On initiation, keys that are lists, sets or tuples are converted to 4046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang multiple keys so accessing any one of the items in the original 4146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang list-like object returns the matching value 4246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 4346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang md = MethodDispatcher({("foo", "bar"):"baz"}) 4446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang md["foo"] == "baz" 4546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 4646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang A default value which can be set through the default attribute. 4746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang """ 4846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 4946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang def __init__(self, items=()): 5046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang # Using _dictEntries instead of directly assigning to self is about 5146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang # twice as fast. Please do careful performance testing before changing 5246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang # anything here. 5346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang _dictEntries = [] 5446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang for name, value in items: 5546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang if type(name) in (list, tuple, frozenset, set): 5646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang for item in name: 5746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang _dictEntries.append((item, value)) 5846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang else: 5946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang _dictEntries.append((name, value)) 6046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang dict.__init__(self, _dictEntries) 6146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang self.default = None 6246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 6346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang def __getitem__(self, key): 6446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang return dict.get(self, key, self.default) 6546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 6646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 6746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Some utility functions to dal with weirdness around UCS2 vs UCS4 6846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# python builds 6946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 7046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangdef isSurrogatePair(data): 7146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang return (len(data) == 2 and 7246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and 7346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) 7446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 7546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 7646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangdef surrogatePairToCodepoint(data): 7746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + 7846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang (ord(data[1]) - 0xDC00)) 7946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang return char_val 8046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 8146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Module Factory Factory (no, this isn't Java, I know) 8246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang# Here to stop this being duplicated all over the place. 8346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 8446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 8546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wangdef moduleFactoryFactory(factory): 8646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang moduleCache = {} 8746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 8846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang def moduleFactory(baseModule, *args, **kwargs): 8946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang if isinstance(ModuleType.__name__, type("")): 9046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang name = "_%s_factory" % baseModule.__name__ 9146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang else: 9246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang name = b"_%s_factory" % baseModule.__name__ 9346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 9446b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang if name in moduleCache: 9546b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang return moduleCache[name] 9646b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang else: 9746b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang mod = ModuleType(name) 9846b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang objs = factory(baseModule, *args, **kwargs) 9946b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang mod.__dict__.update(objs) 10046b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang moduleCache[name] = mod 10146b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang return mod 10246b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang 10346b43bff003ceda46cf9a5d40a47f7674996d2e0Zhen Wang return moduleFactory 104