1# 2# ElementTree 3# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ 4# 5# limited xpath support for element trees 6# 7# history: 8# 2003-05-23 fl created 9# 2003-05-28 fl added support for // etc 10# 2003-08-27 fl fixed parsing of periods in element names 11# 2007-09-10 fl new selection engine 12# 2007-09-12 fl fixed parent selector 13# 2007-09-13 fl added iterfind; changed findall to return a list 14# 2007-11-30 fl added namespaces support 15# 2009-10-30 fl added child element value filter 16# 17# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. 18# 19# fredrik@pythonware.com 20# http://www.pythonware.com 21# 22# -------------------------------------------------------------------- 23# The ElementTree toolkit is 24# 25# Copyright (c) 1999-2009 by Fredrik Lundh 26# 27# By obtaining, using, and/or copying this software and/or its 28# associated documentation, you agree that you have read, understood, 29# and will comply with the following terms and conditions: 30# 31# Permission to use, copy, modify, and distribute this software and 32# its associated documentation for any purpose and without fee is 33# hereby granted, provided that the above copyright notice appears in 34# all copies, and that both that copyright notice and this permission 35# notice appear in supporting documentation, and that the name of 36# Secret Labs AB or the author not be used in advertising or publicity 37# pertaining to distribution of the software without specific, written 38# prior permission. 39# 40# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 41# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 42# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 43# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 44# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 45# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 46# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 47# OF THIS SOFTWARE. 48# -------------------------------------------------------------------- 49 50# Licensed to PSF under a Contributor Agreement. 51# See http://www.python.org/psf/license for licensing details. 52 53## 54# Implementation module for XPath support. There's usually no reason 55# to import this module directly; the <b>ElementTree</b> does this for 56# you, if needed. 57## 58 59import re 60 61xpath_tokenizer_re = re.compile( 62 "(" 63 "'[^']*'|\"[^\"]*\"|" 64 "::|" 65 "//?|" 66 "\.\.|" 67 "\(\)|" 68 "[/.*:\[\]\(\)@=])|" 69 "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" 70 "\s+" 71 ) 72 73def xpath_tokenizer(pattern, namespaces=None): 74 for token in xpath_tokenizer_re.findall(pattern): 75 tag = token[1] 76 if tag and tag[0] != "{" and ":" in tag: 77 try: 78 prefix, uri = tag.split(":", 1) 79 if not namespaces: 80 raise KeyError 81 yield token[0], "{%s}%s" % (namespaces[prefix], uri) 82 except KeyError: 83 raise SyntaxError("prefix %r not found in prefix map" % prefix) 84 else: 85 yield token 86 87def get_parent_map(context): 88 parent_map = context.parent_map 89 if parent_map is None: 90 context.parent_map = parent_map = {} 91 for p in context.root.iter(): 92 for e in p: 93 parent_map[e] = p 94 return parent_map 95 96def prepare_child(next, token): 97 tag = token[1] 98 def select(context, result): 99 for elem in result: 100 for e in elem: 101 if e.tag == tag: 102 yield e 103 return select 104 105def prepare_star(next, token): 106 def select(context, result): 107 for elem in result: 108 for e in elem: 109 yield e 110 return select 111 112def prepare_self(next, token): 113 def select(context, result): 114 for elem in result: 115 yield elem 116 return select 117 118def prepare_descendant(next, token): 119 token = next() 120 if token[0] == "*": 121 tag = "*" 122 elif not token[0]: 123 tag = token[1] 124 else: 125 raise SyntaxError("invalid descendant") 126 def select(context, result): 127 for elem in result: 128 for e in elem.iter(tag): 129 if e is not elem: 130 yield e 131 return select 132 133def prepare_parent(next, token): 134 def select(context, result): 135 # FIXME: raise error if .. is applied at toplevel? 136 parent_map = get_parent_map(context) 137 result_map = {} 138 for elem in result: 139 if elem in parent_map: 140 parent = parent_map[elem] 141 if parent not in result_map: 142 result_map[parent] = None 143 yield parent 144 return select 145 146def prepare_predicate(next, token): 147 # FIXME: replace with real parser!!! refs: 148 # http://effbot.org/zone/simple-iterator-parser.htm 149 # http://javascript.crockford.com/tdop/tdop.html 150 signature = [] 151 predicate = [] 152 while 1: 153 token = next() 154 if token[0] == "]": 155 break 156 if token[0] and token[0][:1] in "'\"": 157 token = "'", token[0][1:-1] 158 signature.append(token[0] or "-") 159 predicate.append(token[1]) 160 signature = "".join(signature) 161 # use signature to determine predicate type 162 if signature == "@-": 163 # [@attribute] predicate 164 key = predicate[1] 165 def select(context, result): 166 for elem in result: 167 if elem.get(key) is not None: 168 yield elem 169 return select 170 if signature == "@-='": 171 # [@attribute='value'] 172 key = predicate[1] 173 value = predicate[-1] 174 def select(context, result): 175 for elem in result: 176 if elem.get(key) == value: 177 yield elem 178 return select 179 if signature == "-" and not re.match("\d+$", predicate[0]): 180 # [tag] 181 tag = predicate[0] 182 def select(context, result): 183 for elem in result: 184 if elem.find(tag) is not None: 185 yield elem 186 return select 187 if signature == "-='" and not re.match("\d+$", predicate[0]): 188 # [tag='value'] 189 tag = predicate[0] 190 value = predicate[-1] 191 def select(context, result): 192 for elem in result: 193 for e in elem.findall(tag): 194 if "".join(e.itertext()) == value: 195 yield elem 196 break 197 return select 198 if signature == "-" or signature == "-()" or signature == "-()-": 199 # [index] or [last()] or [last()-index] 200 if signature == "-": 201 index = int(predicate[0]) - 1 202 else: 203 if predicate[0] != "last": 204 raise SyntaxError("unsupported function") 205 if signature == "-()-": 206 try: 207 index = int(predicate[2]) - 1 208 except ValueError: 209 raise SyntaxError("unsupported expression") 210 else: 211 index = -1 212 def select(context, result): 213 parent_map = get_parent_map(context) 214 for elem in result: 215 try: 216 parent = parent_map[elem] 217 # FIXME: what if the selector is "*" ? 218 elems = list(parent.findall(elem.tag)) 219 if elems[index] is elem: 220 yield elem 221 except (IndexError, KeyError): 222 pass 223 return select 224 raise SyntaxError("invalid predicate") 225 226ops = { 227 "": prepare_child, 228 "*": prepare_star, 229 ".": prepare_self, 230 "..": prepare_parent, 231 "//": prepare_descendant, 232 "[": prepare_predicate, 233 } 234 235_cache = {} 236 237class _SelectorContext: 238 parent_map = None 239 def __init__(self, root): 240 self.root = root 241 242# -------------------------------------------------------------------- 243 244## 245# Generate all matching objects. 246 247def iterfind(elem, path, namespaces=None): 248 # compile selector pattern 249 if path[-1:] == "/": 250 path = path + "*" # implicit all (FIXME: keep this?) 251 try: 252 selector = _cache[path] 253 except KeyError: 254 if len(_cache) > 100: 255 _cache.clear() 256 if path[:1] == "/": 257 raise SyntaxError("cannot use absolute path on element") 258 next = iter(xpath_tokenizer(path, namespaces)).next 259 token = next() 260 selector = [] 261 while 1: 262 try: 263 selector.append(ops[token[0]](next, token)) 264 except StopIteration: 265 raise SyntaxError("invalid path") 266 try: 267 token = next() 268 if token[0] == "/": 269 token = next() 270 except StopIteration: 271 break 272 _cache[path] = selector 273 # execute selector pattern 274 result = [elem] 275 context = _SelectorContext(elem) 276 for select in selector: 277 result = select(context, result) 278 return result 279 280## 281# Find first matching object. 282 283def find(elem, path, namespaces=None): 284 try: 285 return iterfind(elem, path, namespaces).next() 286 except StopIteration: 287 return None 288 289## 290# Find all matching objects. 291 292def findall(elem, path, namespaces=None): 293 return list(iterfind(elem, path, namespaces)) 294 295## 296# Find text for first matching object. 297 298def findtext(elem, path, default=None, namespaces=None): 299 try: 300 elem = iterfind(elem, path, namespaces).next() 301 return elem.text or "" 302 except StopIteration: 303 return default 304