183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# ElementTree 383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ 483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# limited xpath support for element trees 683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# history: 883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2003-05-23 fl created 983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2003-05-28 fl added support for // etc 1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2003-08-27 fl fixed parsing of periods in element names 1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-09-10 fl new selection engine 1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-09-12 fl fixed parent selector 1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-09-13 fl added iterfind; changed findall to return a list 1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-11-30 fl added namespaces support 1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2009-10-30 fl added child element value filter 1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. 1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# fredrik@pythonware.com 2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# http://www.pythonware.com 2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# -------------------------------------------------------------------- 2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# The ElementTree toolkit is 2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Copyright (c) 1999-2009 by Fredrik Lundh 2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# By obtaining, using, and/or copying this software and/or its 2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# associated documentation, you agree that you have read, understood, 2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# and will comply with the following terms and conditions: 3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Permission to use, copy, modify, and distribute this software and 3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# its associated documentation for any purpose and without fee is 3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# hereby granted, provided that the above copyright notice appears in 3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# all copies, and that both that copyright notice and this permission 3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# notice appear in supporting documentation, and that the name of 3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Secret Labs AB or the author not be used in advertising or publicity 3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# pertaining to distribution of the software without specific, written 3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# prior permission. 3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# OF THIS SOFTWARE. 4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# -------------------------------------------------------------------- 4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Licensed to PSF under a Contributor Agreement. 5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# See http://www.python.org/psf/license for licensing details. 5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh## 5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Implementation module for XPath support. There's usually no reason 5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# to import this module directly; the <b>ElementTree</b> does this for 5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# you, if needed. 5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh## 5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport re 6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehxpath_tokenizer_re = re.compile( 6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "(" 6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "'[^']*'|\"[^\"]*\"|" 6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "::|" 6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "//?|" 6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "\.\.|" 6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "\(\)|" 6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "[/.*:\[\]\(\)@=])|" 6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" 7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "\s+" 7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ) 7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef xpath_tokenizer(pattern, namespaces=None): 7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for token in xpath_tokenizer_re.findall(pattern): 7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh tag = token[1] 7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if tag and tag[0] != "{" and ":" in tag: 7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh prefix, uri = tag.split(":", 1) 7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not namespaces: 8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise KeyError 8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield token[0], "{%s}%s" % (namespaces[prefix], uri) 8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except KeyError: 8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SyntaxError("prefix %r not found in prefix map" % prefix) 8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield token 8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef get_parent_map(context): 8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parent_map = context.parent_map 8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if parent_map is None: 9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh context.parent_map = parent_map = {} 9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for p in context.root.iter(): 9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for e in p: 9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parent_map[e] = p 9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return parent_map 9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_child(next, token): 9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh tag = token[1] 9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for e in elem: 10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if e.tag == tag: 10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield e 10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_star(next, token): 10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for e in elem: 10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield e 11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_self(next, token): 11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield elem 11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_descendant(next, token): 11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh token = next() 12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if token[0] == "*": 12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh tag = "*" 12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif not token[0]: 12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh tag = token[1] 12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SyntaxError("invalid descendant") 12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for e in elem.iter(tag): 12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if e is not elem: 13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield e 13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_parent(next, token): 13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # FIXME: raise error if .. is applied at toplevel? 13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parent_map = get_parent_map(context) 13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result_map = {} 13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if elem in parent_map: 14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parent = parent_map[elem] 14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if parent not in result_map: 14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result_map[parent] = None 14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield parent 14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_predicate(next, token): 14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # FIXME: replace with real parser!!! refs: 14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # http://effbot.org/zone/simple-iterator-parser.htm 14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # http://javascript.crockford.com/tdop/tdop.html 15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh signature = [] 15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh predicate = [] 15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh while 1: 15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh token = next() 15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if token[0] == "]": 15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if token[0] and token[0][:1] in "'\"": 15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh token = "'", token[0][1:-1] 15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh signature.append(token[0] or "-") 15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh predicate.append(token[1]) 16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh signature = "".join(signature) 16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # use signature to determine predicate type 16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if signature == "@-": 16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # [@attribute] predicate 16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh key = predicate[1] 16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if elem.get(key) is not None: 16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield elem 16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if signature == "@-='": 17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # [@attribute='value'] 17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh key = predicate[1] 17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh value = predicate[-1] 17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if elem.get(key) == value: 17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield elem 17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if signature == "-" and not re.match("\d+$", predicate[0]): 18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # [tag] 18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh tag = predicate[0] 18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if elem.find(tag) is not None: 18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield elem 18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if signature == "-='" and not re.match("\d+$", predicate[0]): 18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # [tag='value'] 18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh tag = predicate[0] 19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh value = predicate[-1] 19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for e in elem.findall(tag): 19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if "".join(e.itertext()) == value: 19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield elem 19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if signature == "-" or signature == "-()" or signature == "-()-": 19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # [index] or [last()] or [last()-index] 20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if signature == "-": 20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh index = int(predicate[0]) - 1 20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if predicate[0] != "last": 20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SyntaxError("unsupported function") 20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if signature == "-()-": 20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh index = int(predicate[2]) - 1 20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except ValueError: 20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SyntaxError("unsupported expression") 21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh index = -1 21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def select(context, result): 21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parent_map = get_parent_map(context) 21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for elem in result: 21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parent = parent_map[elem] 21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # FIXME: what if the selector is "*" ? 21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elems = list(parent.findall(elem.tag)) 21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if elems[index] is elem: 22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh yield elem 22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except (IndexError, KeyError): 22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh pass 22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return select 22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SyntaxError("invalid predicate") 22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehops = { 22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "": prepare_child, 22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "*": prepare_star, 22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ".": prepare_self, 23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "..": prepare_parent, 23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "//": prepare_descendant, 23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh "[": prepare_predicate, 23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh } 23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh_cache = {} 23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass _SelectorContext: 23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh parent_map = None 23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, root): 24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.root = root 24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# -------------------------------------------------------------------- 24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh## 24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Generate all matching objects. 24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef iterfind(elem, path, namespaces=None): 24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # compile selector pattern 24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if path[-1:] == "/": 25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh path = path + "*" # implicit all (FIXME: keep this?) 25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh selector = _cache[path] 25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except KeyError: 25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if len(_cache) > 100: 25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh _cache.clear() 25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if path[:1] == "/": 25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SyntaxError("cannot use absolute path on element") 25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh next = iter(xpath_tokenizer(path, namespaces)).next 25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh token = next() 26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh selector = [] 26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh while 1: 26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh selector.append(ops[token[0]](next, token)) 26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except StopIteration: 26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise SyntaxError("invalid path") 26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh token = next() 26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if token[0] == "/": 26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh token = next() 27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except StopIteration: 27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh break 27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh _cache[path] = selector 27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # execute selector pattern 27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = [elem] 27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh context = _SelectorContext(elem) 27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for select in selector: 27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh result = select(context, result) 27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return result 27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh## 28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Find first matching object. 28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef find(elem, path, namespaces=None): 28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return iterfind(elem, path, namespaces).next() 28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except StopIteration: 28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return None 28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh## 29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Find all matching objects. 29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef findall(elem, path, namespaces=None): 29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return list(iterfind(elem, path, namespaces)) 29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh## 29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Find text for first matching object. 29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef findtext(elem, path, default=None, namespaces=None): 29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elem = iterfind(elem, path, namespaces).next() 30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return elem.text or "" 30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except StopIteration: 30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return default 304