183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# ElementTree
383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# limited xpath support for element trees
683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# history:
883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2003-05-23 fl   created
983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2003-05-28 fl   added support for // etc
1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2003-08-27 fl   fixed parsing of periods in element names
1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-09-10 fl   new selection engine
1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-09-12 fl   fixed parent selector
1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-09-13 fl   added iterfind; changed findall to return a list
1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2007-11-30 fl   added namespaces support
1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# 2009-10-30 fl   added child element value filter
1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Copyright (c) 2003-2009 by Fredrik Lundh.  All rights reserved.
1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# fredrik@pythonware.com
2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# http://www.pythonware.com
2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# --------------------------------------------------------------------
2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# The ElementTree toolkit is
2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Copyright (c) 1999-2009 by Fredrik Lundh
2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# By obtaining, using, and/or copying this software and/or its
2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# associated documentation, you agree that you have read, understood,
2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# and will comply with the following terms and conditions:
3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Permission to use, copy, modify, and distribute this software and
3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# its associated documentation for any purpose and without fee is
3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# hereby granted, provided that the above copyright notice appears in
3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# all copies, and that both that copyright notice and this permission
3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# notice appear in supporting documentation, and that the name of
3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Secret Labs AB or the author not be used in advertising or publicity
3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# pertaining to distribution of the software without specific, written
3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# prior permission.
3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh#
4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# OF THIS SOFTWARE.
4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# --------------------------------------------------------------------
4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Licensed to PSF under a Contributor Agreement.
5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# See http://www.python.org/psf/license for licensing details.
5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh##
5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Implementation module for XPath support.  There's usually no reason
5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# to import this module directly; the <b>ElementTree</b> does this for
5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# you, if needed.
5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh##
5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport re
6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehxpath_tokenizer_re = re.compile(
6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "("
6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "'[^']*'|\"[^\"]*\"|"
6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "::|"
6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "//?|"
6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "\.\.|"
6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "\(\)|"
6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "[/.*:\[\]\(\)@=])|"
6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "\s+"
7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    )
7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef xpath_tokenizer(pattern, namespaces=None):
7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    for token in xpath_tokenizer_re.findall(pattern):
7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        tag = token[1]
7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if tag and tag[0] != "{" and ":" in tag:
7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            try:
7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                prefix, uri = tag.split(":", 1)
7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if not namespaces:
8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    raise KeyError
8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                yield token[0], "{%s}%s" % (namespaces[prefix], uri)
8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            except KeyError:
8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                raise SyntaxError("prefix %r not found in prefix map" % prefix)
8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            yield token
8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef get_parent_map(context):
8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    parent_map = context.parent_map
8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if parent_map is None:
9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        context.parent_map = parent_map = {}
9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for p in context.root.iter():
9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for e in p:
9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                parent_map[e] = p
9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return parent_map
9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_child(next, token):
9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    tag = token[1]
9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def select(context, result):
9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for elem in result:
10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for e in elem:
10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if e.tag == tag:
10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    yield e
10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return select
10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_star(next, token):
10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def select(context, result):
10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for elem in result:
10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for e in elem:
10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                yield e
11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return select
11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_self(next, token):
11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def select(context, result):
11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for elem in result:
11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            yield elem
11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return select
11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_descendant(next, token):
11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    token = next()
12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if token[0] == "*":
12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        tag = "*"
12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    elif not token[0]:
12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        tag = token[1]
12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    else:
12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        raise SyntaxError("invalid descendant")
12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def select(context, result):
12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for elem in result:
12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for e in elem.iter(tag):
12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if e is not elem:
13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    yield e
13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return select
13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_parent(next, token):
13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def select(context, result):
13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # FIXME: raise error if .. is applied at toplevel?
13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parent_map = get_parent_map(context)
13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        result_map = {}
13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for elem in result:
13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if elem in parent_map:
14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                parent = parent_map[elem]
14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if parent not in result_map:
14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    result_map[parent] = None
14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    yield parent
14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return select
14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef prepare_predicate(next, token):
14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # FIXME: replace with real parser!!! refs:
14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # http://effbot.org/zone/simple-iterator-parser.htm
14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # http://javascript.crockford.com/tdop/tdop.html
15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    signature = []
15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    predicate = []
15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    while 1:
15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        token = next()
15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if token[0] == "]":
15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            break
15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if token[0] and token[0][:1] in "'\"":
15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            token = "'", token[0][1:-1]
15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        signature.append(token[0] or "-")
15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        predicate.append(token[1])
16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    signature = "".join(signature)
16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # use signature to determine predicate type
16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if signature == "@-":
16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # [@attribute] predicate
16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        key = predicate[1]
16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def select(context, result):
16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for elem in result:
16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if elem.get(key) is not None:
16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    yield elem
16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return select
17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if signature == "@-='":
17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # [@attribute='value']
17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        key = predicate[1]
17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        value = predicate[-1]
17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def select(context, result):
17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for elem in result:
17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if elem.get(key) == value:
17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    yield elem
17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return select
17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if signature == "-" and not re.match("\d+$", predicate[0]):
18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # [tag]
18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        tag = predicate[0]
18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def select(context, result):
18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for elem in result:
18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if elem.find(tag) is not None:
18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    yield elem
18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return select
18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if signature == "-='" and not re.match("\d+$", predicate[0]):
18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # [tag='value']
18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        tag = predicate[0]
19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        value = predicate[-1]
19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def select(context, result):
19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for elem in result:
19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                for e in elem.findall(tag):
19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    if "".join(e.itertext()) == value:
19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                        yield elem
19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                        break
19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return select
19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if signature == "-" or signature == "-()" or signature == "-()-":
19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # [index] or [last()] or [last()-index]
20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if signature == "-":
20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            index = int(predicate[0]) - 1
20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        else:
20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if predicate[0] != "last":
20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                raise SyntaxError("unsupported function")
20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if signature == "-()-":
20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                try:
20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    index = int(predicate[2]) - 1
20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                except ValueError:
20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    raise SyntaxError("unsupported expression")
21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            else:
21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                index = -1
21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def select(context, result):
21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            parent_map = get_parent_map(context)
21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            for elem in result:
21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                try:
21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    parent = parent_map[elem]
21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    # FIXME: what if the selector is "*" ?
21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    elems = list(parent.findall(elem.tag))
21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    if elems[index] is elem:
22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                        yield elem
22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                except (IndexError, KeyError):
22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    pass
22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return select
22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    raise SyntaxError("invalid predicate")
22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehops = {
22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "": prepare_child,
22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "*": prepare_star,
22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    ".": prepare_self,
23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "..": prepare_parent,
23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "//": prepare_descendant,
23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    "[": prepare_predicate,
23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    }
23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh_cache = {}
23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass _SelectorContext:
23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    parent_map = None
23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def __init__(self, root):
24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.root = root
24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# --------------------------------------------------------------------
24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh##
24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Generate all matching objects.
24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef iterfind(elem, path, namespaces=None):
24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # compile selector pattern
24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    if path[-1:] == "/":
25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        path = path + "*" # implicit all (FIXME: keep this?)
25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    try:
25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        selector = _cache[path]
25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    except KeyError:
25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if len(_cache) > 100:
25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            _cache.clear()
25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if path[:1] == "/":
25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            raise SyntaxError("cannot use absolute path on element")
25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        next = iter(xpath_tokenizer(path, namespaces)).next
25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        token = next()
26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        selector = []
26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        while 1:
26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            try:
26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                selector.append(ops[token[0]](next, token))
26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            except StopIteration:
26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                raise SyntaxError("invalid path")
26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            try:
26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                token = next()
26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                if token[0] == "/":
26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    token = next()
27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            except StopIteration:
27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                break
27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        _cache[path] = selector
27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    # execute selector pattern
27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    result = [elem]
27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    context = _SelectorContext(elem)
27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    for select in selector:
27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        result = select(context, result)
27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return result
27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh##
28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Find first matching object.
28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef find(elem, path, namespaces=None):
28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    try:
28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return iterfind(elem, path, namespaces).next()
28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    except StopIteration:
28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return None
28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh##
29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Find all matching objects.
29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef findall(elem, path, namespaces=None):
29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    return list(iterfind(elem, path, namespaces))
29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh##
29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Find text for first matching object.
29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef findtext(elem, path, default=None, namespaces=None):
29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    try:
30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elem = iterfind(elem, path, namespaces).next()
30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return elem.text or ""
30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    except StopIteration:
30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return default
304