1/*
2 * Copyright 2005 Maksim Orlovich <maksim@kde.org>
3 * Copyright (C) 2006 Apple Computer, Inc.
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "XPathParser.h"
30
31#if ENABLE(XPATH)
32
33#include "ExceptionCode.h"
34#include "XPathEvaluator.h"
35#include "XPathException.h"
36#include "XPathNSResolver.h"
37#include "XPathStep.h"
38#include <wtf/StdLibExtras.h>
39#include <wtf/text/StringHash.h>
40
41int xpathyyparse(void*);
42
43using namespace WTF;
44using namespace Unicode;
45
46namespace WebCore {
47namespace XPath {
48
49class LocationPath;
50
51#include "XPathGrammar.h"
52
53Parser* Parser::currentParser = 0;
54
55enum XMLCat { NameStart, NameCont, NotPartOfName };
56
57typedef HashMap<String, Step::Axis> AxisNamesMap;
58
59static XMLCat charCat(UChar aChar)
60{
61    //### might need to add some special cases from the XML spec.
62
63    if (aChar == '_')
64        return NameStart;
65
66    if (aChar == '.' || aChar == '-')
67        return NameCont;
68    CharCategory category = Unicode::category(aChar);
69    if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
70        return NameStart;
71    if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
72        return NameCont;
73    return NotPartOfName;
74}
75
76static void setUpAxisNamesMap(AxisNamesMap& axisNames)
77{
78    struct AxisName {
79        const char* name;
80        Step::Axis axis;
81    };
82    const AxisName axisNameList[] = {
83        { "ancestor", Step::AncestorAxis },
84        { "ancestor-or-self", Step::AncestorOrSelfAxis },
85        { "attribute", Step::AttributeAxis },
86        { "child", Step::ChildAxis },
87        { "descendant", Step::DescendantAxis },
88        { "descendant-or-self", Step::DescendantOrSelfAxis },
89        { "following", Step::FollowingAxis },
90        { "following-sibling", Step::FollowingSiblingAxis },
91        { "namespace", Step::NamespaceAxis },
92        { "parent", Step::ParentAxis },
93        { "preceding", Step::PrecedingAxis },
94        { "preceding-sibling", Step::PrecedingSiblingAxis },
95        { "self", Step::SelfAxis }
96    };
97    for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
98        axisNames.set(axisNameList[i].name, axisNameList[i].axis);
99}
100
101static bool isAxisName(const String& name, Step::Axis& type)
102{
103    DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
104
105    if (axisNames.isEmpty())
106        setUpAxisNamesMap(axisNames);
107
108    AxisNamesMap::iterator it = axisNames.find(name);
109    if (it == axisNames.end())
110        return false;
111    type = it->second;
112    return true;
113}
114
115static bool isNodeTypeName(const String& name)
116{
117    DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
118    if (nodeTypeNames.isEmpty()) {
119        nodeTypeNames.add("comment");
120        nodeTypeNames.add("text");
121        nodeTypeNames.add("processing-instruction");
122        nodeTypeNames.add("node");
123    }
124    return nodeTypeNames.contains(name);
125}
126
127// Returns whether the current token can possibly be a binary operator, given
128// the previous token. Necessary to disambiguate some of the operators
129// (* (multiply), div, and, or, mod) in the [32] Operator rule
130// (check http://www.w3.org/TR/xpath#exprlex).
131bool Parser::isBinaryOperatorContext() const
132{
133    switch (m_lastTokenType) {
134    case 0:
135    case '@': case AXISNAME: case '(': case '[': case ',':
136    case AND: case OR: case MULOP:
137    case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
138    case EQOP: case RELOP:
139        return false;
140    default:
141        return true;
142    }
143}
144
145void Parser::skipWS()
146{
147    while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
148        ++m_nextPos;
149}
150
151Token Parser::makeTokenAndAdvance(int code, int advance)
152{
153    m_nextPos += advance;
154    return Token(code);
155}
156
157Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
158{
159    m_nextPos += advance;
160    return Token(code, val);
161}
162
163Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
164{
165    m_nextPos += advance;
166    return Token(code, val);
167}
168
169// Returns next char if it's there and interesting, 0 otherwise
170char Parser::peekAheadHelper()
171{
172    if (m_nextPos + 1 >= m_data.length())
173        return 0;
174    UChar next = m_data[m_nextPos + 1];
175    if (next >= 0xff)
176        return 0;
177    return next;
178}
179
180char Parser::peekCurHelper()
181{
182    if (m_nextPos >= m_data.length())
183        return 0;
184    UChar next = m_data[m_nextPos];
185    if (next >= 0xff)
186        return 0;
187    return next;
188}
189
190Token Parser::lexString()
191{
192    UChar delimiter = m_data[m_nextPos];
193    int startPos = m_nextPos + 1;
194
195    for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
196        if (m_data[m_nextPos] == delimiter) {
197            String value = m_data.substring(startPos, m_nextPos - startPos);
198            if (value.isNull())
199                value = "";
200            ++m_nextPos; // Consume the char.
201            return Token(LITERAL, value);
202        }
203    }
204
205    // Ouch, went off the end -- report error.
206    return Token(XPATH_ERROR);
207}
208
209Token Parser::lexNumber()
210{
211    int startPos = m_nextPos;
212    bool seenDot = false;
213
214    // Go until end or a non-digits character.
215    for (; m_nextPos < m_data.length(); ++m_nextPos) {
216        UChar aChar = m_data[m_nextPos];
217        if (aChar >= 0xff) break;
218
219        if (aChar < '0' || aChar > '9') {
220            if (aChar == '.' && !seenDot)
221                seenDot = true;
222            else
223                break;
224        }
225    }
226
227    return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
228}
229
230bool Parser::lexNCName(String& name)
231{
232    int startPos = m_nextPos;
233    if (m_nextPos >= m_data.length())
234        return false;
235
236    if (charCat(m_data[m_nextPos]) != NameStart)
237        return false;
238
239    // Keep going until we get a character that's not good for names.
240    for (; m_nextPos < m_data.length(); ++m_nextPos)
241        if (charCat(m_data[m_nextPos]) == NotPartOfName)
242            break;
243
244    name = m_data.substring(startPos, m_nextPos - startPos);
245    return true;
246}
247
248bool Parser::lexQName(String& name)
249{
250    String n1;
251    if (!lexNCName(n1))
252        return false;
253
254    skipWS();
255
256    // If the next character is :, what we just got it the prefix, if not,
257    // it's the whole thing.
258    if (peekAheadHelper() != ':') {
259        name = n1;
260        return true;
261    }
262
263    String n2;
264    if (!lexNCName(n2))
265        return false;
266
267    name = n1 + ":" + n2;
268    return true;
269}
270
271Token Parser::nextTokenInternal()
272{
273    skipWS();
274
275    if (m_nextPos >= m_data.length())
276        return Token(0);
277
278    char code = peekCurHelper();
279    switch (code) {
280    case '(': case ')': case '[': case ']':
281    case '@': case ',': case '|':
282        return makeTokenAndAdvance(code);
283    case '\'':
284    case '\"':
285        return lexString();
286    case '0': case '1': case '2': case '3': case '4':
287    case '5': case '6': case '7': case '8': case '9':
288        return lexNumber();
289    case '.': {
290        char next = peekAheadHelper();
291        if (next == '.')
292            return makeTokenAndAdvance(DOTDOT, 2);
293        if (next >= '0' && next <= '9')
294            return lexNumber();
295        return makeTokenAndAdvance('.');
296    }
297    case '/':
298        if (peekAheadHelper() == '/')
299            return makeTokenAndAdvance(SLASHSLASH, 2);
300        return makeTokenAndAdvance('/');
301    case '+':
302        return makeTokenAndAdvance(PLUS);
303    case '-':
304        return makeTokenAndAdvance(MINUS);
305    case '=':
306        return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
307    case '!':
308        if (peekAheadHelper() == '=')
309            return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
310        return Token(XPATH_ERROR);
311    case '<':
312        if (peekAheadHelper() == '=')
313            return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
314        return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
315    case '>':
316        if (peekAheadHelper() == '=')
317            return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
318        return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
319    case '*':
320        if (isBinaryOperatorContext())
321            return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
322        ++m_nextPos;
323        return Token(NAMETEST, "*");
324    case '$': { // $ QName
325        m_nextPos++;
326        String name;
327        if (!lexQName(name))
328            return Token(XPATH_ERROR);
329        return Token(VARIABLEREFERENCE, name);
330    }
331    }
332
333    String name;
334    if (!lexNCName(name))
335        return Token(XPATH_ERROR);
336
337    skipWS();
338    // If we're in an operator context, check for any operator names
339    if (isBinaryOperatorContext()) {
340        if (name == "and") //### hash?
341            return Token(AND);
342        if (name == "or")
343            return Token(OR);
344        if (name == "mod")
345            return Token(MULOP, NumericOp::OP_Mod);
346        if (name == "div")
347            return Token(MULOP, NumericOp::OP_Div);
348    }
349
350    // See whether we are at a :
351    if (peekCurHelper() == ':') {
352        m_nextPos++;
353        // Any chance it's an axis name?
354        if (peekCurHelper() == ':') {
355            m_nextPos++;
356
357            //It might be an axis name.
358            Step::Axis axis;
359            if (isAxisName(name, axis))
360                return Token(AXISNAME, axis);
361            // Ugh, :: is only valid in axis names -> error
362            return Token(XPATH_ERROR);
363        }
364
365        // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
366        skipWS();
367        if (peekCurHelper() == '*') {
368            m_nextPos++;
369            return Token(NAMETEST, name + ":*");
370        }
371
372        // Make a full qname.
373        String n2;
374        if (!lexNCName(n2))
375            return Token(XPATH_ERROR);
376
377        name = name + ":" + n2;
378    }
379
380    skipWS();
381    if (peekCurHelper() == '(') {
382        //note: we don't swallow the (here!
383
384        //either node type of function name
385        if (isNodeTypeName(name)) {
386            if (name == "processing-instruction")
387                return Token(PI, name);
388
389            return Token(NODETYPE, name);
390        }
391        //must be a function name.
392        return Token(FUNCTIONNAME, name);
393    }
394
395    // At this point, it must be NAMETEST.
396    return Token(NAMETEST, name);
397}
398
399Token Parser::nextToken()
400{
401    Token toRet = nextTokenInternal();
402    m_lastTokenType = toRet.type;
403    return toRet;
404}
405
406Parser::Parser()
407{
408    reset(String());
409}
410
411Parser::~Parser()
412{
413}
414
415void Parser::reset(const String& data)
416{
417    m_nextPos = 0;
418    m_data = data;
419    m_lastTokenType = 0;
420
421    m_topExpr = 0;
422    m_gotNamespaceError = false;
423}
424
425int Parser::lex(void* data)
426{
427    YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
428    Token tok = nextToken();
429
430    switch (tok.type) {
431    case AXISNAME:
432        yylval->axis = tok.axis;
433        break;
434    case MULOP:
435        yylval->numop = tok.numop;
436        break;
437    case RELOP:
438    case EQOP:
439        yylval->eqop = tok.eqop;
440        break;
441    case NODETYPE:
442    case PI:
443    case FUNCTIONNAME:
444    case LITERAL:
445    case VARIABLEREFERENCE:
446    case NUMBER:
447    case NAMETEST:
448        yylval->str = new String(tok.str);
449        registerString(yylval->str);
450        break;
451    }
452
453    return tok.type;
454}
455
456bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
457{
458    size_t colon = qName.find(':');
459    if (colon != notFound) {
460        if (!m_resolver)
461            return false;
462        namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
463        if (namespaceURI.isNull())
464            return false;
465        localName = qName.substring(colon + 1);
466    } else
467        localName = qName;
468
469    return true;
470}
471
472Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
473{
474    reset(statement);
475
476    m_resolver = resolver;
477
478    Parser* oldParser = currentParser;
479    currentParser = this;
480    int parseError = xpathyyparse(this);
481    currentParser = oldParser;
482
483    if (parseError) {
484        deleteAllValues(m_parseNodes);
485        m_parseNodes.clear();
486
487        HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
488        for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
489            deleteAllValues(**it);
490            delete *it;
491        }
492        m_predicateVectors.clear();
493
494        HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
495        for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
496            deleteAllValues(**it);
497            delete *it;
498        }
499        m_expressionVectors.clear();
500
501        deleteAllValues(m_strings);
502        m_strings.clear();
503
504        deleteAllValues(m_nodeTests);
505        m_nodeTests.clear();
506
507        m_topExpr = 0;
508
509        if (m_gotNamespaceError)
510            ec = NAMESPACE_ERR;
511        else
512            ec = XPathException::INVALID_EXPRESSION_ERR;
513        return 0;
514    }
515
516    ASSERT(m_parseNodes.size() == 1);
517    ASSERT(*m_parseNodes.begin() == m_topExpr);
518    ASSERT(m_expressionVectors.size() == 0);
519    ASSERT(m_predicateVectors.size() == 0);
520    ASSERT(m_strings.size() == 0);
521    ASSERT(m_nodeTests.size() == 0);
522
523    m_parseNodes.clear();
524    Expression* result = m_topExpr;
525    m_topExpr = 0;
526
527    return result;
528}
529
530void Parser::registerParseNode(ParseNode* node)
531{
532    if (node == 0)
533        return;
534
535    ASSERT(!m_parseNodes.contains(node));
536
537    m_parseNodes.add(node);
538}
539
540void Parser::unregisterParseNode(ParseNode* node)
541{
542    if (node == 0)
543        return;
544
545    ASSERT(m_parseNodes.contains(node));
546
547    m_parseNodes.remove(node);
548}
549
550void Parser::registerPredicateVector(Vector<Predicate*>* vector)
551{
552    if (vector == 0)
553        return;
554
555    ASSERT(!m_predicateVectors.contains(vector));
556
557    m_predicateVectors.add(vector);
558}
559
560void Parser::deletePredicateVector(Vector<Predicate*>* vector)
561{
562    if (vector == 0)
563        return;
564
565    ASSERT(m_predicateVectors.contains(vector));
566
567    m_predicateVectors.remove(vector);
568    delete vector;
569}
570
571
572void Parser::registerExpressionVector(Vector<Expression*>* vector)
573{
574    if (vector == 0)
575        return;
576
577    ASSERT(!m_expressionVectors.contains(vector));
578
579    m_expressionVectors.add(vector);
580}
581
582void Parser::deleteExpressionVector(Vector<Expression*>* vector)
583{
584    if (vector == 0)
585        return;
586
587    ASSERT(m_expressionVectors.contains(vector));
588
589    m_expressionVectors.remove(vector);
590    delete vector;
591}
592
593void Parser::registerString(String* s)
594{
595    if (s == 0)
596        return;
597
598    ASSERT(!m_strings.contains(s));
599
600    m_strings.add(s);
601}
602
603void Parser::deleteString(String* s)
604{
605    if (s == 0)
606        return;
607
608    ASSERT(m_strings.contains(s));
609
610    m_strings.remove(s);
611    delete s;
612}
613
614void Parser::registerNodeTest(Step::NodeTest* t)
615{
616    if (t == 0)
617        return;
618
619    ASSERT(!m_nodeTests.contains(t));
620
621    m_nodeTests.add(t);
622}
623
624void Parser::deleteNodeTest(Step::NodeTest* t)
625{
626    if (t == 0)
627        return;
628
629    ASSERT(m_nodeTests.contains(t));
630
631    m_nodeTests.remove(t);
632    delete t;
633}
634
635}
636}
637
638#endif // ENABLE(XPATH)
639