1/* 2 * Copyright 2005 Maksim Orlovich <maksim@kde.org> 3 * Copyright (C) 2006 Apple Computer, Inc. 4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "config.h" 29#include "XPathParser.h" 30 31#if ENABLE(XPATH) 32 33#include "ExceptionCode.h" 34#include "StringHash.h" 35#include "XPathEvaluator.h" 36#include "XPathException.h" 37#include "XPathNSResolver.h" 38#include "XPathStep.h" 39#include <wtf/StdLibExtras.h> 40 41int xpathyyparse(void*); 42 43using namespace WTF; 44using namespace Unicode; 45 46namespace WebCore { 47namespace XPath { 48 49class LocationPath; 50 51#include "XPathGrammar.h" 52 53Parser* Parser::currentParser = 0; 54 55enum XMLCat { NameStart, NameCont, NotPartOfName }; 56 57typedef HashMap<String, Step::Axis> AxisNamesMap; 58 59static XMLCat charCat(UChar aChar) 60{ 61 //### might need to add some special cases from the XML spec. 62 63 if (aChar == '_') 64 return NameStart; 65 66 if (aChar == '.' || aChar == '-') 67 return NameCont; 68 CharCategory category = Unicode::category(aChar); 69 if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter)) 70 return NameStart; 71 if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit)) 72 return NameCont; 73 return NotPartOfName; 74} 75 76static void setUpAxisNamesMap(AxisNamesMap& axisNames) 77{ 78 struct AxisName { 79 const char* name; 80 Step::Axis axis; 81 }; 82 const AxisName axisNameList[] = { 83 { "ancestor", Step::AncestorAxis }, 84 { "ancestor-or-self", Step::AncestorOrSelfAxis }, 85 { "attribute", Step::AttributeAxis }, 86 { "child", Step::ChildAxis }, 87 { "descendant", Step::DescendantAxis }, 88 { "descendant-or-self", Step::DescendantOrSelfAxis }, 89 { "following", Step::FollowingAxis }, 90 { "following-sibling", Step::FollowingSiblingAxis }, 91 { "namespace", Step::NamespaceAxis }, 92 { "parent", Step::ParentAxis }, 93 { "preceding", Step::PrecedingAxis }, 94 { "preceding-sibling", Step::PrecedingSiblingAxis }, 95 { "self", Step::SelfAxis } 96 }; 97 for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i) 98 axisNames.set(axisNameList[i].name, axisNameList[i].axis); 99} 100 101static bool isAxisName(const String& name, Step::Axis& type) 102{ 103 DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ()); 104 105 if (axisNames.isEmpty()) 106 setUpAxisNamesMap(axisNames); 107 108 AxisNamesMap::iterator it = axisNames.find(name); 109 if (it == axisNames.end()) 110 return false; 111 type = it->second; 112 return true; 113} 114 115static bool isNodeTypeName(const String& name) 116{ 117 DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ()); 118 if (nodeTypeNames.isEmpty()) { 119 nodeTypeNames.add("comment"); 120 nodeTypeNames.add("text"); 121 nodeTypeNames.add("processing-instruction"); 122 nodeTypeNames.add("node"); 123 } 124 return nodeTypeNames.contains(name); 125} 126 127/* Returns whether the last parsed token matches the [32] Operator rule 128 * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate 129 * the tokens. 130 */ 131bool Parser::isOperatorContext() const 132{ 133 if (m_nextPos == 0) 134 return false; 135 136 switch (m_lastTokenType) { 137 case AND: case OR: case MULOP: 138 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: 139 case EQOP: case RELOP: 140 case '@': case AXISNAME: case '(': case '[': 141 return false; 142 default: 143 return true; 144 } 145} 146 147void Parser::skipWS() 148{ 149 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) 150 ++m_nextPos; 151} 152 153Token Parser::makeTokenAndAdvance(int code, int advance) 154{ 155 m_nextPos += advance; 156 return Token(code); 157} 158 159Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) 160{ 161 m_nextPos += advance; 162 return Token(code, val); 163} 164 165Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) 166{ 167 m_nextPos += advance; 168 return Token(code, val); 169} 170 171// Returns next char if it's there and interesting, 0 otherwise 172char Parser::peekAheadHelper() 173{ 174 if (m_nextPos + 1 >= m_data.length()) 175 return 0; 176 UChar next = m_data[m_nextPos + 1]; 177 if (next >= 0xff) 178 return 0; 179 return next; 180} 181 182char Parser::peekCurHelper() 183{ 184 if (m_nextPos >= m_data.length()) 185 return 0; 186 UChar next = m_data[m_nextPos]; 187 if (next >= 0xff) 188 return 0; 189 return next; 190} 191 192Token Parser::lexString() 193{ 194 UChar delimiter = m_data[m_nextPos]; 195 int startPos = m_nextPos + 1; 196 197 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { 198 if (m_data[m_nextPos] == delimiter) { 199 String value = m_data.substring(startPos, m_nextPos - startPos); 200 if (value.isNull()) 201 value = ""; 202 ++m_nextPos; // Consume the char. 203 return Token(LITERAL, value); 204 } 205 } 206 207 // Ouch, went off the end -- report error. 208 return Token(XPATH_ERROR); 209} 210 211Token Parser::lexNumber() 212{ 213 int startPos = m_nextPos; 214 bool seenDot = false; 215 216 // Go until end or a non-digits character. 217 for (; m_nextPos < m_data.length(); ++m_nextPos) { 218 UChar aChar = m_data[m_nextPos]; 219 if (aChar >= 0xff) break; 220 221 if (aChar < '0' || aChar > '9') { 222 if (aChar == '.' && !seenDot) 223 seenDot = true; 224 else 225 break; 226 } 227 } 228 229 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); 230} 231 232bool Parser::lexNCName(String& name) 233{ 234 int startPos = m_nextPos; 235 if (m_nextPos >= m_data.length()) 236 return false; 237 238 if (charCat(m_data[m_nextPos]) != NameStart) 239 return false; 240 241 // Keep going until we get a character that's not good for names. 242 for (; m_nextPos < m_data.length(); ++m_nextPos) 243 if (charCat(m_data[m_nextPos]) == NotPartOfName) 244 break; 245 246 name = m_data.substring(startPos, m_nextPos - startPos); 247 return true; 248} 249 250bool Parser::lexQName(String& name) 251{ 252 String n1; 253 if (!lexNCName(n1)) 254 return false; 255 256 skipWS(); 257 258 // If the next character is :, what we just got it the prefix, if not, 259 // it's the whole thing. 260 if (peekAheadHelper() != ':') { 261 name = n1; 262 return true; 263 } 264 265 String n2; 266 if (!lexNCName(n2)) 267 return false; 268 269 name = n1 + ":" + n2; 270 return true; 271} 272 273Token Parser::nextTokenInternal() 274{ 275 skipWS(); 276 277 if (m_nextPos >= m_data.length()) 278 return Token(0); 279 280 char code = peekCurHelper(); 281 switch (code) { 282 case '(': case ')': case '[': case ']': 283 case '@': case ',': case '|': 284 return makeTokenAndAdvance(code); 285 case '\'': 286 case '\"': 287 return lexString(); 288 case '0': case '1': case '2': case '3': case '4': 289 case '5': case '6': case '7': case '8': case '9': 290 return lexNumber(); 291 case '.': { 292 char next = peekAheadHelper(); 293 if (next == '.') 294 return makeTokenAndAdvance(DOTDOT, 2); 295 if (next >= '0' && next <= '9') 296 return lexNumber(); 297 return makeTokenAndAdvance('.'); 298 } 299 case '/': 300 if (peekAheadHelper() == '/') 301 return makeTokenAndAdvance(SLASHSLASH, 2); 302 return makeTokenAndAdvance('/'); 303 case '+': 304 return makeTokenAndAdvance(PLUS); 305 case '-': 306 return makeTokenAndAdvance(MINUS); 307 case '=': 308 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); 309 case '!': 310 if (peekAheadHelper() == '=') 311 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); 312 return Token(XPATH_ERROR); 313 case '<': 314 if (peekAheadHelper() == '=') 315 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); 316 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); 317 case '>': 318 if (peekAheadHelper() == '=') 319 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); 320 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); 321 case '*': 322 if (isOperatorContext()) 323 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); 324 ++m_nextPos; 325 return Token(NAMETEST, "*"); 326 case '$': { // $ QName 327 m_nextPos++; 328 String name; 329 if (!lexQName(name)) 330 return Token(XPATH_ERROR); 331 return Token(VARIABLEREFERENCE, name); 332 } 333 } 334 335 String name; 336 if (!lexNCName(name)) 337 return Token(XPATH_ERROR); 338 339 skipWS(); 340 // If we're in an operator context, check for any operator names 341 if (isOperatorContext()) { 342 if (name == "and") //### hash? 343 return Token(AND); 344 if (name == "or") 345 return Token(OR); 346 if (name == "mod") 347 return Token(MULOP, NumericOp::OP_Mod); 348 if (name == "div") 349 return Token(MULOP, NumericOp::OP_Div); 350 } 351 352 // See whether we are at a : 353 if (peekCurHelper() == ':') { 354 m_nextPos++; 355 // Any chance it's an axis name? 356 if (peekCurHelper() == ':') { 357 m_nextPos++; 358 359 //It might be an axis name. 360 Step::Axis axis; 361 if (isAxisName(name, axis)) 362 return Token(AXISNAME, axis); 363 // Ugh, :: is only valid in axis names -> error 364 return Token(XPATH_ERROR); 365 } 366 367 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest 368 skipWS(); 369 if (peekCurHelper() == '*') { 370 m_nextPos++; 371 return Token(NAMETEST, name + ":*"); 372 } 373 374 // Make a full qname. 375 String n2; 376 if (!lexNCName(n2)) 377 return Token(XPATH_ERROR); 378 379 name = name + ":" + n2; 380 } 381 382 skipWS(); 383 if (peekCurHelper() == '(') { 384 //note: we don't swallow the (here! 385 386 //either node type of function name 387 if (isNodeTypeName(name)) { 388 if (name == "processing-instruction") 389 return Token(PI, name); 390 391 return Token(NODETYPE, name); 392 } 393 //must be a function name. 394 return Token(FUNCTIONNAME, name); 395 } 396 397 // At this point, it must be NAMETEST. 398 return Token(NAMETEST, name); 399} 400 401Token Parser::nextToken() 402{ 403 Token toRet = nextTokenInternal(); 404 m_lastTokenType = toRet.type; 405 return toRet; 406} 407 408Parser::Parser() 409{ 410 reset(String()); 411} 412 413void Parser::reset(const String& data) 414{ 415 m_nextPos = 0; 416 m_data = data; 417 m_lastTokenType = 0; 418 419 m_topExpr = 0; 420 m_gotNamespaceError = false; 421} 422 423int Parser::lex(void* data) 424{ 425 YYSTYPE* yylval = static_cast<YYSTYPE*>(data); 426 Token tok = nextToken(); 427 428 switch (tok.type) { 429 case AXISNAME: 430 yylval->axis = tok.axis; 431 break; 432 case MULOP: 433 yylval->numop = tok.numop; 434 break; 435 case RELOP: 436 case EQOP: 437 yylval->eqop = tok.eqop; 438 break; 439 case NODETYPE: 440 case PI: 441 case FUNCTIONNAME: 442 case LITERAL: 443 case VARIABLEREFERENCE: 444 case NUMBER: 445 case NAMETEST: 446 yylval->str = new String(tok.str); 447 registerString(yylval->str); 448 break; 449 } 450 451 return tok.type; 452} 453 454bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI) 455{ 456 int colon = qName.find(':'); 457 if (colon >= 0) { 458 if (!m_resolver) 459 return false; 460 namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon)); 461 if (namespaceURI.isNull()) 462 return false; 463 localName = qName.substring(colon + 1); 464 } else 465 localName = qName; 466 467 return true; 468} 469 470Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec) 471{ 472 reset(statement); 473 474 m_resolver = resolver; 475 476 Parser* oldParser = currentParser; 477 currentParser = this; 478 int parseError = xpathyyparse(this); 479 currentParser = oldParser; 480 481 if (parseError) { 482 deleteAllValues(m_parseNodes); 483 m_parseNodes.clear(); 484 485 HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end(); 486 for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) { 487 deleteAllValues(**it); 488 delete *it; 489 } 490 m_predicateVectors.clear(); 491 492 HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end(); 493 for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) { 494 deleteAllValues(**it); 495 delete *it; 496 } 497 m_expressionVectors.clear(); 498 499 deleteAllValues(m_strings); 500 m_strings.clear(); 501 502 deleteAllValues(m_nodeTests); 503 m_nodeTests.clear(); 504 505 m_topExpr = 0; 506 507 if (m_gotNamespaceError) 508 ec = NAMESPACE_ERR; 509 else 510 ec = XPathException::INVALID_EXPRESSION_ERR; 511 return 0; 512 } 513 514 ASSERT(m_parseNodes.size() == 1); 515 ASSERT(*m_parseNodes.begin() == m_topExpr); 516 ASSERT(m_expressionVectors.size() == 0); 517 ASSERT(m_predicateVectors.size() == 0); 518 ASSERT(m_strings.size() == 0); 519 ASSERT(m_nodeTests.size() == 0); 520 521 m_parseNodes.clear(); 522 Expression* result = m_topExpr; 523 m_topExpr = 0; 524 525 return result; 526} 527 528void Parser::registerParseNode(ParseNode* node) 529{ 530 if (node == 0) 531 return; 532 533 ASSERT(!m_parseNodes.contains(node)); 534 535 m_parseNodes.add(node); 536} 537 538void Parser::unregisterParseNode(ParseNode* node) 539{ 540 if (node == 0) 541 return; 542 543 ASSERT(m_parseNodes.contains(node)); 544 545 m_parseNodes.remove(node); 546} 547 548void Parser::registerPredicateVector(Vector<Predicate*>* vector) 549{ 550 if (vector == 0) 551 return; 552 553 ASSERT(!m_predicateVectors.contains(vector)); 554 555 m_predicateVectors.add(vector); 556} 557 558void Parser::deletePredicateVector(Vector<Predicate*>* vector) 559{ 560 if (vector == 0) 561 return; 562 563 ASSERT(m_predicateVectors.contains(vector)); 564 565 m_predicateVectors.remove(vector); 566 delete vector; 567} 568 569 570void Parser::registerExpressionVector(Vector<Expression*>* vector) 571{ 572 if (vector == 0) 573 return; 574 575 ASSERT(!m_expressionVectors.contains(vector)); 576 577 m_expressionVectors.add(vector); 578} 579 580void Parser::deleteExpressionVector(Vector<Expression*>* vector) 581{ 582 if (vector == 0) 583 return; 584 585 ASSERT(m_expressionVectors.contains(vector)); 586 587 m_expressionVectors.remove(vector); 588 delete vector; 589} 590 591void Parser::registerString(String* s) 592{ 593 if (s == 0) 594 return; 595 596 ASSERT(!m_strings.contains(s)); 597 598 m_strings.add(s); 599} 600 601void Parser::deleteString(String* s) 602{ 603 if (s == 0) 604 return; 605 606 ASSERT(m_strings.contains(s)); 607 608 m_strings.remove(s); 609 delete s; 610} 611 612void Parser::registerNodeTest(Step::NodeTest* t) 613{ 614 if (t == 0) 615 return; 616 617 ASSERT(!m_nodeTests.contains(t)); 618 619 m_nodeTests.add(t); 620} 621 622void Parser::deleteNodeTest(Step::NodeTest* t) 623{ 624 if (t == 0) 625 return; 626 627 ASSERT(m_nodeTests.contains(t)); 628 629 m_nodeTests.remove(t); 630 delete t; 631} 632 633} 634} 635 636#endif // ENABLE(XPATH) 637