1/* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18/* 19 * $Id: Lexer.java 524810 2007-04-02 15:51:55Z zongaro $ 20 */ 21package org.apache.xpath.compiler; 22 23import java.util.Vector; 24 25import org.apache.xml.utils.PrefixResolver; 26import org.apache.xpath.res.XPATHErrorResources; 27 28/** 29 * This class is in charge of lexical processing of the XPath 30 * expression into tokens. 31 */ 32class Lexer 33{ 34 35 /** 36 * The target XPath. 37 */ 38 private Compiler m_compiler; 39 40 /** 41 * The prefix resolver to map prefixes to namespaces in the XPath. 42 */ 43 PrefixResolver m_namespaceContext; 44 45 /** 46 * The XPath processor object. 47 */ 48 XPathParser m_processor; 49 50 /** 51 * This value is added to each element name in the TARGETEXTRA 52 * that is a 'target' (right-most top-level element name). 53 */ 54 static final int TARGETEXTRA = 10000; 55 56 /** 57 * Ignore this, it is going away. 58 * This holds a map to the m_tokenQueue that tells where the top-level elements are. 59 * It is used for pattern matching so the m_tokenQueue can be walked backwards. 60 * Each element that is a 'target', (right-most top level element name) has 61 * TARGETEXTRA added to it. 62 * 63 */ 64 private int m_patternMap[] = new int[100]; 65 66 /** 67 * Ignore this, it is going away. 68 * The number of elements that m_patternMap maps; 69 */ 70 private int m_patternMapSize; 71 72 /** 73 * Create a Lexer object. 74 * 75 * @param compiler The owning compiler for this lexer. 76 * @param resolver The prefix resolver for mapping qualified name prefixes 77 * to namespace URIs. 78 * @param xpathProcessor The parser that is processing strings to opcodes. 79 */ 80 Lexer(Compiler compiler, PrefixResolver resolver, 81 XPathParser xpathProcessor) 82 { 83 84 m_compiler = compiler; 85 m_namespaceContext = resolver; 86 m_processor = xpathProcessor; 87 } 88 89 /** 90 * Walk through the expression and build a token queue, and a map of the top-level 91 * elements. 92 * @param pat XSLT Expression. 93 * 94 * @throws javax.xml.transform.TransformerException 95 */ 96 void tokenize(String pat) throws javax.xml.transform.TransformerException 97 { 98 tokenize(pat, null); 99 } 100 101 /** 102 * Walk through the expression and build a token queue, and a map of the top-level 103 * elements. 104 * @param pat XSLT Expression. 105 * @param targetStrings Vector to hold Strings, may be null. 106 * 107 * @throws javax.xml.transform.TransformerException 108 */ 109 void tokenize(String pat, Vector targetStrings) 110 throws javax.xml.transform.TransformerException 111 { 112 113 m_compiler.m_currentPattern = pat; 114 m_patternMapSize = 0; 115 116 // This needs to grow too. Use a conservative estimate that the OpMapVector 117 // needs about five time the length of the input path expression - to a 118 // maximum of MAXTOKENQUEUESIZE*5. If the OpMapVector needs to grow, grow 119 // it freely (second argument to constructor). 120 int initTokQueueSize = ((pat.length() < OpMap.MAXTOKENQUEUESIZE) 121 ? pat.length() : OpMap.MAXTOKENQUEUESIZE) * 5; 122 m_compiler.m_opMap = new OpMapVector(initTokQueueSize, 123 OpMap.BLOCKTOKENQUEUESIZE * 5, 124 OpMap.MAPINDEX_LENGTH); 125 126 int nChars = pat.length(); 127 int startSubstring = -1; 128 int posOfNSSep = -1; 129 boolean isStartOfPat = true; 130 boolean isAttrName = false; 131 boolean isNum = false; 132 133 // Nesting of '[' so we can know if the given element should be 134 // counted inside the m_patternMap. 135 int nesting = 0; 136 137 // char[] chars = pat.toCharArray(); 138 for (int i = 0; i < nChars; i++) 139 { 140 char c = pat.charAt(i); 141 142 switch (c) 143 { 144 case '\"' : 145 { 146 if (startSubstring != -1) 147 { 148 isNum = false; 149 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 150 isAttrName = false; 151 152 if (-1 != posOfNSSep) 153 { 154 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 155 } 156 else 157 { 158 addToTokenQueue(pat.substring(startSubstring, i)); 159 } 160 } 161 162 startSubstring = i; 163 164 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++); 165 166 if (c == '\"' && i < nChars) 167 { 168 addToTokenQueue(pat.substring(startSubstring, i + 1)); 169 170 startSubstring = -1; 171 } 172 else 173 { 174 m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE, 175 null); //"misquoted literal... expected double quote!"); 176 } 177 } 178 break; 179 case '\'' : 180 if (startSubstring != -1) 181 { 182 isNum = false; 183 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 184 isAttrName = false; 185 186 if (-1 != posOfNSSep) 187 { 188 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 189 } 190 else 191 { 192 addToTokenQueue(pat.substring(startSubstring, i)); 193 } 194 } 195 196 startSubstring = i; 197 198 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++); 199 200 if (c == '\'' && i < nChars) 201 { 202 addToTokenQueue(pat.substring(startSubstring, i + 1)); 203 204 startSubstring = -1; 205 } 206 else 207 { 208 m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE, 209 null); //"misquoted literal... expected single quote!"); 210 } 211 break; 212 case 0x0A : 213 case 0x0D : 214 case ' ' : 215 case '\t' : 216 if (startSubstring != -1) 217 { 218 isNum = false; 219 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 220 isAttrName = false; 221 222 if (-1 != posOfNSSep) 223 { 224 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 225 } 226 else 227 { 228 addToTokenQueue(pat.substring(startSubstring, i)); 229 } 230 231 startSubstring = -1; 232 } 233 break; 234 case '@' : 235 isAttrName = true; 236 237 // fall-through on purpose 238 case '-' : 239 if ('-' == c) 240 { 241 if (!(isNum || (startSubstring == -1))) 242 { 243 break; 244 } 245 246 isNum = false; 247 } 248 249 // fall-through on purpose 250 case '(' : 251 case '[' : 252 case ')' : 253 case ']' : 254 case '|' : 255 case '/' : 256 case '*' : 257 case '+' : 258 case '=' : 259 case ',' : 260 case '\\' : // Unused at the moment 261 case '^' : // Unused at the moment 262 case '!' : // Unused at the moment 263 case '$' : 264 case '<' : 265 case '>' : 266 if (startSubstring != -1) 267 { 268 isNum = false; 269 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 270 isAttrName = false; 271 272 if (-1 != posOfNSSep) 273 { 274 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 275 } 276 else 277 { 278 addToTokenQueue(pat.substring(startSubstring, i)); 279 } 280 281 startSubstring = -1; 282 } 283 else if (('/' == c) && isStartOfPat) 284 { 285 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 286 } 287 else if ('*' == c) 288 { 289 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 290 isAttrName = false; 291 } 292 293 if (0 == nesting) 294 { 295 if ('|' == c) 296 { 297 if (null != targetStrings) 298 { 299 recordTokenString(targetStrings); 300 } 301 302 isStartOfPat = true; 303 } 304 } 305 306 if ((')' == c) || (']' == c)) 307 { 308 nesting--; 309 } 310 else if (('(' == c) || ('[' == c)) 311 { 312 nesting++; 313 } 314 315 addToTokenQueue(pat.substring(i, i + 1)); 316 break; 317 case ':' : 318 if (i>0) 319 { 320 if (posOfNSSep == (i - 1)) 321 { 322 if (startSubstring != -1) 323 { 324 if (startSubstring < (i - 1)) 325 addToTokenQueue(pat.substring(startSubstring, i - 1)); 326 } 327 328 isNum = false; 329 isAttrName = false; 330 startSubstring = -1; 331 posOfNSSep = -1; 332 333 addToTokenQueue(pat.substring(i - 1, i + 1)); 334 335 break; 336 } 337 else 338 { 339 posOfNSSep = i; 340 } 341 } 342 343 // fall through on purpose 344 default : 345 if (-1 == startSubstring) 346 { 347 startSubstring = i; 348 isNum = Character.isDigit(c); 349 } 350 else if (isNum) 351 { 352 isNum = Character.isDigit(c); 353 } 354 } 355 } 356 357 if (startSubstring != -1) 358 { 359 isNum = false; 360 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 361 362 if ((-1 != posOfNSSep) || 363 ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes()))) 364 { 365 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars); 366 } 367 else 368 { 369 addToTokenQueue(pat.substring(startSubstring, nChars)); 370 } 371 } 372 373 if (0 == m_compiler.getTokenQueueSize()) 374 { 375 m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!"); 376 } 377 else if (null != targetStrings) 378 { 379 recordTokenString(targetStrings); 380 } 381 382 m_processor.m_queueMark = 0; 383 } 384 385 /** 386 * Record the current position on the token queue as long as 387 * this is a top-level element. Must be called before the 388 * next token is added to the m_tokenQueue. 389 * 390 * @param nesting The nesting count for the pattern element. 391 * @param isStart true if this is the start of a pattern. 392 * @param isAttrName true if we have determined that this is an attribute name. 393 * 394 * @return true if this is the start of a pattern. 395 */ 396 private boolean mapPatternElemPos(int nesting, boolean isStart, 397 boolean isAttrName) 398 { 399 400 if (0 == nesting) 401 { 402 if(m_patternMapSize >= m_patternMap.length) 403 { 404 int patternMap[] = m_patternMap; 405 int len = m_patternMap.length; 406 m_patternMap = new int[m_patternMapSize + 100]; 407 System.arraycopy(patternMap, 0, m_patternMap, 0, len); 408 } 409 if (!isStart) 410 { 411 m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA; 412 } 413 m_patternMap[m_patternMapSize] = 414 (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA; 415 416 m_patternMapSize++; 417 418 isStart = false; 419 } 420 421 return isStart; 422 } 423 424 /** 425 * Given a map pos, return the corresponding token queue pos. 426 * 427 * @param i The index in the m_patternMap. 428 * 429 * @return the token queue position. 430 */ 431 private int getTokenQueuePosFromMap(int i) 432 { 433 434 int pos = m_patternMap[i]; 435 436 return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos; 437 } 438 439 /** 440 * Reset token queue mark and m_token to a 441 * given position. 442 * @param mark The new position. 443 */ 444 private final void resetTokenMark(int mark) 445 { 446 447 int qsz = m_compiler.getTokenQueueSize(); 448 449 m_processor.m_queueMark = (mark > 0) 450 ? ((mark <= qsz) ? mark - 1 : mark) : 0; 451 452 if (m_processor.m_queueMark < qsz) 453 { 454 m_processor.m_token = 455 (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++); 456 m_processor.m_tokenChar = m_processor.m_token.charAt(0); 457 } 458 else 459 { 460 m_processor.m_token = null; 461 m_processor.m_tokenChar = 0; 462 } 463 } 464 465 /** 466 * Given a string, return the corresponding keyword token. 467 * 468 * @param key The keyword. 469 * 470 * @return An opcode value. 471 */ 472 final int getKeywordToken(String key) 473 { 474 475 int tok; 476 477 try 478 { 479 Integer itok = (Integer) Keywords.getKeyWord(key); 480 481 tok = (null != itok) ? itok.intValue() : 0; 482 } 483 catch (NullPointerException npe) 484 { 485 tok = 0; 486 } 487 catch (ClassCastException cce) 488 { 489 tok = 0; 490 } 491 492 return tok; 493 } 494 495 /** 496 * Record the current token in the passed vector. 497 * 498 * @param targetStrings Vector of string. 499 */ 500 private void recordTokenString(Vector targetStrings) 501 { 502 503 int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1); 504 505 resetTokenMark(tokPos + 1); 506 507 if (m_processor.lookahead('(', 1)) 508 { 509 int tok = getKeywordToken(m_processor.m_token); 510 511 switch (tok) 512 { 513 case OpCodes.NODETYPE_COMMENT : 514 targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT); 515 break; 516 case OpCodes.NODETYPE_TEXT : 517 targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT); 518 break; 519 case OpCodes.NODETYPE_NODE : 520 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); 521 break; 522 case OpCodes.NODETYPE_ROOT : 523 targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT); 524 break; 525 case OpCodes.NODETYPE_ANYELEMENT : 526 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); 527 break; 528 case OpCodes.NODETYPE_PI : 529 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); 530 break; 531 default : 532 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY); 533 } 534 } 535 else 536 { 537 if (m_processor.tokenIs('@')) 538 { 539 tokPos++; 540 541 resetTokenMark(tokPos + 1); 542 } 543 544 if (m_processor.lookahead(':', 1)) 545 { 546 tokPos += 2; 547 } 548 549 targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos)); 550 } 551 } 552 553 /** 554 * Add a token to the token queue. 555 * 556 * 557 * @param s The token. 558 */ 559 private final void addToTokenQueue(String s) 560 { 561 m_compiler.getTokenQueue().addElement(s); 562 } 563 564 /** 565 * When a seperator token is found, see if there's a element name or 566 * the like to map. 567 * 568 * @param pat The XPath name string. 569 * @param startSubstring The start of the name string. 570 * @param posOfNSSep The position of the namespace seperator (':'). 571 * @param posOfScan The end of the name index. 572 * 573 * @throws javax.xml.transform.TransformerException 574 * 575 * @return -1 always. 576 */ 577 private int mapNSTokens(String pat, int startSubstring, int posOfNSSep, 578 int posOfScan) 579 throws javax.xml.transform.TransformerException 580 { 581 582 String prefix = ""; 583 584 if ((startSubstring >= 0) && (posOfNSSep >= 0)) 585 { 586 prefix = pat.substring(startSubstring, posOfNSSep); 587 } 588 String uName; 589 590 if ((null != m_namespaceContext) &&!prefix.equals("*") 591 &&!prefix.equals("xmlns")) 592 { 593 try 594 { 595 if (prefix.length() > 0) 596 uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix( 597 prefix); 598 else 599 { 600 601 // Assume last was wildcard. This is not legal according 602 // to the draft. Set the below to true to make namespace 603 // wildcards work. 604 if (false) 605 { 606 addToTokenQueue(":"); 607 608 String s = pat.substring(posOfNSSep + 1, posOfScan); 609 610 if (s.length() > 0) 611 addToTokenQueue(s); 612 613 return -1; 614 } 615 else 616 { 617 uName = 618 ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix( 619 prefix); 620 } 621 } 622 } 623 catch (ClassCastException cce) 624 { 625 uName = m_namespaceContext.getNamespaceForPrefix(prefix); 626 } 627 } 628 else 629 { 630 uName = prefix; 631 } 632 633 if ((null != uName) && (uName.length() > 0)) 634 { 635 addToTokenQueue(uName); 636 addToTokenQueue(":"); 637 638 String s = pat.substring(posOfNSSep + 1, posOfScan); 639 640 if (s.length() > 0) 641 addToTokenQueue(s); 642 } 643 else 644 { 645 // To older XPath code it doesn't matter if 646 // error() is called or errorForDOM3(). 647 m_processor.errorForDOM3(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE, 648 new String[] {prefix}); //"Prefix must resolve to a namespace: {0}"; 649 650/** old code commented out 17-Sep-2004 651// error("Could not locate namespace for prefix: "+prefix); 652// m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE, 653// new String[] {prefix}); //"Prefix must resolve to a namespace: {0}"; 654*/ 655 656 /*** Old code commented out 10-Jan-2001 657 addToTokenQueue(prefix); 658 addToTokenQueue(":"); 659 660 String s = pat.substring(posOfNSSep + 1, posOfScan); 661 662 if (s.length() > 0) 663 addToTokenQueue(s); 664 ***/ 665 } 666 667 return -1; 668 } 669} 670