1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: Lexer.java 524810 2007-04-02 15:51:55Z zongaro $
20 */
21package org.apache.xpath.compiler;
22
23import java.util.Vector;
24
25import org.apache.xml.utils.PrefixResolver;
26import org.apache.xpath.res.XPATHErrorResources;
27
28/**
29 * This class is in charge of lexical processing of the XPath
30 * expression into tokens.
31 */
32class Lexer
33{
34
35  /**
36   * The target XPath.
37   */
38  private Compiler m_compiler;
39
40  /**
41   * The prefix resolver to map prefixes to namespaces in the XPath.
42   */
43  PrefixResolver m_namespaceContext;
44
45  /**
46   * The XPath processor object.
47   */
48  XPathParser m_processor;
49
50  /**
51   * This value is added to each element name in the TARGETEXTRA
52   * that is a 'target' (right-most top-level element name).
53   */
54  static final int TARGETEXTRA = 10000;
55
56  /**
57   * Ignore this, it is going away.
58   * This holds a map to the m_tokenQueue that tells where the top-level elements are.
59   * It is used for pattern matching so the m_tokenQueue can be walked backwards.
60   * Each element that is a 'target', (right-most top level element name) has
61   * TARGETEXTRA added to it.
62   *
63   */
64  private int m_patternMap[] = new int[100];
65
66  /**
67   * Ignore this, it is going away.
68   * The number of elements that m_patternMap maps;
69   */
70  private int m_patternMapSize;
71
72  /**
73   * Create a Lexer object.
74   *
75   * @param compiler The owning compiler for this lexer.
76   * @param resolver The prefix resolver for mapping qualified name prefixes
77   *                 to namespace URIs.
78   * @param xpathProcessor The parser that is processing strings to opcodes.
79   */
80  Lexer(Compiler compiler, PrefixResolver resolver,
81        XPathParser xpathProcessor)
82  {
83
84    m_compiler = compiler;
85    m_namespaceContext = resolver;
86    m_processor = xpathProcessor;
87  }
88
89  /**
90   * Walk through the expression and build a token queue, and a map of the top-level
91   * elements.
92   * @param pat XSLT Expression.
93   *
94   * @throws javax.xml.transform.TransformerException
95   */
96  void tokenize(String pat) throws javax.xml.transform.TransformerException
97  {
98    tokenize(pat, null);
99  }
100
101  /**
102   * Walk through the expression and build a token queue, and a map of the top-level
103   * elements.
104   * @param pat XSLT Expression.
105   * @param targetStrings Vector to hold Strings, may be null.
106   *
107   * @throws javax.xml.transform.TransformerException
108   */
109  void tokenize(String pat, Vector targetStrings)
110          throws javax.xml.transform.TransformerException
111  {
112
113    m_compiler.m_currentPattern = pat;
114    m_patternMapSize = 0;
115
116    // This needs to grow too.  Use a conservative estimate that the OpMapVector
117    // needs about five time the length of the input path expression - to a
118    // maximum of MAXTOKENQUEUESIZE*5.  If the OpMapVector needs to grow, grow
119    // it freely (second argument to constructor).
120    int initTokQueueSize = ((pat.length() < OpMap.MAXTOKENQUEUESIZE)
121                                 ? pat.length() :  OpMap.MAXTOKENQUEUESIZE) * 5;
122    m_compiler.m_opMap = new OpMapVector(initTokQueueSize,
123                                         OpMap.BLOCKTOKENQUEUESIZE * 5,
124                                         OpMap.MAPINDEX_LENGTH);
125
126    int nChars = pat.length();
127    int startSubstring = -1;
128    int posOfNSSep = -1;
129    boolean isStartOfPat = true;
130    boolean isAttrName = false;
131    boolean isNum = false;
132
133    // Nesting of '[' so we can know if the given element should be
134    // counted inside the m_patternMap.
135    int nesting = 0;
136
137    // char[] chars = pat.toCharArray();
138    for (int i = 0; i < nChars; i++)
139    {
140      char c = pat.charAt(i);
141
142      switch (c)
143      {
144      case '\"' :
145      {
146        if (startSubstring != -1)
147        {
148          isNum = false;
149          isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
150          isAttrName = false;
151
152          if (-1 != posOfNSSep)
153          {
154            posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
155          }
156          else
157          {
158            addToTokenQueue(pat.substring(startSubstring, i));
159          }
160        }
161
162        startSubstring = i;
163
164        for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
165
166        if (c == '\"' && i < nChars)
167        {
168          addToTokenQueue(pat.substring(startSubstring, i + 1));
169
170          startSubstring = -1;
171        }
172        else
173        {
174          m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
175                            null);  //"misquoted literal... expected double quote!");
176        }
177      }
178      break;
179      case '\'' :
180        if (startSubstring != -1)
181        {
182          isNum = false;
183          isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
184          isAttrName = false;
185
186          if (-1 != posOfNSSep)
187          {
188            posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
189          }
190          else
191          {
192            addToTokenQueue(pat.substring(startSubstring, i));
193          }
194        }
195
196        startSubstring = i;
197
198        for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
199
200        if (c == '\'' && i < nChars)
201        {
202          addToTokenQueue(pat.substring(startSubstring, i + 1));
203
204          startSubstring = -1;
205        }
206        else
207        {
208          m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
209                            null);  //"misquoted literal... expected single quote!");
210        }
211        break;
212      case 0x0A :
213      case 0x0D :
214      case ' ' :
215      case '\t' :
216        if (startSubstring != -1)
217        {
218          isNum = false;
219          isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
220          isAttrName = false;
221
222          if (-1 != posOfNSSep)
223          {
224            posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
225          }
226          else
227          {
228            addToTokenQueue(pat.substring(startSubstring, i));
229          }
230
231          startSubstring = -1;
232        }
233        break;
234      case '@' :
235        isAttrName = true;
236
237      // fall-through on purpose
238      case '-' :
239        if ('-' == c)
240        {
241          if (!(isNum || (startSubstring == -1)))
242          {
243            break;
244          }
245
246          isNum = false;
247        }
248
249      // fall-through on purpose
250      case '(' :
251      case '[' :
252      case ')' :
253      case ']' :
254      case '|' :
255      case '/' :
256      case '*' :
257      case '+' :
258      case '=' :
259      case ',' :
260      case '\\' :  // Unused at the moment
261      case '^' :  // Unused at the moment
262      case '!' :  // Unused at the moment
263      case '$' :
264      case '<' :
265      case '>' :
266        if (startSubstring != -1)
267        {
268          isNum = false;
269          isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
270          isAttrName = false;
271
272          if (-1 != posOfNSSep)
273          {
274            posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
275          }
276          else
277          {
278            addToTokenQueue(pat.substring(startSubstring, i));
279          }
280
281          startSubstring = -1;
282        }
283        else if (('/' == c) && isStartOfPat)
284        {
285          isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
286        }
287        else if ('*' == c)
288        {
289          isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
290          isAttrName = false;
291        }
292
293        if (0 == nesting)
294        {
295          if ('|' == c)
296          {
297            if (null != targetStrings)
298            {
299              recordTokenString(targetStrings);
300            }
301
302            isStartOfPat = true;
303          }
304        }
305
306        if ((')' == c) || (']' == c))
307        {
308          nesting--;
309        }
310        else if (('(' == c) || ('[' == c))
311        {
312          nesting++;
313        }
314
315        addToTokenQueue(pat.substring(i, i + 1));
316        break;
317      case ':' :
318        if (i>0)
319        {
320          if (posOfNSSep == (i - 1))
321          {
322            if (startSubstring != -1)
323            {
324              if (startSubstring < (i - 1))
325                addToTokenQueue(pat.substring(startSubstring, i - 1));
326            }
327
328            isNum = false;
329            isAttrName = false;
330            startSubstring = -1;
331            posOfNSSep = -1;
332
333            addToTokenQueue(pat.substring(i - 1, i + 1));
334
335            break;
336          }
337          else
338          {
339            posOfNSSep = i;
340          }
341        }
342
343      // fall through on purpose
344      default :
345        if (-1 == startSubstring)
346        {
347          startSubstring = i;
348          isNum = Character.isDigit(c);
349        }
350        else if (isNum)
351        {
352          isNum = Character.isDigit(c);
353        }
354      }
355    }
356
357    if (startSubstring != -1)
358    {
359      isNum = false;
360      isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
361
362      if ((-1 != posOfNSSep) ||
363         ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
364      {
365        posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
366      }
367      else
368      {
369        addToTokenQueue(pat.substring(startSubstring, nChars));
370      }
371    }
372
373    if (0 == m_compiler.getTokenQueueSize())
374    {
375      m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null);  //"Empty expression!");
376    }
377    else if (null != targetStrings)
378    {
379      recordTokenString(targetStrings);
380    }
381
382    m_processor.m_queueMark = 0;
383  }
384
385  /**
386   * Record the current position on the token queue as long as
387   * this is a top-level element.  Must be called before the
388   * next token is added to the m_tokenQueue.
389   *
390   * @param nesting The nesting count for the pattern element.
391   * @param isStart true if this is the start of a pattern.
392   * @param isAttrName true if we have determined that this is an attribute name.
393   *
394   * @return true if this is the start of a pattern.
395   */
396  private boolean mapPatternElemPos(int nesting, boolean isStart,
397                                    boolean isAttrName)
398  {
399
400    if (0 == nesting)
401    {
402      if(m_patternMapSize >= m_patternMap.length)
403      {
404        int patternMap[] = m_patternMap;
405        int len = m_patternMap.length;
406        m_patternMap = new int[m_patternMapSize + 100];
407        System.arraycopy(patternMap, 0, m_patternMap, 0, len);
408      }
409      if (!isStart)
410      {
411        m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
412      }
413      m_patternMap[m_patternMapSize] =
414        (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
415
416      m_patternMapSize++;
417
418      isStart = false;
419    }
420
421    return isStart;
422  }
423
424  /**
425   * Given a map pos, return the corresponding token queue pos.
426   *
427   * @param i The index in the m_patternMap.
428   *
429   * @return the token queue position.
430   */
431  private int getTokenQueuePosFromMap(int i)
432  {
433
434    int pos = m_patternMap[i];
435
436    return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
437  }
438
439  /**
440   * Reset token queue mark and m_token to a
441   * given position.
442   * @param mark The new position.
443   */
444  private final void resetTokenMark(int mark)
445  {
446
447    int qsz = m_compiler.getTokenQueueSize();
448
449    m_processor.m_queueMark = (mark > 0)
450                              ? ((mark <= qsz) ? mark - 1 : mark) : 0;
451
452    if (m_processor.m_queueMark < qsz)
453    {
454      m_processor.m_token =
455        (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
456      m_processor.m_tokenChar = m_processor.m_token.charAt(0);
457    }
458    else
459    {
460      m_processor.m_token = null;
461      m_processor.m_tokenChar = 0;
462    }
463  }
464
465  /**
466   * Given a string, return the corresponding keyword token.
467   *
468   * @param key The keyword.
469   *
470   * @return An opcode value.
471   */
472  final int getKeywordToken(String key)
473  {
474
475    int tok;
476
477    try
478    {
479      Integer itok = (Integer) Keywords.getKeyWord(key);
480
481      tok = (null != itok) ? itok.intValue() : 0;
482    }
483    catch (NullPointerException npe)
484    {
485      tok = 0;
486    }
487    catch (ClassCastException cce)
488    {
489      tok = 0;
490    }
491
492    return tok;
493  }
494
495  /**
496   * Record the current token in the passed vector.
497   *
498   * @param targetStrings Vector of string.
499   */
500  private void recordTokenString(Vector targetStrings)
501  {
502
503    int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
504
505    resetTokenMark(tokPos + 1);
506
507    if (m_processor.lookahead('(', 1))
508    {
509      int tok = getKeywordToken(m_processor.m_token);
510
511      switch (tok)
512      {
513      case OpCodes.NODETYPE_COMMENT :
514        targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
515        break;
516      case OpCodes.NODETYPE_TEXT :
517        targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
518        break;
519      case OpCodes.NODETYPE_NODE :
520        targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
521        break;
522      case OpCodes.NODETYPE_ROOT :
523        targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
524        break;
525      case OpCodes.NODETYPE_ANYELEMENT :
526        targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
527        break;
528      case OpCodes.NODETYPE_PI :
529        targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
530        break;
531      default :
532        targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
533      }
534    }
535    else
536    {
537      if (m_processor.tokenIs('@'))
538      {
539        tokPos++;
540
541        resetTokenMark(tokPos + 1);
542      }
543
544      if (m_processor.lookahead(':', 1))
545      {
546        tokPos += 2;
547      }
548
549      targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
550    }
551  }
552
553  /**
554   * Add a token to the token queue.
555   *
556   *
557   * @param s The token.
558   */
559  private final void addToTokenQueue(String s)
560  {
561    m_compiler.getTokenQueue().addElement(s);
562  }
563
564  /**
565   * When a seperator token is found, see if there's a element name or
566   * the like to map.
567   *
568   * @param pat The XPath name string.
569   * @param startSubstring The start of the name string.
570   * @param posOfNSSep The position of the namespace seperator (':').
571   * @param posOfScan The end of the name index.
572   *
573   * @throws javax.xml.transform.TransformerException
574   *
575   * @return -1 always.
576   */
577  private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
578                          int posOfScan)
579           throws javax.xml.transform.TransformerException
580 {
581
582    String prefix = "";
583
584    if ((startSubstring >= 0) && (posOfNSSep >= 0))
585    {
586       prefix = pat.substring(startSubstring, posOfNSSep);
587    }
588    String uName;
589
590    if ((null != m_namespaceContext) &&!prefix.equals("*")
591            &&!prefix.equals("xmlns"))
592    {
593      try
594      {
595        if (prefix.length() > 0)
596          uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
597            prefix);
598        else
599        {
600
601          // Assume last was wildcard. This is not legal according
602          // to the draft. Set the below to true to make namespace
603          // wildcards work.
604          if (false)
605          {
606            addToTokenQueue(":");
607
608            String s = pat.substring(posOfNSSep + 1, posOfScan);
609
610            if (s.length() > 0)
611              addToTokenQueue(s);
612
613            return -1;
614          }
615          else
616          {
617            uName =
618              ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
619                prefix);
620          }
621        }
622      }
623      catch (ClassCastException cce)
624      {
625        uName = m_namespaceContext.getNamespaceForPrefix(prefix);
626      }
627    }
628    else
629    {
630      uName = prefix;
631    }
632
633    if ((null != uName) && (uName.length() > 0))
634    {
635      addToTokenQueue(uName);
636      addToTokenQueue(":");
637
638      String s = pat.substring(posOfNSSep + 1, posOfScan);
639
640      if (s.length() > 0)
641        addToTokenQueue(s);
642    }
643    else
644    {
645        // To older XPath code it doesn't matter if
646        // error() is called or errorForDOM3().
647		m_processor.errorForDOM3(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
648						 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
649
650/** old code commented out 17-Sep-2004
651// error("Could not locate namespace for prefix: "+prefix);
652//		  m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
653//					 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
654*/
655
656      /***  Old code commented out 10-Jan-2001
657      addToTokenQueue(prefix);
658      addToTokenQueue(":");
659
660      String s = pat.substring(posOfNSSep + 1, posOfScan);
661
662      if (s.length() > 0)
663        addToTokenQueue(s);
664      ***/
665    }
666
667    return -1;
668  }
669}
670