001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: Lexer.java 524810 2007-04-02 15:51:55Z zongaro $
020     */
021    package org.apache.xpath.compiler;
022    
023    import java.util.Vector;
024    
025    import org.apache.xml.utils.PrefixResolver;
026    import org.apache.xpath.res.XPATHErrorResources;
027    
028    /**
029     * This class is in charge of lexical processing of the XPath
030     * expression into tokens.
031     */
032    class Lexer
033    {
034    
035      /**
036       * The target XPath.
037       */
038      private Compiler m_compiler;
039    
040      /**
041       * The prefix resolver to map prefixes to namespaces in the XPath.
042       */
043      PrefixResolver m_namespaceContext;
044    
045      /**
046       * The XPath processor object.
047       */
048      XPathParser m_processor;
049    
050      /**
051       * This value is added to each element name in the TARGETEXTRA
052       * that is a 'target' (right-most top-level element name).
053       */
054      static final int TARGETEXTRA = 10000;
055    
056      /**
057       * Ignore this, it is going away.
058       * This holds a map to the m_tokenQueue that tells where the top-level elements are.
059       * It is used for pattern matching so the m_tokenQueue can be walked backwards.
060       * Each element that is a 'target', (right-most top level element name) has
061       * TARGETEXTRA added to it.
062       *
063       */
064      private int m_patternMap[] = new int[100];
065    
066      /**
067       * Ignore this, it is going away.
068       * The number of elements that m_patternMap maps;
069       */
070      private int m_patternMapSize;
071    
072      /**
073       * Create a Lexer object.
074       *
075       * @param compiler The owning compiler for this lexer.
076       * @param resolver The prefix resolver for mapping qualified name prefixes 
077       *                 to namespace URIs.
078       * @param xpathProcessor The parser that is processing strings to opcodes.
079       */
080      Lexer(Compiler compiler, PrefixResolver resolver,
081            XPathParser xpathProcessor)
082      {
083    
084        m_compiler = compiler;
085        m_namespaceContext = resolver;
086        m_processor = xpathProcessor;
087      }
088    
089      /**
090       * Walk through the expression and build a token queue, and a map of the top-level
091       * elements.
092       * @param pat XSLT Expression.
093       *
094       * @throws javax.xml.transform.TransformerException
095       */
096      void tokenize(String pat) throws javax.xml.transform.TransformerException
097      {
098        tokenize(pat, null);
099      }
100    
101      /**
102       * Walk through the expression and build a token queue, and a map of the top-level
103       * elements.
104       * @param pat XSLT Expression.
105       * @param targetStrings Vector to hold Strings, may be null.
106       *
107       * @throws javax.xml.transform.TransformerException
108       */
109      void tokenize(String pat, Vector targetStrings)
110              throws javax.xml.transform.TransformerException
111      {
112    
113        m_compiler.m_currentPattern = pat;
114        m_patternMapSize = 0; 
115    
116        // This needs to grow too.  Use a conservative estimate that the OpMapVector
117        // needs about five time the length of the input path expression - to a
118        // maximum of MAXTOKENQUEUESIZE*5.  If the OpMapVector needs to grow, grow
119        // it freely (second argument to constructor).
120        int initTokQueueSize = ((pat.length() < OpMap.MAXTOKENQUEUESIZE)
121                                     ? pat.length() :  OpMap.MAXTOKENQUEUESIZE) * 5;
122        m_compiler.m_opMap = new OpMapVector(initTokQueueSize,
123                                             OpMap.BLOCKTOKENQUEUESIZE * 5,
124                                             OpMap.MAPINDEX_LENGTH);
125    
126        int nChars = pat.length();
127        int startSubstring = -1; 
128        int posOfNSSep = -1;
129        boolean isStartOfPat = true;
130        boolean isAttrName = false;
131        boolean isNum = false;
132    
133        // Nesting of '[' so we can know if the given element should be
134        // counted inside the m_patternMap.
135        int nesting = 0;
136    
137        // char[] chars = pat.toCharArray();
138        for (int i = 0; i < nChars; i++)
139        {
140          char c = pat.charAt(i);
141    
142          switch (c)
143          {
144          case '\"' :
145          {
146            if (startSubstring != -1)
147            {
148              isNum = false;
149              isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
150              isAttrName = false;
151    
152              if (-1 != posOfNSSep)
153              {
154                posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
155              }
156              else
157              {
158                addToTokenQueue(pat.substring(startSubstring, i));
159              }
160            }
161    
162            startSubstring = i;
163    
164            for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
165    
166            if (c == '\"' && i < nChars)
167            {
168              addToTokenQueue(pat.substring(startSubstring, i + 1));
169    
170              startSubstring = -1;
171            }
172            else
173            {
174              m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
175                                null);  //"misquoted literal... expected double quote!");
176            }
177          }
178          break;
179          case '\'' :
180            if (startSubstring != -1)
181            {
182              isNum = false;
183              isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
184              isAttrName = false;
185    
186              if (-1 != posOfNSSep)
187              {
188                posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
189              }
190              else
191              {
192                addToTokenQueue(pat.substring(startSubstring, i));
193              }
194            }
195    
196            startSubstring = i;
197    
198            for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
199    
200            if (c == '\'' && i < nChars)
201            {
202              addToTokenQueue(pat.substring(startSubstring, i + 1));
203    
204              startSubstring = -1;
205            }
206            else
207            {
208              m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
209                                null);  //"misquoted literal... expected single quote!");
210            }
211            break;
212          case 0x0A :
213          case 0x0D :
214          case ' ' :
215          case '\t' :
216            if (startSubstring != -1)
217            {
218              isNum = false;
219              isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
220              isAttrName = false;
221    
222              if (-1 != posOfNSSep)
223              {
224                posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
225              }
226              else
227              {
228                addToTokenQueue(pat.substring(startSubstring, i));
229              }
230    
231              startSubstring = -1;
232            }
233            break;
234          case '@' :
235            isAttrName = true;
236    
237          // fall-through on purpose
238          case '-' :
239            if ('-' == c)
240            {
241              if (!(isNum || (startSubstring == -1)))
242              {
243                break;
244              }
245    
246              isNum = false;
247            }
248    
249          // fall-through on purpose
250          case '(' :
251          case '[' :
252          case ')' :
253          case ']' :
254          case '|' :
255          case '/' :
256          case '*' :
257          case '+' :
258          case '=' :
259          case ',' :
260          case '\\' :  // Unused at the moment
261          case '^' :  // Unused at the moment
262          case '!' :  // Unused at the moment
263          case '$' :
264          case '<' :
265          case '>' :
266            if (startSubstring != -1)
267            {
268              isNum = false;
269              isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
270              isAttrName = false;
271    
272              if (-1 != posOfNSSep)
273              {
274                posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
275              }
276              else
277              {
278                addToTokenQueue(pat.substring(startSubstring, i));
279              }
280    
281              startSubstring = -1;
282            }
283            else if (('/' == c) && isStartOfPat)
284            {
285              isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
286            }
287            else if ('*' == c)
288            {
289              isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
290              isAttrName = false;
291            }
292    
293            if (0 == nesting)
294            {
295              if ('|' == c)
296              {
297                if (null != targetStrings)
298                {
299                  recordTokenString(targetStrings);
300                }
301    
302                isStartOfPat = true;
303              }
304            }
305    
306            if ((')' == c) || (']' == c))
307            {
308              nesting--;
309            }
310            else if (('(' == c) || ('[' == c))
311            {
312              nesting++;
313            }
314    
315            addToTokenQueue(pat.substring(i, i + 1));
316            break;
317          case ':' :
318            if (i>0)
319            {
320              if (posOfNSSep == (i - 1))
321              {
322                if (startSubstring != -1)
323                {
324                  if (startSubstring < (i - 1))
325                    addToTokenQueue(pat.substring(startSubstring, i - 1));
326                }
327    
328                isNum = false;
329                isAttrName = false;
330                startSubstring = -1;
331                posOfNSSep = -1;
332    
333                addToTokenQueue(pat.substring(i - 1, i + 1));
334    
335                break;
336              }
337              else
338              {
339                posOfNSSep = i;
340              }
341            }
342    
343          // fall through on purpose
344          default :
345            if (-1 == startSubstring)
346            {
347              startSubstring = i;
348              isNum = Character.isDigit(c);
349            }
350            else if (isNum)
351            {
352              isNum = Character.isDigit(c);
353            }
354          }
355        }
356    
357        if (startSubstring != -1)
358        {
359          isNum = false;
360          isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
361    
362          if ((-1 != posOfNSSep) || 
363             ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
364          {
365            posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
366          }
367          else
368          {
369            addToTokenQueue(pat.substring(startSubstring, nChars));
370          }
371        }
372    
373        if (0 == m_compiler.getTokenQueueSize())
374        {
375          m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null);  //"Empty expression!");
376        }
377        else if (null != targetStrings)
378        {
379          recordTokenString(targetStrings);
380        }
381    
382        m_processor.m_queueMark = 0;
383      }
384    
385      /**
386       * Record the current position on the token queue as long as
387       * this is a top-level element.  Must be called before the
388       * next token is added to the m_tokenQueue.
389       *
390       * @param nesting The nesting count for the pattern element.
391       * @param isStart true if this is the start of a pattern.
392       * @param isAttrName true if we have determined that this is an attribute name.
393       *
394       * @return true if this is the start of a pattern.
395       */
396      private boolean mapPatternElemPos(int nesting, boolean isStart,
397                                        boolean isAttrName)
398      {
399    
400        if (0 == nesting)
401        {
402          if(m_patternMapSize >= m_patternMap.length)
403          {
404            int patternMap[] = m_patternMap;
405            int len = m_patternMap.length;
406            m_patternMap = new int[m_patternMapSize + 100];
407            System.arraycopy(patternMap, 0, m_patternMap, 0, len);
408          } 
409          if (!isStart)
410          {
411            m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
412          }
413          m_patternMap[m_patternMapSize] =
414            (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
415    
416          m_patternMapSize++;
417    
418          isStart = false;
419        }
420    
421        return isStart;
422      }
423    
424      /**
425       * Given a map pos, return the corresponding token queue pos.
426       *
427       * @param i The index in the m_patternMap.
428       *
429       * @return the token queue position.
430       */
431      private int getTokenQueuePosFromMap(int i)
432      {
433    
434        int pos = m_patternMap[i];
435    
436        return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
437      }
438    
439      /**
440       * Reset token queue mark and m_token to a
441       * given position.
442       * @param mark The new position.
443       */
444      private final void resetTokenMark(int mark)
445      {
446    
447        int qsz = m_compiler.getTokenQueueSize();
448    
449        m_processor.m_queueMark = (mark > 0)
450                                  ? ((mark <= qsz) ? mark - 1 : mark) : 0;
451    
452        if (m_processor.m_queueMark < qsz)
453        {
454          m_processor.m_token =
455            (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
456          m_processor.m_tokenChar = m_processor.m_token.charAt(0);
457        }
458        else
459        {
460          m_processor.m_token = null;
461          m_processor.m_tokenChar = 0;
462        }
463      }
464    
465      /**
466       * Given a string, return the corresponding keyword token.
467       *
468       * @param key The keyword.
469       *
470       * @return An opcode value.
471       */
472      final int getKeywordToken(String key)
473      {
474    
475        int tok;
476    
477        try
478        {
479          Integer itok = (Integer) Keywords.getKeyWord(key);
480    
481          tok = (null != itok) ? itok.intValue() : 0;
482        }
483        catch (NullPointerException npe)
484        {
485          tok = 0;
486        }
487        catch (ClassCastException cce)
488        {
489          tok = 0;
490        }
491    
492        return tok;
493      }
494    
495      /**
496       * Record the current token in the passed vector.
497       *
498       * @param targetStrings Vector of string.
499       */
500      private void recordTokenString(Vector targetStrings)
501      {
502    
503        int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
504    
505        resetTokenMark(tokPos + 1);
506    
507        if (m_processor.lookahead('(', 1))
508        {
509          int tok = getKeywordToken(m_processor.m_token);
510    
511          switch (tok)
512          {
513          case OpCodes.NODETYPE_COMMENT :
514            targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
515            break;
516          case OpCodes.NODETYPE_TEXT :
517            targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
518            break;
519          case OpCodes.NODETYPE_NODE :
520            targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
521            break;
522          case OpCodes.NODETYPE_ROOT :
523            targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
524            break;
525          case OpCodes.NODETYPE_ANYELEMENT :
526            targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
527            break;
528          case OpCodes.NODETYPE_PI :
529            targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
530            break;
531          default :
532            targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
533          }
534        }
535        else
536        {
537          if (m_processor.tokenIs('@'))
538          {
539            tokPos++;
540    
541            resetTokenMark(tokPos + 1);
542          }
543    
544          if (m_processor.lookahead(':', 1))
545          {
546            tokPos += 2;
547          }
548    
549          targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
550        }
551      }
552    
553      /**
554       * Add a token to the token queue.
555       *
556       *
557       * @param s The token.
558       */
559      private final void addToTokenQueue(String s)
560      {
561        m_compiler.getTokenQueue().addElement(s);
562      }
563    
564      /**
565       * When a seperator token is found, see if there's a element name or
566       * the like to map.
567       *
568       * @param pat The XPath name string.
569       * @param startSubstring The start of the name string.
570       * @param posOfNSSep The position of the namespace seperator (':').
571       * @param posOfScan The end of the name index.
572       *
573       * @throws javax.xml.transform.TransformerException
574       *
575       * @return -1 always.
576       */
577      private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
578                              int posOfScan)
579               throws javax.xml.transform.TransformerException
580     {
581    
582        String prefix = "";
583        
584        if ((startSubstring >= 0) && (posOfNSSep >= 0))
585        {
586           prefix = pat.substring(startSubstring, posOfNSSep);
587        }
588        String uName;
589    
590        if ((null != m_namespaceContext) &&!prefix.equals("*")
591                &&!prefix.equals("xmlns"))
592        {
593          try
594          {
595            if (prefix.length() > 0)
596              uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
597                prefix);
598            else
599            {
600    
601              // Assume last was wildcard. This is not legal according
602              // to the draft. Set the below to true to make namespace
603              // wildcards work.
604              if (false)
605              {
606                addToTokenQueue(":");
607    
608                String s = pat.substring(posOfNSSep + 1, posOfScan);
609    
610                if (s.length() > 0)
611                  addToTokenQueue(s);
612    
613                return -1;
614              }
615              else
616              {
617                uName =
618                  ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
619                    prefix);
620              }
621            }
622          }
623          catch (ClassCastException cce)
624          {
625            uName = m_namespaceContext.getNamespaceForPrefix(prefix);
626          }
627        }
628        else
629        {
630          uName = prefix;
631        }
632    
633        if ((null != uName) && (uName.length() > 0))
634        {
635          addToTokenQueue(uName);
636          addToTokenQueue(":");
637    
638          String s = pat.substring(posOfNSSep + 1, posOfScan);
639    
640          if (s.length() > 0)
641            addToTokenQueue(s);
642        }
643        else
644        {
645            // To older XPath code it doesn't matter if
646            // error() is called or errorForDOM3().
647                    m_processor.errorForDOM3(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
648                                                     new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
649    
650    /** old code commented out 17-Sep-2004
651    // error("Could not locate namespace for prefix: "+prefix);
652    //                m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
653    //                                       new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
654    */
655    
656          /***  Old code commented out 10-Jan-2001
657          addToTokenQueue(prefix);
658          addToTokenQueue(":");
659    
660          String s = pat.substring(posOfNSSep + 1, posOfScan);
661    
662          if (s.length() > 0)
663            addToTokenQueue(s);
664          ***/
665        }
666    
667        return -1;
668      }
669    }