001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: Lexer.java 524810 2007-04-02 15:51:55Z zongaro $
020 */
021 package org.apache.xpath.compiler;
022
023 import java.util.Vector;
024
025 import org.apache.xml.utils.PrefixResolver;
026 import org.apache.xpath.res.XPATHErrorResources;
027
028 /**
029 * This class is in charge of lexical processing of the XPath
030 * expression into tokens.
031 */
032 class Lexer
033 {
034
035 /**
036 * The target XPath.
037 */
038 private Compiler m_compiler;
039
040 /**
041 * The prefix resolver to map prefixes to namespaces in the XPath.
042 */
043 PrefixResolver m_namespaceContext;
044
045 /**
046 * The XPath processor object.
047 */
048 XPathParser m_processor;
049
050 /**
051 * This value is added to each element name in the TARGETEXTRA
052 * that is a 'target' (right-most top-level element name).
053 */
054 static final int TARGETEXTRA = 10000;
055
056 /**
057 * Ignore this, it is going away.
058 * This holds a map to the m_tokenQueue that tells where the top-level elements are.
059 * It is used for pattern matching so the m_tokenQueue can be walked backwards.
060 * Each element that is a 'target', (right-most top level element name) has
061 * TARGETEXTRA added to it.
062 *
063 */
064 private int m_patternMap[] = new int[100];
065
066 /**
067 * Ignore this, it is going away.
068 * The number of elements that m_patternMap maps;
069 */
070 private int m_patternMapSize;
071
072 /**
073 * Create a Lexer object.
074 *
075 * @param compiler The owning compiler for this lexer.
076 * @param resolver The prefix resolver for mapping qualified name prefixes
077 * to namespace URIs.
078 * @param xpathProcessor The parser that is processing strings to opcodes.
079 */
080 Lexer(Compiler compiler, PrefixResolver resolver,
081 XPathParser xpathProcessor)
082 {
083
084 m_compiler = compiler;
085 m_namespaceContext = resolver;
086 m_processor = xpathProcessor;
087 }
088
089 /**
090 * Walk through the expression and build a token queue, and a map of the top-level
091 * elements.
092 * @param pat XSLT Expression.
093 *
094 * @throws javax.xml.transform.TransformerException
095 */
096 void tokenize(String pat) throws javax.xml.transform.TransformerException
097 {
098 tokenize(pat, null);
099 }
100
101 /**
102 * Walk through the expression and build a token queue, and a map of the top-level
103 * elements.
104 * @param pat XSLT Expression.
105 * @param targetStrings Vector to hold Strings, may be null.
106 *
107 * @throws javax.xml.transform.TransformerException
108 */
109 void tokenize(String pat, Vector targetStrings)
110 throws javax.xml.transform.TransformerException
111 {
112
113 m_compiler.m_currentPattern = pat;
114 m_patternMapSize = 0;
115
116 // This needs to grow too. Use a conservative estimate that the OpMapVector
117 // needs about five time the length of the input path expression - to a
118 // maximum of MAXTOKENQUEUESIZE*5. If the OpMapVector needs to grow, grow
119 // it freely (second argument to constructor).
120 int initTokQueueSize = ((pat.length() < OpMap.MAXTOKENQUEUESIZE)
121 ? pat.length() : OpMap.MAXTOKENQUEUESIZE) * 5;
122 m_compiler.m_opMap = new OpMapVector(initTokQueueSize,
123 OpMap.BLOCKTOKENQUEUESIZE * 5,
124 OpMap.MAPINDEX_LENGTH);
125
126 int nChars = pat.length();
127 int startSubstring = -1;
128 int posOfNSSep = -1;
129 boolean isStartOfPat = true;
130 boolean isAttrName = false;
131 boolean isNum = false;
132
133 // Nesting of '[' so we can know if the given element should be
134 // counted inside the m_patternMap.
135 int nesting = 0;
136
137 // char[] chars = pat.toCharArray();
138 for (int i = 0; i < nChars; i++)
139 {
140 char c = pat.charAt(i);
141
142 switch (c)
143 {
144 case '\"' :
145 {
146 if (startSubstring != -1)
147 {
148 isNum = false;
149 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
150 isAttrName = false;
151
152 if (-1 != posOfNSSep)
153 {
154 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
155 }
156 else
157 {
158 addToTokenQueue(pat.substring(startSubstring, i));
159 }
160 }
161
162 startSubstring = i;
163
164 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
165
166 if (c == '\"' && i < nChars)
167 {
168 addToTokenQueue(pat.substring(startSubstring, i + 1));
169
170 startSubstring = -1;
171 }
172 else
173 {
174 m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
175 null); //"misquoted literal... expected double quote!");
176 }
177 }
178 break;
179 case '\'' :
180 if (startSubstring != -1)
181 {
182 isNum = false;
183 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
184 isAttrName = false;
185
186 if (-1 != posOfNSSep)
187 {
188 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
189 }
190 else
191 {
192 addToTokenQueue(pat.substring(startSubstring, i));
193 }
194 }
195
196 startSubstring = i;
197
198 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
199
200 if (c == '\'' && i < nChars)
201 {
202 addToTokenQueue(pat.substring(startSubstring, i + 1));
203
204 startSubstring = -1;
205 }
206 else
207 {
208 m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
209 null); //"misquoted literal... expected single quote!");
210 }
211 break;
212 case 0x0A :
213 case 0x0D :
214 case ' ' :
215 case '\t' :
216 if (startSubstring != -1)
217 {
218 isNum = false;
219 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
220 isAttrName = false;
221
222 if (-1 != posOfNSSep)
223 {
224 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
225 }
226 else
227 {
228 addToTokenQueue(pat.substring(startSubstring, i));
229 }
230
231 startSubstring = -1;
232 }
233 break;
234 case '@' :
235 isAttrName = true;
236
237 // fall-through on purpose
238 case '-' :
239 if ('-' == c)
240 {
241 if (!(isNum || (startSubstring == -1)))
242 {
243 break;
244 }
245
246 isNum = false;
247 }
248
249 // fall-through on purpose
250 case '(' :
251 case '[' :
252 case ')' :
253 case ']' :
254 case '|' :
255 case '/' :
256 case '*' :
257 case '+' :
258 case '=' :
259 case ',' :
260 case '\\' : // Unused at the moment
261 case '^' : // Unused at the moment
262 case '!' : // Unused at the moment
263 case '$' :
264 case '<' :
265 case '>' :
266 if (startSubstring != -1)
267 {
268 isNum = false;
269 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
270 isAttrName = false;
271
272 if (-1 != posOfNSSep)
273 {
274 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
275 }
276 else
277 {
278 addToTokenQueue(pat.substring(startSubstring, i));
279 }
280
281 startSubstring = -1;
282 }
283 else if (('/' == c) && isStartOfPat)
284 {
285 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
286 }
287 else if ('*' == c)
288 {
289 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
290 isAttrName = false;
291 }
292
293 if (0 == nesting)
294 {
295 if ('|' == c)
296 {
297 if (null != targetStrings)
298 {
299 recordTokenString(targetStrings);
300 }
301
302 isStartOfPat = true;
303 }
304 }
305
306 if ((')' == c) || (']' == c))
307 {
308 nesting--;
309 }
310 else if (('(' == c) || ('[' == c))
311 {
312 nesting++;
313 }
314
315 addToTokenQueue(pat.substring(i, i + 1));
316 break;
317 case ':' :
318 if (i>0)
319 {
320 if (posOfNSSep == (i - 1))
321 {
322 if (startSubstring != -1)
323 {
324 if (startSubstring < (i - 1))
325 addToTokenQueue(pat.substring(startSubstring, i - 1));
326 }
327
328 isNum = false;
329 isAttrName = false;
330 startSubstring = -1;
331 posOfNSSep = -1;
332
333 addToTokenQueue(pat.substring(i - 1, i + 1));
334
335 break;
336 }
337 else
338 {
339 posOfNSSep = i;
340 }
341 }
342
343 // fall through on purpose
344 default :
345 if (-1 == startSubstring)
346 {
347 startSubstring = i;
348 isNum = Character.isDigit(c);
349 }
350 else if (isNum)
351 {
352 isNum = Character.isDigit(c);
353 }
354 }
355 }
356
357 if (startSubstring != -1)
358 {
359 isNum = false;
360 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
361
362 if ((-1 != posOfNSSep) ||
363 ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
364 {
365 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
366 }
367 else
368 {
369 addToTokenQueue(pat.substring(startSubstring, nChars));
370 }
371 }
372
373 if (0 == m_compiler.getTokenQueueSize())
374 {
375 m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!");
376 }
377 else if (null != targetStrings)
378 {
379 recordTokenString(targetStrings);
380 }
381
382 m_processor.m_queueMark = 0;
383 }
384
385 /**
386 * Record the current position on the token queue as long as
387 * this is a top-level element. Must be called before the
388 * next token is added to the m_tokenQueue.
389 *
390 * @param nesting The nesting count for the pattern element.
391 * @param isStart true if this is the start of a pattern.
392 * @param isAttrName true if we have determined that this is an attribute name.
393 *
394 * @return true if this is the start of a pattern.
395 */
396 private boolean mapPatternElemPos(int nesting, boolean isStart,
397 boolean isAttrName)
398 {
399
400 if (0 == nesting)
401 {
402 if(m_patternMapSize >= m_patternMap.length)
403 {
404 int patternMap[] = m_patternMap;
405 int len = m_patternMap.length;
406 m_patternMap = new int[m_patternMapSize + 100];
407 System.arraycopy(patternMap, 0, m_patternMap, 0, len);
408 }
409 if (!isStart)
410 {
411 m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
412 }
413 m_patternMap[m_patternMapSize] =
414 (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
415
416 m_patternMapSize++;
417
418 isStart = false;
419 }
420
421 return isStart;
422 }
423
424 /**
425 * Given a map pos, return the corresponding token queue pos.
426 *
427 * @param i The index in the m_patternMap.
428 *
429 * @return the token queue position.
430 */
431 private int getTokenQueuePosFromMap(int i)
432 {
433
434 int pos = m_patternMap[i];
435
436 return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
437 }
438
439 /**
440 * Reset token queue mark and m_token to a
441 * given position.
442 * @param mark The new position.
443 */
444 private final void resetTokenMark(int mark)
445 {
446
447 int qsz = m_compiler.getTokenQueueSize();
448
449 m_processor.m_queueMark = (mark > 0)
450 ? ((mark <= qsz) ? mark - 1 : mark) : 0;
451
452 if (m_processor.m_queueMark < qsz)
453 {
454 m_processor.m_token =
455 (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
456 m_processor.m_tokenChar = m_processor.m_token.charAt(0);
457 }
458 else
459 {
460 m_processor.m_token = null;
461 m_processor.m_tokenChar = 0;
462 }
463 }
464
465 /**
466 * Given a string, return the corresponding keyword token.
467 *
468 * @param key The keyword.
469 *
470 * @return An opcode value.
471 */
472 final int getKeywordToken(String key)
473 {
474
475 int tok;
476
477 try
478 {
479 Integer itok = (Integer) Keywords.getKeyWord(key);
480
481 tok = (null != itok) ? itok.intValue() : 0;
482 }
483 catch (NullPointerException npe)
484 {
485 tok = 0;
486 }
487 catch (ClassCastException cce)
488 {
489 tok = 0;
490 }
491
492 return tok;
493 }
494
495 /**
496 * Record the current token in the passed vector.
497 *
498 * @param targetStrings Vector of string.
499 */
500 private void recordTokenString(Vector targetStrings)
501 {
502
503 int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
504
505 resetTokenMark(tokPos + 1);
506
507 if (m_processor.lookahead('(', 1))
508 {
509 int tok = getKeywordToken(m_processor.m_token);
510
511 switch (tok)
512 {
513 case OpCodes.NODETYPE_COMMENT :
514 targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
515 break;
516 case OpCodes.NODETYPE_TEXT :
517 targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
518 break;
519 case OpCodes.NODETYPE_NODE :
520 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
521 break;
522 case OpCodes.NODETYPE_ROOT :
523 targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
524 break;
525 case OpCodes.NODETYPE_ANYELEMENT :
526 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
527 break;
528 case OpCodes.NODETYPE_PI :
529 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
530 break;
531 default :
532 targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
533 }
534 }
535 else
536 {
537 if (m_processor.tokenIs('@'))
538 {
539 tokPos++;
540
541 resetTokenMark(tokPos + 1);
542 }
543
544 if (m_processor.lookahead(':', 1))
545 {
546 tokPos += 2;
547 }
548
549 targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
550 }
551 }
552
553 /**
554 * Add a token to the token queue.
555 *
556 *
557 * @param s The token.
558 */
559 private final void addToTokenQueue(String s)
560 {
561 m_compiler.getTokenQueue().addElement(s);
562 }
563
564 /**
565 * When a seperator token is found, see if there's a element name or
566 * the like to map.
567 *
568 * @param pat The XPath name string.
569 * @param startSubstring The start of the name string.
570 * @param posOfNSSep The position of the namespace seperator (':').
571 * @param posOfScan The end of the name index.
572 *
573 * @throws javax.xml.transform.TransformerException
574 *
575 * @return -1 always.
576 */
577 private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
578 int posOfScan)
579 throws javax.xml.transform.TransformerException
580 {
581
582 String prefix = "";
583
584 if ((startSubstring >= 0) && (posOfNSSep >= 0))
585 {
586 prefix = pat.substring(startSubstring, posOfNSSep);
587 }
588 String uName;
589
590 if ((null != m_namespaceContext) &&!prefix.equals("*")
591 &&!prefix.equals("xmlns"))
592 {
593 try
594 {
595 if (prefix.length() > 0)
596 uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
597 prefix);
598 else
599 {
600
601 // Assume last was wildcard. This is not legal according
602 // to the draft. Set the below to true to make namespace
603 // wildcards work.
604 if (false)
605 {
606 addToTokenQueue(":");
607
608 String s = pat.substring(posOfNSSep + 1, posOfScan);
609
610 if (s.length() > 0)
611 addToTokenQueue(s);
612
613 return -1;
614 }
615 else
616 {
617 uName =
618 ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
619 prefix);
620 }
621 }
622 }
623 catch (ClassCastException cce)
624 {
625 uName = m_namespaceContext.getNamespaceForPrefix(prefix);
626 }
627 }
628 else
629 {
630 uName = prefix;
631 }
632
633 if ((null != uName) && (uName.length() > 0))
634 {
635 addToTokenQueue(uName);
636 addToTokenQueue(":");
637
638 String s = pat.substring(posOfNSSep + 1, posOfScan);
639
640 if (s.length() > 0)
641 addToTokenQueue(s);
642 }
643 else
644 {
645 // To older XPath code it doesn't matter if
646 // error() is called or errorForDOM3().
647 m_processor.errorForDOM3(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
648 new String[] {prefix}); //"Prefix must resolve to a namespace: {0}";
649
650 /** old code commented out 17-Sep-2004
651 // error("Could not locate namespace for prefix: "+prefix);
652 // m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
653 // new String[] {prefix}); //"Prefix must resolve to a namespace: {0}";
654 */
655
656 /*** Old code commented out 10-Jan-2001
657 addToTokenQueue(prefix);
658 addToTokenQueue(":");
659
660 String s = pat.substring(posOfNSSep + 1, posOfScan);
661
662 if (s.length() > 0)
663 addToTokenQueue(s);
664 ***/
665 }
666
667 return -1;
668 }
669 }