001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: IncrementalSAXSource_Xerces.java 468653 2006-10-28 07:07:05Z minchau $
020 */
021
022 package org.apache.xml.dtm.ref;
023
024 import java.io.IOException;
025 import java.lang.reflect.Constructor;
026 import java.lang.reflect.Method;
027
028 import org.apache.xerces.parsers.SAXParser;
029 import org.apache.xml.res.XMLErrorResources;
030 import org.apache.xml.res.XMLMessages;
031
032 import org.xml.sax.InputSource;
033 import org.xml.sax.SAXException;
034 import org.xml.sax.XMLReader;
035
036
037 /** <p>IncrementalSAXSource_Xerces takes advantage of the fact that Xerces1
038 * incremental mode is already a coroutine of sorts, and just wraps our
039 * IncrementalSAXSource API around it.</p>
040 *
041 * <p>Usage example: See main().</p>
042 *
043 * <p>Status: Passes simple main() unit-test. NEEDS JAVADOC.</p>
044 * */
045 public class IncrementalSAXSource_Xerces
046 implements IncrementalSAXSource
047 {
048 //
049 // Reflection. To allow this to compile with both Xerces1 and Xerces2, which
050 // require very different methods and objects, we need to avoid static
051 // references to those APIs. So until Xerces2 is pervasive and we're willing
052 // to make it a prerequisite, we will rely upon relection.
053 //
054 Method fParseSomeSetup=null; // Xerces1 method
055 Method fParseSome=null; // Xerces1 method
056 Object fPullParserConfig=null; // Xerces2 pull control object
057 Method fConfigSetInput=null; // Xerces2 method
058 Method fConfigParse=null; // Xerces2 method
059 Method fSetInputSource=null; // Xerces2 pull control method
060 Constructor fConfigInputSourceCtor=null; // Xerces2 initialization method
061 Method fConfigSetByteStream=null; // Xerces2 initialization method
062 Method fConfigSetCharStream=null; // Xerces2 initialization method
063 Method fConfigSetEncoding=null; // Xerces2 initialization method
064 Method fReset=null; // Both Xerces1 and Xerces2, but diff. signatures
065
066 //
067 // Data
068 //
069 SAXParser fIncrementalParser;
070 private boolean fParseInProgress=false;
071
072 //
073 // Constructors
074 //
075
076 /** Create a IncrementalSAXSource_Xerces, and create a SAXParser
077 * to go with it. Xerces2 incremental parsing is only supported if
078 * this constructor is used, due to limitations in the Xerces2 API (as of
079 * Beta 3). If you don't like that restriction, tell the Xerces folks that
080 * there should be a simpler way to request incremental SAX parsing.
081 * */
082 public IncrementalSAXSource_Xerces()
083 throws NoSuchMethodException
084 {
085 try
086 {
087 // Xerces-2 incremental parsing support (as of Beta 3)
088 // ContentHandlers still get set on fIncrementalParser (to get
089 // conversion from XNI events to SAX events), but
090 // _control_ for incremental parsing must be exercised via the config.
091 //
092 // At this time there's no way to read the existing config, only
093 // to assert a new one... and only when creating a brand-new parser.
094 //
095 // Reflection is used to allow us to continue to compile against
096 // Xerces1. If/when we can abandon the older versions of the parser,
097 // this will simplify significantly.
098
099 // If we can't get the magic constructor, no need to look further.
100 Class xniConfigClass=ObjectFactory.findProviderClass(
101 "org.apache.xerces.xni.parser.XMLParserConfiguration",
102 ObjectFactory.findClassLoader(), true);
103 Class[] args1={xniConfigClass};
104 Constructor ctor=SAXParser.class.getConstructor(args1);
105
106 // Build the parser configuration object. StandardParserConfiguration
107 // happens to implement XMLPullParserConfiguration, which is the API
108 // we're going to want to use.
109 Class xniStdConfigClass=ObjectFactory.findProviderClass(
110 "org.apache.xerces.parsers.StandardParserConfiguration",
111 ObjectFactory.findClassLoader(), true);
112 fPullParserConfig=xniStdConfigClass.newInstance();
113 Object[] args2={fPullParserConfig};
114 fIncrementalParser = (SAXParser)ctor.newInstance(args2);
115
116 // Preload all the needed the configuration methods... I want to know they're
117 // all here before we commit to trying to use them, just in case the
118 // API changes again.
119 Class fXniInputSourceClass=ObjectFactory.findProviderClass(
120 "org.apache.xerces.xni.parser.XMLInputSource",
121 ObjectFactory.findClassLoader(), true);
122 Class[] args3={fXniInputSourceClass};
123 fConfigSetInput=xniStdConfigClass.getMethod("setInputSource",args3);
124
125 Class[] args4={String.class,String.class,String.class};
126 fConfigInputSourceCtor=fXniInputSourceClass.getConstructor(args4);
127 Class[] args5={java.io.InputStream.class};
128 fConfigSetByteStream=fXniInputSourceClass.getMethod("setByteStream",args5);
129 Class[] args6={java.io.Reader.class};
130 fConfigSetCharStream=fXniInputSourceClass.getMethod("setCharacterStream",args6);
131 Class[] args7={String.class};
132 fConfigSetEncoding=fXniInputSourceClass.getMethod("setEncoding",args7);
133
134 Class[] argsb={Boolean.TYPE};
135 fConfigParse=xniStdConfigClass.getMethod("parse",argsb);
136 Class[] noargs=new Class[0];
137 fReset=fIncrementalParser.getClass().getMethod("reset",noargs);
138 }
139 catch(Exception e)
140 {
141 // Fallback if this fails (implemented in createIncrementalSAXSource) is
142 // to attempt Xerces-1 incremental setup. Can't do tail-call in
143 // constructor, so create new, copy Xerces-1 initialization,
144 // then throw it away... Ugh.
145 IncrementalSAXSource_Xerces dummy=new IncrementalSAXSource_Xerces(new SAXParser());
146 this.fParseSomeSetup=dummy.fParseSomeSetup;
147 this.fParseSome=dummy.fParseSome;
148 this.fIncrementalParser=dummy.fIncrementalParser;
149 }
150 }
151
152 /** Create a IncrementalSAXSource_Xerces wrapped around
153 * an existing SAXParser. Currently this works only for recent
154 * releases of Xerces-1. Xerces-2 incremental is currently possible
155 * only if we are allowed to create the parser instance, due to
156 * limitations in the API exposed by Xerces-2 Beta 3; see the
157 * no-args constructor for that code.
158 *
159 * @exception if the SAXParser class doesn't support the Xerces
160 * incremental parse operations. In that case, caller should
161 * fall back upon the IncrementalSAXSource_Filter approach.
162 * */
163 public IncrementalSAXSource_Xerces(SAXParser parser)
164 throws NoSuchMethodException
165 {
166 // Reflection is used to allow us to compile against
167 // Xerces2. If/when we can abandon the older versions of the parser,
168 // this constructor will simply have to fail until/unless the
169 // Xerces2 incremental support is made available on previously
170 // constructed SAXParser instances.
171 fIncrementalParser=parser;
172 Class me=parser.getClass();
173 Class[] parms={InputSource.class};
174 fParseSomeSetup=me.getMethod("parseSomeSetup",parms);
175 parms=new Class[0];
176 fParseSome=me.getMethod("parseSome",parms);
177 // Fallback if this fails (implemented in createIncrementalSAXSource) is
178 // to use IncrementalSAXSource_Filter rather than Xerces-specific code.
179 }
180
181 //
182 // Factories
183 //
184 static public IncrementalSAXSource createIncrementalSAXSource()
185 {
186 try
187 {
188 return new IncrementalSAXSource_Xerces();
189 }
190 catch(NoSuchMethodException e)
191 {
192 // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
193 // Fall back on filtering solution.
194 IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter();
195 iss.setXMLReader(new SAXParser());
196 return iss;
197 }
198 }
199
200 static public IncrementalSAXSource
201 createIncrementalSAXSource(SAXParser parser) {
202 try
203 {
204 return new IncrementalSAXSource_Xerces(parser);
205 }
206 catch(NoSuchMethodException e)
207 {
208 // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
209 // Fall back on filtering solution.
210 IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter();
211 iss.setXMLReader(parser);
212 return iss;
213 }
214 }
215
216 //
217 // Public methods
218 //
219
220 // Register handler directly with the incremental parser
221 public void setContentHandler(org.xml.sax.ContentHandler handler)
222 {
223 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
224 // %OPT% Cast at asignment?
225 ((XMLReader)fIncrementalParser).setContentHandler(handler);
226 }
227
228 // Register handler directly with the incremental parser
229 public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler)
230 {
231 // Not supported by all SAX2 parsers but should work in Xerces:
232 try
233 {
234 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
235 // %OPT% Cast at asignment?
236 ((XMLReader)fIncrementalParser).setProperty("http://xml.org/sax/properties/lexical-handler",
237 handler);
238 }
239 catch(org.xml.sax.SAXNotRecognizedException e)
240 {
241 // Nothing we can do about it
242 }
243 catch(org.xml.sax.SAXNotSupportedException e)
244 {
245 // Nothing we can do about it
246 }
247 }
248
249 // Register handler directly with the incremental parser
250 public void setDTDHandler(org.xml.sax.DTDHandler handler)
251 {
252 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
253 // %OPT% Cast at asignment?
254 ((XMLReader)fIncrementalParser).setDTDHandler(handler);
255 }
256
257 //================================================================
258 /** startParse() is a simple API which tells the IncrementalSAXSource
259 * to begin reading a document.
260 *
261 * @throws SAXException is parse thread is already in progress
262 * or parsing can not be started.
263 * */
264 public void startParse(InputSource source) throws SAXException
265 {
266 if (fIncrementalParser==null)
267 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_NEEDS_SAXPARSER, null)); //"startParse needs a non-null SAXParser.");
268 if (fParseInProgress)
269 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_WHILE_PARSING, null)); //"startParse may not be called while parsing.");
270
271 boolean ok=false;
272
273 try
274 {
275 ok = parseSomeSetup(source);
276 }
277 catch(Exception ex)
278 {
279 throw new SAXException(ex);
280 }
281
282 if(!ok)
283 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_INIT_PARSER, null)); //"could not initialize parser with");
284 }
285
286
287 /** deliverMoreNodes() is a simple API which tells the coroutine
288 * parser that we need more nodes. This is intended to be called
289 * from one of our partner routines, and serves to encapsulate the
290 * details of how incremental parsing has been achieved.
291 *
292 * @param parsemore If true, tells the incremental parser to generate
293 * another chunk of output. If false, tells the parser that we're
294 * satisfied and it can terminate parsing of this document.
295 * @return Boolean.TRUE if the CoroutineParser believes more data may be available
296 * for further parsing. Boolean.FALSE if parsing ran to completion.
297 * Exception if the parser objected for some reason.
298 * */
299 public Object deliverMoreNodes (boolean parsemore)
300 {
301 if(!parsemore)
302 {
303 fParseInProgress=false;
304 return Boolean.FALSE;
305 }
306
307 Object arg;
308 try {
309 boolean keepgoing = parseSome();
310 arg = keepgoing ? Boolean.TRUE : Boolean.FALSE;
311 } catch (SAXException ex) {
312 arg = ex;
313 } catch (IOException ex) {
314 arg = ex;
315 } catch (Exception ex) {
316 arg = new SAXException(ex);
317 }
318 return arg;
319 }
320
321 // Private methods -- conveniences to hide the reflection details
322 private boolean parseSomeSetup(InputSource source)
323 throws SAXException, IOException, IllegalAccessException,
324 java.lang.reflect.InvocationTargetException,
325 java.lang.InstantiationException
326 {
327 if(fConfigSetInput!=null)
328 {
329 // Obtain input from SAX inputSource object, construct XNI version of
330 // that object. Logic adapted from Xerces2.
331 Object[] parms1={source.getPublicId(),source.getSystemId(),null};
332 Object xmlsource=fConfigInputSourceCtor.newInstance(parms1);
333 Object[] parmsa={source.getByteStream()};
334 fConfigSetByteStream.invoke(xmlsource,parmsa);
335 parmsa[0]=source.getCharacterStream();
336 fConfigSetCharStream.invoke(xmlsource,parmsa);
337 parmsa[0]=source.getEncoding();
338 fConfigSetEncoding.invoke(xmlsource,parmsa);
339
340 // Bugzilla5272 patch suggested by Sandy Gao.
341 // Has to be reflection to run with Xerces2
342 // after compilation against Xerces1. or vice
343 // versa, due to return type mismatches.
344 Object[] noparms=new Object[0];
345 fReset.invoke(fIncrementalParser,noparms);
346
347 parmsa[0]=xmlsource;
348 fConfigSetInput.invoke(fPullParserConfig,parmsa);
349
350 // %REVIEW% Do first pull. Should we instead just return true?
351 return parseSome();
352 }
353 else
354 {
355 Object[] parm={source};
356 Object ret=fParseSomeSetup.invoke(fIncrementalParser,parm);
357 return ((Boolean)ret).booleanValue();
358 }
359 }
360 // Would null work???
361 private static final Object[] noparms=new Object[0];
362 private static final Object[] parmsfalse={Boolean.FALSE};
363 private boolean parseSome()
364 throws SAXException, IOException, IllegalAccessException,
365 java.lang.reflect.InvocationTargetException
366 {
367 // Take next parsing step, return false iff parsing complete:
368 if(fConfigSetInput!=null)
369 {
370 Object ret=(Boolean)(fConfigParse.invoke(fPullParserConfig,parmsfalse));
371 return ((Boolean)ret).booleanValue();
372 }
373 else
374 {
375 Object ret=fParseSome.invoke(fIncrementalParser,noparms);
376 return ((Boolean)ret).booleanValue();
377 }
378 }
379
380
381 //================================================================
382 /** Simple unit test. Attempt coroutine parsing of document indicated
383 * by first argument (as a URI), report progress.
384 */
385 public static void main(String args[])
386 {
387 System.out.println("Starting...");
388
389 CoroutineManager co = new CoroutineManager();
390 int appCoroutineID = co.co_joinCoroutineSet(-1);
391 if (appCoroutineID == -1)
392 {
393 System.out.println("ERROR: Couldn't allocate coroutine number.\n");
394 return;
395 }
396 IncrementalSAXSource parser=
397 createIncrementalSAXSource();
398
399 // Use a serializer as our sample output
400 org.apache.xml.serialize.XMLSerializer trace;
401 trace=new org.apache.xml.serialize.XMLSerializer(System.out,null);
402 parser.setContentHandler(trace);
403 parser.setLexicalHandler(trace);
404
405 // Tell coroutine to begin parsing, run while parsing is in progress
406
407 for(int arg=0;arg<args.length;++arg)
408 {
409 try
410 {
411 InputSource source = new InputSource(args[arg]);
412 Object result=null;
413 boolean more=true;
414 parser.startParse(source);
415 for(result = parser.deliverMoreNodes(more);
416 result==Boolean.TRUE;
417 result = parser.deliverMoreNodes(more))
418 {
419 System.out.println("\nSome parsing successful, trying more.\n");
420
421 // Special test: Terminate parsing early.
422 if(arg+1<args.length && "!".equals(args[arg+1]))
423 {
424 ++arg;
425 more=false;
426 }
427
428 }
429
430 if (result instanceof Boolean && ((Boolean)result)==Boolean.FALSE)
431 {
432 System.out.println("\nParser ended (EOF or on request).\n");
433 }
434 else if (result == null) {
435 System.out.println("\nUNEXPECTED: Parser says shut down prematurely.\n");
436 }
437 else if (result instanceof Exception) {
438 throw new org.apache.xml.utils.WrappedRuntimeException((Exception)result);
439 // System.out.println("\nParser threw exception:");
440 // ((Exception)result).printStackTrace();
441 }
442
443 }
444
445 catch(SAXException e)
446 {
447 e.printStackTrace();
448 }
449 }
450
451 }
452
453
454 } // class IncrementalSAXSource_Xerces