001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: IncrementalSAXSource_Xerces.java 468653 2006-10-28 07:07:05Z minchau $
020     */
021    
022    package org.apache.xml.dtm.ref;
023    
024    import java.io.IOException;
025    import java.lang.reflect.Constructor;
026    import java.lang.reflect.Method;
027    
028    import org.apache.xerces.parsers.SAXParser;
029    import org.apache.xml.res.XMLErrorResources;
030    import org.apache.xml.res.XMLMessages;
031    
032    import org.xml.sax.InputSource;
033    import org.xml.sax.SAXException;
034    import org.xml.sax.XMLReader;
035    
036    
037    /** <p>IncrementalSAXSource_Xerces takes advantage of the fact that Xerces1
038     * incremental mode is already a coroutine of sorts, and just wraps our
039     * IncrementalSAXSource API around it.</p>
040     *
041     * <p>Usage example: See main().</p>
042     *
043     * <p>Status: Passes simple main() unit-test. NEEDS JAVADOC.</p>
044     * */
045    public class IncrementalSAXSource_Xerces
046      implements IncrementalSAXSource
047    {
048      //
049      // Reflection. To allow this to compile with both Xerces1 and Xerces2, which
050      // require very different methods and objects, we need to avoid static 
051      // references to those APIs. So until Xerces2 is pervasive and we're willing 
052      // to make it a prerequisite, we will rely upon relection.
053      //
054      Method fParseSomeSetup=null; // Xerces1 method
055      Method fParseSome=null; // Xerces1 method
056      Object fPullParserConfig=null; // Xerces2 pull control object
057      Method fConfigSetInput=null; // Xerces2 method
058      Method fConfigParse=null; // Xerces2 method
059      Method fSetInputSource=null; // Xerces2 pull control method
060      Constructor fConfigInputSourceCtor=null; // Xerces2 initialization method
061      Method fConfigSetByteStream=null; // Xerces2 initialization method
062      Method fConfigSetCharStream=null; // Xerces2 initialization method
063      Method fConfigSetEncoding=null; // Xerces2 initialization method
064      Method fReset=null; // Both Xerces1 and Xerces2, but diff. signatures
065      
066      //
067      // Data
068      //
069      SAXParser fIncrementalParser;
070      private boolean fParseInProgress=false;
071    
072      //
073      // Constructors
074      //
075    
076      /** Create a IncrementalSAXSource_Xerces, and create a SAXParser
077       * to go with it. Xerces2 incremental parsing is only supported if
078       * this constructor is used, due to limitations in the Xerces2 API (as of
079       * Beta 3). If you don't like that restriction, tell the Xerces folks that
080       * there should be a simpler way to request incremental SAX parsing.
081       * */
082      public IncrementalSAXSource_Xerces() 
083                    throws NoSuchMethodException
084            {
085                    try
086                    {
087                            // Xerces-2 incremental parsing support (as of Beta 3)
088                            // ContentHandlers still get set on fIncrementalParser (to get
089                            // conversion from XNI events to SAX events), but
090                            // _control_ for incremental parsing must be exercised via the config.
091                            // 
092                            // At this time there's no way to read the existing config, only 
093                            // to assert a new one... and only when creating a brand-new parser.
094                            //
095                            // Reflection is used to allow us to continue to compile against
096                            // Xerces1. If/when we can abandon the older versions of the parser,
097                            // this will simplify significantly.
098                            
099                            // If we can't get the magic constructor, no need to look further.
100                            Class xniConfigClass=ObjectFactory.findProviderClass(
101                                "org.apache.xerces.xni.parser.XMLParserConfiguration",
102                                ObjectFactory.findClassLoader(), true);
103                            Class[] args1={xniConfigClass};
104                            Constructor ctor=SAXParser.class.getConstructor(args1);
105                            
106                            // Build the parser configuration object. StandardParserConfiguration
107                            // happens to implement XMLPullParserConfiguration, which is the API
108                            // we're going to want to use.
109                            Class xniStdConfigClass=ObjectFactory.findProviderClass(
110                                "org.apache.xerces.parsers.StandardParserConfiguration",
111                                ObjectFactory.findClassLoader(), true);
112                            fPullParserConfig=xniStdConfigClass.newInstance();
113                            Object[] args2={fPullParserConfig};
114                            fIncrementalParser = (SAXParser)ctor.newInstance(args2);
115                            
116                            // Preload all the needed the configuration methods... I want to know they're
117                            // all here before we commit to trying to use them, just in case the
118                            // API changes again.
119                            Class fXniInputSourceClass=ObjectFactory.findProviderClass(
120                                "org.apache.xerces.xni.parser.XMLInputSource",
121                                ObjectFactory.findClassLoader(), true);
122                            Class[] args3={fXniInputSourceClass};
123                            fConfigSetInput=xniStdConfigClass.getMethod("setInputSource",args3);
124    
125                            Class[] args4={String.class,String.class,String.class};
126                            fConfigInputSourceCtor=fXniInputSourceClass.getConstructor(args4);
127                            Class[] args5={java.io.InputStream.class};
128                            fConfigSetByteStream=fXniInputSourceClass.getMethod("setByteStream",args5);
129                            Class[] args6={java.io.Reader.class};
130                            fConfigSetCharStream=fXniInputSourceClass.getMethod("setCharacterStream",args6);
131                            Class[] args7={String.class};
132                            fConfigSetEncoding=fXniInputSourceClass.getMethod("setEncoding",args7);
133    
134                            Class[] argsb={Boolean.TYPE};
135                            fConfigParse=xniStdConfigClass.getMethod("parse",argsb);                        
136                            Class[] noargs=new Class[0];
137                            fReset=fIncrementalParser.getClass().getMethod("reset",noargs);
138                    }
139                    catch(Exception e)
140                    {
141                // Fallback if this fails (implemented in createIncrementalSAXSource) is
142                            // to attempt Xerces-1 incremental setup. Can't do tail-call in
143                            // constructor, so create new, copy Xerces-1 initialization, 
144                            // then throw it away... Ugh.
145                            IncrementalSAXSource_Xerces dummy=new IncrementalSAXSource_Xerces(new SAXParser());
146                            this.fParseSomeSetup=dummy.fParseSomeSetup;
147                            this.fParseSome=dummy.fParseSome;
148                            this.fIncrementalParser=dummy.fIncrementalParser;
149                    }
150      }
151    
152      /** Create a IncrementalSAXSource_Xerces wrapped around
153       * an existing SAXParser. Currently this works only for recent
154       * releases of Xerces-1.  Xerces-2 incremental is currently possible
155       * only if we are allowed to create the parser instance, due to
156       * limitations in the API exposed by Xerces-2 Beta 3; see the
157       * no-args constructor for that code.
158       * 
159       * @exception if the SAXParser class doesn't support the Xerces
160       * incremental parse operations. In that case, caller should
161       * fall back upon the IncrementalSAXSource_Filter approach.
162       * */
163      public IncrementalSAXSource_Xerces(SAXParser parser) 
164        throws NoSuchMethodException  
165      {
166                    // Reflection is used to allow us to compile against
167                    // Xerces2. If/when we can abandon the older versions of the parser,
168                    // this constructor will simply have to fail until/unless the
169                    // Xerces2 incremental support is made available on previously
170                    // constructed SAXParser instances.
171        fIncrementalParser=parser;
172                    Class me=parser.getClass();
173        Class[] parms={InputSource.class};
174        fParseSomeSetup=me.getMethod("parseSomeSetup",parms);
175        parms=new Class[0];
176        fParseSome=me.getMethod("parseSome",parms);
177        // Fallback if this fails (implemented in createIncrementalSAXSource) is
178        // to use IncrementalSAXSource_Filter rather than Xerces-specific code.
179      }
180    
181      //
182      // Factories
183      //
184      static public IncrementalSAXSource createIncrementalSAXSource() 
185            {
186                    try
187                    {
188                            return new IncrementalSAXSource_Xerces();
189                    }
190                    catch(NoSuchMethodException e)
191                    {
192                            // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
193                            // Fall back on filtering solution.
194                            IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter();
195                            iss.setXMLReader(new SAXParser());
196                            return iss;
197                    }
198      }
199            
200      static public IncrementalSAXSource
201      createIncrementalSAXSource(SAXParser parser) {
202                    try
203                    {
204                            return new IncrementalSAXSource_Xerces(parser);
205                    }
206                    catch(NoSuchMethodException e)
207                    {
208                            // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
209                            // Fall back on filtering solution.
210                            IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter();
211                            iss.setXMLReader(parser);
212                            return iss;
213                    }
214      }
215    
216      //
217      // Public methods
218      //
219    
220      // Register handler directly with the incremental parser
221      public void setContentHandler(org.xml.sax.ContentHandler handler)
222      {
223        // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
224        // %OPT% Cast at asignment?
225        ((XMLReader)fIncrementalParser).setContentHandler(handler);
226      }
227    
228      // Register handler directly with the incremental parser
229      public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler)
230      {
231        // Not supported by all SAX2 parsers but should work in Xerces:
232        try 
233        {
234          // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
235          // %OPT% Cast at asignment?
236          ((XMLReader)fIncrementalParser).setProperty("http://xml.org/sax/properties/lexical-handler",
237                                         handler);
238        }
239        catch(org.xml.sax.SAXNotRecognizedException e)
240        {
241          // Nothing we can do about it
242        }
243        catch(org.xml.sax.SAXNotSupportedException e)
244        {
245          // Nothing we can do about it
246        }
247      }
248      
249      // Register handler directly with the incremental parser
250      public void setDTDHandler(org.xml.sax.DTDHandler handler)
251      {
252        // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
253        // %OPT% Cast at asignment?
254        ((XMLReader)fIncrementalParser).setDTDHandler(handler);
255      }
256    
257      //================================================================
258      /** startParse() is a simple API which tells the IncrementalSAXSource
259       * to begin reading a document.
260       *
261       * @throws SAXException is parse thread is already in progress
262       * or parsing can not be started.
263       * */
264      public void startParse(InputSource source) throws SAXException
265      {
266        if (fIncrementalParser==null)
267          throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_NEEDS_SAXPARSER, null)); //"startParse needs a non-null SAXParser.");
268        if (fParseInProgress)
269          throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_WHILE_PARSING, null)); //"startParse may not be called while parsing.");
270    
271        boolean ok=false;
272    
273        try
274        {
275          ok = parseSomeSetup(source);
276        }
277        catch(Exception ex)
278        {
279          throw new SAXException(ex);
280        }
281        
282        if(!ok)
283          throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_INIT_PARSER, null)); //"could not initialize parser with");
284      }
285    
286      
287      /** deliverMoreNodes() is a simple API which tells the coroutine
288       * parser that we need more nodes.  This is intended to be called
289       * from one of our partner routines, and serves to encapsulate the
290       * details of how incremental parsing has been achieved.
291       *
292       * @param parsemore If true, tells the incremental parser to generate
293       * another chunk of output. If false, tells the parser that we're
294       * satisfied and it can terminate parsing of this document.
295       * @return Boolean.TRUE if the CoroutineParser believes more data may be available
296       * for further parsing. Boolean.FALSE if parsing ran to completion.
297       * Exception if the parser objected for some reason.
298       * */
299      public Object deliverMoreNodes (boolean parsemore)
300      {
301        if(!parsemore)
302        {
303          fParseInProgress=false;
304          return Boolean.FALSE;
305        }
306    
307        Object arg;
308        try {
309          boolean keepgoing = parseSome();
310          arg = keepgoing ? Boolean.TRUE : Boolean.FALSE;
311        } catch (SAXException ex) {
312          arg = ex;
313        } catch (IOException ex) {
314          arg = ex;
315        } catch (Exception ex) {
316          arg = new SAXException(ex);
317        }
318        return arg;
319      }
320            
321            // Private methods -- conveniences to hide the reflection details
322            private boolean parseSomeSetup(InputSource source) 
323                    throws SAXException, IOException, IllegalAccessException, 
324                                             java.lang.reflect.InvocationTargetException,
325                                             java.lang.InstantiationException
326            {
327                    if(fConfigSetInput!=null)
328                    {
329                            // Obtain input from SAX inputSource object, construct XNI version of
330                            // that object. Logic adapted from Xerces2.
331                            Object[] parms1={source.getPublicId(),source.getSystemId(),null};
332                            Object xmlsource=fConfigInputSourceCtor.newInstance(parms1);
333                            Object[] parmsa={source.getByteStream()};
334                            fConfigSetByteStream.invoke(xmlsource,parmsa);
335                            parmsa[0]=source.getCharacterStream();
336                            fConfigSetCharStream.invoke(xmlsource,parmsa);
337                            parmsa[0]=source.getEncoding();
338                            fConfigSetEncoding.invoke(xmlsource,parmsa);
339    
340                            // Bugzilla5272 patch suggested by Sandy Gao.
341                            // Has to be reflection to run with Xerces2
342                            // after compilation against Xerces1. or vice
343                            // versa, due to return type mismatches.
344                            Object[] noparms=new Object[0];
345                            fReset.invoke(fIncrementalParser,noparms);
346                            
347                            parmsa[0]=xmlsource;
348                            fConfigSetInput.invoke(fPullParserConfig,parmsa);
349                            
350                            // %REVIEW% Do first pull. Should we instead just return true?
351                            return parseSome();
352                    }
353                    else
354                    {
355                            Object[] parm={source};
356                            Object ret=fParseSomeSetup.invoke(fIncrementalParser,parm);
357                            return ((Boolean)ret).booleanValue();
358                    }
359            }
360    //  Would null work???
361        private static final Object[] noparms=new Object[0];
362        private static final Object[] parmsfalse={Boolean.FALSE};
363        private boolean parseSome()
364                    throws SAXException, IOException, IllegalAccessException,
365                                             java.lang.reflect.InvocationTargetException
366            {
367                    // Take next parsing step, return false iff parsing complete:
368                    if(fConfigSetInput!=null)
369                    {
370                            Object ret=(Boolean)(fConfigParse.invoke(fPullParserConfig,parmsfalse));
371                            return ((Boolean)ret).booleanValue();
372                    }
373                    else
374                    {
375                            Object ret=fParseSome.invoke(fIncrementalParser,noparms);
376                            return ((Boolean)ret).booleanValue();
377                    }
378            }
379            
380    
381      //================================================================
382      /** Simple unit test. Attempt coroutine parsing of document indicated
383       * by first argument (as a URI), report progress.
384       */
385      public static void main(String args[])
386      {
387        System.out.println("Starting...");
388    
389        CoroutineManager co = new CoroutineManager();
390        int appCoroutineID = co.co_joinCoroutineSet(-1);
391        if (appCoroutineID == -1)
392        {
393          System.out.println("ERROR: Couldn't allocate coroutine number.\n");
394          return;
395        }
396        IncrementalSAXSource parser=
397          createIncrementalSAXSource();
398    
399        // Use a serializer as our sample output
400        org.apache.xml.serialize.XMLSerializer trace;
401        trace=new org.apache.xml.serialize.XMLSerializer(System.out,null);
402        parser.setContentHandler(trace);
403        parser.setLexicalHandler(trace);
404    
405        // Tell coroutine to begin parsing, run while parsing is in progress
406    
407        for(int arg=0;arg<args.length;++arg)
408        {
409          try
410          {
411            InputSource source = new InputSource(args[arg]);
412            Object result=null;
413            boolean more=true;
414            parser.startParse(source);
415            for(result = parser.deliverMoreNodes(more);
416                result==Boolean.TRUE;
417                result = parser.deliverMoreNodes(more))
418            {
419              System.out.println("\nSome parsing successful, trying more.\n");
420                
421              // Special test: Terminate parsing early.
422              if(arg+1<args.length && "!".equals(args[arg+1]))
423              {
424                ++arg;
425                more=false;
426              }
427                
428            }
429            
430            if (result instanceof Boolean && ((Boolean)result)==Boolean.FALSE)
431            {
432              System.out.println("\nParser ended (EOF or on request).\n");
433            }
434            else if (result == null) {
435              System.out.println("\nUNEXPECTED: Parser says shut down prematurely.\n");
436            }
437            else if (result instanceof Exception) {
438              throw new org.apache.xml.utils.WrappedRuntimeException((Exception)result);
439              //          System.out.println("\nParser threw exception:");
440              //          ((Exception)result).printStackTrace();
441            }
442            
443          }
444    
445          catch(SAXException e)
446          {
447            e.printStackTrace();
448          }
449        }
450        
451      }
452    
453      
454    } // class IncrementalSAXSource_Xerces