001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements. See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership. The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the  "License");
007     * you may not use this file except in compliance with the License.
008     * You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    /*
019     * $Id: DocumentCache.java 1225369 2011-12-28 22:54:01Z mrglavas $
020     */
021    
022    package org.apache.xalan.xsltc.dom;
023    
024    import java.io.File;
025    import java.io.PrintWriter;
026    import java.net.URL;
027    import java.net.URLConnection;
028    import java.net.URLDecoder;
029    import java.util.Date;
030    import java.util.Hashtable;
031    
032    import javax.xml.parsers.ParserConfigurationException;
033    import javax.xml.parsers.SAXParser;
034    import javax.xml.parsers.SAXParserFactory;
035    import javax.xml.transform.TransformerException;
036    import javax.xml.transform.sax.SAXSource;
037    
038    import org.apache.xalan.xsltc.DOM;
039    import org.apache.xalan.xsltc.DOMCache;
040    import org.apache.xalan.xsltc.DOMEnhancedForDTM;
041    import org.apache.xalan.xsltc.Translet;
042    import org.apache.xalan.xsltc.runtime.AbstractTranslet;
043    import org.apache.xalan.xsltc.runtime.BasisLibrary;
044    import org.apache.xalan.xsltc.runtime.Constants;
045    import org.apache.xml.utils.SystemIDResolver;
046    
047    import org.xml.sax.InputSource;
048    import org.xml.sax.SAXException;
049    import org.xml.sax.XMLReader;
050    
051    /**
052     * @author Morten Jorgensen
053     */
054    public final class DocumentCache implements DOMCache {
055    
056        private int       _size;
057        private Hashtable _references;
058        private String[]  _URIs;
059        private int       _count;
060        private int       _current;
061        private SAXParser _parser;
062        private XMLReader _reader;
063        private XSLTCDTMManager _dtmManager;
064    
065        private static final int REFRESH_INTERVAL = 1000;
066    
067        /*
068         * Inner class containing a DOMImpl object and DTD handler
069         */
070        public final class CachedDocument {
071            
072            // Statistics data
073            private long _firstReferenced;
074            private long _lastReferenced;
075            private long _accessCount;
076            private long _lastModified;
077            private long _lastChecked;
078            private long _buildTime;
079    
080            // DOM and DTD handler references
081            private DOMEnhancedForDTM _dom = null;
082            
083            /**
084             * Constructor - load document and initialise statistics
085             */
086            public CachedDocument(String uri) {
087                // Initialise statistics variables
088                final long stamp = System.currentTimeMillis();
089                _firstReferenced = stamp;
090                _lastReferenced  = stamp;
091                _accessCount     = 0;
092                loadDocument(uri);
093    
094                _buildTime = System.currentTimeMillis() - stamp;
095            }
096    
097            /**
098             * Loads the document and updates build-time (latency) statistics
099             */
100            public void loadDocument(String uri) {
101    
102                try {
103                    final long stamp = System.currentTimeMillis();
104                    _dom = (DOMEnhancedForDTM)_dtmManager.getDTM(
105                                     new SAXSource(_reader, new InputSource(uri)),
106                                     false, null, true, false);
107                    _dom.setDocumentURI(uri);
108    
109                    // The build time can be used for statistics for a better
110                    // priority algorithm (currently round robin).
111                    final long thisTime = System.currentTimeMillis() - stamp;
112                    if (_buildTime > 0)
113                        _buildTime = (_buildTime + thisTime) >>> 1;
114                    else
115                        _buildTime = thisTime;
116                }
117                catch (Exception e) {
118                    _dom = null;
119                }
120            }
121    
122            public DOM getDocument()       { return(_dom); }
123    
124            public long getFirstReferenced()   { return(_firstReferenced); }
125    
126            public long getLastReferenced()    { return(_lastReferenced); }
127    
128            public long getAccessCount()       { return(_accessCount); }
129    
130            public void incAccessCount()       { _accessCount++; }
131    
132            public long getLastModified()      { return(_lastModified); }
133    
134            public void setLastModified(long t){ _lastModified = t; }
135    
136            public long getLatency()           { return(_buildTime); }
137    
138            public long getLastChecked()       { return(_lastChecked); }
139            
140            public void setLastChecked(long t) { _lastChecked = t; }
141    
142            public long getEstimatedSize() {
143                if (_dom != null)
144                    return(_dom.getSize() << 5); // ???
145                else
146                    return(0);
147            }
148    
149        }
150    
151        /**
152         * DocumentCache constructor
153         */
154        public DocumentCache(int size) throws SAXException {
155            this(size, null);
156            try {
157                _dtmManager = (XSLTCDTMManager)XSLTCDTMManager.getDTMManagerClass()
158                                                              .newInstance();
159            } catch (Exception e) {
160                throw new SAXException(e);
161            }
162        }
163    
164        /**
165         * DocumentCache constructor
166         */
167        public DocumentCache(int size, XSLTCDTMManager dtmManager) throws SAXException {
168            _dtmManager = dtmManager;
169            _count = 0;
170            _current = 0;
171            _size  = size;
172            _references = new Hashtable(_size+2);
173            _URIs = new String[_size];
174    
175            try {
176                // Create a SAX parser and get the XMLReader object it uses
177                final SAXParserFactory factory = SAXParserFactory.newInstance();
178                try {
179                    factory.setFeature(Constants.NAMESPACE_FEATURE,true);
180                }
181                catch (Exception e) {
182                    factory.setNamespaceAware(true);
183                }
184                _parser = factory.newSAXParser();
185                _reader = _parser.getXMLReader();
186            }
187            catch (ParserConfigurationException e) {
188                BasisLibrary.runTimeError(BasisLibrary.NAMESPACES_SUPPORT_ERR);
189            }
190        }
191    
192        /**
193         * Returns the time-stamp for a document's last update
194         */
195        private final long getLastModified(String uri) {
196            try {
197                URL url = new URL(uri);
198                URLConnection connection = url.openConnection();
199                long timestamp = connection.getLastModified();
200                // Check for a "file:" URI (courtesy of Brian Ewins)
201                if (timestamp == 0){ // get 0 for local URI
202                    if ("file".equals(url.getProtocol())){
203                        File localfile = new File(URLDecoder.decode(url.getFile()));
204                        timestamp = localfile.lastModified();
205                    }
206                }
207                return(timestamp);
208            }
209            // Brutal handling of all exceptions
210            catch (Exception e) {
211                return(System.currentTimeMillis());
212            }
213        }
214    
215        /**
216         *
217         */
218        private CachedDocument lookupDocument(String uri) {
219            return((CachedDocument)_references.get(uri));
220        }
221    
222        /**
223         *
224         */
225        private synchronized void insertDocument(String uri, CachedDocument doc) {
226            if (_count < _size) {
227                // Insert out URI in circular buffer
228                _URIs[_count++] = uri;
229                _current = 0;
230            }
231            else {
232                // Remove oldest URI from reference Hashtable
233                _references.remove(_URIs[_current]);
234                // Insert our URI in circular buffer
235                _URIs[_current] = uri;
236                if (++_current >= _size) _current = 0;
237            }
238            _references.put(uri, doc);
239        }
240    
241        /**
242         *
243         */
244        private synchronized void replaceDocument(String uri, CachedDocument doc) {
245            CachedDocument old = (CachedDocument)_references.get(uri);
246            if (doc == null)
247                insertDocument(uri, doc);
248            else
249                _references.put(uri, doc);
250        }
251    
252        /**
253         * Returns a document either by finding it in the cache or
254         * downloading it and putting it in the cache.
255         */
256        public DOM retrieveDocument(String baseURI, String href, Translet trs) {
257            CachedDocument doc;
258    
259        String uri = href;
260        if (baseURI != null && baseURI.length() != 0) {
261            try {
262                uri = SystemIDResolver.getAbsoluteURI(uri, baseURI);
263            } catch (TransformerException te) {
264                // ignore    
265            }
266        }
267        
268            // Try to get the document from the cache first
269            if ((doc = lookupDocument(uri)) == null) {
270                doc = new CachedDocument(uri);
271                if (doc == null) return null; // better error handling needed!!!
272                doc.setLastModified(getLastModified(uri));
273                insertDocument(uri, doc);
274            }
275            // If the document is in the cache we must check if it is still valid
276            else {
277                long now = System.currentTimeMillis();
278                long chk = doc.getLastChecked();
279                doc.setLastChecked(now);
280                // Has the modification time for this file been checked lately?
281                if (now > (chk + REFRESH_INTERVAL)) {
282                    doc.setLastChecked(now);
283                    long last = getLastModified(uri);
284                    // Reload document if it has been modified since last download
285                    if (last > doc.getLastModified()) {
286                        doc = new CachedDocument(uri);
287                        if (doc == null) return null;
288                        doc.setLastModified(getLastModified(uri));
289                        replaceDocument(uri, doc);
290                    }
291                }
292                
293            }
294    
295            // Get the references to the actual DOM and DTD handler
296            final DOM dom = doc.getDocument();
297    
298            // The dom reference may be null if the URL pointed to a
299            // non-existing document
300            if (dom == null) return null;
301    
302            doc.incAccessCount(); // For statistics
303    
304            final AbstractTranslet translet = (AbstractTranslet)trs;
305    
306            // Give the translet an early opportunity to extract any
307            // information from the DOM object that it would like.
308            translet.prepassDocument(dom);
309    
310            return(doc.getDocument());
311        }
312    
313        /**
314         * Outputs the cache statistics
315         */
316        public void getStatistics(PrintWriter out) {
317            out.println("<h2>DOM cache statistics</h2><center><table border=\"2\">"+
318                        "<tr><td><b>Document URI</b></td>"+
319                        "<td><center><b>Build time</b></center></td>"+
320                        "<td><center><b>Access count</b></center></td>"+
321                        "<td><center><b>Last accessed</b></center></td>"+
322                        "<td><center><b>Last modified</b></center></td></tr>");
323    
324            for (int i=0; i<_count; i++) {
325                CachedDocument doc = (CachedDocument)_references.get(_URIs[i]);
326                out.print("<tr><td><a href=\""+_URIs[i]+"\">"+
327                          "<font size=-1>"+_URIs[i]+"</font></a></td>");
328                out.print("<td><center>"+doc.getLatency()+"ms</center></td>");
329                out.print("<td><center>"+doc.getAccessCount()+"</center></td>");
330                out.print("<td><center>"+(new Date(doc.getLastReferenced()))+
331                          "</center></td>");
332                out.print("<td><center>"+(new Date(doc.getLastModified()))+
333                          "</center></td>");
334                out.println("</tr>");
335            }
336    
337            out.println("</table></center>");
338        }
339    }