001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: DocumentCache.java 1225369 2011-12-28 22:54:01Z mrglavas $
020 */
021
022 package org.apache.xalan.xsltc.dom;
023
024 import java.io.File;
025 import java.io.PrintWriter;
026 import java.net.URL;
027 import java.net.URLConnection;
028 import java.net.URLDecoder;
029 import java.util.Date;
030 import java.util.Hashtable;
031
032 import javax.xml.parsers.ParserConfigurationException;
033 import javax.xml.parsers.SAXParser;
034 import javax.xml.parsers.SAXParserFactory;
035 import javax.xml.transform.TransformerException;
036 import javax.xml.transform.sax.SAXSource;
037
038 import org.apache.xalan.xsltc.DOM;
039 import org.apache.xalan.xsltc.DOMCache;
040 import org.apache.xalan.xsltc.DOMEnhancedForDTM;
041 import org.apache.xalan.xsltc.Translet;
042 import org.apache.xalan.xsltc.runtime.AbstractTranslet;
043 import org.apache.xalan.xsltc.runtime.BasisLibrary;
044 import org.apache.xalan.xsltc.runtime.Constants;
045 import org.apache.xml.utils.SystemIDResolver;
046
047 import org.xml.sax.InputSource;
048 import org.xml.sax.SAXException;
049 import org.xml.sax.XMLReader;
050
051 /**
052 * @author Morten Jorgensen
053 */
054 public final class DocumentCache implements DOMCache {
055
056 private int _size;
057 private Hashtable _references;
058 private String[] _URIs;
059 private int _count;
060 private int _current;
061 private SAXParser _parser;
062 private XMLReader _reader;
063 private XSLTCDTMManager _dtmManager;
064
065 private static final int REFRESH_INTERVAL = 1000;
066
067 /*
068 * Inner class containing a DOMImpl object and DTD handler
069 */
070 public final class CachedDocument {
071
072 // Statistics data
073 private long _firstReferenced;
074 private long _lastReferenced;
075 private long _accessCount;
076 private long _lastModified;
077 private long _lastChecked;
078 private long _buildTime;
079
080 // DOM and DTD handler references
081 private DOMEnhancedForDTM _dom = null;
082
083 /**
084 * Constructor - load document and initialise statistics
085 */
086 public CachedDocument(String uri) {
087 // Initialise statistics variables
088 final long stamp = System.currentTimeMillis();
089 _firstReferenced = stamp;
090 _lastReferenced = stamp;
091 _accessCount = 0;
092 loadDocument(uri);
093
094 _buildTime = System.currentTimeMillis() - stamp;
095 }
096
097 /**
098 * Loads the document and updates build-time (latency) statistics
099 */
100 public void loadDocument(String uri) {
101
102 try {
103 final long stamp = System.currentTimeMillis();
104 _dom = (DOMEnhancedForDTM)_dtmManager.getDTM(
105 new SAXSource(_reader, new InputSource(uri)),
106 false, null, true, false);
107 _dom.setDocumentURI(uri);
108
109 // The build time can be used for statistics for a better
110 // priority algorithm (currently round robin).
111 final long thisTime = System.currentTimeMillis() - stamp;
112 if (_buildTime > 0)
113 _buildTime = (_buildTime + thisTime) >>> 1;
114 else
115 _buildTime = thisTime;
116 }
117 catch (Exception e) {
118 _dom = null;
119 }
120 }
121
122 public DOM getDocument() { return(_dom); }
123
124 public long getFirstReferenced() { return(_firstReferenced); }
125
126 public long getLastReferenced() { return(_lastReferenced); }
127
128 public long getAccessCount() { return(_accessCount); }
129
130 public void incAccessCount() { _accessCount++; }
131
132 public long getLastModified() { return(_lastModified); }
133
134 public void setLastModified(long t){ _lastModified = t; }
135
136 public long getLatency() { return(_buildTime); }
137
138 public long getLastChecked() { return(_lastChecked); }
139
140 public void setLastChecked(long t) { _lastChecked = t; }
141
142 public long getEstimatedSize() {
143 if (_dom != null)
144 return(_dom.getSize() << 5); // ???
145 else
146 return(0);
147 }
148
149 }
150
151 /**
152 * DocumentCache constructor
153 */
154 public DocumentCache(int size) throws SAXException {
155 this(size, null);
156 try {
157 _dtmManager = (XSLTCDTMManager)XSLTCDTMManager.getDTMManagerClass()
158 .newInstance();
159 } catch (Exception e) {
160 throw new SAXException(e);
161 }
162 }
163
164 /**
165 * DocumentCache constructor
166 */
167 public DocumentCache(int size, XSLTCDTMManager dtmManager) throws SAXException {
168 _dtmManager = dtmManager;
169 _count = 0;
170 _current = 0;
171 _size = size;
172 _references = new Hashtable(_size+2);
173 _URIs = new String[_size];
174
175 try {
176 // Create a SAX parser and get the XMLReader object it uses
177 final SAXParserFactory factory = SAXParserFactory.newInstance();
178 try {
179 factory.setFeature(Constants.NAMESPACE_FEATURE,true);
180 }
181 catch (Exception e) {
182 factory.setNamespaceAware(true);
183 }
184 _parser = factory.newSAXParser();
185 _reader = _parser.getXMLReader();
186 }
187 catch (ParserConfigurationException e) {
188 BasisLibrary.runTimeError(BasisLibrary.NAMESPACES_SUPPORT_ERR);
189 }
190 }
191
192 /**
193 * Returns the time-stamp for a document's last update
194 */
195 private final long getLastModified(String uri) {
196 try {
197 URL url = new URL(uri);
198 URLConnection connection = url.openConnection();
199 long timestamp = connection.getLastModified();
200 // Check for a "file:" URI (courtesy of Brian Ewins)
201 if (timestamp == 0){ // get 0 for local URI
202 if ("file".equals(url.getProtocol())){
203 File localfile = new File(URLDecoder.decode(url.getFile()));
204 timestamp = localfile.lastModified();
205 }
206 }
207 return(timestamp);
208 }
209 // Brutal handling of all exceptions
210 catch (Exception e) {
211 return(System.currentTimeMillis());
212 }
213 }
214
215 /**
216 *
217 */
218 private CachedDocument lookupDocument(String uri) {
219 return((CachedDocument)_references.get(uri));
220 }
221
222 /**
223 *
224 */
225 private synchronized void insertDocument(String uri, CachedDocument doc) {
226 if (_count < _size) {
227 // Insert out URI in circular buffer
228 _URIs[_count++] = uri;
229 _current = 0;
230 }
231 else {
232 // Remove oldest URI from reference Hashtable
233 _references.remove(_URIs[_current]);
234 // Insert our URI in circular buffer
235 _URIs[_current] = uri;
236 if (++_current >= _size) _current = 0;
237 }
238 _references.put(uri, doc);
239 }
240
241 /**
242 *
243 */
244 private synchronized void replaceDocument(String uri, CachedDocument doc) {
245 CachedDocument old = (CachedDocument)_references.get(uri);
246 if (doc == null)
247 insertDocument(uri, doc);
248 else
249 _references.put(uri, doc);
250 }
251
252 /**
253 * Returns a document either by finding it in the cache or
254 * downloading it and putting it in the cache.
255 */
256 public DOM retrieveDocument(String baseURI, String href, Translet trs) {
257 CachedDocument doc;
258
259 String uri = href;
260 if (baseURI != null && baseURI.length() != 0) {
261 try {
262 uri = SystemIDResolver.getAbsoluteURI(uri, baseURI);
263 } catch (TransformerException te) {
264 // ignore
265 }
266 }
267
268 // Try to get the document from the cache first
269 if ((doc = lookupDocument(uri)) == null) {
270 doc = new CachedDocument(uri);
271 if (doc == null) return null; // better error handling needed!!!
272 doc.setLastModified(getLastModified(uri));
273 insertDocument(uri, doc);
274 }
275 // If the document is in the cache we must check if it is still valid
276 else {
277 long now = System.currentTimeMillis();
278 long chk = doc.getLastChecked();
279 doc.setLastChecked(now);
280 // Has the modification time for this file been checked lately?
281 if (now > (chk + REFRESH_INTERVAL)) {
282 doc.setLastChecked(now);
283 long last = getLastModified(uri);
284 // Reload document if it has been modified since last download
285 if (last > doc.getLastModified()) {
286 doc = new CachedDocument(uri);
287 if (doc == null) return null;
288 doc.setLastModified(getLastModified(uri));
289 replaceDocument(uri, doc);
290 }
291 }
292
293 }
294
295 // Get the references to the actual DOM and DTD handler
296 final DOM dom = doc.getDocument();
297
298 // The dom reference may be null if the URL pointed to a
299 // non-existing document
300 if (dom == null) return null;
301
302 doc.incAccessCount(); // For statistics
303
304 final AbstractTranslet translet = (AbstractTranslet)trs;
305
306 // Give the translet an early opportunity to extract any
307 // information from the DOM object that it would like.
308 translet.prepassDocument(dom);
309
310 return(doc.getDocument());
311 }
312
313 /**
314 * Outputs the cache statistics
315 */
316 public void getStatistics(PrintWriter out) {
317 out.println("<h2>DOM cache statistics</h2><center><table border=\"2\">"+
318 "<tr><td><b>Document URI</b></td>"+
319 "<td><center><b>Build time</b></center></td>"+
320 "<td><center><b>Access count</b></center></td>"+
321 "<td><center><b>Last accessed</b></center></td>"+
322 "<td><center><b>Last modified</b></center></td></tr>");
323
324 for (int i=0; i<_count; i++) {
325 CachedDocument doc = (CachedDocument)_references.get(_URIs[i]);
326 out.print("<tr><td><a href=\""+_URIs[i]+"\">"+
327 "<font size=-1>"+_URIs[i]+"</font></a></td>");
328 out.print("<td><center>"+doc.getLatency()+"ms</center></td>");
329 out.print("<td><center>"+doc.getAccessCount()+"</center></td>");
330 out.print("<td><center>"+(new Date(doc.getLastReferenced()))+
331 "</center></td>");
332 out.print("<td><center>"+(new Date(doc.getLastModified()))+
333 "</center></td>");
334 out.println("</tr>");
335 }
336
337 out.println("</table></center>");
338 }
339 }