View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.lang.xml.ast;
5   
6   import java.io.IOException;
7   import java.io.Reader;
8   import java.lang.reflect.InvocationHandler;
9   import java.lang.reflect.Method;
10  import java.lang.reflect.Proxy;
11  import java.util.ArrayList;
12  import java.util.Arrays;
13  import java.util.Collections;
14  import java.util.HashMap;
15  import java.util.Iterator;
16  import java.util.LinkedHashSet;
17  import java.util.List;
18  import java.util.Map;
19  import java.util.Set;
20  import java.util.Stack;
21  
22  import javax.xml.parsers.DocumentBuilder;
23  import javax.xml.parsers.ParserConfigurationException;
24  import javax.xml.parsers.SAXParser;
25  import javax.xml.parsers.SAXParserFactory;
26  
27  import net.sourceforge.pmd.lang.ast.ParseException;
28  import net.sourceforge.pmd.lang.ast.RootNode;
29  import net.sourceforge.pmd.lang.ast.xpath.Attribute;
30  import net.sourceforge.pmd.lang.xml.XmlParserOptions;
31  import net.sourceforge.pmd.util.CompoundIterator;
32  
33  import org.apache.xerces.dom.CoreDocumentImpl;
34  import org.apache.xerces.dom.EntityImpl;
35  import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl;
36  import org.w3c.dom.Attr;
37  import org.w3c.dom.CDATASection;
38  import org.w3c.dom.Comment;
39  import org.w3c.dom.Document;
40  import org.w3c.dom.DocumentType;
41  import org.w3c.dom.Element;
42  import org.w3c.dom.Entity;
43  import org.w3c.dom.EntityReference;
44  import org.w3c.dom.NamedNodeMap;
45  import org.w3c.dom.Node;
46  import org.w3c.dom.ProcessingInstruction;
47  import org.w3c.dom.Text;
48  import org.xml.sax.Attributes;
49  import org.xml.sax.InputSource;
50  import org.xml.sax.Locator;
51  import org.xml.sax.SAXException;
52  import org.xml.sax.XMLReader;
53  import org.xml.sax.ext.DefaultHandler2;
54  
55  public class XmlParser {
56      protected final XmlParserOptions parserOptions;
57      protected Map<Node, XmlNode> nodeCache = new HashMap<Node, XmlNode>();
58      
59      public XmlParser(XmlParserOptions parserOptions) {
60  	this.parserOptions = parserOptions;
61      }
62  
63      protected Document parseDocument(Reader reader) throws ParseException {
64  	nodeCache.clear();
65  	try {
66          SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
67          saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
68          saxParserFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
69          saxParserFactory.setNamespaceAware(parserOptions.isNamespaceAware());
70          saxParserFactory.setValidating(parserOptions.isValidating());
71          saxParserFactory.setXIncludeAware(parserOptions.isXincludeAware());
72          SAXParser saxParser = saxParserFactory.newSAXParser();
73  
74          LineNumberAwareSaxHandler handler = new LineNumberAwareSaxHandler(parserOptions);
75          XMLReader xmlReader = saxParser.getXMLReader();
76          xmlReader.setContentHandler(handler);
77          xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
78          xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
79          xmlReader.setEntityResolver(parserOptions.getEntityResolver());
80  
81          xmlReader.parse(new InputSource(reader));
82          return handler.getDocument();
83  	} catch (ParserConfigurationException e) {
84  	    throw new ParseException(e);
85  	} catch (SAXException e) {
86  	    throw new ParseException(e);
87  	} catch (IOException e) {
88  	    throw new ParseException(e);
89  	}
90      }
91  
92      /**
93       * SAX Handler to build a DOM Document with line numbers.
94       * @see http://eyalsch.wordpress.com/2010/11/30/xml-dom-2/
95       */
96      private static class LineNumberAwareSaxHandler extends DefaultHandler2 {
97          public static final String BEGIN_LINE = "pmd:beginLine";
98          public static final String BEGIN_COLUMN = "pmd:beginColumn";
99          public static final String END_LINE = "pmd:endLine";
100         public static final String END_COLUMN = "pmd:endColumn";
101 
102         private Stack<Node> nodeStack = new Stack<Node>();
103         private StringBuilder text = new StringBuilder();
104         private int beginLineText = -1;
105         private int beginColumnText = -1;
106         private Locator locator;
107         private final DocumentBuilder documentBuilder;
108         private final Document document;
109         private boolean cdataEnded = false;
110 
111         private boolean coalescing;
112         private boolean expandEntityReferences;
113         private boolean ignoringComments;
114         private boolean ignoringElementContentWhitespace;
115         private boolean namespaceAware;
116 
117         public LineNumberAwareSaxHandler(XmlParserOptions options) throws ParserConfigurationException {
118             // uses xerces directly, so that we can build a DTD / entities section
119             this.documentBuilder = new DocumentBuilderFactoryImpl().newDocumentBuilder();
120 
121             this.document = this.documentBuilder.newDocument();
122             this.coalescing = options.isCoalescing();
123             this.expandEntityReferences = options.isExpandEntityReferences();
124             this.ignoringComments = options.isIgnoringComments();
125             this.ignoringElementContentWhitespace = options.isIgnoringElementContentWhitespace();
126             this.namespaceAware = options.isNamespaceAware();
127         }
128 
129         public Document getDocument() {
130             return document;
131         }
132 
133         @Override
134         public void setDocumentLocator(Locator locator) {
135             this.locator = locator;
136         }
137         @Override
138         public void startElement(String uri, String localName, String qName, Attributes attributes)
139                 throws SAXException {
140             addTextIfNeeded(false);
141 
142             Element element;
143             if (namespaceAware) {
144                 element = document.createElementNS(uri, qName);
145             } else {
146                 element = document.createElement(qName);
147             }
148 
149             for (int i = 0; i < attributes.getLength(); i++) {
150                 String attQName = attributes.getQName(i);
151                 String attNamespaceURI = attributes.getURI(i);
152                 String attValue = attributes.getValue(i);
153                 Attr a;
154                 if (namespaceAware) {
155                     a = document.createAttributeNS(attNamespaceURI, attQName);
156                     element.setAttributeNodeNS(a);
157                 } else {
158                     a = document.createAttribute(attQName);
159                     element.setAttributeNode(a);
160                 }
161                 a.setValue(attValue);
162             }
163 
164             element.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
165             element.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
166 
167             nodeStack.push(element);
168         }
169         private void addTextIfNeeded(boolean alwaysAdd) {
170             if (text.length() > 0) {
171                 addTextNode(text.toString(), cdataEnded || alwaysAdd);
172                 text.setLength(0);
173                 cdataEnded = false;
174             }
175         }
176         private void addTextNode(String s, boolean alwaysAdd) {
177             if (alwaysAdd || !ignoringElementContentWhitespace || s.trim().length() > 0) {
178                 Text textNode = document.createTextNode(s);
179                 textNode.setUserData(BEGIN_LINE, beginLineText, null);
180                 textNode.setUserData(BEGIN_COLUMN, beginColumnText, null);
181                 textNode.setUserData(END_LINE, locator.getLineNumber(), null);
182                 textNode.setUserData(END_COLUMN, locator.getColumnNumber(), null);
183                 appendChild(textNode);
184             }
185         }
186         @Override
187         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
188             this.characters(ch, start, length);
189         }
190         @Override
191         public void characters(char[] ch, int start, int length) throws SAXException {
192             if (text.length() == 0) {
193                 beginLineText = locator.getLineNumber();
194                 beginColumnText = locator.getColumnNumber();
195             }
196             text.append(ch, start, length);
197         }
198         @Override
199         public void endElement(String uri, String localName, String qName) throws SAXException {
200             addTextIfNeeded(false);
201             Node element = nodeStack.pop();
202             element.setUserData(END_LINE, locator.getLineNumber(), null);
203             element.setUserData(END_COLUMN, locator.getColumnNumber(), null);
204             appendChild(element);
205         }
206         @Override
207         public void startDocument() throws SAXException {
208             document.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
209             document.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
210         }
211         @Override
212         public void endDocument() throws SAXException {
213             addTextIfNeeded(false);
214             document.setUserData(END_LINE, locator.getLineNumber(), null);
215             document.setUserData(END_COLUMN, locator.getColumnNumber(), null);
216         }
217         @Override
218         public void startCDATA() throws SAXException {
219             if (!coalescing) {
220                 addTextIfNeeded(true);
221             }
222         }
223         @Override
224         public void endCDATA() throws SAXException {
225             if (!coalescing) {
226                 CDATASection cdataSection = document.createCDATASection(text.toString());
227                 cdataSection.setUserData(BEGIN_LINE, beginLineText, null);
228                 cdataSection.setUserData(BEGIN_COLUMN, beginColumnText, null);
229                 cdataSection.setUserData(END_LINE, locator.getLineNumber(), null);
230                 cdataSection.setUserData(END_COLUMN, locator.getColumnNumber(), null);
231                 appendChild(cdataSection);
232                 text.setLength(0);
233                 cdataEnded = true;
234             }
235         }
236         @Override
237         public void comment(char[] ch, int start, int length) throws SAXException {
238             if (!ignoringComments) {
239                 addTextIfNeeded(false);
240                 Comment comment = document.createComment(new String(ch, start, length));
241                 comment.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
242                 comment.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
243                 comment.setUserData(END_LINE, locator.getLineNumber(), null);
244                 comment.setUserData(END_COLUMN, locator.getColumnNumber(), null);
245                 appendChild(comment);
246             }
247         }
248         @Override
249         public void startDTD(String name, String publicId, String systemId) throws SAXException {
250             DocumentType docType = documentBuilder
251                     .getDOMImplementation()
252                     .createDocumentType(name, publicId, systemId);
253             docType.setUserData(BEGIN_LINE, locator.getLineNumber(), null);
254             docType.setUserData(BEGIN_COLUMN, locator.getColumnNumber(), null);
255             document.appendChild(docType);
256         }
257         @Override
258         public void startEntity(String name) throws SAXException {
259             if (!expandEntityReferences) {
260                 addTextIfNeeded(false);
261             }
262         }
263         @Override
264         public void endEntity(String name) throws SAXException {
265             if (!expandEntityReferences) {
266                 EntityReference entity = document.createEntityReference(name);
267                 entity.setUserData(BEGIN_LINE, beginLineText, null);
268                 entity.setUserData(BEGIN_COLUMN, beginColumnText, null);
269                 entity.setUserData(END_LINE, locator.getLineNumber(), null);
270                 entity.setUserData(END_COLUMN, locator.getColumnNumber(), null);
271                 appendChild(entity);
272                 text.setLength(0); // throw the expanded entity text away
273             }
274         }
275         @Override
276         public void endDTD() throws SAXException {
277             DocumentType doctype = document.getDoctype();
278             doctype.setUserData(END_LINE, locator.getLineNumber(), null);
279             doctype.setUserData(END_COLUMN, locator.getColumnNumber(), null);
280         }
281         @Override
282         public void internalEntityDecl(String name, String value) throws SAXException {
283             Entity entity = new ChangeableEntity(document, name);
284             entity.appendChild(document.createTextNode(value));
285 
286             NamedNodeMap entities = document.getDoctype().getEntities();
287             entities.setNamedItem(entity);
288         }
289         @Override
290         public void processingInstruction(String target, String data) throws SAXException {
291             ProcessingInstruction pi = document.createProcessingInstruction(target, data);
292             appendChild(pi);
293         }
294         private void appendChild(Node node) {
295             if (nodeStack.isEmpty()) {
296                 document.appendChild(node);
297             } else {
298                 nodeStack.peek().appendChild(node);
299             }
300         }
301         private static class ChangeableEntity extends EntityImpl {
302             public ChangeableEntity(Document document, String name) {
303                 super((CoreDocumentImpl)document, name);
304                 flags = (short) (flags & ~READONLY); // make it changeable again, so that we can add a text node as child
305             }
306         }
307     }
308 
309 
310     public XmlNode parse(Reader reader) {
311 	Document document = parseDocument(reader);
312 	return createProxy(document);
313     }
314 
315     public XmlNode createProxy(Node node) {
316 	XmlNode proxy = nodeCache.get(node);
317 	if (proxy != null) {
318 	    return proxy;
319 	}
320 
321 	// TODO Change Parser interface to take ClassLoader?
322 	LinkedHashSet<Class<?>> interfaces = new LinkedHashSet<Class<?>>();
323 	interfaces.add(XmlNode.class);
324 	if (node instanceof Document) {
325 	    interfaces.add(RootNode.class);
326 	}
327 	addAllInterfaces(interfaces, node.getClass());
328 
329 	proxy = (XmlNode) Proxy.newProxyInstance(XmlParser.class.getClassLoader(), interfaces
330 		.toArray(new Class[interfaces.size()]), new XmlNodeInvocationHandler(node));
331 	nodeCache.put(node, proxy);
332 	return proxy;
333     }
334 
335     public void addAllInterfaces(Set<Class<?>> interfaces, Class<?> clazz) {
336 	interfaces.addAll(Arrays.asList((Class<?>[]) clazz.getInterfaces()));
337 	if (clazz.getSuperclass() != null) {
338 	    addAllInterfaces(interfaces, clazz.getSuperclass());
339 	}
340     }
341 
342     public class XmlNodeInvocationHandler implements InvocationHandler {
343 	private final Node node;
344 	private Object userData;
345 
346 	public XmlNodeInvocationHandler(Node node) {
347 	    this.node = node;
348 	}
349 
350 	public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
351 	    // XmlNode method?
352 	    if (method.getDeclaringClass().isAssignableFrom(XmlNode.class)
353 		    && !"java.lang.Object".equals(method.getDeclaringClass().getName())) {
354 		if ("jjtGetNumChildren".equals(method.getName())) {
355 		    return node.hasChildNodes() ? node.getChildNodes().getLength() : 0;
356 		} else if ("jjtGetChild".equals(method.getName())) {
357 		    return createProxy(node.getChildNodes().item(((Integer) args[0]).intValue()));
358 		} else if ("getImage".equals(method.getName())) {
359 		    if (node instanceof Text) {
360 			return ((Text) node).getData();
361 		    } else {
362 			return null;
363 		    }
364 		} else if ("jjtGetParent".equals(method.getName())) {
365 		    Node parent = node.getParentNode();
366 		    if (parent != null && !(parent instanceof Document)) {
367 			return createProxy(parent);
368 		    } else {
369 			return null;
370 		    }
371 		} else if ("getAttributeIterator".equals(method.getName())) {
372 		    List<Iterator<Attribute>> iterators = new ArrayList<Iterator<Attribute>>();
373 
374 		    // Expose DOM Attributes
375 		    final NamedNodeMap attributes = node.getAttributes();
376 		    iterators.add(new Iterator<Attribute>() {
377 			private int index;
378 
379 			public boolean hasNext() {
380 			    return attributes != null && index < attributes.getLength();
381 			}
382 
383 			public Attribute next() {
384 			    Node attributeNode = attributes.item(index++);
385 			    return new Attribute(createProxy(node), attributeNode.getNodeName(), attributeNode
386 				    .getNodeValue());
387 			}
388 
389 			public void remove() {
390 			    throw new UnsupportedOperationException();
391 			}
392 		    });
393 
394 		    // Expose Text/CDATA nodes to have an 'Image' attribute like AST Nodes
395 		    if (proxy instanceof Text) {
396 			iterators.add(Collections.singletonList(
397 				new Attribute((net.sourceforge.pmd.lang.ast.Node) proxy, "Image", ((Text) proxy)
398 					.getData())).iterator());
399 		    }
400 
401 		    // Expose Java Attributes
402 		    // iterators.add(new AttributeAxisIterator((net.sourceforge.pmd.lang.ast.Node) p));
403 
404 		    return new CompoundIterator<Attribute>(iterators.toArray(new Iterator[iterators.size()]));
405 		} else if ("getBeginLine".equals(method.getName())) {
406 		    return getUserData(LineNumberAwareSaxHandler.BEGIN_LINE);
407 		} else if ("getBeginColumn".equals(method.getName())) {
408             return getUserData(LineNumberAwareSaxHandler.BEGIN_COLUMN);
409 		} else if ("getEndLine".equals(method.getName())) {
410 		    return getUserData(LineNumberAwareSaxHandler.END_LINE);
411 		} else if ("getEndColumn".equals(method.getName())) {
412 		    return getUserData(LineNumberAwareSaxHandler.END_COLUMN);
413 		} else if ("getNode".equals(method.getName())) {
414 		    return node;
415 		} else if ("getUserData".equals(method.getName())) {
416 		    return userData;
417 		} else if ("setUserData".equals(method.getName())) {
418 		    userData = args[0];
419 		    return null;
420 		} else if ("isFindBoundary".equals(method.getName())) {
421 		    return false;
422 		}
423 		throw new UnsupportedOperationException("Method not supported for XmlNode: " + method);
424 	    }
425 	    // Delegate method
426 	    else {
427 		if ("toString".equals(method.getName())) {
428 		    String s = node.getNodeName();
429 		    s = s.replace("#", "");
430 		    return s;
431 		}
432 		Object result = method.invoke(node, args);
433 		return result;
434 	    }
435 	}
436 
437     private Integer getUserData(String key) {
438         if (node.getUserData(key) != null) {
439             return (Integer)node.getUserData(key);
440         }
441         return Integer.valueOf(-1);
442     }
443     }
444 }