View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.lang.xml;
5   
6   import java.io.ByteArrayOutputStream;
7   import java.io.PrintStream;
8   import java.io.StringReader;
9   import java.io.UnsupportedEncodingException;
10  import java.util.Iterator;
11  
12  import net.sourceforge.pmd.lang.Language;
13  import net.sourceforge.pmd.lang.LanguageVersionHandler;
14  import net.sourceforge.pmd.lang.Parser;
15  import net.sourceforge.pmd.lang.ast.Node;
16  import net.sourceforge.pmd.lang.ast.xpath.Attribute;
17  import net.sourceforge.pmd.lang.xml.ast.XmlNode;
18  import net.sourceforge.pmd.lang.xml.ast.XmlParser;
19  import net.sourceforge.pmd.util.StringUtil;
20  
21  import org.junit.Assert;
22  import org.junit.Test;
23  
24  /**
25   * Unit test for the {@link XmlParser}.
26   */
27  public class XmlParserTest {
28  
29      private static final String XML_TEST =
30              "<?xml version=\"1.0\"?>\n" +
31              "<!DOCTYPE rootElement\n" +
32              "[\n" +
33              "<!ELEMENT rootElement (child1,child2)>\n" +
34              "<!ELEMENT child1 (#PCDATA)>\n" +
35              "<!ATTLIST child1 test CDATA #REQUIRED>\n" +
36              "<!ELEMENT child2 (#PCDATA)>\n" +
37              "\n" +
38              "<!ENTITY pmd \"Copyright: PMD\">\n" +
39              "]\n" +
40              ">\n" +
41              "<rootElement>\n" +
42              "    <!-- that's a comment -->\n" +
43              "    <child1 test=\"1\">entity: &pmd;\n" +
44              "    </child1>\n" +
45              "    <child2>\n" +
46              "      <![CDATA[ cdata section ]]>\n" +
47              "    </child2>\n" +
48              "</rootElement>";
49  
50      private static final String XML_NAMESPACE_TEST =
51              "<?xml version=\"1.0\"?>\n" + 
52              "<pmd:rootElement xmlns:pmd=\"http://pmd.sf.net\">\n" + 
53              "    <!-- that's a comment -->\n" + 
54              "    <pmd:child1 test=\"1\">entity: &amp;\n" + 
55              "    </pmd:child1>\n" + 
56              "    <pmd:child2>\n" + 
57              "      <![CDATA[ cdata section ]]>\n" + 
58              "    </pmd:child2>\n" + 
59              "</pmd:rootElement>";
60  
61      private static final String XML_INVALID_WITH_DTD =
62              "<?xml version=\"1.0\"?>\n" +
63              "<!DOCTYPE rootElement\n" +
64              "[\n" +
65              "<!ELEMENT rootElement (child)>\n" +
66              "<!ELEMENT child (#PCDATA)>\n" +
67              "]\n" +
68              ">\n" +
69              "<rootElement>\n" +
70              "  <invalidChild></invalidChild>\n" +
71              "</rootElement>";
72  
73      /**
74       * See bug #1054:
75       * XML Rules ever report a line -1 and not the line/column where the error occurs
76       * @throws Exception any error
77       */
78      @Test
79      public void testLineNumbers() throws Exception {
80          LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
81          Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
82          Node document = parser.parse(null, new StringReader(XML_TEST));
83  
84          assertNode(document, "document", 2);
85          assertLineNumbers(document, 1, 1, 19, 15);
86          Node dtdElement = document.jjtGetChild(0);
87          assertNode(dtdElement, "rootElement", 0);
88          assertLineNumbers(dtdElement, 3, 1, 10, 1);
89          Node rootElement = document.jjtGetChild(1);
90          assertNode(rootElement, "rootElement", 7);
91          assertLineNumbers(rootElement, 12, 14, 19, 15);
92          assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
93          assertLineNumbers(rootElement.jjtGetChild(0), 13, 5, 13, 30);
94          assertNode(rootElement.jjtGetChild(1), "comment", 0);
95          assertLineNumbers(rootElement.jjtGetChild(1), 13, 30, 13, 30);
96          assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
97          assertLineNumbers(rootElement.jjtGetChild(2), 14, 5, 14, 22);
98          Node child1 = rootElement.jjtGetChild(3);
99          assertNode(child1, "child1", 1, "test", "1");
100         assertLineNumbers(child1, 14, 22, 15, 14);
101         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
102         assertLineNumbers(child1.jjtGetChild(0), 14, 30, 15, 14);
103         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
104         assertLineNumbers(rootElement.jjtGetChild(4), 16, 5, 16, 13);
105         Node child2 = rootElement.jjtGetChild(5);
106         assertNode(child2, "child2", 3);
107         assertLineNumbers(child2, 16, 13, 18, 14);
108         assertTextNode(child2.jjtGetChild(0), "\\n      ");
109         assertLineNumbers(child2.jjtGetChild(0), 17, 7, 17, 16);
110         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
111         assertLineNumbers(child2.jjtGetChild(1), 17, 33, 17, 34);
112         assertTextNode(child2.jjtGetChild(2), "\\n    ");
113         assertLineNumbers(child2.jjtGetChild(2), 18, 5, 18, 14);
114         assertTextNode(rootElement.jjtGetChild(6), "\\n");
115         assertLineNumbers(rootElement.jjtGetChild(6), 19, 1, 19, 15);
116     }
117 
118     /**
119      * Verifies the default parsing behavior of the XML parser.
120      */
121     @Test
122     public void testDefaultParsing() {
123         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
124         Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
125         Node document = parser.parse(null, new StringReader(XML_TEST));
126 
127         assertNode(document, "document", 2);
128         Node dtdElement = document.jjtGetChild(0);
129         assertNode(dtdElement, "rootElement", 0);
130         Node rootElement = document.jjtGetChild(1);
131         assertNode(rootElement, "rootElement", 7);
132         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
133         assertNode(rootElement.jjtGetChild(1), "comment", 0);
134         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
135         Node child1 = rootElement.jjtGetChild(3);
136         assertNode(child1, "child1", 1, "test", "1");
137         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
138         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
139         Node child2 = rootElement.jjtGetChild(5);
140         assertNode(child2, "child2", 3);
141         assertTextNode(child2.jjtGetChild(0), "\\n      ");
142         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
143         assertTextNode(child2.jjtGetChild(2), "\\n    ");
144         assertTextNode(rootElement.jjtGetChild(6), "\\n");
145     }
146 
147     /**
148      * Verifies the parsing behavior of the XML parser with coalescing enabled.
149      */
150     @Test
151     public void testParsingCoalescingEnabled() {
152         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
153         XmlParserOptions parserOptions = new XmlParserOptions();
154         parserOptions.setCoalescing(true);
155         Parser parser = xmlVersionHandler.getParser(parserOptions);
156         Node document = parser.parse(null, new StringReader(XML_TEST));
157 
158         assertNode(document, "document", 2);
159         Node dtdElement = document.jjtGetChild(0);
160         assertNode(dtdElement, "rootElement", 0);
161         Node rootElement = document.jjtGetChild(1);
162         assertNode(rootElement, "rootElement", 7);
163         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
164         assertNode(rootElement.jjtGetChild(1), "comment", 0);
165         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
166         Node child1 = rootElement.jjtGetChild(3);
167         assertNode(child1, "child1", 1, "test", "1");
168         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
169         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
170         Node child2 = rootElement.jjtGetChild(5);
171         assertNode(child2, "child2", 1);
172         assertTextNode(child2.jjtGetChild(0), "\\n       cdata section \\n    ");
173         assertTextNode(rootElement.jjtGetChild(6), "\\n");
174     }
175 
176     /**
177      * Verifies the parsing behavior of the XML parser if entities are not expanded.
178      */
179     @Test
180     public void testParsingDoNotExpandEntities() {
181         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
182         XmlParserOptions parserOptions = new XmlParserOptions();
183         parserOptions.setExpandEntityReferences(false);
184         Parser parser = xmlVersionHandler.getParser(parserOptions);
185         Node document = parser.parse(null, new StringReader(XML_TEST));
186 
187         assertNode(document, "document", 2);
188         Node dtdElement = document.jjtGetChild(0);
189         assertNode(dtdElement, "rootElement", 0);
190         Node rootElement = document.jjtGetChild(1);
191         assertNode(rootElement, "rootElement", 7);
192         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
193         assertNode(rootElement.jjtGetChild(1), "comment", 0);
194         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
195         Node child1 = rootElement.jjtGetChild(3);
196         assertNode(child1, "child1", 3, "test", "1");
197         assertTextNode(child1.jjtGetChild(0), "entity: ");
198         assertNode(child1.jjtGetChild(1), "pmd", 1);
199         assertTextNode(child1.jjtGetChild(1).jjtGetChild(0), "Copyright: PMD");
200         assertTextNode(child1.jjtGetChild(2), "\\n    ");
201         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
202         Node child2 = rootElement.jjtGetChild(5);
203         assertNode(child2, "child2", 3);
204         assertTextNode(child2.jjtGetChild(0), "\\n      ");
205         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
206         assertTextNode(child2.jjtGetChild(2), "\\n    ");
207         assertTextNode(rootElement.jjtGetChild(6), "\\n");
208     }
209 
210     /**
211      * Verifies the parsing behavior of the XML parser if ignoring comments.
212      */
213     @Test
214     public void testParsingIgnoreComments() {
215         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
216         XmlParserOptions parserOptions = new XmlParserOptions();
217         parserOptions.setIgnoringComments(true);
218         Parser parser = xmlVersionHandler.getParser(parserOptions);
219         Node document = parser.parse(null, new StringReader(XML_TEST));
220 
221         assertNode(document, "document", 2);
222         Node dtdElement = document.jjtGetChild(0);
223         assertNode(dtdElement, "rootElement", 0);
224         Node rootElement = document.jjtGetChild(1);
225         assertNode(rootElement, "rootElement", 5);
226         assertTextNode(rootElement.jjtGetChild(0), "\\n    \\n    ");
227         Node child1 = rootElement.jjtGetChild(1);
228         assertNode(child1, "child1", 1, "test", "1");
229         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
230         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
231         Node child2 = rootElement.jjtGetChild(3);
232         assertNode(child2, "child2", 3);
233         assertTextNode(child2.jjtGetChild(0), "\\n      ");
234         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
235         assertTextNode(child2.jjtGetChild(2), "\\n    ");
236         assertTextNode(rootElement.jjtGetChild(4), "\\n");
237     }
238 
239     /**
240      * Verifies the parsing behavior of the XML parser if ignoring whitespaces in elements.
241      */
242     @Test
243     public void testParsingIgnoreElementContentWhitespace() {
244         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
245         XmlParserOptions parserOptions = new XmlParserOptions();
246         parserOptions.setIgnoringElementContentWhitespace(true);
247         Parser parser = xmlVersionHandler.getParser(parserOptions);
248         Node document = parser.parse(null, new StringReader(XML_TEST));
249 
250         assertNode(document, "document", 2);
251         Node dtdElement = document.jjtGetChild(0);
252         assertNode(dtdElement, "rootElement", 0);
253         Node rootElement = document.jjtGetChild(1);
254         assertNode(rootElement, "rootElement", 3);
255         assertNode(rootElement.jjtGetChild(0), "comment", 0);
256         Node child1 = rootElement.jjtGetChild(1);
257         assertNode(child1, "child1", 1, "test", "1");
258         assertTextNode(child1.jjtGetChild(0), "entity: Copyright: PMD\\n    ");
259         Node child2 = rootElement.jjtGetChild(2);
260         assertNode(child2, "child2", 3);
261         assertTextNode(child2.jjtGetChild(0), "\\n      ");
262         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
263         assertTextNode(child2.jjtGetChild(2), "\\n    ");
264     }
265 
266     /**
267      * Verifies the default parsing behavior of the XML parser with namespaces.
268      */
269     @Test
270     public void testDefaultParsingNamespaces() {
271         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
272         Parser parser = xmlVersionHandler.getParser(xmlVersionHandler.getDefaultParserOptions());
273         Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST));
274 
275         assertNode(document, "document", 1);
276         Node rootElement = document.jjtGetChild(0);
277         assertNode(rootElement, "pmd:rootElement", 7);
278         Assert.assertEquals("http://pmd.sf.net", ((XmlNode)rootElement).getNode().getNamespaceURI());
279         Assert.assertEquals("pmd", ((XmlNode)rootElement).getNode().getPrefix());
280         Assert.assertEquals("rootElement", ((XmlNode)rootElement).getNode().getLocalName());
281         Assert.assertEquals("pmd:rootElement", ((XmlNode)rootElement).getNode().getNodeName());
282         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
283         assertNode(rootElement.jjtGetChild(1), "comment", 0);
284         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
285         Node child1 = rootElement.jjtGetChild(3);
286         assertNode(child1, "pmd:child1", 1, "test", "1");
287         assertTextNode(child1.jjtGetChild(0), "entity: &\\n    ");
288         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
289         Node child2 = rootElement.jjtGetChild(5);
290         assertNode(child2, "pmd:child2", 3);
291         assertTextNode(child2.jjtGetChild(0), "\\n      ");
292         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
293         assertTextNode(child2.jjtGetChild(2), "\\n    ");
294         assertTextNode(rootElement.jjtGetChild(6), "\\n");
295     }
296 
297     /**
298      * Verifies the default parsing behavior of the XML parser with namespaces but not namespace aware.
299      */
300     @Test
301     public void testParsingNotNamespaceAware() {
302         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
303         XmlParserOptions parserOptions = new XmlParserOptions();
304         parserOptions.setNamespaceAware(false);
305         Parser parser = xmlVersionHandler.getParser(parserOptions);
306         Node document = parser.parse(null, new StringReader(XML_NAMESPACE_TEST));
307 
308         assertNode(document, "document", 1);
309         Node rootElement = document.jjtGetChild(0);
310         assertNode(rootElement, "pmd:rootElement", 7, "xmlns:pmd", "http://pmd.sf.net");
311         Assert.assertNull(((XmlNode)rootElement).getNode().getNamespaceURI());
312         Assert.assertNull(((XmlNode)rootElement).getNode().getPrefix());
313         Assert.assertNull(((XmlNode)rootElement).getNode().getLocalName());
314         Assert.assertEquals("pmd:rootElement", ((XmlNode)rootElement).getNode().getNodeName());
315         assertTextNode(rootElement.jjtGetChild(0), "\\n    ");
316         assertNode(rootElement.jjtGetChild(1), "comment", 0);
317         assertTextNode(rootElement.jjtGetChild(2), "\\n    ");
318         Node child1 = rootElement.jjtGetChild(3);
319         assertNode(child1, "pmd:child1", 1, "test", "1");
320         assertTextNode(child1.jjtGetChild(0), "entity: &\\n    ");
321         assertTextNode(rootElement.jjtGetChild(4), "\\n    ");
322         Node child2 = rootElement.jjtGetChild(5);
323         assertNode(child2, "pmd:child2", 3);
324         assertTextNode(child2.jjtGetChild(0), "\\n      ");
325         assertTextNode(child2.jjtGetChild(1), " cdata section ", "cdata-section");
326         assertTextNode(child2.jjtGetChild(2), "\\n    ");
327         assertTextNode(rootElement.jjtGetChild(6), "\\n");
328     }
329 
330     /**
331      * Verifies the parsing behavior of the XML parser with validation on.
332      * @throws UnsupportedEncodingException error
333      */
334     @Test
335     public void testParsingWithValidation() throws UnsupportedEncodingException {
336         LanguageVersionHandler xmlVersionHandler = Language.XML.getDefaultVersion().getLanguageVersionHandler();
337         XmlParserOptions parserOptions = new XmlParserOptions();
338         parserOptions.setValidating(true);
339         Parser parser = xmlVersionHandler.getParser(parserOptions);
340         PrintStream oldErr = System.err;
341         try {
342             ByteArrayOutputStream bos = new ByteArrayOutputStream();
343             System.setErr(new PrintStream(bos));
344             Node document = parser.parse(null, new StringReader(XML_INVALID_WITH_DTD));
345             Assert.assertNotNull(document);
346             String output = bos.toString("UTF-8");
347             Assert.assertTrue(output.contains("Element type \"invalidChild\" must be declared."));
348             Assert.assertTrue(output.contains("The content of element type \"rootElement\" must match \"(child)\"."));
349             Assert.assertEquals(2, document.jjtGetNumChildren());
350             Assert.assertEquals("invalidChild", String.valueOf(document.jjtGetChild(1).jjtGetChild(1)));
351         } finally {
352             System.setErr(oldErr);
353         }
354     }
355 
356     /**
357      * Asserts a single node inclusive attributes.
358      * @param node the node
359      * @param toString the to String representation to expect
360      * @param childs number of childs
361      * @param atts attributes - each object pair forms one attribute: first name, then value.
362      */
363     private void assertNode(Node node, String toString, int childs, Object ... atts) {
364         Assert.assertEquals(toString, String.valueOf(node));
365         Assert.assertEquals(childs, node.jjtGetNumChildren());
366         Iterator<Attribute> attributeIterator = ((XmlNode)node).getAttributeIterator();
367         if (atts != null) {
368             for (int i = 0; i < atts.length; i += 2) {
369                 Assert.assertTrue(attributeIterator.hasNext());
370                 String name = String.valueOf(atts[i]);
371                 Object value = atts[i + 1];
372                 Attribute attribute = attributeIterator.next();
373                 Assert.assertEquals(name, attribute.getName());
374                 Assert.assertEquals(value, attribute.getValue());
375             }
376         }
377         Assert.assertFalse(attributeIterator.hasNext());
378     }
379 
380     /**
381      * Assert a single text node.
382      * @param node the node to check
383      * @param text the text to expect
384      */
385     private void assertTextNode(Node node, String text) {
386         assertTextNode(node, text, "text");
387     }
388 
389     /**
390      * Assert a single text node.
391      *
392      * @param node the node to check
393      * @param text the text to expect
394      * @param toString the to string representation
395      */
396     private void assertTextNode(Node node, String text, String toString) {
397         Assert.assertEquals(toString, String.valueOf(node));
398         Assert.assertEquals(0, node.jjtGetNumChildren());
399         Assert.assertEquals(text, StringUtil.escapeWhitespace(node.getImage()));
400         Iterator<Attribute> attributeIterator = ((XmlNode)node).getAttributeIterator();
401         Assert.assertTrue(attributeIterator.hasNext());
402         Attribute attribute = attributeIterator.next();
403         Assert.assertEquals("Image", attribute.getName());
404         Assert.assertEquals(text, StringUtil.escapeWhitespace(attribute.getValue()));
405         Assert.assertFalse(attributeIterator.hasNext());
406     }
407 
408     /**
409      * Assert the line numbers of a node.
410      *
411      * @param node the node
412      * @param beginLine the begin line
413      * @param beginColumn the begin column
414      * @param endLine the end line
415      * @param endColumn the end column
416      */
417     private void assertLineNumbers(Node node, int beginLine, int beginColumn, int endLine, int endColumn) {
418         Assert.assertEquals("begin line wrong", beginLine, node.getBeginLine());
419         Assert.assertEquals("begin column wrong", beginColumn, node.getBeginColumn());
420         Assert.assertEquals("end line wrong", endLine, node.getEndLine());
421         Assert.assertEquals("end column wrong", endColumn, node.getEndColumn());
422     }
423 }