View Javadoc
1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.lang.xml.ast;
5   
6   import java.util.Map;
7   import java.util.TreeMap;
8   import java.util.regex.Matcher;
9   import java.util.regex.Pattern;
10  
11  import org.apache.commons.lang3.StringUtils;
12  import org.w3c.dom.Document;
13  import org.w3c.dom.DocumentType;
14  import org.w3c.dom.NamedNodeMap;
15  import org.w3c.dom.Node;
16  import org.w3c.dom.NodeList;
17  import org.w3c.dom.ProcessingInstruction;
18  
19  /**
20   *
21   */
22  class DOMLineNumbers {
23      private final Document document;
24      private final String xmlString;
25      private Map<Integer, Integer> lines;
26  
27      public DOMLineNumbers(Document document, String xmlString) {
28          this.document = document;
29          this.xmlString = xmlString;
30      }
31      
32      public void determine() {
33          calculateLinesMap();
34          determineLocation(document, 0);
35      }
36      private int determineLocation(Node n, int index) {
37          int nextIndex = index;
38          if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
39              nextIndex = xmlString.indexOf("<!DOCTYPE", nextIndex);
40          } else if (n.getNodeType() == Node.COMMENT_NODE) {
41              nextIndex = xmlString.indexOf("<!--", nextIndex);
42          } else if (n.getNodeType() == Node.ELEMENT_NODE) {
43              nextIndex = xmlString.indexOf("<" + n.getNodeName(), nextIndex);
44          } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
45              nextIndex = xmlString.indexOf("<![CDATA[", nextIndex);
46          } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
47              ProcessingInstruction pi = (ProcessingInstruction)n;
48              nextIndex = xmlString.indexOf("<?" + pi.getTarget(), nextIndex);
49          } else if (n.getNodeType() == Node.TEXT_NODE) {
50              String te = unexpandEntities(n, n.getNodeValue());
51              int newIndex = xmlString.indexOf(te, nextIndex);
52              if (newIndex > 0) {
53                  nextIndex = newIndex;
54              }
55          } else if (n.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
56              nextIndex = xmlString.indexOf("&" + n.getNodeName() + ";", nextIndex);
57          }
58          setBeginLocation(n, nextIndex);
59          if (n.hasChildNodes()) {
60              NodeList childs = n.getChildNodes();
61              for (int i = 0; i < childs.getLength(); i++) {
62                  nextIndex = determineLocation(childs.item(i), nextIndex);
63              }
64          }
65          if (n.getNodeType() == Node.ELEMENT_NODE) {
66              nextIndex += 2 + n.getNodeName().length() + 1; // </nodename>
67          } else if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
68              Node nextSibling = n.getNextSibling();
69              if (nextSibling.getNodeType() == Node.ELEMENT_NODE) {
70                  nextIndex = xmlString.indexOf("<" + nextSibling.getNodeName(), nextIndex) - 1;
71              } else if (nextSibling.getNodeType() == Node.COMMENT_NODE) {
72                  nextIndex = xmlString.indexOf("<!--", nextIndex);
73              } else {
74                  nextIndex = xmlString.indexOf(">", nextIndex);
75              }
76          } else if (n.getNodeType() == Node.COMMENT_NODE) {
77              nextIndex += 4 + 3; // <!-- and -->
78              nextIndex += n.getNodeValue().length();
79          } else if (n.getNodeType() == Node.TEXT_NODE) {
80              String te = unexpandEntities(n, n.getNodeValue());
81              nextIndex += te.length();
82          } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
83              nextIndex += "<![CDATA[".length() + n.getNodeValue().length() + "]]>".length();
84          } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
85              ProcessingInstruction pi = (ProcessingInstruction)n;
86              nextIndex += "<?".length() + pi.getTarget().length() + "?>".length() + pi.getData().length();
87          }
88          setEndLocation(n, nextIndex - 1);
89          return nextIndex;
90      }
91  
92      private String unexpandEntities(Node n, String te) {
93          String result = te;
94          DocumentType doctype = n.getOwnerDocument().getDoctype();
95          // implicit entities
96          result = result.replaceAll(Matcher.quoteReplacement("&"), "&amp;");
97          result = result.replaceAll(Matcher.quoteReplacement("<"), "&lt;");
98          result = result.replaceAll(Matcher.quoteReplacement(">"), "&gt;");
99          result = result.replaceAll(Matcher.quoteReplacement("\""), "&quot;");
100         result = result.replaceAll(Matcher.quoteReplacement("'"), "&apos;");
101 
102         if (doctype != null) {
103             NamedNodeMap entities = doctype.getEntities();
104             String internalSubset = doctype.getInternalSubset();
105             if (internalSubset == null) {
106                 internalSubset = "";
107             }
108             for (int i = 0; i < entities.getLength(); i++) {
109                 Node item = entities.item(i);
110                 String entityName = item.getNodeName();
111                 Node firstChild = item.getFirstChild();
112                 if (firstChild != null) {
113                     result = result.replaceAll(Matcher.quoteReplacement(firstChild.getNodeValue()), "&" + entityName + ";");
114                 } else {
115                     Matcher m = Pattern.compile(Matcher.quoteReplacement("<!ENTITY " + entityName + " ") + "[']([^']*)[']>").matcher(internalSubset);
116                     if (m.find()) {
117                         result = result.replaceAll(Matcher.quoteReplacement(m.group(1)), "&" + entityName + ";");
118                     }
119                 }
120             }
121         }
122         return result;
123     }
124     private void setBeginLocation(Node n, int index) {
125         if (n != null) {
126             n.setUserData(XmlNode.BEGIN_LINE, toLine(index), null);
127             n.setUserData(XmlNode.BEGIN_COLUMN, toColumn(index), null);
128         }
129     }
130     private void setEndLocation(Node n, int index) {
131         if (n != null) {
132             n.setUserData(XmlNode.END_LINE, toLine(index), null);
133             n.setUserData(XmlNode.END_COLUMN, toColumn(index), null);
134         }
135     }
136     
137     private void calculateLinesMap() {
138         lines = new TreeMap<>();
139         int index = -1;
140         int count = StringUtils.countMatches(xmlString, "\n");
141         for (int line = 1; line <= count; line++) {
142             lines.put(line, index + 1);
143             index = xmlString.indexOf("\n", index + 1);
144         }
145         lines.put(count + 1, index + 1);
146     }
147     
148     private int toLine(int index) {
149         int line = 1;
150         for (Map.Entry<Integer, Integer> e : lines.entrySet()) {
151             line = e.getKey();
152             if (e.getValue() > index) {
153                 line--;
154                 break;
155             }
156         }
157         return line;
158     }
159     private int toColumn(int index) {
160         int line = toLine(index);
161         Integer lineStart = lines.get(line);
162         if (lineStart == null) {
163             lineStart = lines.get(lines.size() - 1);
164         }
165         int column = index - lineStart;
166         return column + 1;
167     }
168 
169 }