View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.StringReader;
7   import java.util.Properties;
8   
9   import net.sourceforge.pmd.lang.LanguageVersion;
10  import net.sourceforge.pmd.lang.LanguageVersionHandler;
11  import net.sourceforge.pmd.lang.TokenManager;
12  import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
13  import net.sourceforge.pmd.lang.java.ast.Token;
14  
15  public class JavaTokenizer implements Tokenizer {
16  
17      public static final String IGNORE_LITERALS = "ignore_literals";
18      public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
19      public static final String IGNORE_ANNOTATIONS = "ignore_annotations";
20      public static final String CPD_START = "\"CPD-START\"";
21      public static final String CPD_END = "\"CPD-END\"";
22  
23      private boolean ignoreAnnotations;
24      private boolean ignoreLiterals;
25      private boolean ignoreIdentifiers;
26  
27      public void setProperties(Properties properties) {
28          ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
29          ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
30          ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
31      }
32  
33      public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
34          StringBuilder stringBuilder = sourceCode.getCodeBuffer();
35  
36          // Note that Java version is irrelevant for tokenizing
37          LanguageVersionHandler languageVersionHandler = LanguageVersion.JAVA_14.getLanguageVersionHandler();
38          String fileName = sourceCode.getFileName();
39          TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()).getTokenManager(
40                  fileName, new StringReader(stringBuilder.toString()));
41          Token currentToken = (Token) tokenMgr.getNextToken();
42  
43          TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
44  
45          while (currentToken.image.length() > 0) {
46              discarder.updateState(currentToken);
47  
48              if (discarder.isDiscarding()) {
49                  currentToken = (Token) tokenMgr.getNextToken();
50                  continue;
51              }
52  
53              processToken(tokenEntries, fileName, currentToken);
54              currentToken = (Token) tokenMgr.getNextToken();
55          }
56          tokenEntries.add(TokenEntry.getEOF());
57      }
58  
59      private void processToken(Tokens tokenEntries, String fileName, Token currentToken) {
60          String image = currentToken.image;
61          if (ignoreLiterals
62                  && (currentToken.kind == JavaParserConstants.STRING_LITERAL
63                  || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
64                  || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
65                  || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
66              image = String.valueOf(currentToken.kind);
67          }
68          if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
69              image = String.valueOf(currentToken.kind);
70          }
71          tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
72      }
73  
74      public void setIgnoreLiterals(boolean ignore) {
75          this.ignoreLiterals = ignore;
76      }
77  
78      public void setIgnoreIdentifiers(boolean ignore) {
79          this.ignoreIdentifiers = ignore;
80      }
81  
82      public void setIgnoreAnnotations(boolean ignoreAnnotations) {
83          this.ignoreAnnotations = ignoreAnnotations;
84      }
85  
86      /**
87       * The {@link TokenDiscarder} consumes token by token and maintains state.
88       * It can detect, whether the current token belongs to an annotation and whether
89       * the current token should be discarded by CPD.
90       * <p>
91       * By default, it discards semicolons, package and import statements, and enables CPD suppression.
92       * Optionally, all annotations can be ignored, too.
93       * </p>
94       */
95      private static class TokenDiscarder {
96          private boolean isAnnotation = false;
97          private boolean nextTokenEndsAnnotation = false;
98          private int annotationStack = 0;
99  
100         private boolean discardingSemicolon = false;
101         private boolean discardingKeywords = false;
102         private boolean discardingSuppressing = false;
103         private boolean discardingAnnotations = false;
104         private boolean ignoreAnnotations = false;
105 
106         public TokenDiscarder(boolean ignoreAnnotations) {
107             this.ignoreAnnotations = ignoreAnnotations;
108         }
109 
110         public void updateState(Token currentToken) {
111             detectAnnotations(currentToken);
112 
113             skipSemicolon(currentToken);
114             skipPackageAndImport(currentToken);
115             skipCPDSuppression(currentToken);
116             if (ignoreAnnotations) {
117                 skipAnnotations();
118             }
119         }
120 
121         public void skipPackageAndImport(Token currentToken) {
122             if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
123                 discardingKeywords = true;
124             } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
125                 discardingKeywords = false;
126             }
127         }
128 
129         public void skipSemicolon(Token currentToken) {
130             if (currentToken.kind == JavaParserConstants.SEMICOLON) {
131                 discardingSemicolon = true;
132             } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
133                 discardingSemicolon = false;
134             }
135         }
136 
137         public void skipCPDSuppression(Token currentToken) {
138             //if processing an annotation, look for a CPD-START or CPD-END
139             if (isAnnotation) {
140                 if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_START.equals(currentToken.image)) {
141                     discardingSuppressing = true;
142                 } else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_END.equals(currentToken.image)) {
143                     discardingSuppressing = false;
144                 }
145             }
146         }
147 
148         public void skipAnnotations() {
149             if (!discardingAnnotations && isAnnotation) {
150                 discardingAnnotations = true;
151             } else if (discardingAnnotations && !isAnnotation) {
152                 discardingAnnotations = false;
153             }
154         }
155 
156         public boolean isDiscarding() {
157             boolean result = discardingSemicolon || discardingKeywords || discardingAnnotations || discardingSuppressing;
158             return result;
159         }
160 
161         public void detectAnnotations(Token currentToken) {
162             if (isAnnotation && nextTokenEndsAnnotation) {
163                 isAnnotation = false;
164                 nextTokenEndsAnnotation = false;
165             }
166             if (isAnnotation) {
167                 if (currentToken.kind == JavaParserConstants.LPAREN) {
168                     annotationStack++;
169                 } else if (currentToken.kind == JavaParserConstants.RPAREN) {
170                     annotationStack--;
171                     if (annotationStack == 0) {
172                         nextTokenEndsAnnotation = true;
173                     }
174                 } else if (annotationStack == 0 && currentToken.kind != JavaParserConstants.IDENTIFIER &&  currentToken.kind != JavaParserConstants.LPAREN) {
175                     isAnnotation = false;
176                 }
177             }
178             if (currentToken.kind == JavaParserConstants.AT) {
179                 isAnnotation = true;
180             }
181         }
182     }
183 }