View Javadoc
1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.StringReader;
7   import java.util.Properties;
8   
9   import net.sourceforge.pmd.lang.LanguageRegistry;
10  import net.sourceforge.pmd.lang.LanguageVersionHandler;
11  import net.sourceforge.pmd.lang.TokenManager;
12  import net.sourceforge.pmd.lang.java.JavaLanguageModule;
13  import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
14  import net.sourceforge.pmd.lang.java.ast.Token;
15  
16  public class JavaTokenizer implements Tokenizer {
17  
18      public static final String CPD_START = "\"CPD-START\"";
19      public static final String CPD_END = "\"CPD-END\"";
20  
21      private boolean ignoreAnnotations;
22      private boolean ignoreLiterals;
23      private boolean ignoreIdentifiers;
24  
25      public void setProperties(Properties properties) {
26          ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
27          ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
28          ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
29      }
30  
31      public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
32          StringBuilder stringBuilder = sourceCode.getCodeBuffer();
33  
34          // Note that Java version is irrelevant for tokenizing
35          LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JavaLanguageModule.NAME).getVersion("1.4").getLanguageVersionHandler();
36          String fileName = sourceCode.getFileName();
37          TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()).getTokenManager(
38                  fileName, new StringReader(stringBuilder.toString()));
39          Token currentToken = (Token) tokenMgr.getNextToken();
40  
41          TokenDiscarder discarder = new TokenDiscarder(ignoreAnnotations);
42  
43          while (currentToken.image.length() > 0) {
44              discarder.updateState(currentToken);
45  
46              if (discarder.isDiscarding()) {
47                  currentToken = (Token) tokenMgr.getNextToken();
48                  continue;
49              }
50  
51              processToken(tokenEntries, fileName, currentToken);
52              currentToken = (Token) tokenMgr.getNextToken();
53          }
54          tokenEntries.add(TokenEntry.getEOF());
55      }
56  
57      private void processToken(Tokens tokenEntries, String fileName, Token currentToken) {
58          String image = currentToken.image;
59          if (ignoreLiterals
60                  && (currentToken.kind == JavaParserConstants.STRING_LITERAL
61                  || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
62                  || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL
63                  || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
64              image = String.valueOf(currentToken.kind);
65          }
66          if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
67              image = String.valueOf(currentToken.kind);
68          }
69          tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
70      }
71  
72      public void setIgnoreLiterals(boolean ignore) {
73          this.ignoreLiterals = ignore;
74      }
75  
76      public void setIgnoreIdentifiers(boolean ignore) {
77          this.ignoreIdentifiers = ignore;
78      }
79  
80      public void setIgnoreAnnotations(boolean ignoreAnnotations) {
81          this.ignoreAnnotations = ignoreAnnotations;
82      }
83  
84      /**
85       * The {@link TokenDiscarder} consumes token by token and maintains state.
86       * It can detect, whether the current token belongs to an annotation and whether
87       * the current token should be discarded by CPD.
88       * <p>
89       * By default, it discards semicolons, package and import statements, and enables CPD suppression.
90       * Optionally, all annotations can be ignored, too.
91       * </p>
92       */
93      private static class TokenDiscarder {
94          private boolean isAnnotation = false;
95          private boolean nextTokenEndsAnnotation = false;
96          private int annotationStack = 0;
97  
98          private boolean discardingSemicolon = false;
99          private boolean discardingKeywords = false;
100         private boolean discardingSuppressing = false;
101         private boolean discardingAnnotations = false;
102         private boolean ignoreAnnotations = false;
103 
104         public TokenDiscarder(boolean ignoreAnnotations) {
105             this.ignoreAnnotations = ignoreAnnotations;
106         }
107 
108         public void updateState(Token currentToken) {
109             detectAnnotations(currentToken);
110 
111             skipSemicolon(currentToken);
112             skipPackageAndImport(currentToken);
113             skipCPDSuppression(currentToken);
114             if (ignoreAnnotations) {
115                 skipAnnotations();
116             }
117         }
118 
119         public void skipPackageAndImport(Token currentToken) {
120             if (currentToken.kind == JavaParserConstants.PACKAGE || currentToken.kind == JavaParserConstants.IMPORT) {
121                 discardingKeywords = true;
122             } else if (discardingKeywords && currentToken.kind == JavaParserConstants.SEMICOLON) {
123                 discardingKeywords = false;
124             }
125         }
126 
127         public void skipSemicolon(Token currentToken) {
128             if (currentToken.kind == JavaParserConstants.SEMICOLON) {
129                 discardingSemicolon = true;
130             } else if (discardingSemicolon && currentToken.kind != JavaParserConstants.SEMICOLON) {
131                 discardingSemicolon = false;
132             }
133         }
134 
135         public void skipCPDSuppression(Token currentToken) {
136             //if processing an annotation, look for a CPD-START or CPD-END
137             if (isAnnotation) {
138                 if (!discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_START.equals(currentToken.image)) {
139                     discardingSuppressing = true;
140                 } else if (discardingSuppressing && currentToken.kind == JavaParserConstants.STRING_LITERAL && CPD_END.equals(currentToken.image)) {
141                     discardingSuppressing = false;
142                 }
143             }
144         }
145 
146         public void skipAnnotations() {
147             if (!discardingAnnotations && isAnnotation) {
148                 discardingAnnotations = true;
149             } else if (discardingAnnotations && !isAnnotation) {
150                 discardingAnnotations = false;
151             }
152         }
153 
154         public boolean isDiscarding() {
155             boolean result = discardingSemicolon || discardingKeywords || discardingAnnotations || discardingSuppressing;
156             return result;
157         }
158 
159         public void detectAnnotations(Token currentToken) {
160             if (isAnnotation && nextTokenEndsAnnotation) {
161                 isAnnotation = false;
162                 nextTokenEndsAnnotation = false;
163             }
164             if (isAnnotation) {
165                 if (currentToken.kind == JavaParserConstants.LPAREN) {
166                     annotationStack++;
167                 } else if (currentToken.kind == JavaParserConstants.RPAREN) {
168                     annotationStack--;
169                     if (annotationStack == 0) {
170                         nextTokenEndsAnnotation = true;
171                     }
172                 } else if (annotationStack == 0 && currentToken.kind != JavaParserConstants.IDENTIFIER &&  currentToken.kind != JavaParserConstants.LPAREN) {
173                     isAnnotation = false;
174                 }
175             }
176             if (currentToken.kind == JavaParserConstants.AT) {
177                 isAnnotation = true;
178             }
179         }
180     }
181 }