View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import net.sourceforge.pmd.TargetJDK1_4;
7   import net.sourceforge.pmd.ast.JavaParserConstants;
8   import net.sourceforge.pmd.ast.JavaParserTokenManager;
9   import net.sourceforge.pmd.ast.Token;
10  
11  import java.io.StringReader;
12  import java.util.Properties;
13  
14  public class JavaTokenizer implements Tokenizer {
15  
16      public static final String IGNORE_LITERALS = "ignore_literals";
17      public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
18  
19      private boolean ignoreLiterals;
20      private boolean ignoreIdentifiers;
21  
22      public void setProperties(Properties properties) {
23          ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
24          ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
25      }
26  
27      public void tokenize(SourceCode tokens, Tokens tokenEntries) {
28          StringBuffer buffer = tokens.getCodeBuffer();
29  
30          /*
31          I'm doing a sort of State pattern thing here where
32          this goes into "discarding" mode when it hits an import or package
33          keyword and goes back into "accumulate mode" when it hits a semicolon.
34          This could probably be turned into some objects.
35          */
36          // Note that Java version is irrelevant for tokenizing
37          JavaParserTokenManager tokenMgr = new TargetJDK1_4().createJavaParserTokenManager(new StringReader(buffer.toString()));
38          Token currentToken = tokenMgr.getNextToken();
39          boolean inDiscardingState = false;
40          while (currentToken.image.length() > 0) {
41              if (currentToken.kind == JavaParserConstants.IMPORT || currentToken.kind == JavaParserConstants.PACKAGE) {
42                  inDiscardingState = true;
43                  currentToken = tokenMgr.getNextToken();
44                  continue;
45              }
46  
47              if (inDiscardingState && currentToken.kind == JavaParserConstants.SEMICOLON) {
48                  inDiscardingState = false;
49              }
50  
51              if (inDiscardingState) {
52                  currentToken = tokenMgr.getNextToken();
53                  continue;
54              }
55  
56              if (currentToken.kind != JavaParserConstants.SEMICOLON) {
57                  String image = currentToken.image;
58                  if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
59                          || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
60                      image = String.valueOf(currentToken.kind);
61                  }
62                  if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
63                      image = String.valueOf(currentToken.kind);
64                  }
65                  tokenEntries.add(new TokenEntry(image, tokens.getFileName(), currentToken.beginLine));
66              }
67  
68              currentToken = tokenMgr.getNextToken();
69          }
70          tokenEntries.add(TokenEntry.getEOF());
71      }
72  
73      public void setIgnoreLiterals(boolean ignore) {
74          this.ignoreLiterals = ignore;
75      }
76  
77      public void setIgnoreIdentifiers(boolean ignore) {
78          this.ignoreIdentifiers = ignore;
79      }
80  }