View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.BufferedReader;
7   import java.io.CharArrayReader;
8   import java.util.NoSuchElementException;
9   import java.util.StringTokenizer;
10  
11  /**
12   * This class does a best-guess try-anything tokenization.
13   *
14   * @author jheintz
15   */
16  public class AnyTokenizer implements Tokenizer {
17      public static final String TOKENS = " \t!#$%^&*(){}-=+<>/\\`~;:";
18  
19      public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
20          StringBuffer sb = sourceCode.getCodeBuffer();
21          BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()));
22          try {
23              int lineNumber = 1;
24              String line = reader.readLine();
25              while (line != null) {
26                  StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true);
27                  try {
28                      String token = tokenizer.nextToken();
29                      while (token != null) {
30                          if (!token.equals(" ") && !token.equals("\t")) {
31                              tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber));
32                          }
33                          token = tokenizer.nextToken();
34                      }
35                  } catch (NoSuchElementException ex) {
36                      // done with tokens
37                  }
38                  // advance iteration variables
39                  line = reader.readLine();
40                  lineNumber++;
41              }
42          } catch (Exception ex) {
43              ex.printStackTrace();
44          } finally {
45              try {
46                  reader.close();
47              } catch (Exception ex) {
48              }
49              tokenEntries.add(TokenEntry.getEOF());
50          }
51      }
52  }