View Javadoc
1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.Reader;
7   import java.io.StringReader;
8   
9   import net.sourceforge.pmd.lang.LanguageRegistry;
10  import net.sourceforge.pmd.lang.LanguageVersionHandler;
11  import net.sourceforge.pmd.lang.TokenManager;
12  import net.sourceforge.pmd.lang.ast.TokenMgrError;
13  import net.sourceforge.pmd.lang.python.PythonLanguageModule;
14  import net.sourceforge.pmd.lang.python.ast.Token;
15  import net.sourceforge.pmd.util.IOUtil;
16  
17  import org.apache.commons.io.IOUtils;
18  
19  /**
20   * The Python tokenizer.
21   */
22  public class PythonTokenizer implements Tokenizer {
23  
24      @Override
25      public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
26          StringBuilder buffer = sourceCode.getCodeBuffer();
27          Reader reader = null;
28          try {
29              LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PythonLanguageModule.NAME)
30                      .getDefaultVersion().getLanguageVersionHandler();
31              reader = new StringReader(buffer.toString());
32              reader = IOUtil.skipBOM(reader);
33              TokenManager tokenManager = languageVersionHandler.getParser(
34                      languageVersionHandler.getDefaultParserOptions()).getTokenManager(sourceCode.getFileName(), reader);
35              Token currentToken = (Token) tokenManager.getNextToken();
36              while (currentToken.image.length() > 0) {
37                  tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine));
38                  currentToken = (Token) tokenManager.getNextToken();
39              }
40              tokenEntries.add(TokenEntry.getEOF());
41              System.err.println("Added " + sourceCode);
42          } catch (TokenMgrError err) {
43              err.printStackTrace();
44              System.err.println("Skipping " + sourceCode + " due to parse error");
45              tokenEntries.add(TokenEntry.getEOF());
46          } finally {
47              IOUtils.closeQuietly(reader);
48          }
49      }
50  }