View Javadoc
1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.StringReader;
7   import java.util.Properties;
8   import java.util.logging.Level;
9   import java.util.logging.Logger;
10  
11  import net.sourceforge.pmd.lang.ast.SimpleCharStream;
12  import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserConstants;
13  import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserTokenManager;
14  import net.sourceforge.pmd.lang.plsql.ast.Token;
15  
16  public class PLSQLTokenizer implements Tokenizer{
17      private final static Logger LOGGER = Logger.getLogger(PLSQLTokenizer.class.getName());
18  
19      public static final String IGNORE_COMMENTS = "ignore_comments";
20      public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
21      public static final String IGNORE_LITERALS = "ignore_literals";
22  
23      private boolean ignoreComments;
24      private boolean ignoreIdentifiers;
25      private boolean ignoreLiterals;
26  
27      public void setProperties(Properties properties) {
28  		/* The Tokenizer is derived from PLDoc, in which comments are very important
29  		 * When looking for duplication, we are probably not interested in comment variation,
30  		 * so we shall default ignoreComments to true
31  		*/
32          ignoreComments = Boolean.parseBoolean(properties.getProperty(IGNORE_COMMENTS, "true"));
33          ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
34          ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
35      }
36  
37      public void setIgnoreComments(boolean ignore) {
38  	this.ignoreComments = ignore;
39      }
40  
41      public void setIgnoreLiterals(boolean ignore) {
42  	this.ignoreLiterals = ignore;
43      }
44  
45      public void setIgnoreIdentifiers(boolean ignore) {
46  	this.ignoreIdentifiers = ignore;
47      }
48  
49          /**
50           * Read Reader from SourceCode and output an ordered tree of PLSQL tokens.
51           * @param sourceCode PLSQL source in file, string or database (any suitable object that can return
52           * a Reader).
53           * @param tokenEntries  Derived based on PLSQL Abstract Syntax Tree (derived from PLDOc parser.) 
54           */
55  	public void tokenize (SourceCode sourceCode, Tokens tokenEntries )
56  	{
57          long encounteredTokens = 0;
58          long addedTokens = 0;
59  
60          if (LOGGER.isLoggable(Level.FINE)) {
61      		LOGGER.fine("PLSQLTokenizer: ignoreComments=="+ignoreComments);
62      		LOGGER.fine("PLSQLTokenizer: ignoreIdentifiers=="+ignoreIdentifiers);
63      		LOGGER.fine("PLSQLTokenizer: ignoreLiterals=="+ignoreLiterals);
64          }
65  
66  		String fileName = sourceCode.getFileName();
67  		StringBuilder sb = sourceCode.getCodeBuffer();
68  
69  		PLSQLParserTokenManager tokenMgr = new PLSQLParserTokenManager( new SimpleCharStream( new StringReader(sb.toString()))); 
70  		Token currentToken = tokenMgr.getNextToken();
71  		while (currentToken.image.length()  > 0)
72  		{
73  			String image = currentToken.image;
74  
75                          encounteredTokens++;
76  			if (ignoreComments && 
77  			    ( currentToken.kind == PLSQLParserConstants.SINGLE_LINE_COMMENT
78  			    ||currentToken.kind == PLSQLParserConstants.MULTI_LINE_COMMENT
79  			    ||currentToken.kind == PLSQLParserConstants.FORMAL_COMMENT
80  			    ||currentToken.kind == PLSQLParserConstants.COMMENT
81  			    ||currentToken.kind == PLSQLParserConstants.IN_MULTI_LINE_COMMENT
82  			    ||currentToken.kind == PLSQLParserConstants.IN_FORMAL_COMMENT
83  				)
84  				) {
85  				image = String.valueOf(currentToken.kind);
86  			}
87  
88  			if (ignoreIdentifiers && 
89  			    currentToken.kind == PLSQLParserConstants.IDENTIFIER
90  				) {
91  				image = String.valueOf(currentToken.kind);
92  			}
93  
94  			if (ignoreLiterals
95  				&& (   
96  					   currentToken.kind == PLSQLParserConstants.UNSIGNED_NUMERIC_LITERAL 
97  					|| currentToken.kind == PLSQLParserConstants.FLOAT_LITERAL
98  					|| currentToken.kind == PLSQLParserConstants.INTEGER_LITERAL
99  					|| currentToken.kind == PLSQLParserConstants.CHARACTER_LITERAL
100 				    || currentToken.kind == PLSQLParserConstants.STRING_LITERAL
101 					|| currentToken.kind == PLSQLParserConstants.QUOTED_LITERAL
102 					)
103 				) {
104 				image = String.valueOf(currentToken.kind);
105 			}
106 
107 			tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
108                         addedTokens++;
109 			currentToken = tokenMgr.getNextToken();
110 		}
111 		tokenEntries.add(TokenEntry.getEOF() );
112 		if (LOGGER.isLoggable(Level.FINE)) {
113             LOGGER.fine(sourceCode.getFileName() 
114                         + ": encountered " + encounteredTokens + " tokens;"
115                         + " added " + addedTokens + " tokens"
116                        );
117 		}
118 	}
119 
120 
121 
122 }
123 
124