View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.StringReader;
7   import java.util.Properties;
8   import java.util.logging.Logger;
9   
10  import net.sourceforge.pmd.lang.ast.SimpleCharStream;
11  import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserConstants;
12  import net.sourceforge.pmd.lang.plsql.ast.PLSQLParserTokenManager;
13  import net.sourceforge.pmd.lang.plsql.ast.Token;
14  
15  public class PLSQLTokenizer implements Tokenizer{
16      private final static Logger LOGGER = Logger.getLogger(PLSQLTokenizer.class.getName());
17  
18      public static final String IGNORE_COMMENTS = "ignore_comments";
19      public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
20      public static final String IGNORE_LITERALS = "ignore_literals";
21  
22      private boolean ignoreComments;
23      private boolean ignoreIdentifiers;
24      private boolean ignoreLiterals;
25  
26      public void setProperties(Properties properties) {
27  		/* The Tokenizer is derived from PLDoc, in which comments are very important
28  		 * When looking for duplication, we are probably not interested in comment variation,
29  		 * so we shall default ignoreComments to true
30  		*/
31          ignoreComments = Boolean.parseBoolean(properties.getProperty(IGNORE_COMMENTS, "true"));
32          ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
33          ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
34      }
35  
36      public void setIgnoreComments(boolean ignore) {
37  	this.ignoreComments = ignore;
38      }
39  
40      public void setIgnoreLiterals(boolean ignore) {
41  	this.ignoreLiterals = ignore;
42      }
43  
44      public void setIgnoreIdentifiers(boolean ignore) {
45  	this.ignoreIdentifiers = ignore;
46      }
47  
48          /**
49           * Read Reader from SourceCode and output an ordered tree of PLSQL tokens.
50           * @param sourceCode PLSQL source in file, string or database (any suitable object that can return
51           * a Reader).
52           * @param tokenEntries  Derived based on PLSQL Abstract Syntax Tree (derived from PLDOc parser.) 
53           */
54  	public void tokenize (SourceCode sourceCode, Tokens tokenEntries )
55  	{
56          long encounteredTokens = 0, addedTokens = 0;
57  
58  		LOGGER.fine("PLSQLTokenizer: ignoreComments=="+ignoreComments);
59  		LOGGER.fine("PLSQLTokenizer: ignoreIdentifiers=="+ignoreIdentifiers);
60  		LOGGER.fine("PLSQLTokenizer: ignoreLiterals=="+ignoreLiterals);
61  
62  		String fileName = sourceCode.getFileName();
63  		StringBuilder sb = sourceCode.getCodeBuffer();
64  
65  		PLSQLParserTokenManager tokenMgr = new PLSQLParserTokenManager( new SimpleCharStream( new StringReader(sb.toString()))); 
66  		Token currentToken = tokenMgr.getNextToken();
67  		while (currentToken.image.length()  > 0)
68  		{
69  			String image = currentToken.image;
70  
71                          encounteredTokens++;
72  			if (ignoreComments && 
73  			    ( currentToken.kind == PLSQLParserConstants.SINGLE_LINE_COMMENT
74  			    ||currentToken.kind == PLSQLParserConstants.MULTI_LINE_COMMENT
75  			    ||currentToken.kind == PLSQLParserConstants.FORMAL_COMMENT
76  			    ||currentToken.kind == PLSQLParserConstants.COMMENT
77  			    ||currentToken.kind == PLSQLParserConstants.IN_MULTI_LINE_COMMENT
78  			    ||currentToken.kind == PLSQLParserConstants.IN_FORMAL_COMMENT
79  				)
80  				) {
81  				image = String.valueOf(currentToken.kind);
82  			}
83  
84  			if (ignoreIdentifiers && 
85  			    (currentToken.kind == PLSQLParserConstants.IDENTIFIER
86  				)
87  				) {
88  				image = String.valueOf(currentToken.kind);
89  			}
90  
91  			if (ignoreLiterals
92  				&& (   
93  					   currentToken.kind == PLSQLParserConstants.UNSIGNED_NUMERIC_LITERAL 
94  					|| currentToken.kind == PLSQLParserConstants.FLOAT_LITERAL
95  					|| currentToken.kind == PLSQLParserConstants.INTEGER_LITERAL
96  					|| currentToken.kind == PLSQLParserConstants.CHARACTER_LITERAL
97  				    || currentToken.kind == PLSQLParserConstants.STRING_LITERAL
98  					|| currentToken.kind == PLSQLParserConstants.QUOTED_LITERAL
99  					)
100 				) {
101 				image = String.valueOf(currentToken.kind);
102 			}
103 
104 			tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
105                         addedTokens++;
106 			currentToken = tokenMgr.getNextToken();
107 		}
108 		tokenEntries.add(TokenEntry.getEOF() );
109             LOGGER.fine(sourceCode.getFileName() 
110                         + ": encountered " + encounteredTokens + " tokens;"
111                         + " added " + addedTokens + " tokens"
112                        );
113 	}
114 
115 
116 
117 }
118 
119