View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.File;
7   import java.io.FileNotFoundException;
8   import java.io.IOException;
9   import java.util.HashSet;
10  import java.util.Iterator;
11  import java.util.List;
12  import java.util.Map;
13  import java.util.Properties;
14  import java.util.Set;
15  import java.util.TreeMap;
16  
17  import net.sourceforge.pmd.util.FileFinder;
18  
19  public class CPD {
20  
21      private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
22      private CPDListener listener = new CPDNullListener();
23      private Tokens tokens = new Tokens();
24      private int minimumTileSize;
25      private MatchAlgorithm matchAlgorithm;
26      private Language language;
27      private boolean skipDuplicates;
28      public static boolean debugEnable = false;
29      private String encoding = System.getProperty("file.encoding");
30  
31  
32      public CPD(int minimumTileSize, Language language) {
33          this.minimumTileSize = minimumTileSize;
34          this.language = language;
35      }
36  
37      public void skipDuplicates() {
38          this.skipDuplicates = true;
39      }
40  
41      public void setCpdListener(CPDListener cpdListener) {
42          this.listener = cpdListener;
43      }
44  
45      public void setEncoding(String encoding) {
46          this.encoding = encoding;
47      }
48  
49      public void go() {
50          TokenEntry.clearImages();
51          matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
52          matchAlgorithm.findMatches();
53      }
54  
55      public Iterator<Match> getMatches() {
56          return matchAlgorithm.matches();
57      }
58  
59      public void add(File file) throws IOException {
60          add(1, file);
61      }
62  
63      public void addAllInDirectory(String dir) throws IOException {
64          addDirectory(dir, false);
65      }
66  
67      public void addRecursively(String dir) throws IOException {
68          addDirectory(dir, true);
69      }
70  
71      public void add(List<File> files) throws IOException {
72          for (File f: files) {
73              add(files.size(), f);
74          }
75      }
76  
77      private void addDirectory(String dir, boolean recurse) throws IOException {
78          if (!(new File(dir)).exists()) {
79              throw new FileNotFoundException("Couldn't find directory " + dir);
80          }
81          FileFinder finder = new FileFinder();
82          // TODO - could use SourceFileSelector here
83          add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
84      }
85  
86      private Set<String> current = new HashSet<String>();
87  
88      private void add(int fileCount, File file) throws IOException {
89  
90          if (skipDuplicates) {
91              // TODO refactor this thing into a separate class
92              String signature = file.getName() + '_' + file.length();
93              if (current.contains(signature)) {
94                  System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
95                  return;
96              }
97              current.add(signature);
98          }
99  
100         if (!file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
101             System.err.println("Skipping " + file + " since it appears to be a symlink");
102             return;
103         }
104 
105         listener.addedFile(fileCount, file);
106         SourceCode sourceCode = new SourceCode(new SourceCode.FileCodeLoader(file, encoding));
107         language.getTokenizer().tokenize(sourceCode, tokens);
108         source.put(sourceCode.getFileName(), sourceCode);
109     }
110 
111     public static Renderer getRendererFromString(String name, String encoding) {
112         if (name.equalsIgnoreCase("text") || name.equals("")) {
113             return new SimpleRenderer();
114         } else if ("xml".equals(name)) {
115             return new XMLRenderer(encoding);
116         }  else if ("csv".equals(name)) {
117             return new CSVRenderer();
118         }  else if ("vs".equals(name)) {
119             return new VSRenderer();
120         }
121         try {
122             return (Renderer) Class.forName(name).newInstance();
123         } catch (Exception e) {
124             System.out.println("Can't find class '" + name + "', defaulting to SimpleRenderer.");
125         }
126         return new SimpleRenderer();
127     }
128 
129     private static boolean findBooleanSwitch(String[] args, String name) {
130         for (int i = 0; i < args.length; i++) {
131             if (args[i].equals(name)) {
132                 return true;
133             }
134         }
135         return false;
136     }
137 
138     private static String findRequiredStringValue(String[] args, String name) {
139         for (int i = 0; i < args.length; i++) {
140             if (args[i].equals(name)) {
141                 return args[i + 1];
142             }
143         }
144         System.out.println("No " + name + " value passed in");
145         usage();
146         throw new RuntimeException();
147     }
148 
149     private static String findOptionalStringValue(String[] args, String name, String defaultValue) {
150         for (int i = 0; i < args.length; i++) {
151             if (args[i].equals(name)) {
152                 return args[i + 1];
153             }
154         }
155         return defaultValue;
156     }
157 
158     private static void setSystemProperties(String[] args) {
159         boolean ignoreLiterals = findBooleanSwitch(args, "--ignore-literals"),
160         ignoreIdentifiers = findBooleanSwitch(args, "--ignore-identifiers");
161         Properties properties = System.getProperties();
162         if (ignoreLiterals) {
163             properties.setProperty(JavaTokenizer.IGNORE_LITERALS, "true");
164         }
165         if (ignoreIdentifiers) {
166             properties.setProperty(JavaTokenizer.IGNORE_IDENTIFIERS, "true");
167         }
168         System.setProperties(properties);
169     }
170 
171     public static void main(String[] args) {
172         if (args.length == 0) {
173             usage();
174         }
175 
176         try {
177 
178             String languageString = findOptionalStringValue(args, "--language", "java");
179             String formatString = findOptionalStringValue(args, "--format", "text");
180             String encodingString = findOptionalStringValue(args, "--encoding", System.getProperty("file.encoding"));
181             int minimumTokens = Integer.parseInt(findRequiredStringValue(args, "--minimum-tokens"));
182             LanguageFactory f = new LanguageFactory();
183             // Pass extra paramteters as System properties to allow language
184             // implementation to retrieve their associate values...
185             setSystemProperties(args);
186 
187             Language language = f.createLanguage(languageString);
188             Renderer renderer = CPD.getRendererFromString(formatString, encodingString);
189             CPD cpd = new CPD(minimumTokens, language);
190             cpd.setEncoding(encodingString);
191 
192             boolean skipDuplicateFiles = findBooleanSwitch(args, "--skip-duplicate-files");
193             if (skipDuplicateFiles) {
194                 cpd.skipDuplicates();
195             }
196             /* FIXME: Improve this !!!	*/
197             boolean missingFiles = true;
198             for (int position = 0; position < args.length; position++) {
199                 if (args[position].equals("--files")) {
200                 	cpd.addRecursively(args[position + 1]);
201                 	if ( missingFiles ) {
202                         missingFiles = false;
203                     }
204                 }
205             }
206 
207             if ( missingFiles ) {
208 	            System.out.println("No " + "--files" + " value passed in");
209 	            usage();
210 	            throw new RuntimeException();
211             }
212 
213             cpd.go();
214             System.out.println(renderer.render(cpd.getMatches()));
215         } catch (Exception e) {
216             e.printStackTrace();
217         }
218     }
219 
220     private static void usage() {
221         System.out.println("Usage:");
222         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
223         System.out.println("i.e: ");
224         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
225         System.out.println("or: ");
226         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
227         System.out.println("or: ");
228         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
229     }
230 
231 }