View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.util.ArrayList;
7   import java.util.Collections;
8   import java.util.HashMap;
9   import java.util.Iterator;
10  import java.util.List;
11  import java.util.Map;
12  
13  public class MatchAlgorithm {
14  
15      private final static int MOD = 37;
16      private int lastHash;
17      private int lastMod = 1;
18  
19      private List<Match> matches;
20      private Map<String, SourceCode> source;
21      private Tokens tokens;
22      private List<TokenEntry> code;
23      private CPDListener cpdListener;
24      private int min;
25  
26      public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min) {
27          this(sourceCode, tokens, min, new CPDNullListener());
28      }
29  
30      public MatchAlgorithm(Map<String, SourceCode> sourceCode, Tokens tokens, int min, CPDListener listener) {
31          this.source = sourceCode;
32          this.tokens = tokens;
33          this.code = tokens.getTokens();
34          this.min = min;
35          this.cpdListener = listener;
36          for (int i = 0; i < min; i++) {
37              lastMod *= MOD;
38          }
39      }
40  
41      public void setListener(CPDListener listener) {
42          this.cpdListener = listener;
43      }
44  
45      public Iterator<Match> matches() {
46          return matches.iterator();
47      }
48  
49      public TokenEntry tokenAt(int offset, TokenEntry m) {
50          return code.get(offset + m.getIndex());
51      }
52  
53      public int getMinimumTileSize() {
54          return this.min;
55      }
56  
57      public void findMatches() {
58          cpdListener.phaseUpdate(CPDListener.HASH);
59          Map<TokenEntry, Object> markGroups = hash();
60  
61          cpdListener.phaseUpdate(CPDListener.MATCH);
62          MatchCollector matchCollector = new MatchCollector(this);
63          for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) {
64              Object o = i.next();
65              if (o instanceof List) {
66                  List<TokenEntry> l = (List<TokenEntry>) o;
67  
68                  Collections.reverse(l);
69                  matchCollector.collect(l);
70              }
71              i.remove();
72          }
73          cpdListener.phaseUpdate(CPDListener.GROUPING);
74          matches = matchCollector.getMatches();
75          matchCollector = null;
76          for (Match match : matches) {
77              Iterator<TokenEntry> occurrences = match.iterator();
78              if (occurrences.hasNext()) {
79                  TokenEntry mark = occurrences.next();
80                  match.setLineCount(tokens.getLineCount(mark, match));
81                  int start = mark.getBeginLine();
82                  int end = start + match.getLineCount() - 1;
83                  SourceCode sourceCode = source.get(mark.getTokenSrcID());
84                  match.setSourceCodeSlice(sourceCode.getSlice(start, end));
85              }
86          }
87          cpdListener.phaseUpdate(CPDListener.DONE);
88      }
89  
90      private Map<TokenEntry, Object> hash() {
91          Map<TokenEntry, Object> markGroups = new HashMap<TokenEntry, Object>(tokens.size());
92          for (int i = code.size() - 1; i >= 0; i--) {
93              TokenEntry token = code.get(i);
94              if (token != TokenEntry.EOF) {
95                  int last = tokenAt(min, token).getIdentifier();
96                  lastHash = MOD * lastHash + token.getIdentifier() - lastMod * last;
97                  token.setHashCode(lastHash);
98                  Object o = markGroups.get(token);
99  
100                 // Note that this insertion method is worthwhile since the vast majority
101                 // markGroup keys will have only one value.
102                 if (o == null) {
103                     markGroups.put(token, token);
104                 } else if (o instanceof TokenEntry) {
105                     List<TokenEntry> l = new ArrayList<TokenEntry>();
106                     l.add((TokenEntry) o);
107                     l.add(token);
108                     markGroups.put(token, l);
109                 } else {
110                     List<TokenEntry> l = (List<TokenEntry>) o;
111                     l.add(token);
112                 }
113             } else {
114                 lastHash = 0;
115                 for (int end = Math.max(0, i - min + 1); i > end; i--) {
116                     token = code.get(i - 1);
117                     lastHash = MOD * lastHash + token.getIdentifier();
118                     if (token == TokenEntry.EOF) {
119                         break;
120                     }
121                 }
122             }
123         }
124         return markGroups;
125     }
126 }