=====================================================================
Found a 56 line (309 tokens) duplication in the following files:
Starting at line 771 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/SiteCapturer.java
Starting at line 142 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/WikiCapturer.java
worker = new WikiCapturer ();
if (0 >= args.length)
{
url = (String)JOptionPane.showInputDialog (
null,
"Enter the URL to capture:",
"Web Site",
JOptionPane.PLAIN_MESSAGE,
null,
null,
"http://htmlparser.sourceforge.net/wiki");
if (null != url)
worker.setSource (url);
else
System.exit (1);
}
else
worker.setSource (args[0]);
if (1 >= args.length)
{
url = worker.getSource ();
source = new URL (url);
path = new File (new File ("." + File.separator), source.getHost () + File.separator).getCanonicalPath ();
target = new File (path);
chooser = new JFileChooser (target);
chooser.setDialogType (JFileChooser.SAVE_DIALOG);
chooser.setFileSelectionMode (JFileChooser.DIRECTORIES_ONLY);
chooser.setSelectedFile (target); // this doesn't frickin' work
chooser.setMultiSelectionEnabled (false);
chooser.setDialogTitle ("Target Directory");
ret = chooser.showSaveDialog (null);
if (ret == JFileChooser.APPROVE_OPTION)
worker.setTarget (chooser.getSelectedFile ().getAbsolutePath ());
else
System.exit (1);
}
else
worker.setTarget (args[1]);
if (2 >= args.length)
{
capture = (Boolean)JOptionPane.showInputDialog (
null,
"Should resources be captured:",
"Capture Resources",
JOptionPane.PLAIN_MESSAGE,
null,
new Object[] { Boolean.TRUE, Boolean.FALSE},
Boolean.TRUE);
if (null != capture)
worker.setCaptureResources (capture.booleanValue ());
else
System.exit (1);
}
else
worker.setCaptureResources ((Boolean.valueOf (args[2]).booleanValue ()));
worker.setFilter (
=====================================================================
Found a 56 line (297 tokens) duplication in the following files:
Starting at line 135 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
Starting at line 199 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
theirs = sb.toString ();
match = -1;
for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++)
{
node = (Node)mNodes.elementAt (i);
ours = node.getText ();
if (match (theirs, ours))
{
match = i;
break;
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("ours " + cursor + ": " + ours);
}
else
{
boolean skipped = false;
for (int i = mIndex; i < match; i++)
{
ours = ((Node)mNodes.elementAt (i)).toHtml ();
if (0 != ours.trim ().length ())
{
if (!skipped)
System.out.println ("skipping:");
System.out.println (ours);
skipped = true;
}
}
if (skipped)
{
System.out.println ("to match:");
node = (Node)mNodes.elementAt (match);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("@" + cursor + ": " + node.toHtml ());
}
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
/**
* Callback for a start tag lexeme.
* @param t The tag extracted from the page.
* @param a The attributes parsed out of the tag.
* @param pos The position in the page.
* Note: This differs from the Lexer concept of position which is an
* absolute location in the HTML input stream. This position is the character
* position if the text from the page were displayed in a browser.
*/
public void handleStartTag (HTML.Tag t, MutableAttributeSet a, int pos)
=====================================================================
Found a 56 line (273 tokens) duplication in the following files:
Starting at line 290 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java
Starting at line 366 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java
(new Thread (stream)).start ();
assertTrue ("mark not supported", stream.markSupported ());
for (int i = 0; i < 1000; i++)
{
b = stream.read ();
bytes1.add (new Byte ((byte)b));
}
stream.reset ();
for (int i = 0; i < 1000; i++)
{
b = stream.read ();
bytes2.add (new Byte ((byte)b));
}
index = 0;
while (index < bytes1.size ())
{
assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index));
index++;
}
bytes1.clear ();
bytes2.clear ();
stream.mark (1000); // the 1000 is ignored
for (int i = 0; i < 1000; i++)
{
b = stream.read ();
bytes1.add (new Byte ((byte)b));
}
stream.reset ();
for (int i = 0; i < 1000; i++)
{
b = stream.read ();
bytes2.add (new Byte ((byte)b));
}
stream.close ();
index = 0;
while (index < bytes1.size ())
{
assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index));
index++;
}
}
catch (MalformedURLException murle)
{
fail ("bad url " + link);
}
}
/**
* Test close.
*/
public void testClose () throws IOException
=====================================================================
Found a 70 line (272 tokens) duplication in the following files:
Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/filterbuilder/wrappers/OrFilterWrapper.java
Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/filterbuilder/wrappers/AndFilterWrapper.java
mFilter = (AndFilter)filter;
}
/**
* Get the underlying node filter's subordinate filters.
* @return The node filter object's contained filters.
*/
public NodeFilter[] getSubNodeFilters ()
{
return (mFilter.getPredicates ());
}
/**
* Assign the underlying node filter's subordinate filters.
* @param filters The filters to insert into the underlying node filter.
*/
public void setSubNodeFilters (NodeFilter[] filters)
{
mFilter.setPredicates (filters);
}
/**
* Convert this filter into Java code.
* Output whatever text necessary and return the variable name.
* @param out The output buffer.
* @param context Three integers as follows:
*
indent level - the number of spaces to insert at the beginning of each line
* filter number - the next available filter number
* filter array number - the next available array of filters number
* @return The variable name to use when referencing this filter (usually "filter" + context[1]++)
*/
public String toJavaCode (StringBuffer out, int[] context)
{
String array;
NodeFilter[] predicates;
String[] names;
String ret;
predicates = mFilter.getPredicates ();
array = null; // stoopid Java compiler
if (0 != predicates.length)
{
names = new String[predicates.length];
for (int i = 0; i < predicates.length; i++)
{
names[i] = ((Filter)predicates[i]).toJavaCode (out, context);
}
array = "array" + context[2]++;
spaces (out, context[0]);
out.append ("NodeFilter[] ");
out.append (array);
out.append (" = new NodeFilter[");
out.append (predicates.length);
out.append ("];");
newline (out);
for (int i = 0; i < predicates.length; i++)
{
spaces (out, context[0]);
out.append (array);
out.append ("[");
out.append (i);
out.append ("] = ");
out.append (names[i]);
out.append (";");
newline (out);
}
}
ret = "filter" + context[1]++;
spaces (out, context[0]);
out.append ("AndFilter ");
=====================================================================
Found a 52 line (244 tokens) duplication in the following files:
Starting at line 268 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
Starting at line 332 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
ours = ((Attribute)(((Tag)node).getAttributesEx ().elementAt (0))).getName ().substring (1);
if (match (theirs, ours))
{
match = i;
break;
}
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("ours " + cursor + ": " + ours);
}
else
{
boolean skipped = false;
for (int i = mIndex; i < match; i++)
{
ours = ((Node)mNodes.elementAt (i)).toHtml ();
if (0 != ours.trim ().length ())
{
if (!skipped)
System.out.println ("skipping:");
System.out.println (ours);
skipped = true;
}
}
if (skipped)
{
System.out.println ("to match:");
node = (Node)mNodes.elementAt (match);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("@" + cursor + ": " + node.toHtml ());
}
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
/**
* Callback for a non-composite tag.
* @param t The tag extracted from the page.
* @param a The attributes parsed out of the tag.
* @param pos The position in the page.
* Note: This differs from the Lexer concept of position which is an
* absolute location in the HTML input stream. This position is the character
* position if the text from the page were displayed in a browser.
*/
public void handleSimpleTag (HTML.Tag t, MutableAttributeSet a, int pos)
=====================================================================
Found a 51 line (243 tokens) duplication in the following files:
Starting at line 269 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
Starting at line 403 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
if (match (theirs, ours))
{
match = i;
break;
}
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("ours " + cursor + ": " + ours);
}
else
{
boolean skipped = false;
for (int i = mIndex; i < match; i++)
{
ours = ((Node)mNodes.elementAt (i)).toHtml ();
if (0 != ours.trim ().length ())
{
if (!skipped)
System.out.println ("skipping:");
System.out.println (ours);
skipped = true;
}
}
if (skipped)
{
System.out.println ("to match:");
node = (Node)mNodes.elementAt (match);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("@" + cursor + ": " + node.toHtml ());
}
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
/**
* Callback for an error condition.
* @param errorMsg The error condition as a text message.
* @param pos The position in the page.
* Note: This differs from the Lexer concept of position which is an
* absolute location in the HTML input stream. This position is the character
* position if the text from the page were displayed in a browser.
*/
public void handleError (String errorMsg, int pos)
=====================================================================
Found a 46 line (234 tokens) duplication in the following files:
Starting at line 292 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java
Starting at line 373 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java
NodeFilter filter = new NodeClassFilter (Div.class);
String[] tmpSplitTags = ParserUtils.splitTags("Begin ALL OK", filter);
assertStringEquals(
"modified text",
"Begin * ALL OK",
new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])
);
tmpSplitTags = ParserUtils.splitTags("Begin ALL OK", filter, false, false);
assertStringEquals(
"modified text",
"Begin * +12.5
* ALL OK",
new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2])
);
tmpSplitTags = ParserUtils.splitTags("Begin ALL OK", filter, true, false);
assertStringEquals(
"modified text",
"Begin * +12.5 * ALL OK",
new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2])
);
tmpSplitTags = ParserUtils.splitTags("Begin ALL OK", filter, false, true);
assertStringEquals(
"modified text",
"Begin * ALL OK",
new String(tmpSplitTags[0] + '*' + tmpSplitTags[1])
);
assertStringEquals(
"modified text",
" ALL OK",
ParserUtils.trimTags(" ALL OK", filter)
);
assertStringEquals(
"modified text",
" +12.5
ALL OK",
ParserUtils.trimTags(" ALL OK", filter, false, false)
);
assertStringEquals(
"modified text",
" +12.5 ALL OK",
ParserUtils.trimTags(" ALL OK", filter, true, false)
);
assertStringEquals(
"modified text",
" ALL OK",
ParserUtils.trimTags(" ALL OK", filter, false, true)
);
NodeFilter filterTableRow = new NodeClassFilter(TableRow.class);
=====================================================================
Found a 45 line (228 tokens) duplication in the following files:
Starting at line 145 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
Starting at line 274 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("ours " + cursor + ": " + ours);
}
else
{
boolean skipped = false;
for (int i = mIndex; i < match; i++)
{
ours = ((Node)mNodes.elementAt (i)).toHtml ();
if (0 != ours.trim ().length ())
{
if (!skipped)
System.out.println ("skipping:");
System.out.println (ours);
skipped = true;
}
}
if (skipped)
{
System.out.println ("to match:");
node = (Node)mNodes.elementAt (match);
Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ());
System.out.println ("@" + cursor + ": " + node.toHtml ());
}
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
/**
* Callback for an end tag lexeme.
* @param t The tag extracted from the page.
* @param pos The position in the page.
* Note: This differs from the Lexer concept of position which is an
* absolute location in the HTML input stream. This position is the character
* position if the text from the page were displayed in a browser.
*/
public void handleEndTag (HTML.Tag t, int pos)
=====================================================================
Found a 33 line (197 tokens) duplication in the following files:
Starting at line 835 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
Starting at line 1083 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
StringBuffer output = new StringBuffer();
String dummyString = createDummyString (' ', input.length());
// loop inside the tags of the same type
NodeList links = getLinks (input, filter, recursive);
for (int j=0; jFor example if you call trimChars("<DIV> +12.5 </DIV>", "<>DIV/ "),
*
you obtain a string "+12.5" as output (<,>,D,I,V,/ and space char are chars that must be removed).
*
For example if you call trimChars("<DIV> Trim All Chars Also The Ones Inside The String </DIV>", "<>DIV/ "),
*
you obtain a string "TrimAllCharsAlsoTheOnesInsideTheString" as output (all the spaces inside the string are removed).
* @param input The string in input.
* @param charsToBeRemoved The chars to be removed.
* @return The string as output.
*/
public static String trimChars (String input, String charsToBeRemoved)
=====================================================================
Found a 38 line (184 tokens) duplication in the following files:
Starting at line 826 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/lexer/Page.java
Starting at line 103 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/LinkProcessor.java
return (string);
}
/**
* @deprecated Use Page.constructUrl() instead.
*/
public URL constructUrl(String link, String base)
throws MalformedURLException {
String path;
boolean modified;
boolean absolute;
int index;
URL url; // constructed URL combining relative link and base
url = new URL (new URL (base), link);
path = url.getFile ();
modified = false;
absolute = link.startsWith ("/");
if (!absolute) { // we prefer to fix incorrect relative links
// this doesn't fix them all, just the ones at the start
while (path.startsWith ("/.")) {
if (path.startsWith ("/../")) {
path = path.substring (3);
modified = true;
}
else if (path.startsWith ("/./") || path.startsWith("/.")) {
path = path.substring (2);
modified = true;
} else break;
}
}
// fix backslashes
while (-1 != (index = path.indexOf ("/\\"))) {
path = path.substring (0, index + 1) + path.substring (index + 2);
modified = true;
}
if (modified)
url = new URL (url, path);
return url;
=====================================================================
Found a 49 line (183 tokens) duplication in the following files:
Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
Starting at line 263 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound)))
{
str.append(input.charAt(index));
toBeAdd=false;
}
else
if (!toBeAdd)
toBeAdd=true;
// finished to parse one string
if (toBeAdd && (str.length()!=0)) {
minCapacity++;
output.ensureCapacity(minCapacity);
if (output.add(str.toString()))
str = new StringBuffer();
else
minCapacity--;
}
}
// add the last string
if (str.length()!=0) {
minCapacity++;
output.ensureCapacity(minCapacity);
if (output.add(str.toString()))
str = new StringBuffer();
else
minCapacity--;
}
output.trimToSize();
Object[] outputObj = output.toArray();
String[] outputStr = new String[output.size()];
for (int i=0; iFor example if you call trimSpaces("<DIV> +12.5 </DIV>", "<>DIV/"),
*
you obtain a string "+12.5" as output (space chars and <,>,D,I,V,/ are chars that must be removed).
*
For example if you call trimSpaces("<DIV> Trim All Spaces Also The Ones Inside The String </DIV>", "<>DIV/"),
*
you obtain a string "TrimAllSpacesAlsoTheOnesInsideTheString" as output (all the spaces inside the string are removed).
* @param input The string in input.
* @param charsToBeRemoved The chars to be removed.
* @return The string as output.
*/
public static String trimSpaces (String input, String charsToBeRemoved)
=====================================================================
Found a 43 line (182 tokens) duplication in the following files:
Starting at line 1209 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java
Starting at line 1314 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java
String string;
random = new Random ();
list = getReferences ();
stimulus = new StringBuffer ();
response = new StringBuffer ();
for (int i = 0; i < 1000; i++)
{
for (int j = 0; j < 10; j++)
{
// some random characters
for (int k = 0; k < 10; k++)
{
character = (char)random.nextInt (127);
if (character >= ' ')
{
if ('&' == character)
{
stimulus.append (character);
response.append ("&");
}
else if ('"' == character)
{
stimulus.append (character);
response.append (""");
}
else if ('<' == character)
{
stimulus.append (character);
response.append ("<");
}
else if ('>' == character)
{
stimulus.append (character);
response.append (">");
}
else
{
stimulus.append (character);
response.append (character);
}
}
}
=====================================================================
Found a 49 line (182 tokens) duplication in the following files:
Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
Starting at line 408 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
if (charFound)
{
str.append(input.charAt(index));
toBeAdd=false;
}
else
if (!toBeAdd)
toBeAdd=true;
// finished to parse one string
if (toBeAdd && (str.length()!=0)) {
minCapacity++;
output.ensureCapacity(minCapacity);
if (output.add(str.toString()))
str = new StringBuffer();
else
minCapacity--;
}
}
// add the last string
if (str.length()!=0) {
minCapacity++;
output.ensureCapacity(minCapacity);
if (output.add(str.toString()))
str = new StringBuffer();
else
minCapacity--;
}
output.trimToSize();
Object[] outputObj = output.toArray();
String[] outputStr = new String[output.size()];
for (int i=0; iFor example if you call trimButChars("<DIV> +12.5 </DIV>", "+.1234567890"),
*
you obtain a string "+12.5" as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
*
For example if you call trimButChars("<DIV> +1 2 . 5 </DIV>", "+.1234567890"),
*
you obtain a string "+12.5" as output (the spaces between 1 and 2, 2 and ., . and 5 are removed).
* @param input The string in input.
* @param charsDoNotBeRemoved The chars that do not be removed.
* @return The string as output.
*/
public static String trimButChars (String input, String charsDoNotBeRemoved)
=====================================================================
Found a 28 line (178 tokens) duplication in the following files:
Starting at line 984 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
Starting at line 1088 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java
NodeList links = getLinks (input, filter, recursive);
for (int j=0; jjStartTagBegin) && (kEndTagEnd\n");
for (int i = 0;i\n";
paramsMap.put(paramsData[i][0],paramsData[i][1]);
}
testHTML+=
"\n"+
"