===================================================================== Found a 56 line (309 tokens) duplication in the following files: Starting at line 771 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/SiteCapturer.java Starting at line 142 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/WikiCapturer.java worker = new WikiCapturer (); if (0 >= args.length) { url = (String)JOptionPane.showInputDialog ( null, "Enter the URL to capture:", "Web Site", JOptionPane.PLAIN_MESSAGE, null, null, "http://htmlparser.sourceforge.net/wiki"); if (null != url) worker.setSource (url); else System.exit (1); } else worker.setSource (args[0]); if (1 >= args.length) { url = worker.getSource (); source = new URL (url); path = new File (new File ("." + File.separator), source.getHost () + File.separator).getCanonicalPath (); target = new File (path); chooser = new JFileChooser (target); chooser.setDialogType (JFileChooser.SAVE_DIALOG); chooser.setFileSelectionMode (JFileChooser.DIRECTORIES_ONLY); chooser.setSelectedFile (target); // this doesn't frickin' work chooser.setMultiSelectionEnabled (false); chooser.setDialogTitle ("Target Directory"); ret = chooser.showSaveDialog (null); if (ret == JFileChooser.APPROVE_OPTION) worker.setTarget (chooser.getSelectedFile ().getAbsolutePath ()); else System.exit (1); } else worker.setTarget (args[1]); if (2 >= args.length) { capture = (Boolean)JOptionPane.showInputDialog ( null, "Should resources be captured:", "Capture Resources", JOptionPane.PLAIN_MESSAGE, null, new Object[] { Boolean.TRUE, Boolean.FALSE}, Boolean.TRUE); if (null != capture) worker.setCaptureResources (capture.booleanValue ()); else System.exit (1); } else worker.setCaptureResources ((Boolean.valueOf (args[2]).booleanValue ())); worker.setFilter ( ===================================================================== Found a 56 line (297 tokens) duplication in the following files: Starting at line 135 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java Starting at line 199 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java theirs = sb.toString (); match = -1; for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) { node = (Node)mNodes.elementAt (i); ours = node.getText (); if (match (theirs, ours)) { match = i; break; } } if (-1 == match) { node = (Node)mNodes.elementAt (mIndex); ours = node.getText (); System.out.println ("theirs: " + theirs); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("ours " + cursor + ": " + ours); } else { boolean skipped = false; for (int i = mIndex; i < match; i++) { ours = ((Node)mNodes.elementAt (i)).toHtml (); if (0 != ours.trim ().length ()) { if (!skipped) System.out.println ("skipping:"); System.out.println (ours); skipped = true; } } if (skipped) { System.out.println ("to match:"); node = (Node)mNodes.elementAt (match); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("@" + cursor + ": " + node.toHtml ()); } // System.out.println (" match: " + theirs); mIndex = match + 1; } } /** * Callback for a start tag lexeme. * @param t The tag extracted from the page. * @param a The attributes parsed out of the tag. * @param pos The position in the page. * Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser. */ public void handleStartTag (HTML.Tag t, MutableAttributeSet a, int pos) ===================================================================== Found a 56 line (273 tokens) duplication in the following files: Starting at line 290 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java Starting at line 366 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java (new Thread (stream)).start (); assertTrue ("mark not supported", stream.markSupported ()); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes1.add (new Byte ((byte)b)); } stream.reset (); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes2.add (new Byte ((byte)b)); } index = 0; while (index < bytes1.size ()) { assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index)); index++; } bytes1.clear (); bytes2.clear (); stream.mark (1000); // the 1000 is ignored for (int i = 0; i < 1000; i++) { b = stream.read (); bytes1.add (new Byte ((byte)b)); } stream.reset (); for (int i = 0; i < 1000; i++) { b = stream.read (); bytes2.add (new Byte ((byte)b)); } stream.close (); index = 0; while (index < bytes1.size ()) { assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index)); index++; } } catch (MalformedURLException murle) { fail ("bad url " + link); } } /** * Test close. */ public void testClose () throws IOException ===================================================================== Found a 70 line (272 tokens) duplication in the following files: Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/filterbuilder/wrappers/OrFilterWrapper.java Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/filterbuilder/wrappers/AndFilterWrapper.java mFilter = (AndFilter)filter; } /** * Get the underlying node filter's subordinate filters. * @return The node filter object's contained filters. */ public NodeFilter[] getSubNodeFilters () { return (mFilter.getPredicates ()); } /** * Assign the underlying node filter's subordinate filters. * @param filters The filters to insert into the underlying node filter. */ public void setSubNodeFilters (NodeFilter[] filters) { mFilter.setPredicates (filters); } /** * Convert this filter into Java code. * Output whatever text necessary and return the variable name. * @param out The output buffer. * @param context Three integers as follows: *
  • indent level - the number of spaces to insert at the beginning of each line
  • *
  • filter number - the next available filter number
  • *
  • filter array number - the next available array of filters number
  • * @return The variable name to use when referencing this filter (usually "filter" + context[1]++) */ public String toJavaCode (StringBuffer out, int[] context) { String array; NodeFilter[] predicates; String[] names; String ret; predicates = mFilter.getPredicates (); array = null; // stoopid Java compiler if (0 != predicates.length) { names = new String[predicates.length]; for (int i = 0; i < predicates.length; i++) { names[i] = ((Filter)predicates[i]).toJavaCode (out, context); } array = "array" + context[2]++; spaces (out, context[0]); out.append ("NodeFilter[] "); out.append (array); out.append (" = new NodeFilter["); out.append (predicates.length); out.append ("];"); newline (out); for (int i = 0; i < predicates.length; i++) { spaces (out, context[0]); out.append (array); out.append ("["); out.append (i); out.append ("] = "); out.append (names[i]); out.append (";"); newline (out); } } ret = "filter" + context[1]++; spaces (out, context[0]); out.append ("AndFilter "); ===================================================================== Found a 52 line (244 tokens) duplication in the following files: Starting at line 268 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java Starting at line 332 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java ours = ((Attribute)(((Tag)node).getAttributesEx ().elementAt (0))).getName ().substring (1); if (match (theirs, ours)) { match = i; break; } } } if (-1 == match) { node = (Node)mNodes.elementAt (mIndex); ours = node.getText (); System.out.println ("theirs: " + theirs); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("ours " + cursor + ": " + ours); } else { boolean skipped = false; for (int i = mIndex; i < match; i++) { ours = ((Node)mNodes.elementAt (i)).toHtml (); if (0 != ours.trim ().length ()) { if (!skipped) System.out.println ("skipping:"); System.out.println (ours); skipped = true; } } if (skipped) { System.out.println ("to match:"); node = (Node)mNodes.elementAt (match); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("@" + cursor + ": " + node.toHtml ()); } // System.out.println (" match: " + theirs); mIndex = match + 1; } } /** * Callback for a non-composite tag. * @param t The tag extracted from the page. * @param a The attributes parsed out of the tag. * @param pos The position in the page. * Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser. */ public void handleSimpleTag (HTML.Tag t, MutableAttributeSet a, int pos) ===================================================================== Found a 51 line (243 tokens) duplication in the following files: Starting at line 269 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java Starting at line 403 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java if (match (theirs, ours)) { match = i; break; } } } if (-1 == match) { node = (Node)mNodes.elementAt (mIndex); ours = node.getText (); System.out.println ("theirs: " + theirs); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("ours " + cursor + ": " + ours); } else { boolean skipped = false; for (int i = mIndex; i < match; i++) { ours = ((Node)mNodes.elementAt (i)).toHtml (); if (0 != ours.trim ().length ()) { if (!skipped) System.out.println ("skipping:"); System.out.println (ours); skipped = true; } } if (skipped) { System.out.println ("to match:"); node = (Node)mNodes.elementAt (match); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("@" + cursor + ": " + node.toHtml ()); } // System.out.println (" match: " + theirs); mIndex = match + 1; } } /** * Callback for an error condition. * @param errorMsg The error condition as a text message. * @param pos The position in the page. * Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser. */ public void handleError (String errorMsg, int pos) ===================================================================== Found a 46 line (234 tokens) duplication in the following files: Starting at line 292 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java Starting at line 373 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java NodeFilter filter = new NodeClassFilter (Div.class); String[] tmpSplitTags = ParserUtils.splitTags("Begin
    +12.5
    ALL OK", filter); assertStringEquals( "modified text", "Begin * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1]) ); tmpSplitTags = ParserUtils.splitTags("Begin
    +12.5
    ALL OK", filter, false, false); assertStringEquals( "modified text", "Begin *
    +12.5
    * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2]) ); tmpSplitTags = ParserUtils.splitTags("Begin
    +12.5
    ALL OK", filter, true, false); assertStringEquals( "modified text", "Begin * +12.5 * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1] + '*' + tmpSplitTags[2]) ); tmpSplitTags = ParserUtils.splitTags("Begin
    +12.5
    ALL OK", filter, false, true); assertStringEquals( "modified text", "Begin * ALL OK", new String(tmpSplitTags[0] + '*' + tmpSplitTags[1]) ); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags("
    +12.5
    ALL OK", filter) ); assertStringEquals( "modified text", "
    +12.5
    ALL OK", ParserUtils.trimTags("
    +12.5
    ALL OK", filter, false, false) ); assertStringEquals( "modified text", " +12.5 ALL OK", ParserUtils.trimTags("
    +12.5
    ALL OK", filter, true, false) ); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags("
    +12.5
    ALL OK", filter, false, true) ); NodeFilter filterTableRow = new NodeClassFilter(TableRow.class); ===================================================================== Found a 45 line (228 tokens) duplication in the following files: Starting at line 145 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java Starting at line 274 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java } } if (-1 == match) { node = (Node)mNodes.elementAt (mIndex); ours = node.getText (); System.out.println ("theirs: " + theirs); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("ours " + cursor + ": " + ours); } else { boolean skipped = false; for (int i = mIndex; i < match; i++) { ours = ((Node)mNodes.elementAt (i)).toHtml (); if (0 != ours.trim ().length ()) { if (!skipped) System.out.println ("skipping:"); System.out.println (ours); skipped = true; } } if (skipped) { System.out.println ("to match:"); node = (Node)mNodes.elementAt (match); Cursor cursor = new Cursor (((AbstractNode)node).getPage (), node.getStartPosition ()); System.out.println ("@" + cursor + ": " + node.toHtml ()); } // System.out.println (" match: " + theirs); mIndex = match + 1; } } /** * Callback for an end tag lexeme. * @param t The tag extracted from the page. * @param pos The position in the page. * Note: This differs from the Lexer concept of position which is an * absolute location in the HTML input stream. This position is the character * position if the text from the page were displayed in a browser. */ public void handleEndTag (HTML.Tag t, int pos) ===================================================================== Found a 33 line (197 tokens) duplication in the following files: Starting at line 835 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java Starting at line 1083 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java StringBuffer output = new StringBuffer(); String dummyString = createDummyString (' ', input.length()); // loop inside the tags of the same type NodeList links = getLinks (input, filter, recursive); for (int j=0; jFor example if you call trimChars("<DIV> +12.5 </DIV>", "<>DIV/ "), *
    you obtain a string "+12.5" as output (<,>,D,I,V,/ and space char are chars that must be removed). *
    For example if you call trimChars("<DIV> Trim All Chars Also The Ones Inside The String </DIV>", "<>DIV/ "), *
    you obtain a string "TrimAllCharsAlsoTheOnesInsideTheString" as output (all the spaces inside the string are removed). * @param input The string in input. * @param charsToBeRemoved The chars to be removed. * @return The string as output. */ public static String trimChars (String input, String charsToBeRemoved) ===================================================================== Found a 38 line (184 tokens) duplication in the following files: Starting at line 826 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/lexer/Page.java Starting at line 103 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/LinkProcessor.java return (string); } /** * @deprecated Use Page.constructUrl() instead. */ public URL constructUrl(String link, String base) throws MalformedURLException { String path; boolean modified; boolean absolute; int index; URL url; // constructed URL combining relative link and base url = new URL (new URL (base), link); path = url.getFile (); modified = false; absolute = link.startsWith ("/"); if (!absolute) { // we prefer to fix incorrect relative links // this doesn't fix them all, just the ones at the start while (path.startsWith ("/.")) { if (path.startsWith ("/../")) { path = path.substring (3); modified = true; } else if (path.startsWith ("/./") || path.startsWith("/.")) { path = path.substring (2); modified = true; } else break; } } // fix backslashes while (-1 != (index = path.indexOf ("/\\"))) { path = path.substring (0, index + 1) + path.substring (index + 2); modified = true; } if (modified) url = new URL (url, path); return url; ===================================================================== Found a 49 line (183 tokens) duplication in the following files: Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java Starting at line 263 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound))) { str.append(input.charAt(index)); toBeAdd=false; } else if (!toBeAdd) toBeAdd=true; // finished to parse one string if (toBeAdd && (str.length()!=0)) { minCapacity++; output.ensureCapacity(minCapacity); if (output.add(str.toString())) str = new StringBuffer(); else minCapacity--; } } // add the last string if (str.length()!=0) { minCapacity++; output.ensureCapacity(minCapacity); if (output.add(str.toString())) str = new StringBuffer(); else minCapacity--; } output.trimToSize(); Object[] outputObj = output.toArray(); String[] outputStr = new String[output.size()]; for (int i=0; iFor example if you call trimSpaces("<DIV> +12.5 </DIV>", "<>DIV/"), *
    you obtain a string "+12.5" as output (space chars and <,>,D,I,V,/ are chars that must be removed). *
    For example if you call trimSpaces("<DIV> Trim All Spaces Also The Ones Inside The String </DIV>", "<>DIV/"), *
    you obtain a string "TrimAllSpacesAlsoTheOnesInsideTheString" as output (all the spaces inside the string are removed). * @param input The string in input. * @param charsToBeRemoved The chars to be removed. * @return The string as output. */ public static String trimSpaces (String input, String charsToBeRemoved) ===================================================================== Found a 43 line (182 tokens) duplication in the following files: Starting at line 1209 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java Starting at line 1314 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java String string; random = new Random (); list = getReferences (); stimulus = new StringBuffer (); response = new StringBuffer (); for (int i = 0; i < 1000; i++) { for (int j = 0; j < 10; j++) { // some random characters for (int k = 0; k < 10; k++) { character = (char)random.nextInt (127); if (character >= ' ') { if ('&' == character) { stimulus.append (character); response.append ("&"); } else if ('"' == character) { stimulus.append (character); response.append ("""); } else if ('<' == character) { stimulus.append (character); response.append ("<"); } else if ('>' == character) { stimulus.append (character); response.append (">"); } else { stimulus.append (character); response.append (character); } } } ===================================================================== Found a 49 line (182 tokens) duplication in the following files: Starting at line 118 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java Starting at line 408 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java if (charFound) { str.append(input.charAt(index)); toBeAdd=false; } else if (!toBeAdd) toBeAdd=true; // finished to parse one string if (toBeAdd && (str.length()!=0)) { minCapacity++; output.ensureCapacity(minCapacity); if (output.add(str.toString())) str = new StringBuffer(); else minCapacity--; } } // add the last string if (str.length()!=0) { minCapacity++; output.ensureCapacity(minCapacity); if (output.add(str.toString())) str = new StringBuffer(); else minCapacity--; } output.trimToSize(); Object[] outputObj = output.toArray(); String[] outputStr = new String[output.size()]; for (int i=0; iFor example if you call trimButChars("<DIV> +12.5 </DIV>", "+.1234567890"), *
    you obtain a string "+12.5" as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed). *
    For example if you call trimButChars("<DIV> +1 2 . 5 </DIV>", "+.1234567890"), *
    you obtain a string "+12.5" as output (the spaces between 1 and 2, 2 and ., . and 5 are removed). * @param input The string in input. * @param charsDoNotBeRemoved The chars that do not be removed. * @return The string as output. */ public static String trimButChars (String input, String charsDoNotBeRemoved) ===================================================================== Found a 28 line (178 tokens) duplication in the following files: Starting at line 984 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java Starting at line 1088 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java NodeList links = getLinks (input, filter, recursive); for (int j=0; jjStartTagBegin) && (kEndTagEnd\n"); for (int i = 0;i\n"; paramsMap.put(paramsData[i][0],paramsData[i][1]); } testHTML+= "\n"+ ""; createParser(testHTML); parseAndAssertNodeCount(3); assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); AppletTag appletTag = (AppletTag)node[0]; ===================================================================== Found a 36 line (137 tokens) duplication in the following files: Starting at line 1256 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java Starting at line 1395 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java while (0 != character); // some more random characters for (int k = 0; k < 10; k++) { character = (char)random.nextInt (127); if (character >= ' ') { if ('&' == character) { stimulus.append (character); response.append ("&"); } else if ('"' == character) { stimulus.append (character); response.append ("""); } else if ('<' == character) { stimulus.append (character); response.append ("<"); } else if ('>' == character) { stimulus.append (character); response.append (">"); } else { stimulus.append (character); response.append (character); } } } } string = Translate.decode (response.toString ()); ===================================================================== Found a 32 line (131 tokens) duplication in the following files: Starting at line 1220 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java Starting at line 1258 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java for (int k = 0; k < 10; k++) { character = (char)random.nextInt (127); if (character >= ' ') { if ('&' == character) { stimulus.append (character); response.append ("&"); } else if ('"' == character) { stimulus.append (character); response.append ("""); } else if ('<' == character) { stimulus.append (character); response.append ("<"); } else if ('>' == character) { stimulus.append (character); response.append (">"); } else { stimulus.append (character); response.append (character); } } } ===================================================================== Found a 39 line (128 tokens) duplication in the following files: Starting at line 572 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/beans/StringBean.java Starting at line 293 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/beans/FilterBean.java } } /** * Get the current connection. * @return The connection that the parser has or null if it * hasn't been set or the parser hasn't been constructed yet. */ public URLConnection getConnection () { return ((null != mParser) ? mParser.getConnection () : null); } /** * Set the parser's connection. * The text from the URL will be fetched, which may be expensive, so this * property should be set last. * @param connection New value of property Connection. */ public void setConnection (URLConnection connection) { String url; URLConnection conn; url = getURL (); conn = getConnection (); if (((null == conn) && (null != connection)) || ((null != conn) && !conn.equals (connection))) { try { if (null == mParser) mParser = new Parser (connection); else mParser.setConnection (connection); mPropertySupport.firePropertyChange ( PROP_URL_PROPERTY, url, getURL ()); mPropertySupport.firePropertyChange ( PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); ===================================================================== Found a 38 line (127 tokens) duplication in the following files: Starting at line 458 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/beans/StringBean.java Starting at line 249 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/beans/FilterBean.java } /** * Get the current URL. * @return The URL from which text has been extracted, or null * if this property has not been set yet. */ public String getURL () { return ((null != mParser) ? mParser.getURL () : null); } /** * Set the URL to extract strings from. * The text from the URL will be fetched, which may be expensive, so this * property should be set last. * @param url The URL that text should be fetched from. */ public void setURL (String url) { String old; URLConnection conn; old = getURL (); conn = getConnection (); if (((null == old) && (null != url)) || ((null != old) && !old.equals (url))) { try { if (null == mParser) mParser = new Parser (url); else mParser.setURL (url); mPropertySupport.firePropertyChange ( PROP_URL_PROPERTY, old, getURL ()); mPropertySupport.firePropertyChange ( PROP_CONNECTION_PROPERTY, conn, mParser.getConnection ()); ===================================================================== Found a 39 line (125 tokens) duplication in the following files: Starting at line 179 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/nodes/TextNode.java Starting at line 206 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/nodes/RemarkNode.java ret.append ("Rem ("); ret.append (startpos); ret.append (","); ret.append (endpos); ret.append ("): "); for (int i = 0; i < mText.length (); i++) { c = mText.charAt (i); switch (c) { case '\t': ret.append ("\\t"); break; case '\n': ret.append ("\\n"); break; case '\r': ret.append ("\\r"); break; default: ret.append (c); } if (77 <= ret.length ()) { ret.append ("..."); break; } } } return (ret.toString ()); } /** * Remark visiting code. * @param visitor The NodeVisitor object to invoke * visitRemarkNode() on. */ public void accept (NodeVisitor visitor) ===================================================================== Found a 33 line (123 tokens) duplication in the following files: Starting at line 338 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java Starting at line 419 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java NodeFilter filterTableColumn = new NodeClassFilter(TableColumn.class); OrFilter filterOr = new OrFilter(filterTableRow, filterTableColumn); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags(" +12.5 ALL OK", filterOr) ); assertStringEquals( "modified text", " +12.5 ALL OK", ParserUtils.trimTags(" +12.5 ALL OK", filterOr, false, false) ); assertStringEquals( "modified text", " +12.5 ALL OK", ParserUtils.trimTags(" +12.5 ALL OK", filterOr, true, false) ); assertStringEquals( "modified text", " ALL OK", ParserUtils.trimTags(" +12.5 ALL OK", filterOr, false, true) ); } catch (Exception e) { String msg = e.getMessage (); if (null == msg) msg = e.getClass ().getName (); fail (msg); } } public void testTagsComplexMethods() { ===================================================================== Found a 19 line (122 tokens) duplication in the following files: Starting at line 180 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java Starting at line 446 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java source = new StringSource (reference); assertTrue ("not markable", source.markSupported ()); buffer = new StringBuffer (reference.length ()); for (int i = 0; i < 25; i++) buffer.append ((char)source.read ()); source.mark (88); for (int i = 0; i < 25; i++) source.read (); source.reset (); while (-1 != (c = source.read ())) buffer.append ((char)c); assertTrue ("string incorrect", reference.equals (buffer.toString ())); source.close (); } /** * Test skipping a StringSource. */ public void testStringSourceSkip () throws IOException ===================================================================== Found a 29 line (121 tokens) duplication in the following files: Starting at line 174 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/BeanTest.java Starting at line 202 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/utilTests/BeanTest.java public void testSerializableScanners () throws IOException, ClassNotFoundException, ParserException { Parser parser; Vector vector; NodeIterator enumeration; byte[] data; parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html"); enumeration = parser.elements (); vector = new Vector (50); while (enumeration.hasMoreNodes ()) vector.addElement (enumeration.nextNode ()); data = pickle (parser); parser = (Parser)unpickle (data); enumeration = parser.elements (); while (enumeration.hasMoreNodes ()) assertEquals ( "Nodes before and after serialization differ", ((Node)vector.remove (0)).toHtml (), enumeration.nextNode ().toHtml ()); } public void testSerializableStringBean () ===================================================================== Found a 25 line (120 tokens) duplication in the following files: Starting at line 89 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tags/AppletTag.java Starting at line 87 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tags/ObjectTag.java public Hashtable createObjectParamsTable () { NodeList kids; Node node; Tag tag; String paramName; String paramValue; Hashtable ret; ret = new Hashtable (); kids = getChildren (); if (null != kids) for (int i = 0; i < kids.size (); i++) { node = children.elementAt(i); if (node instanceof Tag) { tag = (Tag)node; if (tag.getTagName().equals ("PARAM")) { paramName = tag.getAttribute ("NAME"); if (null != paramName && 0 != paramName.length ()) { paramValue = tag.getAttribute ("VALUE"); ret.put (paramName.toUpperCase(),paramValue); ===================================================================== Found a 24 line (120 tokens) duplication in the following files: Starting at line 597 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/SiteCapturer.java Starting at line 640 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/SiteCapturer.java link = getFrameLocation (); // check if it needs to be captured if (isToBeCaptured (link)) { // add the link to a list to be processed if (mFinished.contains (link)) html = true; else if (mPages.contains (link)) html = true; else if (mCopied.contains (link)) html = false; else if (mImages.contains (link)) html = false; else { // this test is expensive, do it reluctantly html = isHtml (link); if (html) mPages.add (link); else mImages.add (link); } // alter the link if (html || (!html && getCaptureResources ())) link = makeLocalLink (link, mParser.getLexer ().getPage ().getUrl ()); ===================================================================== Found a 20 line (119 tokens) duplication in the following files: Starting at line 254 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java Starting at line 383 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/KitTest.java public void handleSimpleTag (HTML.Tag t, MutableAttributeSet a, int pos) { String theirs; Node node; int match; String ours; theirs = t.toString (); match = -1; for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++) { node = (Node)mNodes.elementAt (i); if (node instanceof Tag) { ours = ((Attribute)(((Tag)node).getAttributesEx ().elementAt (0))).getName (); if (match (theirs, ours)) { match = i; break; } ===================================================================== Found a 12 line (115 tokens) duplication in the following files: Starting at line 546 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java Starting at line 565 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java Starting at line 584 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Only node should be an HTML node",node[0] instanceof Html); Html html = (Html)node[0]; assertTrue("HTML node should have one child",1 == html.getChildCount ()); assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag); BodyTag body = (BodyTag)html.getChild(0); assertTrue("BODY node should have one child",1 == body.getChildCount ()); assertTrue("Only node should be a string node",body.getChild(0) instanceof Text); Text stringNode = (Text)body.getChild(0); String actual = stringNode.getText(); assertEquals("Third node has incorrect text","text<>\ntext",actual); ===================================================================== Found a 53 line (111 tokens) duplication in the following files: Starting at line 319 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/filterbuilder/wrappers/StringFilterWrapper.java Starting at line 301 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/parserapplications/filterbuilder/wrappers/RegexFilterWrapper.java } // // DocumentListener interface // /** * Handle an insert update event. * @param e Details about the insert event. */ public void insertUpdate (DocumentEvent e) { Document doc; doc = e.getDocument (); try { mFilter.setPattern (doc.getText (0, doc.getLength ())); } catch (BadLocationException ble) { ble.printStackTrace (); } } /** * Handle a remove update event. * @param e Details about the remove event. */ public void removeUpdate (DocumentEvent e) { Document doc; doc = e.getDocument (); try { mFilter.setPattern (doc.getText (0, doc.getLength ())); } catch (BadLocationException ble) { ble.printStackTrace (); } } /** * Handle a change update event. * @param e Details about the change event. */ public void changedUpdate (DocumentEvent e) { // plain text components don't fire these events } } ===================================================================== Found a 19 line (109 tokens) duplication in the following files: Starting at line 333 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/ParserTest.java Starting at line 384 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/ParserTest.java NodeIterator enumeration; path = System.getProperty ("user.dir"); if (!path.endsWith (File.separator)) path += File.separator; file = new File (path + "delete_me.html"); try { out = new PrintWriter (new FileWriter (file)); out.println (""); out.println (""); out.println (""); out.println ("test"); out.println (""); out.println (""); out.println (""); out.println ("This is a test page "); out.println (""); out.println (""); ===================================================================== Found a 17 line (109 tokens) duplication in the following files: Starting at line 132 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java Starting at line 398 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java source = new StringSource (reference); buffer = new StringBuffer (reference.length ()); while (-1 != (c = source.read ())) buffer.append ((char)c); assertTrue ("string incorrect", reference.equals (buffer.toString ())); source.reset (); buffer.setLength (0); while (-1 != (c = source.read ())) buffer.append ((char)c); assertTrue ("string incorrect", reference.equals (buffer.toString ())); source.close (); } /** * Test resetting a StringSource in the middle of reading. */ public void testStringSourceMidReset () throws IOException ===================================================================== Found a 10 line (107 tokens) duplication in the following files: Starting at line 47 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/AppletTagTest.java Starting at line 73 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/AppletTagTest.java Starting at line 155 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/AppletTagTest.java public void testChangeAppletClass() throws ParserException { String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; Hashtable paramsMap = new Hashtable(); String testHTML = new String("\n"); for (int i = 0;i\n"; paramsMap.put(paramsData[i][0],paramsData[i][1]); } testHTML+= ===================================================================== Found a 17 line (106 tokens) duplication in the following files: Starting at line 156 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java Starting at line 422 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java source = new StringSource (reference); buffer = new StringBuffer (reference.length ()); for (int i = 0; i < 25; i++) buffer.append ((char)source.read ()); source.reset (); for (int i = 0; i < 25; i++) source.read (); while (-1 != (c = source.read ())) buffer.append ((char)c); assertTrue ("string incorrect", reference.equals (buffer.toString ())); source.close (); } /** * Test mark/reset of a StringSource in the middle of reading. */ public void testStringSourceMarkReset () throws IOException ===================================================================== Found a 19 line (106 tokens) duplication in the following files: Starting at line 180 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/AttributeTests.java Starting at line 268 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/AttributeTests.java attribute = new PageAttribute (); attribute.setName ("name"); attribute.setAssignment ("="); attribute.setValue ("topFrame"); attribute.setQuote ('"'); assertTrue ("should not be standalone", !attribute.isStandAlone ()); assertTrue ("should not be whitespace", !attribute.isWhitespace ()); assertTrue ("should be valued", attribute.isValued ()); assertTrue ("should not be empty", !attribute.isEmpty ()); attributes.add (attribute); tag = new TagNode (null, 0, 0, attributes); html = ""; assertStringEquals ("tag contents", html, tag.toHtml ()); } /** * Test simple value. */ public void testParseParameters() { ===================================================================== Found a 10 line (103 tokens) duplication in the following files: Starting at line 528 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java Starting at line 546 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Only node should be an HTML node",node[0] instanceof Html); Html html = (Html)node[0]; assertTrue("HTML node should have one child",1 == html.getChildCount ()); assertTrue("Only node should be an BODY node",html.getChild(0) instanceof BodyTag); BodyTag body = (BodyTag)html.getChild(0); assertTrue("BODY node should have one child",1 == body.getChildCount ()); assertTrue("Only node should be a string node",body.getChild(0) instanceof Text); Text stringNode = (Text)body.getChild(0); ===================================================================== Found a 15 line (102 tokens) duplication in the following files: Starting at line 212 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java Starting at line 478 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java source = new StringSource (reference); buffer = new StringBuffer (reference.length ()); for (int i = 0; i < part1.length (); i++) buffer.append ((char)source.read ()); source.skip (part2.length ()); while (-1 != (c = source.read ())) buffer.append ((char)c); assertTrue ("string incorrect", (part1 + part3).equals (buffer.toString ())); source.close (); } /** * Test multi-byte read with a StringSource. */ public void testStringSourceMultByte () throws IOException ===================================================================== Found a 34 line (100 tokens) duplication in the following files: Starting at line 146 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/nodes/TextNode.java Starting at line 173 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/nodes/RemarkNode.java while (start.getPosition () < endpos) { try { c = mPage.getCharacter (start); switch (c) { case '\t': ret.append ("\\t"); break; case '\n': ret.append ("\\n"); break; case '\r': ret.append ("\\r"); break; default: ret.append (c); } } catch (ParserException pe) { // not really expected, but we're only doing toString, so ignore } if (77 <= ret.length ()) { ret.append ("..."); break; } } } else { ret.append ("Rem ("); ===================================================================== Found a 15 line (100 tokens) duplication in the following files: Starting at line 132 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java Starting at line 422 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java ); parser.setNodeFactory ( new PrototypicalNodeFactory ( new Tag[] { new CustomTag (), new AnotherTag (true), })); parseAndAssertNodeCount(1); assertType("node",CustomTag.class,node[0]); CustomTag customTag = (CustomTag)node[0]; assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartPosition ()); assertEquals("ending loc",8,customTag.getEndPosition ()); ===================================================================== Found a 16 line (100 tokens) duplication in the following files: Starting at line 343 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java Starting at line 630 of /home/tom/pmd/pmd-web/src/htmlparser/src/org/htmlparser/util/ParserUtils.java public static String trimCharsBeginEnd (String input, String charsToBeRemoved) { String output = new String(); int begin=0; int end=input.length()-1; boolean charFound=false; boolean ok=true; for (int index=begin; (index