Skip to content

Commit

Permalink
[Java] Handle comment nodes (#764)
Browse files Browse the repository at this point in the history
  • Loading branch information
tushuhei authored Oct 21, 2024
1 parent 9f14646 commit 02fb71c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
3 changes: 2 additions & 1 deletion java/src/main/java/com/google/budoux/HTMLProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
Expand Down Expand Up @@ -156,7 +157,7 @@ public void head(Node node, int depth) {

@Override
public void tail(Node node, int depth) {
if (node.nodeName().equals("body") || node instanceof TextNode) {
if (node.nodeName().equals("body") || node instanceof TextNode || node instanceof Comment) {
return;
}
// assume node instanceof Element;
Expand Down
8 changes: 8 additions & 0 deletions java/src/test/java/com/google/budoux/HTMLProcessorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,12 @@ public void testGetTextWhiteSpaceAcrossElements() {
String result = HTMLProcessor.getText(html);
assertEquals(" 1 2 ", result);
}

@Test
public void testResolveWithComments() {
List<String> phrases = Arrays.asList("abc", "def", "ghi", "jkl");
String html = "abcdef<!-- comments should be ignored-->ghijkl";
String result = HTMLProcessor.resolve(phrases, html, "<wbr>");
assertEquals(this.wrap("abc<wbr>def<wbr>ghi<wbr>jkl"), result);
}
}

0 comments on commit 02fb71c

Please sign in to comment.