From 02fb71ce79ac9473a9b4550d5502a45f47f514db Mon Sep 17 00:00:00 2001 From: Shuhei Iitsuka Date: Mon, 21 Oct 2024 17:40:24 +0900 Subject: [PATCH] [Java] Handle comment nodes (#764) --- java/src/main/java/com/google/budoux/HTMLProcessor.java | 3 ++- .../test/java/com/google/budoux/HTMLProcessorTest.java | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/com/google/budoux/HTMLProcessor.java b/java/src/main/java/com/google/budoux/HTMLProcessor.java index 2e504863..f79e47fb 100644 --- a/java/src/main/java/com/google/budoux/HTMLProcessor.java +++ b/java/src/main/java/com/google/budoux/HTMLProcessor.java @@ -32,6 +32,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.jsoup.Jsoup; +import org.jsoup.nodes.Comment; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; @@ -156,7 +157,7 @@ public void head(Node node, int depth) { @Override public void tail(Node node, int depth) { - if (node.nodeName().equals("body") || node instanceof TextNode) { + if (node.nodeName().equals("body") || node instanceof TextNode || node instanceof Comment) { return; } // assume node instanceof Element; diff --git a/java/src/test/java/com/google/budoux/HTMLProcessorTest.java b/java/src/test/java/com/google/budoux/HTMLProcessorTest.java index 4790eafe..bb9bd579 100644 --- a/java/src/test/java/com/google/budoux/HTMLProcessorTest.java +++ b/java/src/test/java/com/google/budoux/HTMLProcessorTest.java @@ -136,4 +136,12 @@ public void testGetTextWhiteSpaceAcrossElements() { String result = HTMLProcessor.getText(html); assertEquals(" 1 2 ", result); } + + @Test + public void testResolveWithComments() { + List phrases = Arrays.asList("abc", "def", "ghi", "jkl"); + String html = "abcdefghijkl"; + String result = HTMLProcessor.resolve(phrases, html, ""); + assertEquals(this.wrap("abcdefghijkl"), result); + } }