Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable js resource tests and update with jsoup latest code #45

Merged
merged 21 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
branches: [ "release" ]
paths-ignore: [ "**.md" ]
pull_request:
branches: [ "release" ]
branches: [ "release", "develop" ]
paths-ignore: [ "**.md" ]

jobs:
Expand Down Expand Up @@ -64,6 +64,7 @@ jobs:
# ORG_GRADLE_PROJECT_signingInMemoryKeyPassword: ${{ secrets.GPG_KEY_PASSWORD }}
#
deploy_docs:
if: github.ref == 'refs/heads/release'
runs-on: macos-latest
needs:
- build
Expand Down
11 changes: 10 additions & 1 deletion ksoup-test/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ val generateBuildConfigFile: Task by tasks.creating {
}
}

tasks.all {
tasks.configureEach {
if (name != generateBuildConfigFile.name && !name.contains("publish", ignoreCase = true)) {
dependsOn(generateBuildConfigFile.name)
}
Expand All @@ -34,4 +34,13 @@ kotlin {
this.kotlin.srcDir(layout.buildDirectory.file(rootPath))
}
}
js(IR) {
browser {
testTask {
useMocha {
timeout = "9s"
}
}
}
}
}
1 change: 1 addition & 0 deletions ksoup-test/module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ test-dependencies:
- $libs.codepoints
- $libs.kotlinx.coroutines.test
- $libs.kotlinx.datetime
- $libs.stately.concurrent

settings:
kotlin:
Expand Down
4 changes: 0 additions & 4 deletions ksoup-test/test/com/fleeksoft/ksoup/GzipTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ import kotlin.test.assertEquals
class GzipTest {
@Test
fun testReadGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val gzipFileStr = TestHelper.readGzipResource("htmltests/gzip.html.gz").readAll()
.toString(charset = Charsets.UTF8)
val expected = """<title>Gzip test</title>
Expand Down
21 changes: 11 additions & 10 deletions ksoup-test/test/com/fleeksoft/ksoup/TestHelper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@ import korlibs.io.stream.readAll
object TestHelper {

suspend fun readGzipResource(file: String): SyncStream {
return readGzipFile(getResourceAbsolutePath(file))
return readGzipFile(getResourceAbsolutePath(file).uniVfs)
}

fun getResourceAbsolutePath(resourceName: String): String {
if (Platform.current == PlatformType.WINDOWS) {
if (Platform.isWindows()) {
return "../../../../testResources/$resourceName"
} else if (Platform.isJS()) {
return "https://raw.githubusercontent.com/fleeksoft/ksoup/release/ksoup-test/testResources/$resourceName"
}
return "${BuildConfig.PROJECT_ROOT}/ksoup-test/testResources/$resourceName"
}

suspend fun getFileAsString(file: VfsFile): String {
val bytes: ByteArray =
if (file.fullName.endsWith(".gz")) {
readGzipFile(file.absolutePath).readAll()
} else {
readFile(file.absolutePath).readAll()
}
val bytes: ByteArray = if (file.fullName.endsWith(".gz")) {
readGzipFile(file).readAll()
} else {
readFile(file).readAll()
}
return bytes.decodeToString()
}

Expand All @@ -36,9 +37,9 @@ object TestHelper {

suspend fun pathToStream(file: VfsFile): SyncStream {
return if (file.fullName.endsWith(".gz")) {
readGzipFile(file.absolutePath)
readGzipFile(file)
} else {
readFile(file.absolutePath)
readFile(file)
}
}
}
90 changes: 54 additions & 36 deletions ksoup-test/test/com/fleeksoft/ksoup/helper/DataUtilTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,6 @@ class DataUtilTest {

@Test
fun supportsBOMinFiles() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
var input = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16be.html")
var doc: Document =
Ksoup.parseFile(filePath = input, baseUri = "http://example.com", charsetName = null)
Expand All @@ -187,7 +183,7 @@ class DataUtilTest {
assertTrue(doc.title().contains("UTF-16LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

if (Platform.isJS()) {
if (Platform.isJS() || Platform.isWindows()) {
// FIXME: UTF-32 charset not supported
return@runTest
}
Expand All @@ -203,32 +199,58 @@ class DataUtilTest {
}

@Test
fun supportsUTF8BOM() = runTest {
if (Platform.isJS()) {
// js resource access issue
fun streamerSupportsBOMinFiles() = runTest {
// test files from http://www.i18nl10n.com/korean/utftest/
var file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16be.html").uniVfs
val parser = Parser.htmlParser()

var doc: Document =
DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-16BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf16le.html").uniVfs
doc = DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-16LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

if (Platform.isJS() || Platform.isWindows()) {
// FIXME: UTF-32 charset not supported
return@runTest
}

file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf32be.html").uniVfs
doc = DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-32BE"))
assertTrue(doc.text().contains("가각갂갃간갅"))

file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf32le.html").uniVfs
doc = DataUtil.streamParser(file = file, baseUri = "http://example.com", charset = null, parser = parser)
.complete()
assertTrue(doc.title().contains("UTF-32LE"))
assertTrue(doc.text().contains("가각갂갃간갅"))
}

@Test
fun supportsUTF8BOM() = runTest {
val input: String = TestHelper.getResourceAbsolutePath("bomtests/bom_utf8.html")
val doc: Document = Ksoup.parseFile(input, "http://example.com", null)
assertEquals("OK", doc.head().select("title").text())
}

@Test
fun noExtraNULLBytes() {
val b =
"<html><head><meta charset=\"UTF-8\"></head><body><div><u>ü</u>ü</div></body></html>".toByteArray(
Charsets.UTF8,
)
val b = "<html><head><meta charset=\"UTF-8\"></head><body><div><u>ü</u>ü</div></body></html>"
.toByteArray(Charsets.UTF8)
val doc = Ksoup.parse(b.openSync(), baseUri = "", charsetName = null)
assertFalse(doc.outerHtml().contains("\u0000"))
}

@Test
fun supportsZippedUTF8BOM() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input: String = TestHelper.getResourceAbsolutePath("bomtests/bom_utf8.html.gz")
val doc: Document =
Ksoup.parseFile(
Expand All @@ -243,6 +265,22 @@ class DataUtilTest {
)
}

@Test
fun streamerSupportsZippedUTF8BOM() = runTest {
val file = TestHelper.getResourceAbsolutePath("bomtests/bom_utf8.html.gz").uniVfs
val doc = DataUtil.streamParser(
file = file,
baseUri = "http://example.com",
charset = null,
parser = Parser.htmlParser()
).complete();
assertEquals("OK", doc.head().select("title").text());
assertEquals(
"There is a UTF8 BOM at the top (before the XML decl). If not read correctly, will look like a non-joining space.",
doc.body().text()
);
}

@Test
fun supportsXmlCharsetDeclaration() {
val encoding = "iso-8859-1"
Expand All @@ -259,10 +297,6 @@ class DataUtilTest {

@Test
fun loadsGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input: String = TestHelper.getResourceAbsolutePath("htmltests/gzip.html.gz")
val doc: Document = Ksoup.parseFile(filePath = input, charsetName = null)
doc.toString()
Expand All @@ -272,10 +306,6 @@ class DataUtilTest {

@Test
fun loadsZGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
// compressed on win, with z suffix
val input: String = TestHelper.getResourceAbsolutePath("htmltests/gzip.html.z")
val doc: Document = Ksoup.parseFile(filePath = input, charsetName = null)
Expand All @@ -285,10 +315,6 @@ class DataUtilTest {

@Test
fun handlesFakeGzipFile() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input: String = TestHelper.getResourceAbsolutePath("htmltests/fake-gzip.html.gz")
val doc: Document = Ksoup.parseFile(filePath = input, charsetName = null)
assertEquals("This is not gzipped", doc.title())
Expand All @@ -297,10 +323,6 @@ class DataUtilTest {

@Test
fun handlesChunkedInputStream() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val inputFile: String = TestHelper.getResourceAbsolutePath("htmltests/large.html.gz")
val input: String = TestHelper.getFileAsString(inputFile.uniVfs)
// val stream = VaryingBufferReader(BufferReader(input))
Expand All @@ -313,10 +335,6 @@ class DataUtilTest {

@Test
fun handlesUnlimitedRead() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val inputFile: String = TestHelper.getResourceAbsolutePath("htmltests/large.html.gz")
val input: String = TestHelper.getFileAsString(inputFile.uniVfs)
val byteBuffer: ByteArray = DataUtil.readToByteBuffer(input.openSync(), 0)
Expand Down
58 changes: 13 additions & 45 deletions ksoup-test/test/com/fleeksoft/ksoup/integration/ParseTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,17 @@ import kotlin.test.assertTrue
class ParseTest {
@Test
fun testHtml5Charset() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
if (Platform.isApple()) {
// apple don't support gb2312 or gbk
if (Platform.isApple() || Platform.isWindows()) {
// don't support gb2312 or gbk
return@runTest
}
// test that <meta charset="gb2312"> works
var input = TestHelper.getResourceAbsolutePath("htmltests/meta-charset-1.html")
var doc: Document =
parseFile(
filePath = input,
baseUri = "http://example.com/",
charsetName = null,
) // gb2312, has html5 <meta charset>
var doc: Document = parseFile(
filePath = input,
baseUri = "http://example.com/",
charsetName = null,
) // gb2312, has html5 <meta charset>
if (Platform.isJS()) {
// FIXME: on js it is returning GBK
assertEquals("GBK", doc.outputSettings().charset().name.uppercase())
Expand Down Expand Up @@ -85,10 +80,6 @@ class ParseTest {

@Test
fun testLowercaseUtf8Charset() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input = TestHelper.getResourceAbsolutePath("htmltests/lowercase-charset-test.html")
val doc: Document = parseFile(filePath = input, charsetName = null)
val form = doc.select("#form").first()
Expand All @@ -98,11 +89,6 @@ class ParseTest {

@Test
fun testXwiki() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
// https://github.com/jhy/jsoup/issues/1324
// this tests that when in CharacterReader we hit a buffer while marked, we preserve the mark when buffered up and can rewind
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-1324.html.gz")
val doc: Document =
Expand All @@ -122,21 +108,15 @@ class ParseTest {

@Test
fun testXwikiExpanded() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
// https://github.com/jhy/jsoup/issues/1324
// this tests that if there is a huge illegal character reference, we can get through a buffer and rewind, and still catch that it's an invalid refence,
// and the parse tree is correct.
val parser = Parser.htmlParser()
val doc =
parse(
syncStream = TestHelper.resourceFilePathToStream("htmltests/xwiki-edit.html.gz"),
baseUri = "https://localhost/",
charsetName = "UTF-8",
parser = parser.setTrackErrors(100),
)
val doc = parse(
syncStream = TestHelper.resourceFilePathToStream("htmltests/xwiki-edit.html.gz"),
baseUri = "https://localhost/",
charsetName = "UTF-8",
parser = parser.setTrackErrors(100),
)
val errors = parser.getErrors()
assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text())
assertEquals(0, errors.size) // not an invalid reference because did not look legit
Expand All @@ -150,10 +130,6 @@ class ParseTest {

@Test
fun testWikiExpandedFromString() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-edit.html.gz")
val html = TestHelper.getFileAsString(input.uniVfs)
val doc = parse(html)
Expand All @@ -165,10 +141,6 @@ class ParseTest {

@Test
fun testWikiFromString() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val input = TestHelper.getResourceAbsolutePath("htmltests/xwiki-1324.html.gz")
val html = TestHelper.getFileAsString(input.uniVfs)
val doc = parse(html)
Expand All @@ -180,10 +152,6 @@ class ParseTest {

@Test
fun testFileParseNoCharsetMethod() = runTest {
if (Platform.isJS()) {
// js resource access issue
return@runTest
}
val file = TestHelper.getResourceAbsolutePath("htmltests/xwiki-1324.html.gz")
val doc: Document = parseFile(file)
assertEquals("XWiki Jetty HSQLDB 12.1-SNAPSHOT", doc.select("#xwikiplatformversion").text())
Expand Down
Loading
Loading