Skip to content

Commit

Permalink
KAFKA-17672 Run quarantined tests separately (apache#17329)
Browse files Browse the repository at this point in the history
Introduce new quarantinedTest that excludes tests tagged with "flaky". Also introduce two new build parameters "maxQuarantineTestRetries" and "maxQuarantineTestRetryFailures".

Reviewers: Chia-Ping Tsai <[email protected]>
  • Loading branch information
mumrah authored Oct 6, 2024
1 parent f1d6549 commit d38a90d
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 14 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,18 +130,19 @@ jobs:
timeout ${TIMEOUT_MINUTES}m ./gradlew --build-cache --continue --no-scan \
-PtestLoggingEvents=started,passed,skipped,failed \
-PmaxParallelForks=2 \
-PmaxTestRetries=1 -PmaxTestRetryFailures=10 \
-PmaxTestRetries=1 -PmaxTestRetryFailures=3 \
-PmaxQuarantineTestRetries=3 -PmaxQuarantineTestRetryFailures=0 \
-PcommitId=xxxxxxxxxxxxxxxx \
test
quarantinedTest test
exitcode="$?"
echo "exitcode=$exitcode" >> $GITHUB_OUTPUT
- name: Archive JUnit reports
- name: Archive JUnit HTML reports
uses: actions/upload-artifact@v4
id: junit-upload-artifact
with:
name: junit-reports-${{ matrix.java }}
path: |
**/build/reports/tests/test/*
**/build/reports/tests/*
compression-level: 9
if-no-files-found: ignore
- name: Archive Thread Dumps
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/deflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ jobs:
-PtestLoggingEvents=started,passed,skipped,failed \
-PignoreFailures=true -PmaxParallelForks=2 \
-Pkafka.cluster.test.repeat=${{ inputs.test-repeat }} \
${{ inputs.test-module }}:test --tests ${{ inputs.test-pattern }}
-PmaxTestRetries=${{ inputs.test-repeat }} -PmaxTestRetryFailures=0 \
-PmaxQuarantineTestRetries=${{ inputs.test-repeat }} -PmaxQuarantineTestRetryFailures=0 \
${{ inputs.test-module }}:test ${{ inputs.test-module }}:quarantinedTest --tests ${{ inputs.test-pattern }}
exitcode="$?"
echo "exitcode=$exitcode" >> $GITHUB_OUTPUT
- name: Archive JUnit reports
Expand Down
24 changes: 16 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,16 @@ Follow instructions in https://kafka.apache.org/quickstart
./gradlew docsJar # builds both (if applicable) javadoc and scaladoc jars for each module

### Run unit/integration tests ###
./gradlew test # runs both unit and integration tests
./gradlew test # runs both unit and integration tests
./gradlew unitTest
./gradlew integrationTest
./gradlew quarantinedTest # runs the quarantined tests


### Force re-running tests without code change ###
./gradlew test --rerun
./gradlew unitTest --rerun
./gradlew integrationTest --rerun
./gradlew test --rerun-tasks
./gradlew unitTest --rerun-tasks
./gradlew integrationTest --rerun-tasks

### Running a particular unit/integration test ###
./gradlew clients:test --tests RequestResponseTest
Expand All @@ -64,11 +66,17 @@ to `log4j.logger.org.apache.kafka=INFO` and then run:
And you should see `INFO` level logs in the file under the `clients/build/test-results/test` directory.

### Specifying test retries ###
By default, each failed test is retried once up to a maximum of five retries per test run. Tests are retried at the end of the test task. Adjust these parameters in the following way:
By default, each failed test is retried once up to a maximum of three total retries per test run.
Tests are retried at the end of the test task. Adjust these parameters in the following way:

./gradlew test -PmaxTestRetries=1 -PmaxTestRetryFailures=5

See [Test Retry Gradle Plugin](https://github.com/gradle/test-retry-gradle-plugin) for more details.
./gradlew test -PmaxTestRetries=1 -PmaxTestRetryFailures=3

Additionally, quarantined tests are automatically retried three times up to a total of
20 retries per run. This is controlled by similar parameters.

./gradlew test -PmaxQuarantineTestRetries=3 -PmaxQuarantineTestRetryFailures=20

See [Test Retry Gradle Plugin](https://github.com/gradle/test-retry-gradle-plugin) for and [build.yml](.github/workflows/build.yml) more details.

### Generating test coverage reports ###
Generate coverage reports for the whole project:
Expand Down
63 changes: 62 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ ext {
userMaxTestRetries = project.hasProperty('maxTestRetries') ? maxTestRetries.toInteger() : 0
userMaxTestRetryFailures = project.hasProperty('maxTestRetryFailures') ? maxTestRetryFailures.toInteger() : 0

userMaxQuarantineTestRetries = project.hasProperty('maxQuarantineTestRetries') ? maxQuarantineTestRetries.toInteger() : 0
userMaxQuarantineTestRetryFailures = project.hasProperty('maxQuarantineTestRetryFailures') ? maxQuarantineTestRetryFailures.toInteger() : 0

skipSigning = project.hasProperty('skipSigning') && skipSigning.toBoolean()
shouldSign = !skipSigning && !version.endsWith("SNAPSHOT")

Expand Down Expand Up @@ -501,6 +504,7 @@ subprojects {

useJUnitPlatform {
includeEngines 'junit-jupiter'
excludeTags 'flaky'
}

develocity {
Expand All @@ -524,7 +528,7 @@ subprojects {
if (ext.isGithubActions) {
def dest = rootProject.layout.buildDirectory.dir("junit-xml/${project.name}").get().asFile
println "Copy JUnit XML for ${project.name} to $dest"
ant.copy(todir: "$dest") {
ant.copy(todir: "$dest/test") {
ant.fileset(dir: "${test.reports.junitXml.entryPoint}")
}

Expand All @@ -537,6 +541,62 @@ subprojects {
}
}

task quarantinedTest(type: Test, dependsOn: compileJava) {
ext {
isGithubActions = System.getenv('GITHUB_ACTIONS') != null
}

// Disable caching and up-to-date for this task. We always want quarantined tests
// to run and never want to cache their results. Since we do this, we can avoid
// explicitly failing the build like we do in "test" with ext.hadFailure.
outputs.upToDateWhen { false }
outputs.cacheIf { false }

maxParallelForks = maxTestForks
ignoreFailures = userIgnoreFailures

maxHeapSize = defaultMaxHeapSize
jvmArgs = defaultJvmArgs

// KAFKA-17433 Used by deflake.yml github action to repeat individual tests
systemProperty("kafka.cluster.test.repeat", project.findProperty("kafka.cluster.test.repeat"))

testLogging {
events = userTestLoggingEvents ?: testLoggingEvents
showStandardStreams = userShowStandardStreams ?: testShowStandardStreams
exceptionFormat = testExceptionFormat
displayGranularity = 0
}
logTestStdout.rehydrate(delegate, owner, this)()

useJUnitPlatform {
includeEngines 'junit-jupiter'
includeTags 'flaky'
}

develocity {
testRetry {
maxRetries = userMaxQuarantineTestRetries
maxFailures = userMaxQuarantineTestRetryFailures
}
}

// This closure will copy JUnit XML files out of the sub-project's build directory and into
// a top-level build/junit-xml directory. This is necessary to avoid reporting on tests which
// were not run, but instead were restored via FROM-CACHE. See KAFKA-17479 for more details.
doLast {
if (ext.isGithubActions) {
def dest = rootProject.layout.buildDirectory.dir("junit-xml/${project.name}").get().asFile
println "Copy JUnit XML for ${project.name} to $dest"
ant.copy(todir: "$dest/quarantinedTest", failonerror: "false") {
ant.fileset(dir: "${quarantinedTest.reports.junitXml.entryPoint}") {
ant.include(name: "**/*.xml")
}
}
}
}
}

task integrationTest(type: Test, dependsOn: compileJava) {
maxParallelForks = maxTestForks
ignoreFailures = userIgnoreFailures
Expand Down Expand Up @@ -3507,6 +3567,7 @@ project(':connect:runtime') {
testImplementation project(':storage')
testImplementation project(':connect:test-plugins')
testImplementation project(':server-common').sourceSets.test.output
testImplementation project(':test-common:test-common-api')

testImplementation libs.junitJupiter
testImplementation libs.mockitoCore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.admin.Admin;
import org.apache.kafka.clients.admin.ConsumerGroupListing;
import org.apache.kafka.common.test.api.Flaky;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.apache.kafka.connect.runtime.SourceConnectorConfig;
import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
Expand Down Expand Up @@ -331,6 +332,7 @@ public void testAlterSinkConnectorOffsets() throws Exception {
alterAndVerifySinkConnectorOffsets(baseSinkConnectorConfigs(), connect.kafka());
}

@Flaky("KAFKA-15914")
@Test
public void testAlterSinkConnectorOffsetsOverriddenConsumerGroupId() throws Exception {
Map<String, String> connectorConfigs = baseSinkConnectorConfigs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.kafka.common.errors.CoordinatorLoadInProgressException
import org.apache.kafka.common.message.AllocateProducerIdsResponseData
import org.apache.kafka.common.protocol.Errors
import org.apache.kafka.common.requests.AllocateProducerIdsResponse
import org.apache.kafka.common.test.api.Flaky
import org.apache.kafka.common.utils.{MockTime, Time}
import org.apache.kafka.server.NodeToControllerChannelManager
import org.apache.kafka.server.common.ProducerIdsBlock
Expand All @@ -40,6 +41,7 @@ import java.util.concurrent.{ConcurrentLinkedQueue, CountDownLatch, Executors, T
import scala.collection.mutable
import scala.util.{Failure, Success, Try}

@Flaky("KAFKA-17654")
class ProducerIdManagerTest {

var brokerToController: NodeToControllerChannelManager = mock(classOf[NodeToControllerChannelManager])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.kafka.common.test.api;

import org.junit.jupiter.api.Tag;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

@Target({ElementType.TYPE, ElementType.METHOD})
@Retention(RetentionPolicy.RUNTIME)
@Tag("flaky")
public @interface Flaky {
/**
* Required reference to a KAFKA Jira ticket.
*/
String value();

/**
* Optional comment describing the reason for quarantined.
*/
String comment() default "";
}

0 comments on commit d38a90d

Please sign in to comment.