From fbec8e2ad1dcbebbbc96491f8b6b208f5b3ac91f Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Tue, 14 Feb 2023 09:17:21 -0800 Subject: [PATCH] Reduce flakiness on Windows for BwoB tests `BuildWithoutTheBytesIntegrationTest` has been flaky on Windows. Example error leads to file system errors. ``` 1) incrementalBuild_treeArtifacts_correctlyProducesNewTree(com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest) java.io.IOException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.pid_file (Permission denied) at com.google.devtools.build.lib.windows.WindowsFileSystem.delete(WindowsFileSystem.java:60) at com.google.devtools.build.lib.vfs.Path.delete(Path.java:587) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.bestEffortDeleteTreesBelow(BuildIntegrationTestCase.java:387) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.cleanUp(BuildIntegrationTestCase.java:364) ... 26 trimmed Caused by: java.nio.file.AccessDeniedException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.pid_file at com.google.devtools.build.lib.windows.WindowsFileOperations.deletePath(WindowsFileOperations.java:279) at com.google.devtools.build.lib.windows.WindowsFileSystem.delete(WindowsFileSystem.java:56) ... 30 more 2) downloadToplevel_treeArtifacts(com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest) java.io.IOException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.work_path already exists at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.ensureMkdir(IntegrationTestUtils.java:116) at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.startWorker(IntegrationTestUtils.java:87) at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.startWorker(IntegrationTestUtils.java:77) at com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest.setupOptions(BuildWithoutTheBytesIntegrationTest.java:49) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.createRuntimeWrapper(BuildIntegrationTestCase.java:317) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.createFilesAndMocks(BuildIntegrationTestCase.java:234) ``` This PR fixes 1) by not using pid but checking port status directly to wait for the remote worker to start up and 2) by cleaning up resources used by remote worker after each test case. Closes #17479. PiperOrigin-RevId: 509549826 Change-Id: Icef2be46d5f2a6025cad0cefcf766ae16b4552d8 --- .../google/devtools/build/lib/remote/BUILD | 2 + .../lib/remote/util/IntegrationTestUtils.java | 69 ++++++++++++------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/src/test/java/com/google/devtools/build/lib/remote/BUILD b/src/test/java/com/google/devtools/build/lib/remote/BUILD index a65bcfc0ba3d25..ab9e4802ce682e 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/test/java/com/google/devtools/build/lib/remote/BUILD @@ -149,6 +149,7 @@ java_test( size = "large", srcs = ["BuildWithoutTheBytesIntegrationTest.java"], shard_count = 5, + tags = ["requires-network"], runtime_deps = [ "//third_party/grpc-java:grpc-jar", ], @@ -173,6 +174,7 @@ java_test( java_test( name = "DiskCacheIntegrationTest", srcs = ["DiskCacheIntegrationTest.java"], + tags = ["requires-network"], runtime_deps = [ "//third_party/grpc-java:grpc-jar", ], diff --git a/src/test/java/com/google/devtools/build/lib/remote/util/IntegrationTestUtils.java b/src/test/java/com/google/devtools/build/lib/remote/util/IntegrationTestUtils.java index 44e204f06b53f7..e2d2df95f93f8c 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/util/IntegrationTestUtils.java +++ b/src/test/java/com/google/devtools/build/lib/remote/util/IntegrationTestUtils.java @@ -25,8 +25,14 @@ import java.io.File; import java.io.IOException; import java.net.DatagramSocket; +import java.net.InetSocketAddress; import java.net.ServerSocket; import java.net.SocketException; +import java.nio.channels.SocketChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Comparator; import java.util.Random; /** Integration test utilities. */ @@ -73,6 +79,30 @@ public static int pickUnusedRandomPort() throws IOException, InterruptedExceptio throw new IOException("Failed to find available port"); } + private static void waitForPortOpen(Subprocess process, int port) + throws IOException, InterruptedException { + var addr = new InetSocketAddress("localhost", port); + var timeout = new IOException("Timed out when waiting for port to open"); + for (var i = 0; i < 20; ++i) { + if (!process.isAlive()) { + var message = new String(process.getErrorStream().readAllBytes(), UTF_8); + throw new IOException("Failed to start worker: " + message); + } + + try { + try (var socketChannel = SocketChannel.open()) { + socketChannel.configureBlocking(/* block= */ true); + socketChannel.connect(addr); + } + return; + } catch (IOException e) { + timeout.addSuppressed(e); + Thread.sleep(1000); + } + } + throw timeout; + } + public static WorkerInstance startWorker() throws IOException, InterruptedException { return startWorker(/* useHttp= */ false); } @@ -82,7 +112,6 @@ public static WorkerInstance startWorker(boolean useHttp) PathFragment testTmpDir = PathFragment.create(tmpDirFile().getAbsolutePath()); PathFragment workPath = testTmpDir.getRelative("remote.work_path"); PathFragment casPath = testTmpDir.getRelative("remote.cas_path"); - PathFragment pidPath = testTmpDir.getRelative("remote.pid_file"); int workerPort = pickUnusedRandomPort(); ensureMkdir(workPath); ensureMkdir(casPath); @@ -94,20 +123,10 @@ public static WorkerInstance startWorker(boolean useHttp) workerPath, "--work_path=" + workPath.getSafePathString(), "--cas_path=" + casPath.getSafePathString(), - (useHttp ? "--http_listen_port=" : "--listen_port=") + workerPort, - "--pid_file=" + pidPath)) + (useHttp ? "--http_listen_port=" : "--listen_port=") + workerPort)) .start(); - - File pidFile = new File(pidPath.getSafePathString()); - while (!pidFile.exists()) { - if (!workerProcess.isAlive()) { - String message = new String(workerProcess.getErrorStream().readAllBytes(), UTF_8); - throw new IOException("Failed to start worker: " + message); - } - Thread.sleep(1); - } - - return new WorkerInstance(workerProcess, workerPort, workPath, casPath, pidPath); + waitForPortOpen(workerProcess, workerPort); + return new WorkerInstance(workerProcess, workerPort, workPath, casPath); } private static void ensureMkdir(PathFragment path) throws IOException { @@ -125,23 +144,25 @@ public static class WorkerInstance { private final int port; private final PathFragment workPath; private final PathFragment casPath; - private final PathFragment pidPath; private WorkerInstance( - Subprocess process, - int port, - PathFragment workPath, - PathFragment casPath, - PathFragment pidPath) { + Subprocess process, int port, PathFragment workPath, PathFragment casPath) { this.process = process; this.port = port; this.workPath = workPath; this.casPath = casPath; - this.pidPath = pidPath; } - public void stop() { + public void stop() throws IOException { process.destroyAndWait(); + deleteDir(workPath); + deleteDir(casPath); + } + + private static void deleteDir(PathFragment path) throws IOException { + try (var stream = Files.walk(Paths.get(path.getSafePathString()))) { + stream.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + } } public int getPort() { @@ -155,9 +176,5 @@ public PathFragment getWorkPath() { public PathFragment getCasPath() { return casPath; } - - public PathFragment getPidPath() { - return pidPath; - } } }