From 394966b22b54bdde62ea6dfb83bc97223a63d3fb Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 20 Oct 2023 17:48:21 +0900 Subject: [PATCH] GH-38345: [Release] Use local test data for verification if possible (#38362) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We have external test data repositories, apache/arrow-testing and apache/parquet-testing. We use them as submodule. apache/arrow may not use the latest test data repositories. But our verification script always use the latest test data repositories. It may cause test failures. ### What changes are included in this PR? Use local test data if they exist. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #38345 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- dev/release/verify-release-candidate.sh | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 0c6ac075b636e..287c557fb5ef4 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -959,12 +959,26 @@ ensure_source_directory() { fi fi - # Ensure that the testing repositories are cloned - if [ ! -d "${ARROW_SOURCE_DIR}/testing/data" ]; then - git clone https://github.com/apache/arrow-testing.git ${ARROW_SOURCE_DIR}/testing + # Ensure that the testing repositories are prepared + if [ ! -d ${ARROW_SOURCE_DIR}/testing/data ]; then + if [ -d ${SOURCE_DIR}/../../testing/data ]; then + cp -a ${SOURCE_DIR}/../../testing/ ${ARROW_SOURCE_DIR}/ + else + git clone \ + https://github.com/apache/arrow-testing.git \ + ${ARROW_SOURCE_DIR}/testing + fi fi - if [ ! -d "${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing/data" ]; then - git clone https://github.com/apache/parquet-testing.git ${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing + if [ ! -d ${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing/data ]; then + if [ -d ${SOURCE_DIR}/../../cpp/submodules/parquet-testing/data ]; then + cp -a \ + ${SOURCE_DIR}/../../cpp/submodules/parquet-testing/ \ + ${ARROW_SOURCE_DIR}/cpp/submodules/ + else + git clone \ + https://github.com/apache/parquet-testing.git \ + ${ARROW_SOURCE_DIR}/cpp/submodules/parquet-testing + fi fi export ARROW_TEST_DATA=$ARROW_SOURCE_DIR/testing/data