From 80551c34a865fbb338344a6d742c056e5f6ee66e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 13 Sep 2024 15:12:45 +0900 Subject: [PATCH] GH-44062: [Dev][Archery][Integration] Reduce needless test matrix If we enable C++, Java and Rust, we use the following patterns: | Producer | Consumer | |----------|----------| | C++ | C++ | | C++ | Java | | C++ | Rust | | Java | C++ | | Java | Java | | Java | Rust | | Rust | C++ | | Rust | Java | | Rust | Rust | In apache/arrow, the following patterns are redundant because they should be done in apache/arrow-rs: | Producer | Consumer | |----------|----------| | Rust | Rust | In apache/arror-rs, the following patterns are redundant because they should be done in apache/arrow: | Producer | Consumer | |----------|----------| | C++ | C++ | | C++ | Java | | Java | C++ | | Java | Java | Add `--target-language` option. We can specify target languages by this. (We can specify `--target-language` multiple times.) Here are expected usages: In apache/arrow: * `--target-language=cpp` * `--target-language=csharp` * `--target-language=go` * `--target-language=java` * `--target-language=js` In apache/arrow-rs * `--target-language=rust` Here is an example in apache/arrow-rs: T: Languages specified by `--target-language` * rust O: Languages not specified by `--target-language` * cpp * csharp * go * java * js * nanoarrow Used matrix: | Producer | Consumer | |----------|----------| | Rust | Rust | | Rust | C++ | | Rust | C# | | Rust | Go | | Rust | Java | | Rust | JS | | Rust | nanoarrow| | C++ | Rust | | C# | Rust | | Go | Rust | | Java | Rust | | JS | Rust | | nanoarrow| Rust | --- ci/scripts/integration_arrow.sh | 5 ++ dev/archery/archery/cli.py | 2 + dev/archery/archery/integration/runner.py | 56 ++++++++++++++++++----- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh index 3050ad3111198..1fea8e97daf94 100755 --- a/ci/scripts/integration_arrow.sh +++ b/ci/scripts/integration_arrow.sh @@ -63,10 +63,15 @@ time archery integration \ --run-ipc \ --run-flight \ --with-cpp=$([ "$ARROW_INTEGRATION_CPP" == "ON" ] && echo "1" || echo "0") \ + --target-language=cpp \ --with-csharp=$([ "$ARROW_INTEGRATION_CSHARP" == "ON" ] && echo "1" || echo "0") \ + --target-language=csharp \ --with-go=$([ "$ARROW_INTEGRATION_GO" == "ON" ] && echo "1" || echo "0") \ + --target-language=go \ --with-java=$([ "$ARROW_INTEGRATION_JAVA" == "ON" ] && echo "1" || echo "0") \ + --target-language=java \ --with-js=$([ "$ARROW_INTEGRATION_JS" == "ON" ] && echo "1" || echo "0") \ + --target-language=js \ --gold-dirs=$gold_dir/0.14.1 \ --gold-dirs=$gold_dir/0.17.1 \ --gold-dirs=$gold_dir/1.0.0-bigendian \ diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index cd746f9c4499a..a89a13af68409 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -744,6 +744,8 @@ def _set_default(opt, default): @click.option('--with-rust', type=bool, default=False, help='Include Rust in integration tests', envvar="ARCHERY_INTEGRATION_WITH_RUST") +@click.option('--target-language', multiple=True, + help=('Target languages in this integration tests')) @click.option('--write_generated_json', default="", help='Generate test JSON to indicated path') @click.option('--run-ipc', is_flag=True, default=False, diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index ca5febca9f801..7877411412bf4 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -67,12 +67,13 @@ class IntegrationRunner(object): def __init__(self, json_files, flight_scenarios: List[Scenario], - testers: List[Tester], tempdir=None, - debug=False, stop_on_error=True, gold_dirs=None, + testers: List[Tester], other_testers: List[Tester], + tempdir=None, debug=False, stop_on_error=True, gold_dirs=None, serial=False, match=None, **unused_kwargs): self.json_files = json_files self.flight_scenarios = flight_scenarios self.testers = testers + self.other_testers = other_testers self.temp_dir = tempdir or tempfile.mkdtemp() self.debug = debug self.stop_on_error = stop_on_error @@ -100,6 +101,20 @@ def run_ipc(self): producer, consumer, self._produce_consume, self.json_files) + for producer, consumer in itertools.product( + filter(lambda t: t.PRODUCER, self.testers), + filter(lambda t: t.CONSUMER, self.other_testers)): + self._compare_ipc_implementations( + producer, consumer, self._produce_consume, + self.json_files) + + for producer, consumer in itertools.product( + filter(lambda t: t.PRODUCER, self.other_testers), + filter(lambda t: t.CONSUMER, self.testers)): + self._compare_ipc_implementations( + producer, consumer, self._produce_consume, + self.json_files) + if self.gold_dirs: for gold_dir, consumer in itertools.product( self.gold_dirs, @@ -124,7 +139,7 @@ def run_flight(self): """ servers = filter(lambda t: t.FLIGHT_SERVER, self.testers) clients = filter(lambda t: (t.FLIGHT_CLIENT and t.CONSUMER), - self.testers) + self.testers + self.other_testers) for server, client in itertools.product(servers, clients): self._compare_flight_implementations(server, client) log('\n') @@ -138,6 +153,14 @@ def run_c_data(self): filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.testers), filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.testers)): self._compare_c_data_implementations(producer, consumer) + for producer, consumer in itertools.product( + filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.testers), + filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.other_testers)): + self._compare_c_data_implementations(producer, consumer) + for producer, consumer in itertools.product( + filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.other_testers), + filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.testers)): + self._compare_c_data_implementations(producer, consumer) log('\n') def _gold_tests(self, gold_dir): @@ -560,31 +583,39 @@ def get_static_json_files(): def run_all_tests(with_cpp=True, with_java=True, with_js=True, with_csharp=True, with_go=True, with_rust=False, with_nanoarrow=False, run_ipc=False, run_flight=False, - run_c_data=False, tempdir=None, **kwargs): + run_c_data=False, tempdir=None, target_languages=[], + **kwargs): tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-') testers: List[Tester] = [] + other_testers: List[Tester] = [] + + def append_tester(language, tester): + if len(target_languages) == 0 or language in target_languages: + testers.append(tester) + else: + other_testers.append(tester) if with_cpp: - testers.append(CppTester(**kwargs)) + append_tester("cpp", CppTester(**kwargs)) if with_java: - testers.append(JavaTester(**kwargs)) + append_tester("java", JavaTester(**kwargs)) if with_js: - testers.append(JSTester(**kwargs)) + append_tester("js", JSTester(**kwargs)) if with_csharp: - testers.append(CSharpTester(**kwargs)) + append_tester("csharp", CSharpTester(**kwargs)) if with_go: - testers.append(GoTester(**kwargs)) + append_tester("go", GoTester(**kwargs)) if with_nanoarrow: - testers.append(NanoarrowTester(**kwargs)) + append_tester("nanoarrow", NanoarrowTester(**kwargs)) if with_rust: - testers.append(RustTester(**kwargs)) + append_tester("rust", RustTester(**kwargs)) static_json_files = get_static_json_files() generated_json_files = datagen.get_generated_json_files(tempdir=tempdir) @@ -666,7 +697,8 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True, ), ] - runner = IntegrationRunner(json_files, flight_scenarios, testers, **kwargs) + runner = IntegrationRunner(json_files, flight_scenarios, testers, + other_testers, **kwargs) if run_ipc: runner.run_ipc() if run_flight: