Skip to content

Commit

Permalink
Merge pull request #1909 from microsoft/jumin/fix_pyspark_test
Browse files Browse the repository at this point in the history
Fix pyspark test
  • Loading branch information
miguelgfierro authored Mar 30, 2023
2 parents b3a19c2 + d309100 commit 72a5100
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 29 deletions.
34 changes: 11 additions & 23 deletions .github/actions/azureml-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ inputs:
RG:
required: false
type: string
default: "recommenders_project_resources"
default: "recommenders_project_resources"
# AzureML workspace name
WS:
required: false
type: string
default: "azureml-test-workspace"
default: "azureml-test-workspace"
# test logs path
TEST_LOGS_PATH:
required: false
Expand All @@ -70,7 +70,7 @@ runs:
steps:
- name: Setup python
uses: actions/setup-python@v4
with:
with:
python-version: "3.8"
- name: Install azureml-core and azure-cli on a GitHub hosted server
shell: bash
Expand All @@ -92,9 +92,9 @@ runs:
python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.CPU_CLUSTER_NAME}}
--subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }}
--rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}}
--testlogs ${{inputs.TEST_LOGS_PATH}} --testkind ${{inputs.TEST_KIND}}
--testlogs ${{inputs.TEST_LOGS_PATH}} --testkind ${{inputs.TEST_KIND}}
--conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}}
--disable-warnings
--disable-warnings
- name: Submit GPU tests to AzureML
shell: bash
if: contains(inputs.TEST_GROUP, 'gpu')
Expand All @@ -108,24 +108,12 @@ runs:
- name: Submit PySpark tests to AzureML
shell: bash
if: contains(inputs.TEST_GROUP, 'spark')
run: |
echo $JAVA_HOME
echo $PYSPARK_PYTHON
echo $PYSPARK_DRIVER_PYTHON
export PYSPARK_PYTHON=$Python_ROOT_DIR/bin/python
export PYSPARK_DRIVER_PYTHON=$Python_ROOT_DIR/bin/python
echo $PYSPARK_PYTHON
echo $PYSPARK_DRIVER_PYTHON
sudo apt install openjdk-17-jre -q
JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
echo $JAVA_HOME
unset SPARK_HOME
env
python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.CPU_CLUSTER_NAME}} \
--subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }} \
--rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}} \
--testlogs ${{inputs.TEST_LOGS_PATH}} --add_spark_dependencies --testkind ${{inputs.TEST_KIND}} \
--conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}} \
run: >-
python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py --clustername ${{inputs.CPU_CLUSTER_NAME}}
--subid ${{inputs.AZUREML_TEST_SUBID}} --reponame "recommenders" --branch ${{ github.ref }}
--rg ${{inputs.RG}} --wsname ${{inputs.WS}} --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}}
--testlogs ${{inputs.TEST_LOGS_PATH}} --add_spark_dependencies --testkind ${{inputs.TEST_KIND}}
--conda_pkg_python ${{inputs.PYTHON_VERSION}} --testgroup ${{inputs.TEST_GROUP}}
--disable-warnings
- name: Print test logs
shell: bash
Expand Down
6 changes: 5 additions & 1 deletion recommenders/utils/spark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def start_or_get_spark(
Args:
app_name (str): set name of the application
url (str): URL for spark master
memory (str): size of memory for spark driver
memory (str): size of memory for spark driver. This will be ignored if spark.driver.memory is set in config.
config (dict): dictionary of configuration options
packages (list): list of packages to install
jars (list): list of jar files to add
Expand Down Expand Up @@ -65,5 +65,9 @@ def start_or_get_spark(
if config is None or "spark.driver.memory" not in config:
spark_opts.append('config("spark.driver.memory", "{}")'.format(memory))

# Set larger stack size
spark_opts.append('config("spark.executor.extraJavaOptions", "-Xss4m")')
spark_opts.append('config("spark.driver.extraJavaOptions", "-Xss4m")')

spark_opts.append("getOrCreate()")
return eval(".".join(spark_opts))
9 changes: 4 additions & 5 deletions tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,6 @@ def create_run_config(
conda_dep.add_channel("conda-forge")
conda_dep.add_conda_package(conda_pkg_jdk)
conda_dep.add_pip_package("recommenders[dev,examples,spark]")
# run_azuremlcompute.environment_variables = {
# "PYSPARK_PYTHON": "$Python_ROOT_DIR/bin/python",
# "PYSPARK_DRIVER_PYTHON": "$Python_ROOT_DIR/bin/python",
# }
else:
conda_dep.add_pip_package("recommenders[dev,examples]")

Expand Down Expand Up @@ -270,7 +266,6 @@ def submit_experiment_to_azureml(
script=test,
run_config=run_config,
arguments=arguments,
# docker_runtime_config=dc
)
run = experiment.submit(script_run_config)
# waits only for configuration to complete
Expand Down Expand Up @@ -483,9 +478,13 @@ def create_arg_parser():
)

# add helpful information to experiment on Azure
run.tag("Python", args.conda_pkg_python)
run.tag("RepoName", args.reponame)
run.tag("Branch", args.branch)
run.tag("PR", args.pr)
run.tag("script", args.test)
run.tag("testgroup", args.testgroup)
run.tag("testkind", args.testkind)

# download logs file from AzureML
run.download_file(name="test_logs", output_file_path=args.testlogs)
Expand Down
6 changes: 6 additions & 0 deletions tests/integration/examples/test_notebooks_pyspark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import sys
import pytest

Expand Down Expand Up @@ -79,6 +80,11 @@ def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel
)
def test_benchmark_movielens_pyspark(notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg):
notebook_path = notebooks["benchmark_movielens"]

os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
os.environ.pop("SPARK_HOME", None)

pm.execute_notebook(
notebook_path,
output_notebook,
Expand Down

0 comments on commit 72a5100

Please sign in to comment.