-
Notifications
You must be signed in to change notification settings - Fork 74
/
Makefile
191 lines (158 loc) · 6.93 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#**********************
# Variable definitions
#**********************
# Variable definitions
SHELL := /bin/sh
# Set variables if testing locally
ifeq ($(IS_RELEASE_BUILD),)
SPARK_VERSION := 3.5
PROCESSOR := cpu
FRAMEWORK_VERSION := py39
SM_VERSION := 1.0
USE_CASE := processing
BUILD_CONTEXT := ./spark/${USE_CASE}/${SPARK_VERSION}/py3
export SPARK_ACCOUNT_ID=$(AWS_ACCOUNT_ID)
export INTEG_TEST_ACCOUNT=$(INTEG_TEST_ACCOUNT_ID)
export INTEG_TEST_ROLE=$(SAGEMAKER_ROLE)
export DEST_REPO=$(SPARK_REPOSITORY)
endif
ROLE := arn:${AWS_PARTITION}:iam::$(INTEG_TEST_ACCOUNT):role/$(INTEG_TEST_ROLE)
IMAGE_URI := $(SPARK_ACCOUNT_ID).dkr.ecr.$(REGION).$(AWS_DOMAIN)/$(DEST_REPO):$(VERSION)
# default target.
all: build test
# Downloads EMR packages. Skips if the tar file containing EMR packages has been made.
init:
python --version
pip install --upgrade pip
# pipenv > 2022.4.8 fails to build smspark
python -m pip install pipenv==2022.4.8
cp smsparkbuild/${FRAMEWORK_VERSION}/Pipfile .
cp smsparkbuild/${FRAMEWORK_VERSION}/pyproject.toml .
cp smsparkbuild/${FRAMEWORK_VERSION}/setup.py .
pipenv install
cp Pipfile ${BUILD_CONTEXT}
cp Pipfile.lock ${BUILD_CONTEXT}
cp setup.py ${BUILD_CONTEXT}
# Builds and moves container python library into the Docker build context
build-container-library: init
python setup.py bdist_wheel;
cp -- dist/*.whl ${BUILD_CONTEXT}
install-container-library: init
# temporarily bypass urllib3 because circular dependency will be introduced if bumped up urllib3 version
# temporarily bypass py=1.1.0 because pytest-parallel has a dependency on it however the module is no longer maitained.
# In the future the pylib will be removed from pytest-parallel dependency and 51457 should only impact the local tests.
# For more info, https://github.com/pytest-dev/py/issues/287
pipenv run safety check -i 43975 -i 51457 -i 39611 -i 62044 -i 65647 -i 66742 # https://github.com/pyupio/safety
build-static-config:
./scripts/fetch-ec2-instance-type-info.sh --region ${REGION} --use-case ${USE_CASE} --spark-version ${SPARK_VERSION} \
--processor ${PROCESSOR} --framework-version ${FRAMEWORK_VERSION} --sm-version ${SM_VERSION}
# Builds docker image.
build: build-container-library build-static-config
./scripts/build.sh --region ${REGION} --use-case ${USE_CASE} --spark-version ${SPARK_VERSION} \
--processor ${PROCESSOR} --framework-version ${FRAMEWORK_VERSION} --sm-version ${SM_VERSION}
# Compiles Scala test JAR
# (Requires SBT: `brew install sbt`)
build-test-scala:
cd test/resources/code/scala/hello-scala-spark; sbt package
# Compiles Java test JAR
# (Requires Maven: `brew install maven`)
build-test-java:
cd test/resources/code/java/hello-java-spark; mvn package
build-tests: init build-test-scala build-test-java
lint: init
pipenv run black --check ./src
pipenv run black --check ./test
pipenv run mypy --follow-imports=skip src/smspark # see mypy.ini for configuration
pipenv run flake8 src # see .flake8 for configuration
test-unit: install-container-library
pipenv run python -m pytest -s -vv test/unit
# Only runs local tests.
test-local: install-container-library build-tests
pipenv run python -m pytest -s -vv test/integration/local --repo=$(DEST_REPO) --tag=$(VERSION) --role=$(ROLE) --durations=0
# Only runs sagemaker tests
# Use pytest-parallel to run tests in parallel - https://pypi.org/project/pytest-parallel/
test-sagemaker: build-tests
# Separate `pytest` invocation without parallelization:
# History server tests can't run in parallel since they use the same container name.
pipenv run pytest --reruns 3 -s -vv test/integration/history \
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
--spark-version=$(SPARK_VERSION) \
--framework-version=$(FRAMEWORK_VERSION) \
--role $(ROLE) \
--image_uri $(IMAGE_URI) \
--region ${REGION} \
--domain ${AWS_DOMAIN}
# OBJC_DISABLE_INITIALIZE_FORK_SAFETY: https://github.com/ansible/ansible/issues/32499#issuecomment-341578864
OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES pipenv run pytest --workers auto --reruns 3 -s -vv test/integration/sagemaker \
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
--spark-version=$(SPARK_VERSION) \
--framework-version=$(FRAMEWORK_VERSION) \
--role $(ROLE) \
--account-id ${INTEG_TEST_ACCOUNT} \
--image_uri $(IMAGE_URI) \
--region ${REGION} \
--domain ${AWS_DOMAIN}
test-sagemaker-history-server: build-tests
pipenv run pytest --reruns 3 -s -vv test/integration/history \
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
--spark-version=$(SPARK_VERSION) \
--framework-version=$(FRAMEWORK_VERSION) \
--role $(ROLE) \
--image_uri $(IMAGE_URI) \
--region ${REGION} \
--domain ${AWS_DOMAIN}
test-sagemaker-processing: build-tests
# OBJC_DISABLE_INITIALIZE_FORK_SAFETY: https://github.com/ansible/ansible/issues/32499#issuecomment-341578864
OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES pipenv run pytest --workers auto --reruns 3 -s -vv test/integration/sagemaker \
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
--spark-version=$(SPARK_VERSION) \
--framework-version=$(FRAMEWORK_VERSION) \
--role $(ROLE) \
--account-id ${INTEG_TEST_ACCOUNT} \
--image_uri $(IMAGE_URI) \
--region ${REGION} \
--domain ${AWS_DOMAIN}
# This is included in a separate target because it will be run only in prod stage
test-prod:
pipenv run pytest -s -vv test/integration/tag \
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
--spark-version=$(SPARK_VERSION) \
--framework-version=$(FRAMEWORK_VERSION) \
--role $(ROLE) \
--image_uri $(IMAGE_URI) \
--region ${REGION} \
--domain ${AWS_DOMAIN}
# Runs local tests and sagemaker tests.
test-all: test-local test-sagemaker
# Builds and installs sagemaker-python-sdk-spark library, since it's used in sagemaker tests.
install-sdk:
cp smsparkbuild/${FRAMEWORK_VERSION}/Pipfile .
cp smsparkbuild/${FRAMEWORK_VERSION}/pyproject.toml .
cp smsparkbuild/${FRAMEWORK_VERSION}/setup.py .
pip install --upgrade pip
pip install --upgrade sagemaker>=2.9.0
# Makes sure docker containers are cleaned
clean:
docker-compose down || true
docker kill $$(docker ps -q) || true
docker rm $$(docker ps -a -q) || true
docker network rm $$(docker network ls -q) || true
rm ${BUILD_CONTEXT}/*.whl || true
rm -rf dist || true
rm -rf build || true
rm -f Pipfile
rm -f Pipfile.lock
rm -f setup.py
rm -f pyproject.toml
# Removes compiled Scala SBT artifacts
clean-test-scala:
cd test/resources/code/scala/hello-scala-spark; sbt clean; rm -r project/ target/ lib_managed/
# Removes compiled Java Maven artifacts
clean-test-java:
cd test/resources/code/java/hello-java-spark; mvn clean
clean-tests: clean-test-scala clean-test-java
release:
./scripts/publish.sh --region ${REGION} --use-case ${USE_CASE} --spark-version ${SPARK_VERSION} \
--processor ${PROCESSOR} --framework-version ${FRAMEWORK_VERSION} --sm-version ${SM_VERSION}
# Targets that don't create a file with the same name as the target.
.PHONY: all build test test-all clean clean-all release whitelist build-container-library