-
Notifications
You must be signed in to change notification settings - Fork 835
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
initial PySpark PMML Example including updates to wrappers
- Loading branch information
1 parent
ea85b7e
commit db6636b
Showing
19 changed files
with
9,444 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
API_TYPE=REST | ||
SERVICE_TYPE=MODEL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"features":[ | ||
{ | ||
"name":"_c", | ||
"dtype":"INT", | ||
"ftype":"continuous", | ||
"range":[0,255], | ||
"repeat":784 | ||
} | ||
], | ||
"targets":[ | ||
{ | ||
"name":"class", | ||
"dtype":"FLOAT", | ||
"ftype":"continuous", | ||
"range":[0,1], | ||
"repeat":10 | ||
} | ||
] | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Train MNIST Model using pySpark" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from tensorflow.examples.tutorials.mnist import input_data\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"mnist = input_data.read_data_sets('data/MNIST_data', one_hot=False)\n", | ||
"X = (mnist.train.images * 225).astype(int)\n", | ||
"X_y = np.concatenate((X,np.expand_dims(mnist.train.labels,1)),axis=1)\n", | ||
"np.savetxt(\"mnist_train.csv\", X_y, fmt='%i', delimiter=\",\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pyspark.ml import Pipeline\n", | ||
"from pyspark.ml.classification import LogisticRegression, LogisticRegressionModel\n", | ||
"from pyspark.ml.feature import VectorAssembler\n", | ||
"\n", | ||
"df = sqlContext.read.csv(\"./mnist_train.csv\",inferSchema=True)\n", | ||
"\n", | ||
"df = df = df.withColumnRenamed(\"_c784\",\"label\")\n", | ||
"\n", | ||
"assembler = (VectorAssembler()\n", | ||
" .setInputCols(df.columns[0:784])\n", | ||
" .setOutputCol(\"features\"))\n", | ||
"\n", | ||
"lr = LogisticRegression(maxIter=10, regParam=0.01)\n", | ||
"\n", | ||
"pipeline = Pipeline(stages=[assembler, lr])\n", | ||
"model = pipeline.fit(df)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from jpmml_sparkml import toPMMLBytes\n", | ||
"\n", | ||
"pmmlBytes = toPMMLBytes(sc, df, model)\n", | ||
"f = open('model.pmml', 'wb')\n", | ||
"f.write(pmmlBytes)\n", | ||
"f.close()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!mv model.pmml pyspark-pmml-evaluator/src/main/resources" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Build Image with S2I" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!s2i build . seldonio/seldon-core-s2i-java-build pyspark-test:0.1 --runtime-image seldonio/seldon-core-s2i-java-runtime" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Test with Docker" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!docker run --name \"pyspark_predictor\" -d --rm -p 5000:5000 pyspark-test:0.1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!cd ../../../wrappers/testing && make build_protos" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!python ../../../wrappers/testing/tester.py contract.json 0.0.0.0 5000 -p -t" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!docker rm pyspark_predictor --force" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Test in Minikube" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!minikube start --memory 4096 --feature-gates=CustomResourceValidation=true --extra-config=apiserver.Authorization.Mode=RBAC" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!kubectl create clusterrolebinding kube-system-cluster-admin --clusterrole=cluster-admin --serviceaccount=kube-system:default" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!helm init" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!helm install ../../../helm-charts/seldon-core-crd --name seldon-core-crd --set usage_metrics.enabled=true\n", | ||
"!helm install ../../../helm-charts/seldon-core --name seldon-core" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!eval $(minikube docker-env) && s2i build . seldonio/seldon-core-s2i-java-build pyspark-test:0.1 --runtime-image seldonio/seldon-core-s2i-java-runtime" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!kubectl create -f mnist_deployment.json" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Wait until ready (replicas == replicasAvailable)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!kubectl get seldondeployments seldon-deployment-example -o jsonpath='{.status}'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!python ../../../util/api_tester/api-tester.py contract.json \\\n", | ||
" `minikube ip` `kubectl get svc -l app=seldon-apiserver-container-app -o jsonpath='{.items[0].spec.ports[0].nodePort}'` \\\n", | ||
" --oauth-key oauth-key --oauth-secret oauth-secret -p" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!minikube delete" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
{ | ||
"apiVersion": "machinelearning.seldon.io/v1alpha1", | ||
"kind": "SeldonDeployment", | ||
"metadata": { | ||
"labels": { | ||
"app": "seldon" | ||
}, | ||
"name": "seldon-deployment-example" | ||
}, | ||
"spec": { | ||
"annotations": { | ||
"project_name": "Pyspark PMML Example", | ||
"deployment_version": "0.1" | ||
}, | ||
"name": "h2o-deployment", | ||
"oauth_key": "oauth-key", | ||
"oauth_secret": "oauth-secret", | ||
"predictors": [ | ||
{ | ||
"componentSpec": { | ||
"spec": { | ||
"containers": [ | ||
{ | ||
"image": "pyspark-test:0.1", | ||
"imagePullPolicy": "IfNotPresent", | ||
"name": "mnist-classifier", | ||
"resources": { | ||
"requests": { | ||
"memory": "1Mi" | ||
} | ||
} | ||
} | ||
], | ||
"terminationGracePeriodSeconds": 20 | ||
} | ||
}, | ||
"graph": { | ||
"children": [], | ||
"name": "mnist-classifier", | ||
"endpoint": { | ||
"type" : "REST" | ||
}, | ||
"type": "MODEL" | ||
}, | ||
"name": "pmml-predictor", | ||
"replicas": 1, | ||
"annotations": { | ||
"predictor_version" : "0.1" | ||
} | ||
} | ||
] | ||
} | ||
} |
Oops, something went wrong.