Skip to content

Commit

Permalink
initial PySpark PMML Example including updates to wrappers
Browse files Browse the repository at this point in the history
  • Loading branch information
ukclivecox committed May 23, 2018
1 parent ea85b7e commit db6636b
Show file tree
Hide file tree
Showing 19 changed files with 9,444 additions and 21 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,9 @@ wrappers/s2i/python/Dockerfile
wrappers/s2i/python/_wrappers

.Rhistory
examples/models/pyspark_pmml/metastore_db
examples/models/pyspark_pmml/data
examples/models/pyspark_pmml/derby.log
examples/models/pyspark_pmml/mnist_train.csv
examples/models/pyspark_pmml/src/main/resources/model.pmml
examples/models/pyspark_pmml/.gitignore
2 changes: 2 additions & 0 deletions examples/models/pyspark_pmml/.s2i/environment
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
API_TYPE=REST
SERVICE_TYPE=MODEL
22 changes: 22 additions & 0 deletions examples/models/pyspark_pmml/contract.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"features":[
{
"name":"_c",
"dtype":"INT",
"ftype":"continuous",
"range":[0,255],
"repeat":784
}
],
"targets":[
{
"name":"class",
"dtype":"FLOAT",
"ftype":"continuous",
"range":[0,1],
"repeat":10
}
]
}


251 changes: 251 additions & 0 deletions examples/models/pyspark_pmml/mnist.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train MNIST Model using pySpark"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.examples.tutorials.mnist import input_data\n",
"import numpy as np\n",
"\n",
"mnist = input_data.read_data_sets('data/MNIST_data', one_hot=False)\n",
"X = (mnist.train.images * 225).astype(int)\n",
"X_y = np.concatenate((X,np.expand_dims(mnist.train.labels,1)),axis=1)\n",
"np.savetxt(\"mnist_train.csv\", X_y, fmt='%i', delimiter=\",\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.ml import Pipeline\n",
"from pyspark.ml.classification import LogisticRegression, LogisticRegressionModel\n",
"from pyspark.ml.feature import VectorAssembler\n",
"\n",
"df = sqlContext.read.csv(\"./mnist_train.csv\",inferSchema=True)\n",
"\n",
"df = df = df.withColumnRenamed(\"_c784\",\"label\")\n",
"\n",
"assembler = (VectorAssembler()\n",
" .setInputCols(df.columns[0:784])\n",
" .setOutputCol(\"features\"))\n",
"\n",
"lr = LogisticRegression(maxIter=10, regParam=0.01)\n",
"\n",
"pipeline = Pipeline(stages=[assembler, lr])\n",
"model = pipeline.fit(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from jpmml_sparkml import toPMMLBytes\n",
"\n",
"pmmlBytes = toPMMLBytes(sc, df, model)\n",
"f = open('model.pmml', 'wb')\n",
"f.write(pmmlBytes)\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!mv model.pmml pyspark-pmml-evaluator/src/main/resources"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Build Image with S2I"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!s2i build . seldonio/seldon-core-s2i-java-build pyspark-test:0.1 --runtime-image seldonio/seldon-core-s2i-java-runtime"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test with Docker"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!docker run --name \"pyspark_predictor\" -d --rm -p 5000:5000 pyspark-test:0.1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!cd ../../../wrappers/testing && make build_protos"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!python ../../../wrappers/testing/tester.py contract.json 0.0.0.0 5000 -p -t"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!docker rm pyspark_predictor --force"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test in Minikube"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!minikube start --memory 4096 --feature-gates=CustomResourceValidation=true --extra-config=apiserver.Authorization.Mode=RBAC"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!kubectl create clusterrolebinding kube-system-cluster-admin --clusterrole=cluster-admin --serviceaccount=kube-system:default"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!helm init"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!helm install ../../../helm-charts/seldon-core-crd --name seldon-core-crd --set usage_metrics.enabled=true\n",
"!helm install ../../../helm-charts/seldon-core --name seldon-core"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!eval $(minikube docker-env) && s2i build . seldonio/seldon-core-s2i-java-build pyspark-test:0.1 --runtime-image seldonio/seldon-core-s2i-java-runtime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!kubectl create -f mnist_deployment.json"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Wait until ready (replicas == replicasAvailable)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!kubectl get seldondeployments seldon-deployment-example -o jsonpath='{.status}'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!python ../../../util/api_tester/api-tester.py contract.json \\\n",
" `minikube ip` `kubectl get svc -l app=seldon-apiserver-container-app -o jsonpath='{.items[0].spec.ports[0].nodePort}'` \\\n",
" --oauth-key oauth-key --oauth-secret oauth-secret -p"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!minikube delete"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
53 changes: 53 additions & 0 deletions examples/models/pyspark_pmml/mnist_deployment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"apiVersion": "machinelearning.seldon.io/v1alpha1",
"kind": "SeldonDeployment",
"metadata": {
"labels": {
"app": "seldon"
},
"name": "seldon-deployment-example"
},
"spec": {
"annotations": {
"project_name": "Pyspark PMML Example",
"deployment_version": "0.1"
},
"name": "h2o-deployment",
"oauth_key": "oauth-key",
"oauth_secret": "oauth-secret",
"predictors": [
{
"componentSpec": {
"spec": {
"containers": [
{
"image": "pyspark-test:0.1",
"imagePullPolicy": "IfNotPresent",
"name": "mnist-classifier",
"resources": {
"requests": {
"memory": "1Mi"
}
}
}
],
"terminationGracePeriodSeconds": 20
}
},
"graph": {
"children": [],
"name": "mnist-classifier",
"endpoint": {
"type" : "REST"
},
"type": "MODEL"
},
"name": "pmml-predictor",
"replicas": 1,
"annotations": {
"predictor_version" : "0.1"
}
}
]
}
}
Loading

0 comments on commit db6636b

Please sign in to comment.