Skip to content

Commit

Permalink
Merge pull request #1374 from tkatila/e2e-gpu-tf
Browse files Browse the repository at this point in the history
e2e: gpu: add a basic tensorflow test
  • Loading branch information
hj-johannes-lee authored Aug 22, 2023
2 parents a70651f + 4212145 commit 6861ef5
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 2 deletions.
26 changes: 26 additions & 0 deletions deployments/gpu_tensorflow_test/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: v1
kind: Pod
metadata:
name: training-pod
spec:
restartPolicy: Never
containers:
- name: testcontainer
image: intel/intel-extension-for-tensorflow:latest
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
command: ["/bin/sh", "-c"]
args: ["python /code/training.py"]
resources:
limits:
gpu.intel.com/i915: 1
requests:
gpu.intel.com/i915: 1
volumeMounts:
- mountPath: /code
name: code
volumes:
- configMap:
name: training-code
name: code
11 changes: 11 additions & 0 deletions deployments/gpu_tensorflow_test/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
configMapGenerator:
- name: training-code
files:
- training.py

resources:
- deployment.yaml

images:
- name: intel/intel-extension-for-tensorflow
newTag: 1.2.0-gpu
61 changes: 61 additions & 0 deletions deployments/gpu_tensorflow_test/training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright 2018 The TensorFlow Authors.
# Copyright 2023 Intel Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# original code from:
# https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l02c01_celsius_to_fahrenheit.ipynb
# this is slightly modified to run explicitly with XPU devices

import tensorflow as tf
import intel_extension_for_tensorflow as itex
import numpy as np

print("BACKENDS: ", str(itex.get_backend()))

devs = tf.config.list_physical_devices('XPU')

print(devs)

if not devs:
raise Exception("No devices found")

with tf.device("/xpu:0"):
celsius_q = np.array([-40, -10, 0, 8, 15, 22, 38], dtype=float)
fahrenheit_a = np.array([-40, 14, 32, 46, 59, 72, 100], dtype=float)

model = tf.keras.Sequential([
tf.keras.layers.Dense(units=1, input_shape=[1])
])

model.compile(loss='mean_squared_error',
optimizer=tf.keras.optimizers.Adam(0.1))

history = model.fit(celsius_q, fahrenheit_a, epochs=500, verbose=False)

print("model trained")

test = [100.0]
p = model.predict(test)

if len(p) != 1:
raise Exception("invalid result obj")

prediction = p[0]

if prediction >= 211 and prediction <= 213:
print("inference ok: %f" % prediction)
else:
raise Exception("bad prediction %f" % prediction)

print("SUCCESS")
25 changes: 23 additions & 2 deletions test/e2e/gpu/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/utils"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -35,8 +36,10 @@ import (
)

const (
kustomizationYaml = "deployments/gpu_plugin/kustomization.yaml"
containerName = "testcontainer"
kustomizationYaml = "deployments/gpu_plugin/kustomization.yaml"
containerName = "testcontainer"
tfKustomizationYaml = "deployments/gpu_tensorflow_test/kustomization.yaml"
tfPodName = "training-pod"
)

func init() {
Expand Down Expand Up @@ -118,5 +121,23 @@ func describe() {

framework.Logf("found card and renderD from the log")
})

ginkgo.It("run a small workload on the GPU", func(ctx context.Context) {
kustomYaml, err := utils.LocateRepoFile(tfKustomizationYaml)
if err != nil {
framework.Failf("unable to locate %q: %v", tfKustomizationYaml, err)
}

ginkgo.By("submitting demo deployment")

e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "apply", "-k", filepath.Dir(kustomYaml))

ginkgo.By("waiting the pod to finish")

err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, tfPodName, f.Namespace.Name, 300*time.Second)
gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, tfPodName, containerName))

framework.Logf("tensorflow execution succeeded!")
})
})
}

0 comments on commit 6861ef5

Please sign in to comment.