diff --git a/deployments/gpu_tensorflow_test/deployment.yaml b/deployments/gpu_tensorflow_test/deployment.yaml new file mode 100644 index 000000000..472c85f49 --- /dev/null +++ b/deployments/gpu_tensorflow_test/deployment.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: Pod +metadata: + name: training-pod +spec: + restartPolicy: Never + containers: + - name: testcontainer + image: intel/intel-extension-for-tensorflow:latest + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + command: ["/bin/sh", "-c"] + args: ["python /code/training.py"] + resources: + limits: + gpu.intel.com/i915: 1 + requests: + gpu.intel.com/i915: 1 + volumeMounts: + - mountPath: /code + name: code + volumes: + - configMap: + name: training-code + name: code diff --git a/deployments/gpu_tensorflow_test/kustomization.yaml b/deployments/gpu_tensorflow_test/kustomization.yaml new file mode 100644 index 000000000..eb361008d --- /dev/null +++ b/deployments/gpu_tensorflow_test/kustomization.yaml @@ -0,0 +1,11 @@ +configMapGenerator: +- name: training-code + files: + - training.py + +resources: + - deployment.yaml + +images: + - name: intel/intel-extension-for-tensorflow + newTag: 1.2.0-gpu diff --git a/deployments/gpu_tensorflow_test/training.py b/deployments/gpu_tensorflow_test/training.py new file mode 100644 index 000000000..77c284ce2 --- /dev/null +++ b/deployments/gpu_tensorflow_test/training.py @@ -0,0 +1,61 @@ +# Copyright 2018 The TensorFlow Authors. +# Copyright 2023 Intel Corporation. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# original code from: +# https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l02c01_celsius_to_fahrenheit.ipynb +# this is slightly modified to run explicitly with XPU devices + +import tensorflow as tf +import intel_extension_for_tensorflow as itex +import numpy as np + +print("BACKENDS: ", str(itex.get_backend())) + +devs = tf.config.list_physical_devices('XPU') + +print(devs) + +if not devs: + raise Exception("No devices found") + +with tf.device("/xpu:0"): + celsius_q = np.array([-40, -10, 0, 8, 15, 22, 38], dtype=float) + fahrenheit_a = np.array([-40, 14, 32, 46, 59, 72, 100], dtype=float) + + model = tf.keras.Sequential([ + tf.keras.layers.Dense(units=1, input_shape=[1]) + ]) + + model.compile(loss='mean_squared_error', + optimizer=tf.keras.optimizers.Adam(0.1)) + + history = model.fit(celsius_q, fahrenheit_a, epochs=500, verbose=False) + + print("model trained") + + test = [100.0] + p = model.predict(test) + + if len(p) != 1: + raise Exception("invalid result obj") + + prediction = p[0] + + if prediction >= 211 and prediction <= 213: + print("inference ok: %f" % prediction) + else: + raise Exception("bad prediction %f" % prediction) + + print("SUCCESS") diff --git a/test/e2e/gpu/gpu.go b/test/e2e/gpu/gpu.go index c6bbab78c..ff3d3c639 100644 --- a/test/e2e/gpu/gpu.go +++ b/test/e2e/gpu/gpu.go @@ -22,6 +22,7 @@ import ( "github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/utils" "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,8 +36,10 @@ import ( ) const ( - kustomizationYaml = "deployments/gpu_plugin/kustomization.yaml" - containerName = "testcontainer" + kustomizationYaml = "deployments/gpu_plugin/kustomization.yaml" + containerName = "testcontainer" + tfKustomizationYaml = "deployments/gpu_tensorflow_test/kustomization.yaml" + tfPodName = "training-pod" ) func init() { @@ -118,5 +121,23 @@ func describe() { framework.Logf("found card and renderD from the log") }) + + ginkgo.It("run a small workload on the GPU", func(ctx context.Context) { + kustomYaml, err := utils.LocateRepoFile(tfKustomizationYaml) + if err != nil { + framework.Failf("unable to locate %q: %v", tfKustomizationYaml, err) + } + + ginkgo.By("submitting demo deployment") + + e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "apply", "-k", filepath.Dir(kustomYaml)) + + ginkgo.By("waiting the pod to finish") + + err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, tfPodName, f.Namespace.Name, 300*time.Second) + gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, tfPodName, containerName)) + + framework.Logf("tensorflow execution succeeded!") + }) }) }