From b6b4fb04b7102cdd58aa0811ae6afc638914e15a Mon Sep 17 00:00:00 2001 From: bjodom Date: Wed, 26 Apr 2023 14:46:59 -0700 Subject: [PATCH] Add files via upload --- .../gemm_oneMKL_SYCL/00_GEMM/00_GEMM.ipynb | 35 ++ .../00_GEMM/01_GEMM_DPCPP_Buffers.ipynb | 311 ++++++++++++++++++ .../00_GEMM/02_GEMM_DPCPP_USM.ipynb | 255 ++++++++++++++ .../gemm_oneMKL_SYCL/00_GEMM/GEMM_OMP.ipynb | 223 +++++++++++++ .../00_GEMM/lab/dpcpp_gemm_buffers.cpp | 107 ++++++ .../00_GEMM/lab/dpcpp_gemm_usm.cpp | 124 +++++++ .../gemm_oneMKL_SYCL/00_GEMM/lab/omp_gemm.cpp | 95 ++++++ Libraries/gemm_oneMKL_SYCL/00_GEMM/q | 52 +++ .../00_GEMM/run_gemm_buffers.sh | 7 + .../gemm_oneMKL_SYCL/00_GEMM/run_gemm_omp.sh | 5 + .../gemm_oneMKL_SYCL/00_GEMM/run_gemm_usm.sh | 5 + .../00_GEMM/src/dpcpp_gemm_buffers.cpp | 99 ++++++ .../00_GEMM/src/dpcpp_gemm_usm.cpp | 123 +++++++ .../gemm_oneMKL_SYCL/00_GEMM/src/omp_gemm.cpp | 95 ++++++ Libraries/gemm_oneMKL_SYCL/oneMKL_Intro.ipynb | 149 +++++++++ 15 files changed, 1685 insertions(+) create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/00_GEMM.ipynb create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/01_GEMM_DPCPP_Buffers.ipynb create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/02_GEMM_DPCPP_USM.ipynb create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/GEMM_OMP.ipynb create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_buffers.cpp create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_usm.cpp create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/omp_gemm.cpp create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/q create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_buffers.sh create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_omp.sh create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_usm.sh create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_buffers.cpp create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_usm.cpp create mode 100644 Libraries/gemm_oneMKL_SYCL/00_GEMM/src/omp_gemm.cpp create mode 100644 Libraries/gemm_oneMKL_SYCL/oneMKL_Intro.ipynb diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/00_GEMM.ipynb b/Libraries/gemm_oneMKL_SYCL/00_GEMM/00_GEMM.ipynb new file mode 100644 index 0000000000..84accfef2a --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/00_GEMM.ipynb @@ -0,0 +1,35 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matrix Multiplication (GEMM)\n", + "### [DPC++ (Buffers)](./01_GEMM_DPCPP_Buffers.ipynb)\n", + "### [DPC++ (Unified Shared Memory)](./02_GEMM_DPCPP_USM.ipynb)\n", + "### [OpenMP Offload](./GEMM_OMP.ipynb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (Intel® oneAPI 2023.0)", + "language": "python", + "name": "c009-intel_distribution_of_python_3_oneapi-beta05-python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/01_GEMM_DPCPP_Buffers.ipynb b/Libraries/gemm_oneMKL_SYCL/00_GEMM/01_GEMM_DPCPP_Buffers.ipynb new file mode 100644 index 0000000000..3c0891e3f2 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/01_GEMM_DPCPP_Buffers.ipynb @@ -0,0 +1,311 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GEMM (Using DPC++ Buffers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following example shows a simple matrix multiplication program using __DPC++__ with the __buffer/accessor__ style of memory management. Follow along with the instructions of the lab to build and run the program. The lab requires a mixture of observing key components, and making simple modifications." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting lab/dpcpp_gemm_buffers.cpp\n" + ] + } + ], + "source": [ + "%%writefile lab/dpcpp_gemm_buffers.cpp\n", + "//==============================================================\n", + "// Copyright © 2023 Intel Corporation\n", + "//\n", + "// SPDX-License-Identifier: MIT\n", + "// =============================================================\n", + "\n", + "#include \n", + "#include \n", + "\n", + "//# sycl namespace\n", + "#include \n", + "using namespace sycl;\n", + "\n", + "//# oneMKL DPC++ interface for BLAS functions\n", + "\n", + "#include \"oneapi/mkl/blas.hpp\" \n", + "// # shorten mkl namespace\n", + "namespace mkl = oneapi::mkl; \n", + "\n", + "//# The following project performs matrix multiplication using oneMKL / DPC++ with buffers.\n", + "//# We will execute the simple operation A * B = C\n", + "//# The matrix B is set equal to the identity matrix such that A * B = A * I\n", + "//# After performing the computation, we will verify A * I = C -> A = C\n", + "\n", + "\n", + "\n", + "int main() {\n", + "\n", + " //# dimensions\n", + " int m = 3, n = 3, k = 3;\n", + " //# leading dimensions\n", + " int ldA = 3, ldB = 3, ldC = 3;\n", + " //# scalar multipliers\n", + " double alpha = 1.0, beta = 1.0;\n", + " //# transpose status of matrices\n", + " mkl::transpose transA = mkl::transpose::nontrans;\n", + " mkl::transpose transB = mkl::transpose::nontrans;\n", + " //# matrix data\n", + " std::vector A = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};\n", + " std::vector B = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};\n", + " std::vector C = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};\n", + " \n", + " //### Step 1 - Observe the definition of an asynchronous exception handler.\n", + " //# This function object will later be supplied to the queue.\n", + " //# It is designed to handle errors thrown while device code executes.\n", + " auto async_handler = [](sycl::exception_list exceptions) {\n", + " for (std::exception_ptr const &e : exceptions) {\n", + " try {\n", + " std::rethrow_exception(e);\n", + " }\n", + " catch (sycl::exception const &e) {\n", + " std::cout << \"Caught asynchronous SYCL exception: \" << e.what() << std::endl;\n", + " }\n", + " }\n", + " };\n", + "\n", + " //### Step 2 - Create a device object. (create device and q in one step)\n", + " //# Device selectors are used to specify the type of a device.\n", + " //# Uncomment _one_ of the following three lines to select a device.\n", + " queue q(default_selector_v, async_handler); //# default_selector returns a device based on a performance heuristic\n", + " // queue q(cpu_selector_v); //# cpu_selector returns a cpu device\n", + " // queue q(gpu_selector_v); //# gpu_selector returns a gpu device\n", + " // queue q;\n", + " //# Print actual device used\n", + " std::cout << \"Device: \" << q.get_device().get_info() << \"\\n\";\n", + "\n", + " //### Step 4 - Create buffers to hold our matrix data.\n", + " //# Buffer objects can be constructed given a container\n", + " //# Observe the creation of buffers for matrices A and B.\n", + " //# Try and create a third buffer for matrix C called C_buffer.\n", + " //# The solution is shown in the hidden cell below.\n", + " buffer A_buffer(A);\n", + " buffer B_buffer(B);\n", + " /* define C_buffer below */\n", + " buffer C_buffer(C);\n", + " \n", + "\n", + " //### Step 5 - Execute gemm operation.\n", + " //# Here, we need only pass in our queue and other familiar matrix multiplication parameters.\n", + " //# This includes the dimensions and data buffers for matrices A, B, and C.\n", + " mkl::blas::gemm(q, transA, transB, m, n, k, alpha, A_buffer, ldA, B_buffer, ldB, beta, C_buffer, ldC);\n", + "\n", + " //# we cannot explicitly transfer memory to/from the device when using buffers\n", + " //# that is why we must use this operation to ensure result data is returned to the host\n", + " q.wait_and_throw(); //# block until operation completes, throw any errors\n", + "\n", + " //### Step 6 - Observe creation of accessors to retrieve data from A_buffer and C_buffer.\n", + " accessor A_acc(A_buffer,read_only);\n", + " accessor C_acc(C_buffer,read_only);\n", + "\n", + " int status = 0;\n", + "\n", + " // verify C matrix using accessor to observe values held in C_buffer\n", + " std::cout << \"\\n\";\n", + " std::cout << \"C = \\n\";\n", + " for (int i = 0; i < m; ++i) {\n", + " for (int j = 0; j < n; ++j) {\n", + " if (A_acc[i*m+j] != C_acc[i*m+j]) status = 1;\n", + " std::cout << C_acc[i*m+j] << \" \";\n", + " }\n", + " std::cout << \"\\n\";\n", + " }\n", + " std::cout << \"\\n\";\n", + "\n", + " status == 0 ? std::cout << \"Verified: A = C\\n\" : std::cout << \"Failed: A != C\\n\";\n", + " return status;\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Solutions - click the three dots below to reveal" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Step 4 - The correct line is\n", + "```sycl::buffer C_buffer(C);```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build and Run\n", + "Select the cell below and click Run ▶ to compile and execute the code above:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job has been submitted to Intel(R) DevCloud and will execute soon.\n", + "\n", + " If you do not see result in 60 seconds, please restart the Jupyter kernel:\n", + " Kernel -> 'Restart Kernel and Clear All Outputs...' and then try again\n", + "\n", + "Job ID Name User Time Use S Queue\n", + "------------------------- ---------------- --------------- -------- - -----\n", + "2281801.v-qsvr-1 ...ub-singleuser u51369 00:01:08 R jupyterhub \n", + "2281830.v-qsvr-1 STDIN u51369 00:00:06 R batch \n", + "2281854.v-qsvr-1 ...mm_buffers.sh u51369 0 Q batch \n", + "\n", + "Waiting for Output ██████████ Done⬇\n", + "\n", + "########################################################################\n", + "# Date: Thu 20 Apr 2023 03:27:43 PM PDT\n", + "# Job ID: 2281854.v-qsvr-1.aidevcloud\n", + "# User: u51369\n", + "# Resources: cput=75:00:00,neednodes=1:gpu:ppn=2,nodes=1:gpu:ppn=2,walltime=06:00:00\n", + "########################################################################\n", + "\n", + "## u51369 is compiling oneMKL_introduction Module0 -- gemm with buffers - 1 of 3 dpcpp_gemm_buffers.cpp\n", + "\n", + "########################################################################\n", + "# End of output for job 2281854.v-qsvr-1.aidevcloud\n", + "# Date: Thu 20 Apr 2023 03:27:48 PM PDT\n", + "########################################################################\n", + "\n", + "lab/dpcpp_gemm_buffers.cpp:7:10: fatal error: 'sycl/sycl.hpp' file not found\n", + "#include //# sycl namespace\n", + " ^~~~~~~~~~~~~~~\n", + "1 error generated.\n", + "Job Completed in 10 seconds.\n" + ] + } + ], + "source": [ + "! chmod 755 q; chmod 755 run_gemm_buffers.sh;if [ -x \"$(command -v qsub)\" ]; then ./q run_gemm_buffers.sh; else ./run_gemm_buffers.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The build instructions for this sample can be found in the ```run_gemm_buffers.sh``` script. Consider using the [Link Line Advisor](https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html) to help you create compile and link lines for your oneMKL projects." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Survey

\n", + "\n", + "[We would appreciate any feedback you’d care to give, so that we can improve the overall training quality and experience. Thanks! ](https://intel.az1.qualtrics.com/jfe/form/SV_cCpY08ARDi6NhfT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Reset Notebook

\n", + "\n", + "##### Should you be experiencing any issues with your notebook or just want to start fresh run the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e9f5a54d77f541bf981b6aebea899d57", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Button(description='Reset Notebook', icon='check', style=ButtonStyle(), tooltip='This will upda…" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import display, Markdown, clear_output\n", + "import ipywidgets as widgets\n", + "button = widgets.Button(\n", + " description='Reset Notebook',\n", + " disabled=False,\n", + " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", + " tooltip='This will update this notebook, overwriting any changes.',\n", + " icon='check' # (FontAwesome names without the `fa-` prefix)\n", + ")\n", + "out = widgets.Output()\n", + "def on_button_clicked(_):\n", + " # \"linking function with output\"\n", + " with out:\n", + " # what happens when we press the button\n", + " clear_output()\n", + " !rsync -a --size-only /data/oneapi_workshop/Intel_oneAPI_MKL_Training/00_GEMM/ ~/Intel_oneAPI_MKL_Training/00_GEMM/\n", + " print('Notebook reset -- now click reload on browser.')\n", + "# linking button and function together using a button's method\n", + "button.on_click(on_button_clicked)\n", + "# displaying button and its output together\n", + "widgets.VBox([button,out])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (Intel® oneAPI 2023.0)", + "language": "python", + "name": "c009-intel_distribution_of_python_3_oneapi-beta05-python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/02_GEMM_DPCPP_USM.ipynb b/Libraries/gemm_oneMKL_SYCL/00_GEMM/02_GEMM_DPCPP_USM.ipynb new file mode 100644 index 0000000000..d00466d7c5 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/02_GEMM_DPCPP_USM.ipynb @@ -0,0 +1,255 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GEMM (Using DPC++ Unified Shared Memory)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following example shows a simple matrix multiplication program using __DPC++__ with the __unified shared memory__ style of memory management. Follow along with the instructions of the lab to build and run the program. The lab requires a mixture of observing key components, and making simple modifications." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting lab/dpcpp_gemm_usm.cpp\n" + ] + } + ], + "source": [ + "%%writefile lab/dpcpp_gemm_usm.cpp\n", + "//==============================================================\n", + "// Copyright © 2020 Intel Corporation\n", + "//\n", + "// SPDX-License-Identifier: MIT\n", + "// =============================================================\n", + "#include \n", + "#include \n", + "#include //# sycl namespace\n", + "#include \"oneapi/mkl/blas.hpp\" //# oneMKL DPC++ interface for BLAS functions\n", + "\n", + "//# The following project performs matrix multiplication using oneMKL / DPC++ with Unified Shared Memory (USM)\n", + "//# We will execute the simple operation A * B = C\n", + "//# The matrix B is set equal to the identity matrix such that A * B = A * I\n", + "//# After performing the computation, we will verify A * I = C -> A = C\n", + "\n", + "using namespace sycl;\n", + "namespace mkl = oneapi::mkl; //# shorten mkl namespace\n", + "\n", + "int main() {\n", + "\n", + " //# dimensions\n", + " int m = 3, n = 3, k = 3;\n", + " //# leading dimensions\n", + " int ldA = 3, ldB = 3, ldC = 3;\n", + " //# scalar multipliers\n", + " double alpha = 1.0, beta = 1.0;\n", + " //# transpose status of matrices\n", + " mkl::transpose transA = mkl::transpose::nontrans;\n", + " mkl::transpose transB = mkl::transpose::nontrans;\n", + "\n", + " //### Step 1 - Observe the definition of an asynchronous exception handler.\n", + " //# This function object will later be supplied to the queue.\n", + " //# It is designed to handle errors thrown while device code executes.\n", + " auto async_handler = [](sycl::exception_list exceptions) {\n", + " for (std::exception_ptr const &e : exceptions) {\n", + " try {\n", + " std::rethrow_exception(e);\n", + " }\n", + " catch (sycl::exception const &e) {\n", + " std::cout << \"Caught asynchronous SYCL exception: \" << e.what() << std::endl;\n", + " }\n", + " }\n", + " };\n", + "\n", + " //### Step 2 - Create a device object.\n", + " //# Device selectors are used to specify the type of a device.\n", + " //# Uncomment _one_ of the following three lines to select a device.\n", + " // sycl::device device = sycl::device(sycl::default_selector()); //# default_selector returns a device based on a performance heuristic\n", + " // sycl::device device = sycl::device(sycl::cpu_selector()); //# cpu_selector returns a cpu device\n", + " // sycl::device device = sycl::device(sycl::gpu_selector()); //# gpu_selector returns a gpu device\n", + " std::cout << \"Device: \" << device.get_info() << \"\\n\";\n", + "\n", + " //### Step 3 - Create a queue object.\n", + " //# A queue accepts a single device, and optionally, an exception handler.\n", + " //# Uncomment the following line to initialize a queue with our device and handler.\n", + " // sycl::queue queue(device, async_handler);\n", + "\n", + " //### Step 4 - Create a sycl event and allocate USM\n", + " //# The later execution of the gemm operation is tied to this event\n", + " //# The gemm operation will also make use of a vector of sycl events we can call 'gemm_dependencies'\n", + " sycl::event gemm_done;\n", + " std::vector gemm_dependencies;\n", + " //# Here, we allocate USM pointers for each matrix, using the special 'malloc_shared' function\n", + " //# Make sure to template the function with the correct precision, and pass in our queue to the function call\n", + " double *A_usm = sycl::malloc_shared(m * k, queue);\n", + " double *B_usm = sycl::malloc_shared(k * n, queue);\n", + " double *C_usm = sycl::malloc_shared(m * n, queue);\n", + "\n", + " //# define matrix A as the 3x3 matrix\n", + " //# {{ 1, 2, 3}, {4, 5, 6}, {7, 8, 9}}\n", + " for (int i = 0; i < m; i++) {\n", + " for (int j = 0; j < k; j++) {\n", + " A_usm[i*m+j] = (double)(i*m+j) + 1.0;\n", + " }\n", + " }\n", + " \n", + " //# define matrix B as the identity matrix\n", + " for (int i = 0; i < k; i++) {\n", + " for (int j = 0; j < n; j++) {\n", + " if (i == j) B_usm[i*k+j] = 1.0;\n", + " else B_usm[i*k+j] = 0.0;\n", + " }\n", + " }\n", + " \n", + " //# initialize C as a 0 matrix\n", + " for (int i = 0; i < m; i++) {\n", + " for (int j = 0; j < n; j++) {\n", + " C_usm[i*m+j] = 0.0;\n", + " }\n", + " }\n", + "\n", + " //### Step 5 - Execute gemm operation.\n", + " //# Here, we fill in the familiar parameters for the gemm operation.\n", + " //# However, we must also pass in the queue as the first parameter.\n", + " //# We must also pass in our list of dependencies as the final parameter.\n", + " //# We are also passing in our USM pointers as opposed to a buffer or raw data pointer.\n", + " gemm_done = mkl::blas::gemm(queue, transA, transB, m, n, k, alpha, A_usm, ldA, B_usm, ldB, beta, C_usm, ldC, gemm_dependencies);\n", + "\n", + " //# We must now wait for the given event to finish before accessing any data involved in the operation\n", + " //# Otherwise, we may access data before the operation has completed, or before it has been returned to the host\n", + " gemm_done.wait();\n", + "\n", + " int status = 0;\n", + "\n", + " //# verify C matrix using USM data\n", + " std::cout << \"\\n\";\n", + " std::cout << \"C = \\n\";\n", + " for (int i = 0; i < m; ++i) {\n", + " for (int j = 0; j < n; ++j) {\n", + " if (A_usm[i*m+j] != C_usm[i*m+j]) status = 1;\n", + " std::cout << C_usm[i*m+j] << \" \";\n", + " }\n", + " std::cout << \"\\n\";\n", + " }\n", + " std::cout << \"\\n\";\n", + "\n", + " //# free usm pointers\n", + " sycl::free(A_usm, queue);\n", + " sycl::free(B_usm, queue);\n", + " sycl::free(C_usm, queue);\n", + "\n", + " status == 0 ? std::cout << \"Verified: A = C\\n\" : std::cout << \"Failed: A != C\\n\";\n", + " return status;\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build and Run\n", + "Select the cell below and click Run ▶ to compile and execute the code above:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 q; chmod 755 run_gemm_usm.sh;if [ -x \"$(command -v qsub)\" ]; then ./q run_gemm_usm.sh; else ./run_gemm_usm.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The build instructions for this sample can be found in the ```run_gemm_usm.sh``` script. Consider using the [Link Line Advisor](https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html) to help you create compile and link lines for your oneMKL projects." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Survey

\n", + "\n", + "[We would appreciate any feedback you’d care to give, so that we can improve the overall training quality and experience. Thanks! ](https://intel.az1.qualtrics.com/jfe/form/SV_3JyBEAtwk5YIg85)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Reset Notebook

\n", + "\n", + "##### Should you be experiencing any issues with your notebook or just want to start fresh run the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "from IPython.display import display, Markdown, clear_output\n", + "import ipywidgets as widgets\n", + "button = widgets.Button(\n", + " description='Reset Notebook',\n", + " disabled=False,\n", + " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", + " tooltip='This will update this notebook, overwriting any changes.',\n", + " icon='check' # (FontAwesome names without the `fa-` prefix)\n", + ")\n", + "out = widgets.Output()\n", + "def on_button_clicked(_):\n", + " # \"linking function with output\"\n", + " with out:\n", + " # what happens when we press the button\n", + " clear_output()\n", + " !rsync -a --size-only /data/oneapi_workshop/Intel_oneAPI_MKL_Training/00_GEMM/ ~/Intel_oneAPI_MKL_Training/00_GEMM/\n", + " print('Notebook reset -- now click reload on browser.')\n", + "# linking button and function together using a button's method\n", + "button.on_click(on_button_clicked)\n", + "# displaying button and its output together\n", + "widgets.VBox([button,out])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (Intel® oneAPI 2023.0)", + "language": "python", + "name": "c009-intel_distribution_of_python_3_oneapi-beta05-python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/GEMM_OMP.ipynb b/Libraries/gemm_oneMKL_SYCL/00_GEMM/GEMM_OMP.ipynb new file mode 100644 index 0000000000..98f02e76a6 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/GEMM_OMP.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GEMM (Using OpenMP Offload)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following example shows a simple matrix multiplication program using __OpenMP Offload__. Follow along with the instructions of the lab to build and run the program. The lab requires a mixture of observing key components, and making simple modifications." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile lab/omp_gemm.cpp\n", + "//==============================================================\n", + "// Copyright © 2020 Intel Corporation\n", + "//\n", + "// SPDX-License-Identifier: MIT\n", + "// =============================================================\n", + "#include \n", + "#include \"mkl.h\" //# main mkl header\n", + "#include \"mkl_omp_offload.h\" //# mkl OMP Offload interface\n", + "\n", + "int dnum = 0;\n", + "\n", + "int main() {\n", + "\n", + " //# dimensions\n", + " MKL_INT m = 3, n = 3, k = 3;\n", + " //# leading dimensions\n", + " MKL_INT ldA = k, ldB = n, ldC = n;\n", + " //# scalar multipliers\n", + " double alpha = 1.0;\n", + " double beta = 1.0;\n", + " //# matrix data\n", + " double *A = (double *)malloc(m * k * sizeof(double));\n", + " double *B = (double *)malloc(k * n * sizeof(double));\n", + " double *C = (double *)malloc(m * n * sizeof(double));\n", + "\n", + " //# define matrix A as the 3x3 matrix\n", + " //# {{ 1, 2, 3}, {4, 5, 6}, {7, 8, 9}}\n", + " for (int i = 0; i < m; i++) {\n", + " for (int j = 0; j < k; j++) {\n", + " A[i*m+j] = (double)(i*m+j) + 1.0;\n", + " }\n", + " }\n", + "\n", + " //# define matrix B as the identity matrix\n", + " for (int i = 0; i < k; i++) {\n", + " for (int j = 0; j < n; j++) {\n", + " if (i == j) B[i*k+j] = 1.0;\n", + " else B[i*k+j] = 0.0;\n", + " }\n", + " }\n", + "\n", + " //# initialize C as a 0 matrix\n", + " for (int i = 0; i < m; i++) {\n", + " for (int j = 0; j < n; j++) {\n", + " C[i*m+j] = 0.0;\n", + " }\n", + " }\n", + "\n", + " MKL_INT sizeA = m*k;\n", + " MKL_INT sizeB = k*n;\n", + " MKL_INT sizeC = m*n;\n", + " \n", + " //# Below are the two compiler directives necessary to offload the GEMM operation\n", + " //# we are using 'dgemm' to specify we are using double-precision values\n", + " \n", + " //# The outer directive maps input data (matrices A & B) 'to' the device.\n", + " //# It also maps output data (matrix C) 'from' the device so that the results of the operation are returned.\n", + " //# Finally, this directive specifies device number 0, which should interact with an available GPU.\n", + " \n", + " //# The inner directive dispatches the correct version of the contained operation, again specifying the device number.\n", + " //# This directive also uses the 'use_devce_ptr' statement to specify the data we are working with (in this case, arrays A, B, & C).\n", + " \n", + " //# Uncomment the two 'pragma' lines below. (Do not remove the '#' character)\n", + " \n", + " //#pragma omp target data map(to:A[0:sizeA],B[0:sizeB]) map(from:C[0:sizeC]) device(dnum)\n", + " {\n", + " //#pragma omp target variant dispatch device(dnum) use_device_ptr(A, B, C)\n", + " {\n", + " dgemm(\"N\", \"N\", &m, &n, &k, &alpha, A, &ldA, B, &ldB, &beta, C, &ldC);\n", + " }\n", + " }\n", + "\n", + " int status = 0;\n", + "\n", + " //# verify C matrix\n", + " printf(\"\\n\");\n", + " printf(\"C = \\n\");\n", + " for (int i = 0; i < m; i++) {\n", + " for (int j = 0; j < n; j++) {\n", + " if (A[i*m+j] != C[i*m+j]) status = 1;\n", + " printf(\"%0.0f \", C[i*m+j]);\n", + " }\n", + " printf(\"\\n\");\n", + " }\n", + " printf(\"\\n\");\n", + "\n", + " //# free matrix data\n", + " free(A);\n", + " free(B);\n", + " free(C);\n", + "\n", + " status == 0 ? printf(\"Verified: A = C\\n\") : printf(\"Failed: A != C\\n\");\n", + "\n", + " return status;\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build and Run\n", + "Select the cell below and click Run ▶ to compile and execute the code above:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 q; chmod 755 run_gemm_omp.sh;if [ -x \"$(command -v qsub)\" ]; then ./q run_gemm_omp.sh; else ./run_gemm_omp.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The build instructions for this sample can be found in the ```run_gemm_omp.sh``` script. Consider using the [Link Line Advisor](https://software.intel.com/content/www/us/en/develop/articles/intel-mkl-link-line-advisor.html) to help you create compile and link lines for your oneMKL projects.\n", + "\n", + "To validate your runs of OpenMP, consider setting the following environment variables\n", + "* ```LIBOMPTARGET_PROFILE=T``` - Enables OpenMP profiling, also prints the chosen device\n", + "* ```LIBOMPTARGET_DEBUG=1``` - Forces OpenMP to dump debug info\n", + "* ```LIBOMPTARGET_PLUGIN=OpenCL``` - Target OpenCL backend instead of Level0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Survey

\n", + "\n", + "[We would appreciate any feedback you’d care to give, so that we can improve the overall training quality and experience. Thanks! ](https://intel.az1.qualtrics.com/jfe/form/SV_55Ollfy7wQ1kuNL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Reset Notebook

\n", + "\n", + "##### Should you be experiencing any issues with your notebook or just want to start fresh run the below cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "from IPython.display import display, Markdown, clear_output\n", + "import ipywidgets as widgets\n", + "button = widgets.Button(\n", + " description='Reset Notebook',\n", + " disabled=False,\n", + " button_style='', # 'success', 'info', 'warning', 'danger' or ''\n", + " tooltip='This will update this notebook, overwriting any changes.',\n", + " icon='check' # (FontAwesome names without the `fa-` prefix)\n", + ")\n", + "out = widgets.Output()\n", + "def on_button_clicked(_):\n", + " # \"linking function with output\"\n", + " with out:\n", + " # what happens when we press the button\n", + " clear_output()\n", + " !rsync -a --size-only /data/oneapi_workshop/Intel_oneAPI_MKL_Training/00_GEMM/ ~/Intel_oneAPI_MKL_Training/00_GEMM/\n", + " print('Notebook reset -- now click reload on browser.')\n", + "# linking button and function together using a button's method\n", + "button.on_click(on_button_clicked)\n", + "# displaying button and its output together\n", + "widgets.VBox([button,out])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7 (Intel® oneAPI)", + "language": "python", + "name": "c009-intel_distribution_of_python_3_oneapi-beta05-python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_buffers.cpp b/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_buffers.cpp new file mode 100644 index 0000000000..ada8faa8e9 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_buffers.cpp @@ -0,0 +1,107 @@ +//============================================================== +// Copyright © 2023 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#include +#include + +//# sycl namespace +#include +using namespace sycl; + +//# oneMKL DPC++ interface for BLAS functions + +#include "oneapi/mkl/blas.hpp" +// # shorten mkl namespace +namespace mkl = oneapi::mkl; + +//# The following project performs matrix multiplication using oneMKL / DPC++ with buffers. +//# We will execute the simple operation A * B = C +//# The matrix B is set equal to the identity matrix such that A * B = A * I +//# After performing the computation, we will verify A * I = C -> A = C + + + +int main() { + + //# dimensions + int m = 3, n = 3, k = 3; + //# leading dimensions + int ldA = 3, ldB = 3, ldC = 3; + //# scalar multipliers + double alpha = 1.0, beta = 1.0; + //# transpose status of matrices + mkl::transpose transA = mkl::transpose::nontrans; + mkl::transpose transB = mkl::transpose::nontrans; + //# matrix data + std::vector A = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; + std::vector B = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0}; + std::vector C = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + + //### Step 1 - Observe the definition of an asynchronous exception handler. + //# This function object will later be supplied to the queue. + //# It is designed to handle errors thrown while device code executes. + auto async_handler = [](sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl; + } + } + }; + + //### Step 2 - Create a device object. (create device and q in one step) + //# Device selectors are used to specify the type of a device. + //# Uncomment _one_ of the following three lines to select a device. + queue q(default_selector_v, async_handler); //# default_selector returns a device based on a performance heuristic + // queue q(cpu_selector_v); //# cpu_selector returns a cpu device + // queue q(gpu_selector_v); //# gpu_selector returns a gpu device + // queue q; + //# Print actual device used + std::cout << "Device: " << q.get_device().get_info() << "\n"; + + //### Step 4 - Create buffers to hold our matrix data. + //# Buffer objects can be constructed given a container + //# Observe the creation of buffers for matrices A and B. + //# Try and create a third buffer for matrix C called C_buffer. + //# The solution is shown in the hidden cell below. + buffer A_buffer(A); + buffer B_buffer(B); + /* define C_buffer below */ + buffer C_buffer(C); + + + //### Step 5 - Execute gemm operation. + //# Here, we need only pass in our queue and other familiar matrix multiplication parameters. + //# This includes the dimensions and data buffers for matrices A, B, and C. + mkl::blas::gemm(q, transA, transB, m, n, k, alpha, A_buffer, ldA, B_buffer, ldB, beta, C_buffer, ldC); + + //# we cannot explicitly transfer memory to/from the device when using buffers + //# that is why we must use this operation to ensure result data is returned to the host + q.wait_and_throw(); //# block until operation completes, throw any errors + + //### Step 6 - Observe creation of accessors to retrieve data from A_buffer and C_buffer. + accessor A_acc(A_buffer,read_only); + accessor C_acc(C_buffer,read_only); + + int status = 0; + + // verify C matrix using accessor to observe values held in C_buffer + std::cout << "\n"; + std::cout << "C = \n"; + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + if (A_acc[i*m+j] != C_acc[i*m+j]) status = 1; + std::cout << C_acc[i*m+j] << " "; + } + std::cout << "\n"; + } + std::cout << "\n"; + + status == 0 ? std::cout << "Verified: A = C\n" : std::cout << "Failed: A != C\n"; + return status; +} diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_usm.cpp b/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_usm.cpp new file mode 100644 index 0000000000..3d44473fb7 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/dpcpp_gemm_usm.cpp @@ -0,0 +1,124 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include //# sycl namespace +#include "oneapi/mkl/blas.hpp" //# oneMKL DPC++ interface for BLAS functions + +//# The following project performs matrix multiplication using oneMKL / DPC++ with Unified Shared Memory (USM) +//# We will execute the simple operation A * B = C +//# The matrix B is set equal to the identity matrix such that A * B = A * I +//# After performing the computation, we will verify A * I = C -> A = C + +using namespace sycl; +namespace mkl = oneapi::mkl; //# shorten mkl namespace + +int main() { + + //# dimensions + int m = 3, n = 3, k = 3; + //# leading dimensions + int ldA = 3, ldB = 3, ldC = 3; + //# scalar multipliers + double alpha = 1.0, beta = 1.0; + //# transpose status of matrices + mkl::transpose transA = mkl::transpose::nontrans; + mkl::transpose transB = mkl::transpose::nontrans; + + //### Step 1 - Observe the definition of an asynchronous exception handler. + //# This function object will later be supplied to the queue. + //# It is designed to handle errors thrown while device code executes. + auto async_handler = [](sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl; + } + } + }; + + //### Step 2 - Create a device object. + //# Device selectors are used to specify the type of a device. + //# Uncomment _one_ of the following three lines to select a device. + // sycl::device device = sycl::device(sycl::default_selector()); //# default_selector returns a device based on a performance heuristic + // sycl::device device = sycl::device(sycl::cpu_selector()); //# cpu_selector returns a cpu device + // sycl::device device = sycl::device(sycl::gpu_selector()); //# gpu_selector returns a gpu device + std::cout << "Device: " << device.get_info() << "\n"; + + //### Step 3 - Create a queue object. + //# A queue accepts a single device, and optionally, an exception handler. + //# Uncomment the following line to initialize a queue with our device and handler. + // sycl::queue queue(device, async_handler); + + //### Step 4 - Create a sycl event and allocate USM + //# The later execution of the gemm operation is tied to this event + //# The gemm operation will also make use of a vector of sycl events we can call 'gemm_dependencies' + sycl::event gemm_done; + std::vector gemm_dependencies; + //# Here, we allocate USM pointers for each matrix, using the special 'malloc_shared' function + //# Make sure to template the function with the correct precision, and pass in our queue to the function call + double *A_usm = sycl::malloc_shared(m * k, queue); + double *B_usm = sycl::malloc_shared(k * n, queue); + double *C_usm = sycl::malloc_shared(m * n, queue); + + //# define matrix A as the 3x3 matrix + //# {{ 1, 2, 3}, {4, 5, 6}, {7, 8, 9}} + for (int i = 0; i < m; i++) { + for (int j = 0; j < k; j++) { + A_usm[i*m+j] = (double)(i*m+j) + 1.0; + } + } + + //# define matrix B as the identity matrix + for (int i = 0; i < k; i++) { + for (int j = 0; j < n; j++) { + if (i == j) B_usm[i*k+j] = 1.0; + else B_usm[i*k+j] = 0.0; + } + } + + //# initialize C as a 0 matrix + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + C_usm[i*m+j] = 0.0; + } + } + + //### Step 5 - Execute gemm operation. + //# Here, we fill in the familiar parameters for the gemm operation. + //# However, we must also pass in the queue as the first parameter. + //# We must also pass in our list of dependencies as the final parameter. + //# We are also passing in our USM pointers as opposed to a buffer or raw data pointer. + gemm_done = mkl::blas::gemm(queue, transA, transB, m, n, k, alpha, A_usm, ldA, B_usm, ldB, beta, C_usm, ldC, gemm_dependencies); + + //# We must now wait for the given event to finish before accessing any data involved in the operation + //# Otherwise, we may access data before the operation has completed, or before it has been returned to the host + gemm_done.wait(); + + float status = 0.0; + + //# verify C matrix using USM data + std::cout << "\n"; + std::cout << "C = \n"; + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + if (A_usm[i*m+j] != C_usm[i*m+j]) status = 1; + std::cout << C_usm[i*m+j] << " "; + } + std::cout << "\n"; + } + std::cout << "\n"; + + //# free usm pointers + sycl::free(A_usm, queue); + sycl::free(B_usm, queue); + sycl::free(C_usm, queue); + + status == 0 ? std::cout << "Verified: A = C\n" : std::cout << "Failed: A != C\n"; + return status; +} diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/omp_gemm.cpp b/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/omp_gemm.cpp new file mode 100644 index 0000000000..a0d1f0e4b0 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/lab/omp_gemm.cpp @@ -0,0 +1,95 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include "mkl.h" //# main mkl header +#include "mkl_omp_offload.h" //# mkl OMP Offload interface + +int dnum = 0; + +int main() { + + //# dimensions + MKL_INT m = 3, n = 3, k = 3; + //# leading dimensions + MKL_INT ldA = k, ldB = n, ldC = n; + //# scalar multipliers + double alpha = 1.0; + double beta = 1.0; + //# matrix data + double *A = (double *)malloc(m * k * sizeof(double)); + double *B = (double *)malloc(k * n * sizeof(double)); + double *C = (double *)malloc(m * n * sizeof(double)); + + //# define matrix A as the 3x3 matrix + //# {{ 1, 2, 3}, {4, 5, 6}, {7, 8, 9}} + for (int i = 0; i < m; i++) { + for (int j = 0; j < k; j++) { + A[i*m+j] = (double)(i*m+j) + 1.0; + } + } + + //# define matrix B as the identity matrix + for (int i = 0; i < k; i++) { + for (int j = 0; j < n; j++) { + if (i == j) B[i*k+j] = 1.0; + else B[i*k+j] = 0.0; + } + } + + //# initialize C as a 0 matrix + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + C[i*m+j] = 0.0; + } + } + + MKL_INT sizeA = m*k; + MKL_INT sizeB = k*n; + MKL_INT sizeC = m*n; + + //# Below are the two compiler directives necessary to offload the GEMM operation + //# we are using 'dgemm' to specify we are using double-precision values + + //# The outer directive maps input data (matrices A & B) 'to' the device. + //# It also maps output data (matrix C) 'from' the device so that the results of the operation are returned. + //# Finally, this directive specifies device number 0, which should interact with an available GPU. + + //# The inner directive dispatches the correct version of the contained operation, again specifying the device number. + //# This directive also uses the 'use_devce_ptr' statement to specify the data we are working with (in this case, arrays A, B, & C). + + //# Uncomment the two 'pragma' lines below. (Do not remove the '#' character) + + #pragma omp target data map(to:A[0:sizeA],B[0:sizeB]) map(from:C[0:sizeC]) device(dnum) + { + #pragma omp target variant dispatch device(dnum) use_device_ptr(A, B, C) + { + dgemm("N", "N", &m, &n, &k, &alpha, A, &ldA, B, &ldB, &beta, C, &ldC); + } + } + + int status = 0; + + //# verify C matrix + printf("\n"); + printf("C = \n"); + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + if (A[i*m+j] != C[i*m+j]) status = 1; + printf("%0.0f ", C[i*m+j]); + } + printf("\n"); + } + printf("\n"); + + //# free matrix data + free(A); + free(B); + free(C); + + status == 0 ? printf("Verified: A = C\n") : printf("Failed: A != C\n"); + + return status; +} diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/q b/Libraries/gemm_oneMKL_SYCL/00_GEMM/q new file mode 100644 index 0000000000..b06e66b787 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/q @@ -0,0 +1,52 @@ +#!/bin/bash +#========================================== +# Copyright © 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT +#========================================== +# Script to submit job in Intel(R) DevCloud +# Version: 0.7 +#========================================== +if [ -z "$1" ]; then + echo "Missing script argument, Usage: ./q run.sh" +elif [ ! -f "$1" ]; then + echo "File $1 does not exist" +else + echo "Job has been submitted to Intel(R) DevCloud and will execute soon." + echo "" + echo " If you do not see result in 60 seconds, please restart the Jupyter kernel:" + echo " Kernel -> 'Restart Kernel and Clear All Outputs...' and then try again" + echo "" + script=$1 + # Remove old output files + rm *.sh.* > /dev/null 2>&1 + # Submit job using qsub + qsub_id=`qsub -l nodes=1:gpu:ppn=2 -d . $script` + job_id="$(cut -d'.' -f1 <<<"$qsub_id")" + # Print qstat output + qstat + # Wait for output file to be generated and display + echo "" + echo -ne "Waiting for Output " + until [ -f $script.o$job_id ]; do + sleep 1 + echo -ne "█" + ((timeout++)) + # Timeout if no output file generated within 60 seconds + if [ $timeout == 60 ]; then + echo "" + echo "" + echo "TimeOut 60 seconds: Job is still queued for execution, check for output file later ($script.o$job_id)" + echo "" + break + fi + done + # Print output and error file content if exist + if [ -n "$(find -name '*.sh.o'$job_id)" ]; then + echo " Done⬇" + cat $script.o$job_id + cat $script.e$job_id + echo "Job Completed in $timeout seconds." + rm *.sh.*$job_id > /dev/null 2>&1 + fi +fi diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_buffers.sh b/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_buffers.sh new file mode 100644 index 0000000000..b7a4bdc88f --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_buffers.sh @@ -0,0 +1,7 @@ +#!/bin/bash +source /opt/intel/oneapi/setvars.sh > /dev/null 2>&1 +/bin/echo "##" $(whoami) is compiling oneMKL_introduction Module0 -- gemm with buffers - 1 of 3 dpcpp_gemm_buffers.cpp + +icpx -fsycl -fsycl-device-code-split=per_kernel -DMKL_ILP64 -I$MKLROOT/include -L$MKLROOT/lib/intel64 -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl lab/dpcpp_gemm_buffers.cpp + +if [ $? -eq 0 ]; then ./a.out; fi diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_omp.sh b/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_omp.sh new file mode 100644 index 0000000000..25be751795 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_omp.sh @@ -0,0 +1,5 @@ +#!/bin/bash +source /opt/intel/oneapi/setvars.sh > /dev/null 2>&1 +/bin/echo "##" $(whoami) is compiling oneMKL_introduction Module0 -- gemm with openmp - 3 of 3 omp_gemm.cpp +icx lab/omp_gemm.cpp -fsycl-device-code-split=per_kernel -DMKL_ILP64 -m64 -I/opt/intel/oneapi/mkl/2021.1-beta10/include -fsycl -fiopenmp -fopenmp-targets=spir64 -mllvm -vpo-paropt-use-raw-dev-ptr -L/opt/intel/oneapi/mkl/2021.1-beta10/lib/intel64 -lmkl_sycl -Wl,--start-group -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lsycl -lOpenCL -lpthread -ldl -lm -lstdc++ +if [ $? -eq 0 ]; then ./a.out; fi diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_usm.sh b/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_usm.sh new file mode 100644 index 0000000000..a6e3aa6443 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/run_gemm_usm.sh @@ -0,0 +1,5 @@ +#!/bin/bash +source /opt/intel/oneapi/setvars.sh > /dev/null 2>&1 +/bin/echo "##" $(whoami) is compiling oneMKL_introduction Module0 -- gemm with usm - 2 of 3 dpcpp_gemm_usm.cpp +dpcpp lab/dpcpp_gemm_usm.cpp -fsycl-device-code-split=per_kernel -DMKL_ILP64 -I$MKLROOT/include -L$MKLROOT/lib/intel64 -lmkl_sycl -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl +if [ $? -eq 0 ]; then ./a.out; fi diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_buffers.cpp b/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_buffers.cpp new file mode 100644 index 0000000000..db13e2bed6 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_buffers.cpp @@ -0,0 +1,99 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include //# sycl namespace +#include "oneapi/mkl/blas.hpp" //# oneMKL DPC++ interface for BLAS functions + +//# The following project performs matrix multiplication using oneMKL / DPC++ with buffers. +//# We will execute the simple operation A * B = C +//# The matrix B is set equal to the identity matrix such that A * B = A * I +//# After performing the computation, we will verify A * I = C -> A = C + +namespace mkl = oneapi::mkl; //# shorten mkl namespace + +int main() { + + //# dimensions + int m = 3, n = 3, k = 3; + //# leading dimensions + int ldA = 3, ldB = 3, ldC = 3; + //# scalar multipliers + double alpha = 1.0, beta = 1.0; + //# transpose status of matrices + mkl::transpose transA = mkl::transpose::nontrans; + mkl::transpose transB = mkl::transpose::nontrans; + //# matrix data + std::vector A = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; + std::vector B = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0}; + std::vector C = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + + //### Step 1 - Observe the definition of an asynchronous exception handler. + //# This function object will later be supplied to the queue. + //# It is designed to handle errors thrown while device code executes. + auto async_handler = [](sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl; + } + } + }; + + //### Step 2 - Create a device object. + //# Device selectors are used to specify the type of a device. + //# Uncomment _one_ of the following three lines to select a device. + // sycl::device device = sycl::device(sycl::default_selector()); //# default_selector returns a device based on a performance heuristic + // sycl::device device = sycl::device(sycl::cpu_selector()); //# cpu_selector returns a cpu device + // sycl::device device = sycl::device(sycl::gpu_selector()); //# gpu_selector returns a gpu device + std::cout << "Device: " << device.get_info() << "\n"; + + //### Step 3 - Create a queue object. + //# A queue accepts a single device, and optionally, an exception handler. + //# Uncomment the following line to initialize a queue with our device and handler. + // sycl::queue queue(device, async_handler); + + //### Step 4 - Create buffers to hold our matrix data. + //# Buffer objects can be constructed given a container + //# Observe the creation of buffers for matrices A and B. + //# Try and create a third buffer for matrix C called C_buffer. + //# The solution is shown in the hidden cell below. + sycl::buffer A_buffer(A); + sycl::buffer B_buffer(B); + /* define C_buffer here */ + + //### Step 5 - Execute gemm operation. + //# Here, we need only pass in our queue and other familiar matrix multiplication parameters. + //# This includes the dimensions and data buffers for matrices A, B, and C. + mkl::blas::gemm(queue, transA, transB, m, n, k, alpha, A_buffer, ldA, B_buffer, ldB, beta, C_buffer, ldC); + + //# we cannot explicitly transfer memory to/from the device when using buffers + //# that is why we must use this operation to ensure result data is returned to the host + queue.wait_and_throw(); //# block until operation completes, throw any errors + + //### Step 6 - Observe creation of accessors to retrieve data from A_buffer and C_buffer. + sycl::host_accessor A_acc(A_buffer, sycl::read_only); + sycl::host_accessor C_acc(C_buffer, sycl::read_only); + + int status = 0; + + // verify C matrix using accessor to observe values held in C_buffer + std::cout << "\n"; + std::cout << "C = \n"; + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + if (A_acc[i*m+j] != C_acc[i*m+j]) status = 1; + std::cout << C_acc[i*m+j] << " "; + } + std::cout << "\n"; + } + std::cout << "\n"; + + status == 0 ? std::cout << "Verified: A = C\n" : std::cout << "Failed: A != C\n"; + return status; +} \ No newline at end of file diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_usm.cpp b/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_usm.cpp new file mode 100644 index 0000000000..faecdf25d0 --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/dpcpp_gemm_usm.cpp @@ -0,0 +1,123 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include +#include //# sycl namespace +#include "oneapi/mkl/blas.hpp" //# oneMKL DPC++ interface for BLAS functions + +//# The following project performs matrix multiplication using oneMKL / DPC++ with Unified Shared Memory (USM) +//# We will execute the simple operation A * B = C +//# The matrix B is set equal to the identity matrix such that A * B = A * I +//# After performing the computation, we will verify A * I = C -> A = C + +namespace mkl = oneapi::mkl; //# shorten mkl namespace + +int main() { + + //# dimensions + int m = 3, n = 3, k = 3; + //# leading dimensions + int ldA = 3, ldB = 3, ldC = 3; + //# scalar multipliers + double alpha = 1.0, beta = 1.0; + //# transpose status of matrices + mkl::transpose transA = mkl::transpose::nontrans; + mkl::transpose transB = mkl::transpose::nontrans; + + //### Step 1 - Observe the definition of an asynchronous exception handler. + //# This function object will later be supplied to the queue. + //# It is designed to handle errors thrown while device code executes. + auto async_handler = [](sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL exception: " << e.what() << std::endl; + } + } + }; + + //### Step 2 - Create a device object. + //# Device selectors are used to specify the type of a device. + //# Uncomment _one_ of the following three lines to select a device. + // sycl::device device = sycl::device(sycl::default_selector()); //# default_selector returns a device based on a performance heuristic + // sycl::device device = sycl::device(sycl::cpu_selector()); //# cpu_selector returns a cpu device + // sycl::device device = sycl::device(sycl::gpu_selector()); //# gpu_selector returns a gpu device + std::cout << "Device: " << device.get_info() << "\n"; + + //### Step 3 - Create a queue object. + //# A queue accepts a single device, and optionally, an exception handler. + //# Uncomment the following line to initialize a queue with our device and handler. + // sycl::queue queue(device, async_handler); + + //### Step 4 - Create a sycl event and allocate USM + //# The later execution of the gemm operation is tied to this event + //# The gemm operation will also make use of a vector of sycl events we can call 'gemm_dependencies' + sycl::event gemm_done; + std::vector gemm_dependencies; + //# Here, we allocate USM pointers for each matrix, using the special 'malloc_shared' function + //# Make sure to template the function with the correct precision, and pass in our queue to the function call + double *A_usm = sycl::malloc_shared(m * k, queue); + double *B_usm = sycl::malloc_shared(k * n, queue); + double *C_usm = sycl::malloc_shared(m * n, queue); + + //# define matrix A as the 3x3 matrix + //# {{ 1, 2, 3}, {4, 5, 6}, {7, 8, 9}} + for (int i = 0; i < m; i++) { + for (int j = 0; j < k; j++) { + A_usm[i*m+j] = (double)(i*m+j) + 1.0; + } + } + + //# define matrix B as the identity matrix + for (int i = 0; i < k; i++) { + for (int j = 0; j < n; j++) { + if (i == j) B_usm[i*k+j] = 1.0; + else B_usm[i*k+j] = 0.0; + } + } + + //# initialize C as a 0 matrix + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + C_usm[i*m+j] = 0.0; + } + } + + //### Step 5 - Execute gemm operation. + //# Here, we fill in the familiar parameters for the gemm operation. + //# However, we must also pass in the queue as the first parameter. + //# We must also pass in our list of dependencies as the final parameter. + //# We are also passing in our USM pointers as opposed to a buffer or raw data pointer. + gemm_done = mkl::blas::gemm(queue, transA, transB, m, n, k, alpha, A_usm, ldA, B_usm, ldB, beta, C_usm, ldC, gemm_dependencies); + + //# We must now wait for the given event to finish before accessing any data involved in the operation + //# Otherwise, we may access data before the operation has completed, or before it has been returned to the host + gemm_done.wait(); + + int status = 0; + + //# verify C matrix using USM data + std::cout << "\n"; + std::cout << "C = \n"; + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + if (A_usm[i*m+j] != C_usm[i*m+j]) status = 1; + std::cout << C_usm[i*m+j] << " "; + } + std::cout << "\n"; + } + std::cout << "\n"; + + //# free usm pointers + sycl::free(A_usm, queue); + sycl::free(B_usm, queue); + sycl::free(C_usm, queue); + + status == 0 ? std::cout << "Verified: A = C\n" : std::cout << "Failed: A != C\n"; + return status; +} \ No newline at end of file diff --git a/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/omp_gemm.cpp b/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/omp_gemm.cpp new file mode 100644 index 0000000000..ef1413928e --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/00_GEMM/src/omp_gemm.cpp @@ -0,0 +1,95 @@ +//============================================================== +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +#include +#include "mkl.h" //# main mkl header +#include "mkl_omp_offload.h" //# mkl OMP Offload interface + +int dnum = 0; + +int main() { + + //# dimensions + MKL_INT m = 3, n = 3, k = 3; + //# leading dimensions + MKL_INT ldA = k, ldB = n, ldC = n; + //# scalar multipliers + double alpha = 1.0; + double beta = 1.0; + //# matrix data + double *A = (double *)malloc(m * k * sizeof(double)); + double *B = (double *)malloc(k * n * sizeof(double)); + double *C = (double *)malloc(m * n * sizeof(double)); + + //# define matrix A as the 3x3 matrix + //# {{ 1, 2, 3}, {4, 5, 6}, {7, 8, 9}} + for (int i = 0; i < m; i++) { + for (int j = 0; j < k; j++) { + A[i*m+j] = (double)(i*m+j) + 1.0; + } + } + + //# define matrix B as the identity matrix + for (int i = 0; i < k; i++) { + for (int j = 0; j < n; j++) { + if (i == j) B[i*k+j] = 1.0; + else B[i*k+j] = 0.0; + } + } + + //# initialize C as a 0 matrix + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + C[i*m+j] = 0.0; + } + } + + MKL_INT sizeA = m*k; + MKL_INT sizeB = k*n; + MKL_INT sizeC = m*n; + + //# Below are the two compiler directives necessary to offload the GEMM operation + //# we are using 'dgemm' to specify we are using double-precision values + + //# The outer directive maps input data (matrices A & B) 'to' the device. + //# It also maps output data (matrix C) 'from' the device so that the results of the operation are returned. + //# Finally, this directive specifies device number 0, which should interact with an available GPU. + + //# The inner directive dispatches the correct version of the contained operation, again specifying the device number. + //# This directive also uses the 'use_devce_ptr' statement to specify the data we are working with (in this case, arrays A, B, & C). + + //# Uncomment the two 'pragma' lines below. (Do not remove the '#' character) + + //#pragma omp target data map(to:A[0:sizeA],B[0:sizeB]) map(from:C[0:sizeC]) device(dnum) + { + //#pragma omp target variant dispatch device(dnum) use_device_ptr(A, B, C) + { + dgemm("N", "N", &m, &n, &k, &alpha, A, &ldA, B, &ldB, &beta, C, &ldC); + } + } + + int status = 0; + + //# verify C matrix + printf("\n"); + printf("C = \n"); + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + if (A[i*m+j] != C[i*m+j]) status = 1; + printf("%0.0f ", C[i*m+j]); + } + printf("\n"); + } + printf("\n"); + + //# free matrix data + free(A); + free(B); + free(C); + + status == 0 ? printf("Verified: A = C\n") : printf("Failed: A != C\n"); + + return status; +} \ No newline at end of file diff --git a/Libraries/gemm_oneMKL_SYCL/oneMKL_Intro.ipynb b/Libraries/gemm_oneMKL_SYCL/oneMKL_Intro.ipynb new file mode 100644 index 0000000000..48fc9c776e --- /dev/null +++ b/Libraries/gemm_oneMKL_SYCL/oneMKL_Intro.ipynb @@ -0,0 +1,149 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Intel oneAPI MKL Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Sections\n", + "* [Learning Objectives](#Learning-Objectives)\n", + "* [oneMKL Overview](#oneMKL-Overview)\n", + "* [Prerequisites](#Prerequisites)\n", + "* [oneMKL With DPC++](#oneMKL-With-DPC++)\n", + "* [oneMKL With OpenMP Offload](#oneMKL-With-OpenMP-Offload)\n", + "* [Modules](#Modules)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning Objectives\n", + "* Understand how the __Intel oneAPI Math Kernel Library (oneMKL)__ fits within the __oneAPI programming model__ for heterogeneous computing\n", + "* Know the difference between the __Data Parallel C++ (DPC++)__ and __OpenMP Offload__ approaches to oneMKL and when to use each one\n", + "* Get __hands-on__ experience with common oneMKL routines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## oneMKL Overview\n", + "oneMKL provides a set of optimized scientific computing routines familiar to users of the Intel Math Kernel Library (MKL). This toolkit extends heterogeneous computing functionality via the DPC++ and OpenMP Offload interfaces.\n", + "\n", + "Each interface follows a specific use case. Generally, users creating new data-parallel projects *or* migrating CUDA or OpenCL projects should opt for DPC++, while those updating legacy C or Fortran code should use OpenMP Offload." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "The following courses prime the reader on the use of oneAPI with DPC++ and with OpenMP Offload. The courses also provide motivation for the use of each method and serve as a foundation for the learnings in this lab.\n", + "\n", + "* [Essentials of Data Parallel C++](https://software.intel.com/content/www/us/en/develop/tools/oneapi/training/dpc-essentials.html)\n", + "* [OpenMP* Offload Basics](https://software.intel.com/content/www/us/en/develop/tools/oneapi/training/openmp-offload.html)\n", + "\n", + "oneMKL simplifies the use of the oneAPI programming model and handles much of the work for users. As such it is *not* necessary to work through all of the training modules in the *Essentials of Data Parallel C++* lab. Below is the list of minimum recommended training modules for DPC++ before starting this lab.\n", + "\n", + "* oneAPI_Essentials/01_oneAPI_Intro\n", + "* oneAPI_Essentials/02_DPCPP_Program_structure\n", + "* oneAPI_Essentials/03_DPCPP_Unified_Shared_Memory\n", + "\n", + "As for the OpenMP Offload approach, it will be worthwhile to view all training modules in the OpenMP Offload Basics lab." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## oneMKL With DPC++\n", + "The oneMKL DPC++ interface allows DPC++ programs to take advantage of oneMKL routines. When working with the DPC++ we must keep track of a few important components, including:\n", + "\n", + "* __Device(s)__ on which oneMKL functions will execute\n", + "* __Queue__ to schedule submission of tasks to device(s)\n", + "\n", + "oneMKL also supports different DPC++ memory management models.\n", + "\n", + "1. __Buffers__ and __accessors__\n", + "2. __Unified shared memory__\n", + "\n", + "A typical DPC++ program requires the user to create a __kernel__, contained within a __command group__. The user must then submit the __command group__ to the __queue__, scheduling its execution on the given __device__. \n", + "\n", + "oneMKL provides a simpler path. Instead of the traditional approach, the user need only create a __queue__ and pass it to a oneMKL function call. The function selects a pre-written kernel, optimized for the chosen device, and submits it to our queue. There is *no* need to write a __kernel__ or __command group__." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## oneMKL With OpenMP Offload\n", + "The OpenMP Offload approach interfaces well with existing C code, allowing programs to execute on GPUs with __minimal__ changes to the source. OpenMP Offload utilizes __directives__ in the form of ```#pragma``` statements. The *OpenMP Offload Basics* lab linked above explores these directives in greater details. The following modules will explain how to target the OpenMP Offload interface for oneMKL, and how to set up the necessary ```#pragma``` statements for each routine." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modules\n", + "Each module is a self contained lab explaining the usage of a specific oneMKL routine. Further, each module shows the usage of a given operation under three different paradigms:\n", + "1. DPC++ with buffer/accessor memory model\n", + "2. DPC++ with unified shared memory model\n", + "3. OpenMP Offload\n", + "\n", + "### 00 - [Matrix Multiplication (GEMM)](./00_GEMM/00_GEMM.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "As is shown in the above modules, oneMKL enables users by providing an easy way to utilize heterogeneous computing platforms. Whether it be DPC++ for new applications or OpenMP Offload for legacy code, oneMKL provides a means to accelerate scientific computing workloads.\n", + "\n", + "Hopefully, you can now:\n", + "\n", + "* Understand the use of oneMKL within the oneAPI framework\n", + "* Utilize DPC++ to take advantage of heterogeneous computing systems\n", + "* Execute oneMKL routines on a GPU with OpenMP Offload" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "

Survey

\n", + "\n", + "[We would appreciate any feedback you’d care to give, so that we can improve the overall training quality and experience. Thanks! ](https://intel.az1.qualtrics.com/jfe/form/SV_3elZDqbEP3ZcXC5)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7 (Intel® oneAPI)", + "language": "python", + "name": "c009-intel_distribution_of_python_3_oneapi-beta05-python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}