diff --git a/_static/css/theme_overrides.css b/_static/css/theme_overrides.css index 4042241e..b951b7d7 100644 --- a/_static/css/theme_overrides.css +++ b/_static/css/theme_overrides.css @@ -1,11 +1,31 @@ -.wy-side-nav-search, .wy-nav-top { - background: #007833; -} - .wy-nav-content { max-width: 1200px; } +/* Adds whitespace between OLCF logo and Docs Home link */ +body > div > nav > div > div.wy-side-nav-search > a > img { + padding-bottom: 10px; +} + +/* Clicking on the OLCF logo does nothing + (disable RTD theme's default behavior) */ +body > div > nav > div > div.wy-side-nav-search > a{ + pointer-events: none; + cursor: default; +} + +/* Supersede the above block, and allow the Docs Home link to be clickable */ +body > div > nav > div > div.wy-side-nav-search > a > a{ + pointer-events: auto !important; + cursor: pointer !important; + color: grey !important; +} + +/* Don't let the color of the Docs Home link change. */ +body > div > nav > div > div.wy-side-nav-search > a > a:visited{ + color: grey !important; +} + /* override table width restrictions */ @media screen and (min-width: 767px) { @@ -16,6 +36,6 @@ } .wy-table-responsive { - overflow: visible !important; + overflow: auto !important; } } diff --git a/_static/js/custom.js b/_static/js/custom.js index 105724a0..0a7aae3e 100644 --- a/_static/js/custom.js +++ b/_static/js/custom.js @@ -1,4 +1,5 @@ $( document ).ready(function() { + // Create link and text for navigation back to the OLCF home page var olcf_link = document.createElement("a"); var olcf_text = document.createTextNode("OLCF Home Page"); @@ -17,4 +18,32 @@ $( document ).ready(function() { aside.appendChild(separator); aside.appendChild(olcf_link); + // Insert Project Name "OLCF User Documentation" below html_logo in sidebar navigation + var project_name_link = document.createElement("a"); + var project_name_text = document.createTextNode(" OLCF User Documentation"); + project_name_link.appendChild(project_name_text); + project_name_link.setAttribute("href", "https://docs.olcf.ornl.gov"); + project_name_link.classList.add("icon"); + project_name_link.classList.add("icon-home"); + wysidenavsearch = document.querySelector("body > div > nav > div > div.wy-side-nav-search > a"); + wysidenavsearch.appendChild(project_name_link); + + + // For any external links in the main navigation, append the FontAwesome external link icon. + function iconize_external_links(nav_level){ + a_elements = nav_level.getElementsByTagName("A"); + for (var i = 0; i < a_elements.length; ++i){ + if (a_elements[i].getAttribute("href").includes("http")){ + var icon = document.createElement("i"); + icon.classList.add("fa"); + icon.classList.add("fa-external-link"); + var spacer = document.createTextNode(" "); + a_elements[i].appendChild(spacer); + a_elements[i].appendChild(icon); + } + } + } + + iconize_external_links(document.querySelector("body > div > nav > div > div.wy-menu.wy-menu-vertical")) + }); diff --git a/accounts/olcf_policy_guide.rst b/accounts/olcf_policy_guide.rst index 2818733d..2ff2d261 100644 --- a/accounts/olcf_policy_guide.rst +++ b/accounts/olcf_policy_guide.rst @@ -202,7 +202,7 @@ Data Management Policy - Principal Investigators (Industry) - All Users - **Title:** Data Management Policy **Version:** 14.01 + **Title:** Data Management Policy **Version:** 20.02 Introduction ------------ @@ -223,23 +223,27 @@ categories: those intended for user data and those intended for project data. Within each of the two categories, we provide different sub-areas, each with an intended purpose: -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ -| Purpose | Storage Area | Path | -+====================================================================================================+=====================+============================+ -| Long-term data for routine access that is unrelated to a project | *User Home* | ``$HOME`` | -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ -| Long-term data for archival access that is unrelated to a project | *User Archive* | ``/home/$USER`` | -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ -| Long-term project data for routine access that's shared with other project members | *Project Home* | ``/ccs/proj/[projid]`` | -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ -| Short-term project data for fast, batch-job access that you don't want to share | *Member Work* | ``$MEMBERWORK/[projid]`` | -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ -| Short-term project data for fast, batch-job access that's shared with other project members | *Project Work* | ``$PROJWORK/[projid]`` | -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ -| Short-term project data for fast, batch-job access that's shared with those outside your project | *World Work* | ``$WORLDWORK/[projid]`` | -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ -| Long-term project data for archival access that's shared with other project members | *Project Archive* | ``/proj/[projid]`` | -+----------------------------------------------------------------------------------------------------+---------------------+----------------------------+ ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Purpose | Storage Area | Path | ++==================================================================================================+===================+============================================+ +| Long-term data for routine access that is unrelated to a project | *User Home* | ``/ccs/home/[userid]`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Long-term data for archival access that is unrelated to a project | *User Archive* | ``/home/[userid]`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Long-term project data for routine access that's shared with other project members | *Project Home* | ``/ccs/proj/[projid]`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Short-term project data for fast, batch job access that you don't want to share | *Member Work* | ``/gpfs/alpine/[projid]/scratch/[userid]`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Short-term project data for fast, batch job access that's shared with other project members | *Project Work* | ``/gpfs/alpine/[projid]/proj-shared`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Short-term project data for fast, batch job access that's shared with those outside your project | *World Work* | ``/gpfs/alpine/[projid]/world-shared`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Long-term project data for archival access that you don't want to share | *Member Archive* | ``/hpss/prod/[projid]/users/$USER`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Long-term project data for archival access that's shared with other project members | *Project Archive* | ``/hpss/prod/[projid]/proj-shared`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ +| Long-term project data for archival access that's shared with those outside your project | *World Archive* | ``/hpss/prod/[projid]/world-shared`` | ++--------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------+ For more information about using the data storage archiving systems, please refer to the pages on :ref:`data-storage-and-transfers`. @@ -261,9 +265,15 @@ User Archive The High Performance Storage System (HPSS) is the tape-archive storage system at the OLCF and is the storage technology that supports the User Archive areas. HPSS is intended for data that do not require day-to-day -access. See the section :ref:`retention-policy` for -more details on applicable quotas, backups, purge, and retention -timeframes. +access. + +.. note:: + Use of this directory for data storage is deprecated in favor of storing + data in the User, Project, and World Archive directories. For new users, + this directory is a "link farm" with symlinks to that user's /hpss/prod + directories. Data for existing users remains in this directory but should + be moved into a User/Project/World Archive directory, at which time this + directory will automatically convert to a link farm. Project Home ^^^^^^^^^^^^ @@ -280,74 +290,97 @@ quotas, backups, purge, and retention timeframes. Member Work ^^^^^^^^^^^ -Project members get an individual Member Work directory for each -associated project; these reside in the center-wide, high-capacity -Spectrum Scale file system on large, fast disk areas intended for -global (parallel) access to temporary/scratch storage. Member Work -directories are provided commonly across all systems. Because of the -scratch nature of the file system, it is not backed up and files are -automatically purged on a regular basis. Files should not be retained in -this file system for long, but rather should be migrated to Project Home -or Project Archive space as soon as the files are not actively being -used. If a file system associated with your Member Work directory is -nearing capacity, the OLCF may contact you to request that you reduce -the size of your Member Work directory. See the section :ref:`retention-policy` -for more details on applicable quotas, backups, -purge, and retention timeframes. +Project members get an individual Member Work directory for each associated +project; these reside in the center-wide, high-capacity Spectrum Scale file +system on large, fast disk areas intended for global (parallel) access to +temporary/scratch storage. Member Work areas are not shared with other +users of the system and are intended for project data that the user does +not want to make available to other users. Member Work directories are +provided commonly across all systems. Because of the scratch nature of the +file system, it is not backed up and files are automatically purged on a +regular basis. Files should not be retained in this file system for long, +but rather should be migrated to Project Home or Project Archive space as +soon as the files are not actively being used. If a file system associated +with your Member Work directory is nearing capacity, the OLCF may contact +you to request that you reduce the size of your Member Work directory. See +the section :ref:`retention-policy` for more details on applicable quotas, +backups, purge, and retention timeframes. Project Work ^^^^^^^^^^^^ -Individual Project Work directories reside in the center-wide, -high-capacity Spectrum Scale file system on large, fast disk areas intended for -global (parallel) access to temporary/scratch storage. Project Work -directories are provided commonly across most systems. Because of the -scratch nature of the file system, it is not backed up. If a file system -associated with Project Work storage is nearing capacity, the OLCF may -contact the PI of the project to request that he or she reduce the size -of the Project Work directory. See the section :ref:`retention-policy` -for more details on applicable quotas, backups, purge, -and retention timeframes. +Each project is granted a Project Work directory; these reside in the +center-wide, high-capacity Spectrum Scale file system on large, fast disk +areas intended for global (parallel) access to temporary/scratch storage. +Project Work directories can be accessed by all members of a project and +are intended for sharing data within a project. Project Work directories +are provided commonly across most systems. Because of the scratch nature of +the file system, it is not backed up and files are automatically purged on +a regular bases. Files should not be retained in this file system for long, +but rather should be migrated to Project Home or Project Archive space as +soon as the files are not actively being used. If a file system associated +with Project Work storage is nearing capacity, the OLCF may contact the PI +of the project to request that he or she reduce the size of the Project +Work directory. See the section :ref:`retention-policy` for more details on +applicable quotas, backups, purge, and retention timeframes. World Work ^^^^^^^^^^ Each project has a World Work directory that resides in the center-wide, -high-capacity Spectrum Scale file system on large, fast disk areas intended for -global (parallel) access to temporary/scratch storage. World Work -directories are provided commonly across most systems. Because of the -scratch nature of the file system, it is not backed up. If a file system -associated with World Work storage is nearing capacity, the OLCF may -contact the PI of the project to request that he or she reduce the size -of the World Work directory. See the section :ref:`retention-policy` -for more details on applicable quotas, backups, purge, -and retention timeframes. +high-capacity Spectrum Scale file system on large, fast disk areas intended +for global (parallel) access to temporary/scratch storage. World Work areas +can be accessed by all users of the system and are intended for sharing of +data between projects. World Work directories are provided commonly across +most systems. Because of the scratch nature of the file system, it is not +backed up and files are automatically purged on a regular bases. Files +should not be retained in this file system for long, but rather should be +migrated to Project Home or Project Archive space as soon as the files are +not actively being used. If a file system associated with World Work +storage is nearing capacity, the OLCF may contact the PI of the project to +request that he or she reduce the size of the World Work directory. See the +section :ref:`retention-policy` for more details on applicable quotas, +backups, purge, and retention timeframes. + +Member Archive +^^^^^^^^^^^^^^ + +Project members get an individual Member Archive directory for each +associated project; these reside on the High Performance Storage System +(HPSS), OLCF's tape-archive storage system. Member Archive areas are not +shared with other users of the system and are intended for project data +that the user does not want to make available to other users. HPSS is +intended for data that do not require day-to-day access. Users should not +store data unrelated to OLCF projects on HPSS. Users should periodically +review files and remove unneeded ones. See the section +:ref:`retention-policy` for more details on applicable quotas, backups, +purge, and retention timeframes. Project Archive ^^^^^^^^^^^^^^^ -The High Performance Storage System (HPSS) is the tape-archive storage -system at the OLCF and is the storage technology that supports the User -Archive areas. HPSS is intended for data that do not require day-to-day -access. Project Archive areas are shared between all users of the -project. Users should not store data unrelated to OLCF projects on HPSS. -Project members should also periodically review files and remove -unneeded ones. See the section :ref:`retention-policy` -for more details on applicable quotas, backups, purge, and retention -timeframes. - -Local Scratch Storage -^^^^^^^^^^^^^^^^^^^^^ - -A large, fast disk area intended for parallel access to temporary -storage in the form of scratch directories may be provided on a limited -number of systems. This area is local to a specific system. This -directory is, for example, intended to hold output generated by a user's -job. Because of the scratch nature of the file system, it is not backed -up and files are automatically purged on a regular basis. Files should -not be retained in this file system and should be migrated to archival -storage as soon as the files are not actively being used. Quotas may be -instituted on a machine-by-machine basis if deemed necessary. +Each project is granted a Project Archive directory; these reside on the +High Performance Storage System (HPSS), OLCF's tape-archive storage system. +Project Archive directories are shared among all members of a project and +are intended for sharing data within a project. HPSS is intended for data +that do not require day-to-day access. Users should not store data +unrelated to OLCF projects on HPSS. Project members should also +periodically review files and remove unneeded ones. See the section +:ref:`retention-policy` for more details on applicable quotas, backups, +purge, and retention timeframes. + +World Archive +^^^^^^^^^^^^^ + +Each project is granted a World Archive directory; these reside on the High +Performance Storage System (HPSS), OLCF's tape-archive storage system. +World Archive areas are shared among all users of the system and are +intended for sharing data between projects. HPSS is intended for data that +do not require day-to-day access. Users should not store data unrelated to +OLCF projects on HPSS. Users should periodically review files and remove +unneeded ones. See the section :ref:`retention-policy` for more details on +applicable quotas, backups, purge, and retention timeframes. + .. _retention-policy: @@ -363,29 +396,35 @@ available at the OLCF. **User-Centric Storage Areas** -+--------------+-----------------+------+-----------------+------------+---------+--------+-----------+ -| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | -+==============+=================+======+=================+============+=========+========+===========+ -| User Home | ``$HOME`` | NFS | User-controlled | 50 GB | Yes | No | 90 days | -+--------------+-----------------+------+-----------------+------------+---------+--------+-----------+ -| User Archive | ``/home/user`` | HPSS | User-controlled | 2TB | No | No | 90 days | -+--------------+-----------------+------+-----------------+------------+---------+--------+-----------+ ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | On Compute Nodes | ++=====================+=============================================+================+=============+========+=========+=========+============+==================+ +| User Home | ``/ccs/home/[userid]`` | NFS | User set | 50 GB | Yes | No | 90 days | Read-only | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| User Archive [#f1]_ | ``/home/[userid]`` | HPSS | User set | 2TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| User Archive [#f2]_ | ``/home/[userid]`` | HPSS | 700 | N/A | N/A | N/A | N/A | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ **Project-Centric Storage Areas** -+-----------------+---------------------------+---------------+-----------------+---------------+---------+---------+-----------+ -| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | -+=================+===========================+===============+=================+===============+=========+=========+===========+ -| Project Home | ``/ccs/proj/[projid]`` | NFS | 770 | 50 GB | Yes | No | 90 days | -+-----------------+---------------------------+---------------+-----------------+---------------+---------+---------+-----------+ -| Member Work | ``$MEMBERWORK/[projid]`` | Spectrum Scale| 700 [#f1]_ | 50 TB | No | 90 days | [#f2]_ | -+-----------------+---------------------------+---------------+-----------------+---------------+---------+---------+-----------+ -| Project Work | ``$PROJWORK/projid]`` | Spectrum Scale| 770 | 50 TB | No | 90 days | [#f2]_ | -+-----------------+---------------------------+---------------+-----------------+---------------+---------+---------+-----------+ -| World Work | ``$WORLDWORK/[projid]`` | Spectrum Scale| 775 | 50 TB | No | 90 days | [#f2]_ | -+-----------------+---------------------------+---------------+-----------------+---------------+---------+---------+-----------+ -| Project Archive | ``/proj/[projid]`` | HPSS | 770 | 100 TB | No | No | 90 days | -+-----------------+---------------------------+---------------+-----------------+---------------+---------+---------+-----------+ ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | On Compute Nodes | ++=====================+=============================================+================+=============+========+=========+=========+============+==================+ +| Project Home | ``/ccs/proj/[projid]`` | NFS | 770 | 50 GB | Yes | No | 90 days | Read-only | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Member Work | ``/gpfs/alpine/[projid]/scratch/[userid]`` | Spectrum Scale | 700 [#f3]_ | 50 TB | No | 90 days | N/A [#f4]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Project Work | ``/gpfs/alpine/[projid]/proj-shared`` | Spectrum Scale | 770 | 50 TB | No | 90 days | N/A [#f4]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| World Work | ``/gpfs/alpine/[projid]/world-shared`` | Spectrum Scale | 775 | 50 TB | No | 90 days | N/A [#f4]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Member Archive | ``/hpss/prod/[projid]/users/$USER`` | HPSS | 700 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Project Archive | ``/hpss/prod/[projid]/proj-shared`` | HPSS | 770 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| World Archive | ``/hpss/prod/[projid]/world-shared`` | HPSS | 775 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ | *Area -* The general name of storage area. | *Path -* The path (symlink) to the storage area's directory. @@ -402,9 +441,21 @@ available at the OLCF. .. rubric:: Footnotes -.. [#f1] Permissions on Member Work directories can be controlled to an extent by project members. By default, only the project member has any accesses, but accesses can be granted to other project members by setting group permissions accordingly on the Member Work directory. The parent directory of the Member Work directory prevents accesses by "UNIX-others" and cannot be changed (security measures). +.. [#f1] This entry is for legacy User Archive directories which contained user data on January 14, 2020. There is also a quota/limit of 2,000 files on this directory. + +.. [#f2] User Archive directories that were created (or had no user data) after January 14, 2020. Settings other than permissions are not applicable because directories are root-owned and contain no user files. + +.. [#f3] Permissions on Member Work directories can be controlled to an extent by project members. By default, only the project member has any accesses, but accesses can be granted to other project members by setting group permissions accordingly on the Member Work directory. The parent directory of the Member Work directory prevents accesses by "UNIX-others" and cannot be changed (security measures). + +.. [#f4] Retention is not applicable as files will follow purge cycle. + +On Summit, Rhea and the DTNs, additional paths to the various project-centric work areas are available +via the following symbolic links and/or environment variables: + +- Member Work Directory: ``/gpfs/alpine/scratch/[userid]/[projid]`` or ``$MEMBERWORK/[projid]`` +- Project Work Directory: ``/gpfs/alpine/proj-shared/[projid]`` or ``$PROJWORK/[projid]`` +- World Work Directory: ``/gpfs/alpine/world-shared/[projid]`` or ``$WORLDWORK/[projid]`` -.. [#f2] Retention is not applicable as files will follow purge cycle. Data Retention Overview ^^^^^^^^^^^^^^^^^^^^^^^ @@ -446,6 +497,14 @@ For sensitive projects only, all data related to the project must be purged from all OLCF computing resources within 30 days of the project’s end or termination date. +Transfer of Member Work and Member Archive Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Although the Member Work and Member Archive directories are for storage +of data a user does not want to make available to other users on the +system, files in these directories are still considered project data +and can be reassigned to another user at the PI's request. + Data Purges ^^^^^^^^^^^ diff --git a/conf.py b/conf.py index 8b09ed50..cfda4035 100644 --- a/conf.py +++ b/conf.py @@ -46,6 +46,10 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +# To avoid errors with older versions of Sphinx and Sphinx RTD theme, explicitly +# specify the master document. +master_doc = 'index' + # -- Options for HTML output ------------------------------------------------- @@ -70,6 +74,10 @@ 'js/custom.js', ] +html_logo = 'images/olcf_logo.png' + +html_favicon = 'images/favicon.ico' + html_context = { 'vcs_pageview_mode': 'edit', 'display_github': True, @@ -86,6 +94,8 @@ 'sticky_navigation': True, 'navigation_depth': 4, 'style_external_links': True, + 'style_nav_header_background': '#efefef', + 'logo_only': True, } diff --git a/contributing/index.rst b/contributing/index.rst index ffa73bc2..2cd4cb7c 100644 --- a/contributing/index.rst +++ b/contributing/index.rst @@ -37,7 +37,8 @@ Setup authoring environment $ git clone https://github.com//olcf-user-docs.git #. Point your master branch to track upstream:: - + + $ cd olcf-user-docs $ git remote add olcf https://github.com/olcf/olcf-user-docs.git $ git fetch olcf $ git branch --set-upstream-to=olcf/master diff --git a/data/archiving.rst b/data/archiving.rst index d563d1db..aea3776b 100644 --- a/data/archiving.rst +++ b/data/archiving.rst @@ -101,17 +101,11 @@ HPSS, you might use: and ``hsi mput`` can be used to retrieve multiple files. To send a file to HPSS, you might use: - ``hsi put a.out`` - -To put a file in a pre-existing directory on hpss: - - - ``hsi “cd MyHpssDir; put a.out”`` + ``hsi put a.out : /hpss/prod/[projid]/users/[userid]/a.out`` To retrieve one, you might use: - - ``hsi get /proj/projectid/a.out`` + ``hsi get /hpss/prod/[projid]/proj-shared/a.out`` Here is a list of commonly used hsi commands. @@ -159,24 +153,24 @@ As with the standard Unix ``tar`` utility the ``-c``, ``-x``, and ``-t`` options, respectively, function to create, extract, and list tar archive files. The ``-K`` option verifies an existing tarfile in HPSS and the ``-X`` option can be used to re-create the index file for an existing archive. For example, to -store all files in the directory ``dir1`` to a file named ``allfiles.tar`` on -HPSS, use the command: +store all files in the directory ``dir1`` to a file named +``/hpss/prod/[projid]/users/[userid]/allfiles.tar`` on HPSS, use the command: - ``htar -cvf allfiles.tar dir1/*`` + ``htar -cvf /hpss/prod/[projid]/users/[userid]/allfiles.tar dir1/*`` To retrieve these files: - ``htar -xvf allfiles.tar`` + ``htar -xvf /hpss/prod/[projid]/users/[userid]/allfiles.tar`` ``htar`` will overwrite files of the same name in the target directory. **When possible, extract only the files you need from large archives.** To display the names of the files in the ``project1.tar`` archive file within the HPSS home directory: - ``htar -vtf project1.tar`` + ``htar -vtf /hpss/prod/[projid]/users/[userid]/project1.tar`` To extract only one file, ``executable.out``, from the ``project1`` directory in -the Archive file called ``project1.tar``: +the Archive file called `` /hpss/prod/[projid]/users/[userid]/project1.tar``: ``htar -xm -f project1.tar project1/ executable.out`` @@ -184,7 +178,7 @@ To extract all files from the ``project1/src`` directory in the archive file called ``project1.tar``, and use the time of extraction as the modification time, use the following command: - ``htar -xm -f project1.tar project1/src`` + ``htar -xm -f /hpss/prod/[projid]/users/[userid]/project1.tar project1/src`` HTAR Limitations ----------------- @@ -217,7 +211,9 @@ Maximum Number of Files per Archive 1 million For example, when attempting to HTAR a directory with one member file larger that 64GB, the following error message will appear: - ``htar -cvf hpss_test.tar hpss_test/`` +.. code:: + + $ htar -cvf /hpss/prod/[projid]/users/[userid]/hpss_test.tar hpss_test/ INFO: File too large for htar to handle: hpss_test/75GB.dat (75161927680 bytes) ERROR: 1 oversize member files found - please correct and retry diff --git a/data/policies.rst b/data/policies.rst index 06e226ee..61bf75d3 100644 --- a/data/policies.rst +++ b/data/policies.rst @@ -9,32 +9,62 @@ Policy A brief description of each area and basic guidelines to follow are provided in the table below: -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ -| *Name* | Path | Type | Permissions | Backups | Purged | Quota | Mounted on Compute nodes | -+===================+=============================================+===========================+=============+=========+=========+=======+==========================+ -| *User Home* | ``$HOME`` | NFS | User Set | yes | no | 50GB | Read-only | -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ -| *Project Home* | ``/ccs/proj/[projid]`` | NFS | 770 | yes | no | 50GB | Read-only | -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ -| *User Archive* | ``/home/$USER`` | HPSS | User Set | no | no | 2TB | No | -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ -| *Project Archive* | ``/proj/[projid]`` | HPSS | 770 | no | no | 100TB | No | -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ -| *Member Work* | ``/gpfs/alpine/scratch/[userid]/[projid]/`` | Spectrum Scale (ex. GPFS) | 700 | no | 90 days | 50TB | Yes | -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ -| *Project Work* | ``/gpfs/alpine/proj-shared/[projid]`` | Spectrum Scale (ex. GPFS) | 770 | no | 90 days | 50TB | Yes | -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ -| *World Work* | ``/gpfs/alpine/world-shared/[projid]`` | Spectrum Scale (ex. GPFS) | 775 | no | 90 days | 50TB | Yes | -+-------------------+---------------------------------------------+---------------------------+-------------+---------+---------+-------+--------------------------+ - - -On Summit paths to the various project-centric work storage areas are simplified -by the use of environment variables that point to the proper directory on a -per-user basis: - -- Member Work Directory: ``$MEMBERWORK/[projid]`` -- Project Work Directory: ``$PROJWORK/[projid]`` -- World Work Directory: ``$WORLDWORK/[projid]`` ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | On Compute Nodes | ++=====================+=============================================+================+=============+========+=========+=========+============+==================+ +| User Home | ``/ccs/home/[userid]`` | NFS | User set | 50 GB | Yes | No | 90 days | Read-only | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| User Archive [#f1]_ | ``/home/[userid]`` | HPSS | User set | 2TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| User Archive [#f2]_ | ``/home/[userid]`` | HPSS | 700 | N/A | N/A | N/A | N/A | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Project Home | ``/ccs/proj/[projid]`` | NFS | 770 | 50 GB | Yes | No | 90 days | Read-only | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Member Work | ``/gpfs/alpine/[projid]/scratch/[userid]`` | Spectrum Scale | 700 [#f3]_ | 50 TB | No | 90 days | N/A [#f4]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Project Work | ``/gpfs/alpine/[projid]/proj-shared`` | Spectrum Scale | 770 | 50 TB | No | 90 days | N/A [#f4]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| World Work | ``/gpfs/alpine/[projid]/world-shared`` | Spectrum Scale | 775 | 50 TB | No | 90 days | N/A [#f4]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Member Archive | ``/hpss/prod/[projid]/users/$USER`` | HPSS | 700 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Project Archive | ``/hpss/prod/[projid]/proj-shared`` | HPSS | 770 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| World Archive | ``/hpss/prod/[projid]/world-shared`` | HPSS | 775 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ + +| *Area -* The general name of storage area. +| *Path -* The path (symlink) to the storage area's directory. +| *Type -* The underlying software technology supporting the storage area. +| *Permissions -* UNIX Permissions enforced on the storage area's top-level directory. +| *Quota -* The limits placed on total number of bytes and/or files in the storage area. +| *Backups -* States if the data is automatically duplicated for disaster recovery purposes. +| *Purged -* Period of time, post-file-access, after which a file will be marked as eligible for permanent deletion. +| *Retention -* Period of time, post-account-deactivation or post-project-end, after which data will be marked as eligible for permanent deletion. +| *On Compute Nodes -* Is this filesystem available on compute nodes (yes, no, or available but read-only) + + **Important!** Files within "Work" directories (i.e., Member Work, + Project Work, World Work) are *not* backed up and are *purged* on a + regular basis according to the timeframes listed above. + +.. rubric:: Footnotes + +.. [#f1] This entry is for legacy User Archive directories which contained user data on January 14, 2020. + +.. [#f2] User Archive directories that were created (or had no user data) after January 14, 2020. Settings other than permissions are not applicable because directories are root-owned and contain no user files. + +.. [#f3] Permissions on Member Work directories can be controlled to an extent by project members. By default, only the project member has any accesses, but accesses can be granted to other project members by setting group permissions accordingly on the Member Work directory. The parent directory of the Member Work directory prevents accesses by "UNIX-others" and cannot be changed (security measures). + +.. [#f4] Retention is not applicable as files will follow purge cycle. + + + +On Summit, Rhea and the DTNs, additional paths to the various project-centric work areas are available +via the following symbolic links and/or environment variables: + +- Member Work Directory: ``/gpfs/alpine/scratch/[userid]/[projid]`` or ``$MEMBERWORK/[projid]`` +- Project Work Directory: ``/gpfs/alpine/proj-shared/[projid]`` or ``$PROJWORK/[projid]`` +- World Work Directory: ``/gpfs/alpine/world-shared/[projid]`` or ``$WORLDWORK/[projid]`` Information ============ diff --git a/data/project_centric.rst b/data/project_centric.rst index 2a9622e4..289d801d 100644 --- a/data/project_centric.rst +++ b/data/project_centric.rst @@ -11,11 +11,11 @@ store code, data, and other files related to their project. Project Home Directories (NFS) =============================== -============== ====================== ==== =========== ======= ====== ===== -*Name* Path Type Permissions Backups Purged Quota -============== ====================== ==== =========== ======= ====== ===== -*Project Home* ``/ccs/proj/[projid]`` NFS 770 yes no 50GB -============== ====================== ==== =========== ======= ====== ===== ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | On Compute Nodes | ++=====================+=============================================+================+=============+========+=========+=========+============+==================+ +| Project Home | ``/ccs/proj/[projid]`` | NFS | 770 | 50 GB | Yes | No | 90 days | Read-only | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ Projects are provided with a Project Home storage area in the NFS-mounted filesystem. This area is intended for storage of data, code, @@ -46,6 +46,31 @@ should also be members of that group-specific project. For example, all members of project “ABC123” should be members of the “abc123” UNIX group. +Project Home Backups +--------------------- + +If you accidentally delete files from your project home directory +(``/ccs/proj/[projid]``), you may be able to retrieve them. Online backups +are performed at regular intervals. Hourly backups for the past 24 hours, +daily backups for the last 7 days, and once-weekly backups are available. It is +possible that the deleted files are available in one of those backups. The +backup directories are named ``hourly.*``, ``daily.*``, and ``weekly.*`` where +``*`` is the date/time stamp of backup creation. For example, +``hourly.2020-01-01-0905`` is an hourly backup made on January 1st, 2020 at +9:05 AM. + +The backups are accessed via the ``.snapshot`` subdirectory. Note that ``ls`` +alone (or even ``ls -a``) will not show the ``.snapshot`` subdirectory exists, +though ``ls .snapshot`` will show its contents. The ``.snapshot`` feature is +available in any subdirectory of your project home directory and will show the +online backups available for that subdirectory. + +To retrieve a backup, simply copy it into your desired destination with the +``cp`` command. + +Project Work Areas +=================== + Three Project Work Areas to Facilitate Collaboration ----------------------------------------------------- @@ -55,16 +80,29 @@ directories, *Project Work* directories, and *World Work* directories. Each directory should be used for storing files generated by computationally-intensive HPC jobs related to a project. -+----------------+--------------------------------------------+-----------------+-------------+---------+-----------+-------+ -| *Name* | Path | Type | Permissions | Backups | Purged | Quota | -+================+============================================+=================+=============+=========+===========+=======+ -| *Member Work* | ``/gpfs/alpine/scratch/[userid]/[projid]`` | Spectrum Scale | 700 | no | 90 days | 50TB | -+----------------+--------------------------------------------+-----------------+-------------+---------+-----------+-------+ -| *Project Work* | ``/gpfs/alpine/proj-shared/[projid]`` | Spectrum Scale | 770 | no | 90 days | 50TB | -+----------------+--------------------------------------------+-----------------+-------------+---------+-----------+-------+ -| *World Work* | ``/gpfs/alpine/world-shared/[projid]`` | Spectrum Scale | 775 | no | 90 days | 50TB | -+----------------+--------------------------------------------+-----------------+-------------+---------+-----------+-------+ ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | On Compute Nodes | ++=====================+=============================================+================+=============+========+=========+=========+============+==================+ +| Member Work | ``/gpfs/alpine/[projid]/scratch/[userid]`` | Spectrum Scale | 700 [#f1]_ | 50 TB | No | 90 days | N/A [#f2]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Project Work | ``/gpfs/alpine/[projid]/proj-shared`` | Spectrum Scale | 770 | 50 TB | No | 90 days | N/A [#f2]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| World Work | ``/gpfs/alpine/[projid]/world-shared`` | Spectrum Scale | 775 | 50 TB | No | 90 days | N/A [#f2]_ | Yes | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +.. rubric:: Footnotes + +.. [#f1] Permissions on Member Work directories can be controlled to an extent by project members. By default, only the project member has any accesses, but accesses can be granted to other project members by setting group permissions accordingly on the Member Work directory. The parent directory of the Member Work directory prevents accesses by "UNIX-others" and cannot be changed (security measures). + +.. [#f2] Retention is not applicable as files will follow purge cycle. + + +On Summit, Rhea and the DTNs, additional paths to the various project-centric work areas are available +via the following symbolic links and/or environment variables: + +- Member Work Directory: ``/gpfs/alpine/scratch/[userid]/[projid]`` or ``$MEMBERWORK/[projid]`` +- Project Work Directory: ``/gpfs/alpine/proj-shared/[projid]`` or ``$PROJWORK/[projid]`` +- World Work Directory: ``/gpfs/alpine/world-shared/[projid]`` or ``$WORLDWORK/[projid]`` The difference between the three lies in the accessibility of the data to project members and to researchers outside of the project. Member @@ -102,27 +140,56 @@ to Project Archive areas (HPSS) or to an off-site location. Project Archive Directories ============================ -================= ================== ==== =========== ======= ====== ===== -*Name* Path Type Permissions Backups Purged Quota -================= ================== ==== =========== ======= ====== ===== -*Project Archive* ``/proj/[projid]`` HPSS 770 no no 100TB -================= ================== ==== =========== ======= ====== ===== - Projects are also allocated project-specific archival space on the High Performance Storage System (HPSS). The default quota is shown on the -table above. If a higher quota is needed, contact the User Assistance +table below. If a higher quota is needed, contact the User Assistance Center. -The Project Archive space on HPSS is intended for storage of data not -immediately needed in either Project Home (NFS) areas nor Project Work -(Alpine) areas, and to serve as a location to store backup copies of -project-related files. +Three Project Archive Areas Facilitae Collaboration on Archival Data +-------------------------------------------------------------------- +To facilitate collaboration among researchers, the OLCF provides (3) +distinct types of project-centric archival storage areas: *Member Archive* +directories, *Project Archive* directories, and *World Archive* directories. +These directories should be used for storage of data not immediately needed +in either the Project Home (NFS) areas or Project Work (Alpine) areas and +to serve as a location to store backup copies of project-related files. + ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | On Compute Nodes | ++=====================+=============================================+================+=============+========+=========+=========+============+==================+ +| Member Archive | ``/hpss/prod/[projid]/users/$USER`` | HPSS | 700 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Project Archive | ``/hpss/prod/[projid]/proj-shared`` | HPSS | 770 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| World Archive | ``/hpss/prod/[projid]/world-shared`` | HPSS | 775 | 100 TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ + +As with the three project work areas, the difference between these three areas +lies in the accessibility of data to project members and to researchers outside +of the project. Member Archive directories are accessible only by an individual +project member by default, Project Archive directories are accessible by all +project members, and World Archive directories are readable by any user on the +system. -Project Archive Path ---------------------- +Permissions +^^^^^^^^^^^ + +UNIX Permissions on each project-centric archive storage area differ +according to the area’s intended collaborative use. Under this setup, +the process of sharing data with other researchers amounts to simply +ensuring that the data resides in the proper archive directory. + +- Member Archive Directory: ``700`` +- Project Archive Directory: ``770`` +- World Archive Directory: ``775`` -The project archive directories are located at ``/proj/pjt000`` (where -``pjt000`` is your Project ID). +For example, if you have data that must be restricted only to yourself, +keep them in your Member Archive directory for that project (and leave the +default permissions unchanged). If you have data that you intend to +share with researchers within your project, keep them in the project’s +Project Archive directory. If you have data that you intend to share with +researchers outside of a project, keep them in the project’s World Archive +directory. Project Archive Access ----------------------- diff --git a/data/storage_overview.rst b/data/storage_overview.rst index 802a1995..48ba0352 100644 --- a/data/storage_overview.rst +++ b/data/storage_overview.rst @@ -16,16 +16,17 @@ The storage area to use in any given situation depends upon the activity you wish to carry out. Each user has a User Home area on a Network File System (NFS) and a User Archive area on the archival High Performance Storage System (HPSS). These user storage areas are intended to house user-specific files. Each project -has a Project Home area on NFS, multiple Project Work areas on Spectrum Scale, -and a Project Archive area on HPSS. These project storage areas are intended to +has a Project Home area on NFS, multiple Work areas on Spectrum Scale, and +multiple Archive areas on HPSS. These project storage areas are intended to house project-centric files. We have defined several areas as listed below by function: - **User Home:** Long-term data for routine access that is unrelated to a project. It is mounted on compute nodes of Summit as read only -- **User Archive:** Long-term data for archival access that is unrelated to a - project. +- **User Archive:** A "link farm" with symbolic links to a user's project + directories on HPSS. (Previously this was for non-project data on HPSS; such + use is now deprecated) - **Project Home:** Long-term project data for routine access that's shared with other project members. It is mounted on compute nodes of Summit as read @@ -38,11 +39,17 @@ function: shared with other project members. - **World Work:** Short-term project data for fast, batch-job access that's - shared with OLCF users outside your project. + you don't want to share + +- **Member Archive:** Long-term project data for archival access that's shared + with other project members. - **Project Archive:** Long-term project data for archival access that's shared with other project members. +- **World Archive:** Long-term project data for archival access that's shared + with users outside your project. + .. _alpine-ibm-spectrum-scale-filesystem: diff --git a/data/transferring.rst b/data/transferring.rst index 7da82fd8..75ffd58d 100644 --- a/data/transferring.rst +++ b/data/transferring.rst @@ -102,6 +102,10 @@ See the manual pages for more information: * By default, ``rsync`` checks if the transfer of the data was successful. +.. note:: + Standard file transfer protocol (FTP) and remote copy (RCP) should not be used to transfer files to the NCCS high-performance computing (HPC) systems due to security concerns. + + Using Globus from your local machine ===================================== diff --git a/data/user_centric.rst b/data/user_centric.rst index 4decc2cb..b895ca4e 100644 --- a/data/user_centric.rst +++ b/data/user_centric.rst @@ -9,17 +9,22 @@ resources and lists relevant polices. **User-Centric Storage Areas** -+--------------+-----------------+------+-----------------+-------------+---------+--------+-----------+ -| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | -+==============+=================+======+=================+=============+=========+========+===========+ -| User Home | ``$HOME`` | NFS | User-controlled | 50 GB | Yes | No | 90 days | -+--------------+-----------------+------+-----------------+-------------+---------+--------+-----------+ -| User Archive | ``/home/$USER`` | HPSS | User-controlled | 2 TB [#f1]_ | **No** | No | 90 days | -+--------------+-----------------+------+-----------------+-------------+---------+--------+-----------+ ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| Area | Path | Type | Permissions | Quota | Backups | Purged | Retention | On Compute Nodes | ++=====================+=============================================+================+=============+========+=========+=========+============+==================+ +| User Home | ``/ccs/home/[userid]`` | NFS | User set | 50 GB | Yes | No | 90 days | Read-only | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| User Archive [#f1]_ | ``/home/[userid]`` | HPSS | User set | 2TB | No | No | 90 days | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ +| User Archive [#f2]_ | ``/home/[userid]`` | HPSS | 700 | N/A | N/A | N/A | N/A | No | ++---------------------+---------------------------------------------+----------------+-------------+--------+---------+---------+------------+------------------+ .. rubric:: footnotes -.. [#f1] In addition, there is a quota/limit of 2,000 files on this directory. + +.. [#f1] This entry is for legacy User Archive directories which contained user data on January 14, 2020. There is also a quota/limit of 2,000 files on this directory. + +.. [#f2] User Archive directories that were created (or had no user data) after January 14, 2020. Settings other than permissions are not applicable because directories are root-owned and contain no user files. .. _user-home-directories-nfs: @@ -61,6 +66,28 @@ permissions on their home directories, although it is recommended that permissions be set to as restrictive as possible (without interfering with your work). +User Home Backups +----------------- + +If you accidentally delete files from your home directory +(``/ccs/home/$USER``), you may be able to retrieve them. Online backups are +performed at regular intervals. Hourly backups for the past 24 hours, daily +backups for the last 7 days, and once-weekly backups are available. It is +possible that the deleted files are available in one of those backups. The +backup directories are named ``hourly.*``, ``daily.*``, and ``weekly.*`` where +``*`` is the date/time stamp of backup creation. For example, +``hourly.2020-01-01-0905`` is an hourly backup made on January 1st, 2020 at +9:05 AM. + +The backups are accessed via the ``.snapshot`` subdirectory. Note that ``ls`` +alone (or even ``ls -a``) will not show the ``.snapshot`` subdirectory exists, +though ``ls .snapshot`` will show its contents. The ``.snapshot`` feature is +available in any subdirectory of your home directory and will show the online +backups available for that subdirectory. + +To retrieve a backup, simply copy it into your desired destination with the +``cp`` command. + User Website Directory ---------------------- @@ -74,6 +101,17 @@ Assistance Center at help@olcf.ornl.gov. User Archive Directories (HPSS) ================================ +.. note:: + Use of User Archive areas for data storage is deprecated as of January 14, 2020. + The user archive area for any user account created after that date (or for any + user archive directory that is empty of user files after that date) will contain + only symlinks to the top-level directories for each of the user's projects on + HPSS. Users with existing data in a User Archive directory are encouraged to + move that data to an appropriate project-based directory as soon as possible. + + The information below is simply for reference for those users with existing + data in User Archive directories. + The High Performance Storage System (HPSS) at the OLCF provides longer-term storage for the large amounts of data created on the OLCF compute systems. The mass storage facility consists of tape and disk storage components, servers, and diff --git a/images/favicon.ico b/images/favicon.ico new file mode 100644 index 00000000..03c3df50 Binary files /dev/null and b/images/favicon.ico differ diff --git a/images/gpu_hackathons.jpg b/images/gpu_hackathons.jpg new file mode 100644 index 00000000..d5858160 Binary files /dev/null and b/images/gpu_hackathons.jpg differ diff --git a/images/projects_allocation_overview.png b/images/projects_allocation_overview.png index ca74c202..509b5987 100644 Binary files a/images/projects_allocation_overview.png and b/images/projects_allocation_overview.png differ diff --git a/images/summit-node-1rs-1task-1gpu-example.png b/images/summit-node-1rs-1task-1gpu-example.png deleted file mode 100644 index 33353275..00000000 Binary files a/images/summit-node-1rs-1task-1gpu-example.png and /dev/null differ diff --git a/images/titan-node-1task-1gpu.png b/images/titan-node-1task-1gpu.png deleted file mode 100644 index d29f6041..00000000 Binary files a/images/titan-node-1task-1gpu.png and /dev/null differ diff --git a/images/training.jpeg b/images/training.jpeg new file mode 100644 index 00000000..10bc41b1 Binary files /dev/null and b/images/training.jpeg differ diff --git a/index.rst b/index.rst index a142ee54..0c7b0b64 100644 --- a/index.rst +++ b/index.rst @@ -6,6 +6,15 @@ OLCF User Documentation ======================= +.. note:: + The `OLCF User Assistance Center + `_ remains open and + available for support. Users should follow normal support procedures when + reporting issues or requesting help. + + * `Submit a Support Ticket `_ + * Email us at help@olcf.ornl.gov + This technical documentation is a reference for the user community to efficiently use OLCF compute and storage resources. diff --git a/software/UMS/Flux.rst b/software/UMS/Flux.rst new file mode 100644 index 00000000..8f6d07af --- /dev/null +++ b/software/UMS/Flux.rst @@ -0,0 +1,8 @@ +.. _flux: + +***** +Flux +***** + +(Placeholder for User-Managed Software documentation from the `Flux Framework +`_ project.) diff --git a/software/UMS/index.rst b/software/UMS/index.rst new file mode 100644 index 00000000..1da36030 --- /dev/null +++ b/software/UMS/index.rst @@ -0,0 +1,10 @@ +.. _UMS: + +###################### +User-Managed Software +###################### + +.. toctree:: + :maxdepth: 2 + + Flux diff --git a/software/analytics/ibm-wml-ce.rst b/software/analytics/ibm-wml-ce.rst index e84d58fb..5f4bf103 100644 --- a/software/analytics/ibm-wml-ce.rst +++ b/software/analytics/ibm-wml-ce.rst @@ -6,9 +6,8 @@ IBM Watson Machine Learning CE Getting Started =============== -IBM Watson Machine Learning Community Edition 1.6.1 is provided on Summit -through the module ``ibm-wml-ce``. This module includes a license for IBM -Distributed Deep Learning (DDL) allowing execution across up to 954 nodes. +IBM Watson Machine Learning Community Edition is provided on Summit +through the module ``ibm-wml-ce``. To access the IBM WML CE packages use the ``module load`` command: @@ -16,20 +15,51 @@ To access the IBM WML CE packages use the ``module load`` command: module load ibm-wml-ce -This will activate a conda environment which is pre-loaded with the following -packages, and their dependencies: +Loading a specific version of the module is recommended to future-proof +scripts against software updates. The following commands can be used to +find and load specific module versions: -* `IBM DDL 1.4.0 `_ +.. code-block:: bash -* `Tensorflow 1.14 `_ + [user@login2.summit ~]$ module avail ibm-wml-ce -* `Pytorch 1.1.0 `_ + ------------------------- /sw/summit/modulefiles/core -------------------------- + ibm-wml-ce/1.6.1-1 ibm-wml-ce/1.6.2-0 ibm-wml-ce/1.6.2-3 + ibm-wml-ce/1.6.1-2 ibm-wml-ce/1.6.2-1 ibm-wml-ce/1.7.0-1 (D) + ibm-wml-ce/1.6.1-3 ibm-wml-ce/1.6.2-2 + ... + [user@login2.summit ~]$ module load ibm-wml-ce/1.6.2-3 -* `Caffe(IBM-enhanced) 1.0.0 `_ +For more information on loading modules, including loading specific verions, +see: :ref:`environment-management-with-lmod` -* `Horovod (IBM-DDL Backend) `_ +This will activate a conda environment which is pre-loaded with the following +packages, and their dependencies: -For a complete list of packages and their versions please see: `WMLC CE Software Packages `_. +.. table:: + :widths: 20 40 40 35 + + +--------------------+--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + | IBM WML CE Version | ibm-wml-ce/1.6.1 | ibm-wml-ce/1.6.2 | ibm-wml-ce/1.7.0 + + +--------------------+--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + | Package | `IBM DDL 1.4.0 `_ | `IBM DDL 1.5.0 `_ | `IBM DDL 1.5.1 `_ + + | +--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + | | `Tensorflow 1.14 `_ | `Tensorflow 1.15 `_ | `Tensorflow 2.1 `_ + + | +--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + | | `Pytorch 1.1.0 `_ | `Pytorch 1.2.0 `_ | `Pytorch 1.3.1 `_ + + | +--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + | | `Caffe(IBM-enhanced) 1.0.0 `_ | `Caffe (IBM-enhanced) 1.0.0 `_ | `Caffe (IBM-enhanced) 1.0.0 `_ + + | +--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + | | `Horovod @9f87459 (IBM-DDL Backend) `_ | `Horovod v0.18.2 (IBM-DDL Backend) `_ | `Horovod v0.19 (NCCL Backend) `_ | + +--------------------+--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + | Complete List | `1.6.1 Software Packages `_ | `1.6.2 Software Packages `_ | `1.7.0 Software Packages `_ | + +--------------------+--------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ + +.. note:: + + WML-CE was the topic of the October 2019 User Conference Call, presented by + Bryant Nelson and Brad Nemanich of IBM. + (`slides `__ | `recording `__) Running DDL Jobs ================ @@ -43,8 +73,8 @@ for performance. Basic DDL BSUB Script --------------------- -The following bsub script will run a distributed Tensorflow resnet50 training job -across 2 nodes. +The following bsub script will run a distributed Tensorflow resnet50 +training job across 2 nodes. .. code-block:: bash :caption: script.bash @@ -59,7 +89,7 @@ across 2 nodes. module load ibm-wml-ce - ddlrun python $CONDA_PREFIX/tf_cnn_benchmarks/tf_cnn_benchmarks.py --variable_update=ddl --model=resnet50 + ddlrun python $CONDA_PREFIX/horovod/examples/tensorflow2_synthetic_benchmark.py ``bsub`` is used to launch the script as follows: @@ -83,7 +113,7 @@ is doing. The following is the first line of the output from the above script: .. code-block:: console $ module load ibm-wml-ce - (ibm-wml-ce-1.6.1) $ ddlrun python $CONDA_PREFIX/tf_cnn_benchmarks/tf_cnn_benchmarks.py --variable_update=ddl --model=resnet50 + (ibm-wml-ce-1.6.1-1) $ ddlrun python $CONDA_PREFIX/tf_cnn_benchmarks/tf_cnn_benchmarks.py --variable_update=ddl --model=resnet50 + /autofs/nccs-svm1_sw/summit/.swci/1-compute/opt/spack/20180914/linux-rhel7-ppc64le/xl-16.1.1-3/spectrum-mpi-10.3.0.1-20190611-aqjt3jo53mogrrhcrd2iufr435azcaha/bin/mpirun \ -x LSB_JOBID -x PATH -x PYTHONPATH -x LD_LIBRARY_PATH -x LSB_MCPU_HOSTS -x NCCL_LL_THRESHOLD=0 -x NCCL_TREE_THRESHOLD=0 \ -disable_gdr -gpu --rankfile /tmp/DDLRUN/DDLRUN.xoObgjtixZfp/RANKFILE -x "DDL_OPTIONS=-mode p:6x2x1x1 " -n 12 \ @@ -103,17 +133,24 @@ launching a distributed job. Setting up Custom Environments ============================== -The ``IBM-WML-CE-1.6.1`` conda environment is read-only. Therefore, users +The IBM-WML-CE conda environment is read-only. Therefore, users cannot install any additional packages that may be needed. If users need -any additional conda or pip packages, they can clone the ``IBM-WML-CE-1.6.1`` +any additional conda or pip packages, they can clone the IBM-WML-CE conda environment into their home directory and then add any packages they need. +.. note:: + + The conda environment includes a module revision number, the 'X' in + ``ibm-wml-ce-1.7.0-X``. The name of the active environment can be found in + the prompt string, or ``conda env list`` can be used to see what conda + environments are available. + .. code-block:: console $ module load ibm-wml-ce - (ibm-wml-ce-1.6.1) $ conda create --name cloned_env --clone ibm-wml-ce-1.6.1 - (ibm-wml-ce-1.6.1) $ conda activate cloned_env + (ibm-wml-ce-1.7.0-X) $ conda create --name cloned_env --clone ibm-wml-ce-1.7.0-X + (ibm-wml-ce-1.7.0-X) $ conda activate cloned_env (cloned_env) $ By default this should create the cloned environment in @@ -125,16 +162,9 @@ will ensure that all of the conda settings remain the same. .. code-block:: console $ module load ibm-wml-ce - (ibm-wml-ce-1.6.1) $ conda activate cloned_env + (ibm-wml-ce-1.7.0-X) $ conda activate cloned_env (cloned_env) $ -To use Horovod with the IBM DDL backend in a cloned environment, the user must -``pip`` install Horovod using the following command: - -.. code-block:: bash - - HOROVOD_CUDA_HOME="${CONDA_PREFIX}" HOROVOD_GPU_ALLREDUCE=DDL pip install --no-cache-dir git+https://github.com/horovod/horovod.git@9f87459ead9ebb7331e1cd9cf8e9a5543ecfb784 - Best DDL Performance ==================== @@ -146,9 +176,8 @@ techniques. Reserving Whole Racks --------------------- -When making node reservations for DDL jobs, it is best to reserve nodes in a -rack-contiguous manner. IBM DDL optimizes communication with knowledge of the -node layout. +When making node reservations for DDL jobs, it can sometimes improve +performance to reserve nodes in a rack-contiguous manner. In order to instruct BSUB to reserve nodes in the same rack, expert mode must be used (``-csm y``), and the user needs to explicitly specify the reservation @@ -186,44 +215,6 @@ We can break the reservation string down to understand each piece. * The ``maxcus=1`` specifies that the nodes can come from at most 1 rack. -Best DDL Arguments ------------------- - -Summit is comprised of 256 racks of 18 nodes with 6 GPUs each. For more -information about the hardware of Summit please see: :ref:`system-overview`. - -DDL works best with topological knowledge of the cluster. -``GPUs per Node X Nodes per Rack X Racks Per Aisle X Aisles`` Some of this -information can be acquired automatically, but some has to be specified -by the user. - -To get the best performance reservations should be made in multiples of 18, -and the user should pass topology arguments to ``DDLRUN``. - -* ``--nodes 18`` informs DDL that there are 18 nodes per rack. Specifying 18 - nodes per rack gave the best performance in preliminary testing, but it may - be that logically splitting racks in half (``--nodes 9``) or logically - grouping racks (``--nodes 36``) could lead to better performance on other - workloads. - -* ``--racks 4`` informs DDL that there are 4 racks per aisle. Summit is a - fat tree, but preliminary testing showed that grouping racks into logical - aisles of 4 racks gave the best performance. - -* ``--aisles 2`` informs DDL that there are 2 total aisles. - ``Nodes X Racks X Aisles`` must equal the total number of nodes in the LSF - reservation. - -If running on 144 nodes, the following ``ddlrun`` command should -give good performance. - -.. code-block:: bash - - ddlrun --nodes 18 --racks 4 --aisles 2 python script.py - -For more information on ``ddlrun``, please see: `DDLRUN `_. - - Example =================== @@ -250,7 +241,7 @@ The following LSF script can be used to reproduce the results for 144 nodes: #BSUB -o /ccs/home/user/job%J.out #BSUB -e /ccs/home/user/job%J.out - module load ibm-wml-ce + module load ibm-wml-ce/1.6.2-2 ddlrun --nodes 18 --racks 4 --aisles 2 python $CONDA_PREFIX/tf_cnn_benchmarks/tf_cnn_benchmarks.py \ --variable_update=horovod\ diff --git a/software/index.rst b/software/index.rst index b13a410a..5dcab274 100644 --- a/software/index.rst +++ b/software/index.rst @@ -1,14 +1,16 @@ .. _software-at-the-olcf: -##################### -Software at the OLCF -##################### +######### +Software +######### .. toctree:: :maxdepth: 2 + software-news analytics/index python profiling/index + UMS/index diff --git a/software/profiling/TAU.rst b/software/profiling/TAU.rst index 3b3bd9f9..acb9d49a 100644 --- a/software/profiling/TAU.rst +++ b/software/profiling/TAU.rst @@ -708,3 +708,15 @@ Static Phase .. image:: /images/tau_static_phases.png :align: center + +OpenMP Offload +============== + +- Initially compile your application without TAU and create a dynamic binary +- Use all the compiler options requiried for OpenMP offload + +- Then execute, for example with XL compiler, 1 resource set, 1 MPI process with 2 OpenMP threads, and 2 GPUs: + +.. code:: + + jsrun --smpiargs="-gpu" -n 1 -a 2 -c 2 -r 1 -g 2 -b packed:2 tau_exec -T cupti,papi,openmp,xl -cupti ./a.out diff --git a/software/software-news.rst b/software/software-news.rst new file mode 100644 index 00000000..33543f73 --- /dev/null +++ b/software/software-news.rst @@ -0,0 +1,173 @@ + +************* +Software News +************* + +This page lists significant changes to software provided on OLCF systems. The +most recent changes are listed first. + +.. raw:: html + +

Summit: Software Installation/Default Software Changes (March 10, 2020)

+ +The following modules will be installed as default on March 10, 2020. The new +stack requires the latest version of Spectrum MPI and as a result, previous +versions have been deprecated. + +.. csv-table:: + :header: "Package", "Current Default", "New Default" + + "cuda", "10.1.168", "10.1.243" + "spectrum-mpi", "10.3.0.1-20190611", "10.3.1.2-20200121" + "hdf5", "1.10.3", "1.10.4" + "pgi", "19.4", "19.9" + "xl", "16.1.1-3", "16.1.1-5" + "ibm-wml-ce", "1.6.2-3", "1.7.0-1" + +In addition, the following new packages have been installed and are available for use: + +.. csv-table:: + :header: "Package", "New Version" + + "pgi", "20.1" + "xl", "16.1.1-6" + "kokkos", "3.0.0" + +Finally, the FFTW installations on Summit for the XL compiler have been rebuilt +using ``-O2`` to address an issue observed when running the FFTW suite using +the default optimization options. All builds of the ``fftw`` package that use +the XL compiler have been rebuilt. + +If you encounter any issues, please contact help@olcf.ornl.gov. + +---- + +.. raw:: html + +

Rhea: OpenMPI Upgrade (February 18, 2020)

+ +On February 18, 2020, Rhea’s default OpenMPI will be updated to version 3.1.4. +Due to underlying library changes that will be made on the same day, following +the change, all codes should be rebuilt against the updated version. + +.. csv-table:: + :header: "Package", "Current Default", "New Default" + + "OpenMPI", "3.1.3", "3.1.4" + + +---- + +.. raw:: html + +

All Systems: Python2 End of Life (January 01, 2020)

+ +On January 1, 2020, Python 2 will reach its end of life and will no longer be +supported by the project’s core developers. On this date, the OLCF will also +end its support for Python 2. Users reliant on Python 2 should port code to +Python 3 for forward compatibility with OLCF systems and many open source +packages. Python 2 modules will not be removed on January 1, but will no longer +receive maintenance or regular updates. + +While default Python modules on OLCF systems are already set to Python 3, we +recommend all users follow PEP394 by explicitly invoking either ‘python2’ or +‘python3’ instead of simply ‘python’. Python 2 Conda Environments and user +installations of Python 2 will remain as options for using Python 2 on OLCF +systems. + +Official documentation for porting from Python 2 to Python3 can be found at: +``_ + +General information and a list of open source packages dropping support for +Python 2 can be found at: ``_ + +---- + +.. raw:: html + +

Summit: Software Upgrade (July 16, 2019)

+ +The following modules will be installed and will become the default on July 16, +2019. The new stack requires Spectrum MPI 10.3 PTF 1 and as a result previous +versions of Spectrum MPI have been deprecated. + +.. csv-table:: + :header: "Package", "Default" + + "cuda", "10.1.168" + "spectrum-mpi", "10.3.0.1-20190716" + +Details about the software stack upgrade can be found in the `IBM Service Pack 3.1 site `_ and the `Spectrum MPI 10.3.0.1 release notes `_. + +---- + +.. raw:: html + +

Summit: Software Installation/Default Software Changes (May 21, 2019)

+ +The following modules will be installed as default on May 21, 2019. The new +stack requires Spectrum MPI 10.3 and as a result previous versions of Spectrum +MPI have been deprecated. + +.. csv-table:: + :header: "Package", "Default" + + "xl", "16.1.1.3" + "cuda", "10.1.105" + "essl", "6.2.0-20190419" + "spectrum-mpi", "10.3.0.0-20190419" + +---- + +.. raw:: html + +

Rhea: Default Software Changes (March 12, 2019)

+ +The following modules will become the default on March 12, 2019. + +.. csv-table:: + :header: "Package", "Default" + + "intel", "19.0.0" + "pgi", "18.10" + "gcc", "6.2.0" + "cuda", "10.0.3" + "openmpi", "3.1.3" + "anaconda", "5.3.0" + "adios", "1.11.1" + "atlas", "3.10.2" + "boost", "1.67.0" + "fftw", "3.3.8" + "hdf5", "1.10.3" + "nco", "4.6.9" + "netcdf", "4.6.1" + "netcdf-fortran", "4.4.4" + "netcdf-cxx", "4.3.0" + "parallel-netcdf", "1.8.0" + +---- + +.. raw:: html + +

Summit: Default Software Changes (March 12, 2019)

+ +The following modules will become the default on March 12, 2019. + +.. csv-table:: + :header: "Package", "Current Default", "New Default" + + "spectrum-mpi", "unset", "10.2.0.11-20190201" + "xl", "16.1.1-1", "16.1.1-2" + "pgi", "unset", "18.10" + +In addition, the following default Spectrum MPI settings will be changed to +address issues resolved with the February 19, 2019 software upgrade: + +.. csv-table:: + :header: "Environment Variable", "Current Default", "New Default" + + "OMP_MCA_io", "romio314", "romio321" + "OMPI_MCA_coll_ibm_xml_disable_cache", "1", "unset" + "PAMI_PMIX_USE_OLD_MAPCACHE", "1", "unset" + + diff --git a/systems/summit_user_guide.rst b/systems/summit_user_guide.rst index be8b0425..e391f295 100644 --- a/systems/summit_user_guide.rst +++ b/systems/summit_user_guide.rst @@ -1101,6 +1101,8 @@ If you would like to have your default shell changed, please contact the `OLCF User Assistance Center `__ at help@nccs.gov. +.. _environment-management-with-lmod: + Environment Management with Lmod -------------------------------- @@ -2571,81 +2573,6 @@ It's recommended to explicitly specify ``jsrun`` options. This most often includes ``--nrs``,\ ``--cpu_per_rs``, ``--gpu_per_rs``, ``--tasks_per_rs``, ``--bind``, and ``--launch_distribution``. -Aprun to jsrun -"""""""""""""" - -Mapping aprun commands used on Titan to Summit's jsrun is only possible -in simple single GPU cases. The following table shows some basic single -GPU examples that could be executed on Titan or Summit. In the single -node examples, each resource set will resemble a Titan node containing a -single GPU and one or more cores. Although not required in each case, -common jsrun flags (resource set count, GPUs per resource set, tasks per -resource set, cores per resource set, binding) are included in each -example for reference. The jsrun ``-n`` flag can be used to increase the -number of resource sets needed. Multiple resource sets can be created on -a single node. If each MPI task requires a single GPU, up to 6 resource -sets could be created on a single node. - -+-------------------------+-------------+--------------------+-----------------+-------------------------------------+ -| GPUs per Resource Set | MPI Tasks | Threads per Task | aprun | jsrun | -+=========================+=============+====================+=================+=====================================+ -| 1 | 1 | 0 | aprun -n1 | jsrun -n1 -g1 -a1 -c1 | -+-------------------------+-------------+--------------------+-----------------+-------------------------------------+ -| 1 | 2 | 0 | aprun -n2 | jsrun -n1 -g1 -a2 -c1 | -+-------------------------+-------------+--------------------+-----------------+-------------------------------------+ -| 1 | 1 | 4 | aprun -n1 -d4 | jsrun -n1 -g1 -a1 -c4 -bpacked:4 | -+-------------------------+-------------+--------------------+-----------------+-------------------------------------+ -| 1 | 2 | 8 | aprun -n2 -d8 | jsrun -n1 -g1 -a2 -c16 -bpacked:8 | -+-------------------------+-------------+--------------------+-----------------+-------------------------------------+ - -The jsrun ``-n`` flag can be used to increase the number of resource -sets needed. Multiple resource sets can be created on a single node. If -each MPI task requires a single GPU, up to 6 resource sets could be -created on a single node. - -For cases when the number of tasks per resource set (i.e. the ``-a`` -flag) is greater than one, the job must use ``-alloc_flags "gpumps"``. -This allows multiple tasks to share the same GPU. - -The following example images show how a single-gpu/single-task job -would be placed on a single Titan and Summit node. On Summit, the red -box represents a resource set created by jsrun. The resource set looks -similar to a Titan node, containing a single GPU, a single core, and -memory. - -+--------------+-------------------------+ -| Titan Node | Summit Node | -+==============+=========================+ -| aprun -n1 | jsrun -n1 -g1 -a1 -c1 | -+--------------+-------------------------+ -| |image18| | |image19| | -+--------------+-------------------------+ - -.. |image18| image:: /images/titan-node-1task-1gpu.png - :class: normal aligncenter -.. |image19| image:: /images/summit-node-1rs-1task-1gpu-example.png - :class: normal aligncenter - -Because Summit's nodes are much larger than Titan's were, 6 single-gpu -resource sets can be created on a single Summit node. The following -image shows how six single-gpu, single-task resource sets would be -placed on a node by default. In the example, the command -``jsrun -n6 -g1 -a1 -c1`` is used to create six single-gpu resource sets -on the node. Each resource set is indicated by differing colors. Notice, -the ``-n`` flag is all that changed between the above single resource -set example. The ``-n`` flag tells jsrun to create six resource sets. - -.. figure:: https://www.olcf.ornl.gov/wp-content/uploads/2018/03/summit-2node-1taskpergpu.png - :align: center -  - ``jsrun -n 6 -g 1 -a 1 -c 1`` starts 6 resource sets, each indicated by - differing colors. Each resource contains 1 GPU, 1 Core, and memory. The - red resource set contains GPU 0 and Core 0. The purple resource set - contains GPU 3 and Core 84. ``-n 6`` tells jsrun how many resource sets to - create. In this example, each resource set is similar to a single Titan - node. - - jsrun Examples ^^^^^^^^^^^^^^ @@ -2942,6 +2869,43 @@ mapped to the hardware. https://code.ornl.gov/t4p/Hello_jsrun A screencast showing how to use Hello\_jsrun is also available: https://vimeo.com/261038849 +Job Step Viewer +""""""""""""""" + +`Job Step Viewer `__ provides a graphical view of an application's runtime layout on Summit. +It allows users to preview and quickly iterate with multiple ``jsrun`` options to +understand and optimize job launch. + +For bug reports or suggestions, please email help@olcf.ornl.gov. + +Usage +_____ + +1. Request a Summit allocation + * ``bsub -W 10 -nnodes 2 -P $OLCF_PROJECT_ID -Is $SHELL`` +2. Load the ``job-step-viewer`` module + * ``module load job-step-viewer`` +3. Test out a ``jsrun`` line by itself, or provide an executable as normal + * ``jsrun -n12 -r6 -c7 -g1 -a1 EOMP_NUM_THREADS=7 -brs`` +4. Visit the provided URL + * https://jobstepviewer.olcf.ornl.gov/summit/871957-1 + +.. note:: + Most Terminal applications have built-in shortcuts to directly open + web addresses in the default browser. + + * MacOS Terminal.app: hold Command (⌘) and double-click on the URL + * iTerm2: hold Command (⌘) and single-click on the URL + +Limitations +___________ + +* (currently) Only available on Summit +* (currently) Compiled with XL toolchain only +* Does not support MPMD-mode via ERF +* OpenMP only supported with use of the ``OMP_NUM_THREADS`` environment variable. + + jsrunVisualizer """"""""""""""" @@ -3162,11 +3126,95 @@ please see the `Vampir Software Page + Framework: pml + -------------------------------------------------------------------------- + PML pami cannot be selected + +This is due to an incompatibility in the 2019.x versions of Nsight Compute with +Spectrum MPI. As a workaround, you can disable CUDA hooks in Spectrum MPI using + +:: + + jsrun -n 1 -a 1 -g 1 --smpiargs="-disable_gpu_hooks" nv-nsight-cu-cli ./a.out + +Unfortunately, this is incompatible with using CUDA-aware MPI in your application. + +This will be resolved in a future release of CUDA. + +CUDA hook error when program uses CUDA without first calling MPI_Init() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Serial applications, that are not MPI enabled, often face the following +issue when compiled with Spectrum MPI's wrappers and run with jsrun: + +:: + + CUDA Hook Library: Failed to find symbol mem_find_dreg_entries, ./a.out: undefined symbol: __PAMI_Invalidate_region + +The same issue can occur if CUDA API calls that interact with the GPU +(e.g. allocating memory) are called before MPI_Init() in an MPI enabled +application. Depending on context, this error can either be harmless or +it can be fatal. + +The reason this occurs is that the PAMI messaging backend, used by Spectrum +MPI by default, has a "CUDA hook" that records GPU memory allocations. +This record is used later during CUDA-aware MPI calls to efficiently detect +whether a given message is sent from the CPU or the GPU. This is done by +design in the IBM implementation and is unlikely to be changed. + +There are two main ways to work around this problem. If CUDA-aware MPI is +not a relevant factor for your work (which is naturally true for serial +applications) then you can simply disable the CUDA hook with: + +:: + + --smpiargs="-disable_gpu_hooks" + +as an argument to jsrun. Note that this is not compatible with the ``-gpu`` +argument to ``--smpiargs``, since that is what enables CUDA-aware MPI and +the CUDA-aware MPI functionality depends on the CUDA hook. + +If you do need CUDA-aware MPI functionality, then the only known working +solution to this problem is to refactor your code so that no CUDA calls +occur before MPI_Init(). (This includes any libraries or programming models +such as OpenACC or OpenMP that would use CUDA behind the scenes.) While it +is not explicitly codified in the standard, it is worth noting that the major +MPI implementations all recommend doing as little as possible before MPI_Init(), +and this recommendation is consistent with that. + Spindle is not currently supported ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -3412,8 +3460,7 @@ Simultaneous backgrounded jsruns (Resolved: May 21, 2019) We have seen occasional errors from batch jobs with multiple simultaneous backgrounded jsrun commands. Jobs may see pmix errors -during the noted failures. +during the noted failures. -------------- diff --git a/training/index.rst b/training/index.rst index 647532d2..c4935aeb 100644 --- a/training/index.rst +++ b/training/index.rst @@ -1,13 +1,20 @@ .. _training: +.. figure:: ../images/training.jpeg + :align: center + :width: 100% + ######### Training ######### +The OLCF provides training to our users in a variety of ways. By following the links below, you can find upcoming training events, self-guided tutorials, slides and recordings from past training events, information on upcoming GPU hackathons, and the OLCF Vimeo channel. -* `Tutorials `_ -* `Archive `_ -* `Calendar `_ -* `Hackathons `_ -* `OLCF Training Channel `_ +.. toctree:: + :maxdepth: 1 + OLCF Training Calendar + Tutorials + training_archive + olcf_gpu_hackathons + OLCF Vimeo Channel diff --git a/training/olcf_gpu_hackathons.rst b/training/olcf_gpu_hackathons.rst new file mode 100644 index 00000000..a5b2b475 --- /dev/null +++ b/training/olcf_gpu_hackathons.rst @@ -0,0 +1,112 @@ +.. I used html for the section headings to avoid individual entries in the associated menu (TP) + +.. figure:: /images/gpu_hackathons.jpg + :align: center + :width: 100% + +=================== +OLCF GPU Hackathons +=================== + +Each year, the Oak Ridge Leadership Computing Facility (OLCF) works with our +vendor partners to organize a series of GPU hackathons at a number of host +locations around the world. The table below lists the OLCF-supported events +scheduled for 2020. + +.. table:: + :align: center + :width: 80% + + +-------------------------------------------------+-------------------+-------------------------+ + | Location | Proposal Deadline | Event Dates | + +=================================================+===================+=========================+ + | San Diego Supercomputing Center - San Diego, CA | March 11 | May 11 - 15 | + +-------------------------------------------------+-------------------+-------------------------+ + | Princeton University - Princeton, NJ | March 2 | June 8 - 12 | + +-------------------------------------------------+-------------------+-------------------------+ + | NERSC - Oakland, CA | March 11 | July 13 - 17 | + +-------------------------------------------------+-------------------+-------------------------+ + | Brookhaven National Laboratory - Upton, NY | June 17 | August 17 - 21 | + +-------------------------------------------------+-------------------+-------------------------+ + | OLCF - Knoxville, TN | August 26 | October 26 - 30 | + +-------------------------------------------------+-------------------+-------------------------+ + + +.. note:: + The OLCF-supported events are a subset of a larger number of GPU hackathons organized within the community. For the full list, please visit `gpuhackathons.org `__ + +.. raw:: html + +

What is a GPU hackathon?

+ +A GPU hackathon is a 5-day coding event in which teams of developers port their +own applications to run on GPUs, or optimize their applications that already +run on GPUs. Each team consists of 3 or more developers who are intimately +familiar with (some part of) their application, and they work alongside 1 or +more mentors with GPU programming expertise. Our mentors come from +universities, national laboratories, supercomputing centers, and industry +partners. + +.. note:: + There are a variety of programming models available to program GPUs (e.g. CUDA, OpenACC, OpenMP offloading, etc.) and your are welcome to use any of them at these events. + +.. raw:: html + +

Why Participate?

+ +If you want/need to get your code running (or optimized) on a GPU-accelerated +system, these hackathons offer a unique opportunity to set aside 5 days, +surrounds yourself with experts in the field, and push toward your development +goals. By the end of the event, each team should have their code running (or +more optimized) on GPUs, or at least have a clear roadmap of how to get there. + +.. raw:: html + +

Target audience

+ +We are looking for teams of 3-6 developers with a scalable** application to +port (or optimize on) GPUs. Collectively, the team should know the application +intimately. + +[** We say scalable here because we're typically looking for codes intended to +run on multiple nodes (e.g. MPI-enabled), although porting/optimizing such +codes on a single node during the events is encouraged whenever possible.] + +.. note:: + Please keep in mind that we are looking for teams with plans to develop GPU code - not to simply run their code on GPUs. + +.. raw:: html + +

Ok, so how do I attend?

+ +First, you must decide which event you'd like to attend (see table of events +above for locations and proposal deadlines), and then submit a short proposal +form describing your application and team. The organizing committee will then +review all proposals after the call for that event closes and select the teams +they believe are best suited for the event. + +To submit a proposal, please visit `gpuhackathons.org/events +`__, click on the event you'd like to attend, +and submit the form. + +.. raw:: html + +

Costs?

+ +Events are free for the selected participants. The organizers will provide a +meeting room, lunches, mentors, and access to compute resources. + +.. raw:: html + +

Want to be a mentor?

+ +If you would like more information about how you can volunteer to mentor a team +at an upcoming GPU hackathon, please visit our `Become a Mentor +`__ page. + +.. raw:: html + +

Who can I contact with question??

+ +If you have any questions about the OLCF GPU Hackathon Series, please contact +Tom Papatheodore (`papatheodore@ornl.gov `__). diff --git a/training/training_archive.rst b/training/training_archive.rst new file mode 100644 index 00000000..1c1845a5 --- /dev/null +++ b/training/training_archive.rst @@ -0,0 +1,115 @@ +**************** +Training Archive +**************** + +The table below lists presentations given at previous OLCF training events. For a list of upcoming training events, please visit the `OLCF Training Calendar `__ + +.. I used a csv-table here because the normal table format was difficult to use with such long entries. To add a new entry, copy and paste the following template, replacing the "REPLACE_*" placeholders and adding urls: +.. "REPLACE_DATE", "REPLACE_TITLE", "REPLACE_PRESENTER", `REPLACE_EVENT <>`__, (`slides <>`__ | `recording <>`__) + +.. csv-table:: + :header: "Date", "Title", "Speaker", "Event", "Presentation" + + "2020-03-25", "Job Step Viewer", "Jack Morrison (OLCF)", `March 2020 OLCF User Conference Call `__, (`slides `__) + "2020-03-18", "CUDA Optimizations (Part 1)", "Bob Crovella (NVIDIA)", `Fundamental CUDA Optimization (Part 1) `__, (`slides `__ | `recording `__) + "2020-03-10", "Nsight Compute", "Felix Schmitt (NVIDIA)", `NVIDIA Profiling Tools - Nsight Compute `__, (`slides `__ | `recording `__) + "2020-03-09", "Nsight Systems", "Holly Wilper (NVIDIA)", `NVIDIA Profiling Tools - Nsight Systems `__, (`slides `__ | `recording `__) + "2020-02-26", "OLCF Overview for New Users", "Bill Renaud (OLCF)", `February 2020 OLCF User Conference Call `__, (`slides `__) + "2020-02-19", "CUDA Shared Memory", "Bob Crovella (NVIDIA)", `CUDA Shared Memory `__, (`slides `__ | `recording `__) + "2020-02-18", "Explicit Resource Files (ERFs)", "Tom Papatheodore (OLCF)", `jsrun Tutorial `__, (`slides `__ | `recording `__) + "2020-02-18", "Multiple jsrun Commands", "Chris Fuson (OLCF)", `jsrun Tutorial `__, (`slides `__ | `recording `__) + "2020-02-18", "jsrun Basics", "Jack Morrison (OLCF)", `jsrun Tutorial `__, (`slides `__ | `recording `__) + "2020-02-10", "Scaling Up Deep Learning Applications on Summit", "Junqi Yin (OLCF)", `Scaling Up Deep Learning Applications on Summit `__, (`slides `__ | `recording `__) + "2020-02-10", "NCCL on Summit", "Sylvain Jeaugey (NVIDIA)", `Scaling Up Deep Learning Applications on Summit `__, (`slides `__ | `recording `__) + "2020-02-10", "Introduction to Watson Machine Learning CE", "Brad Nemanich & Bryant Nelson (IBM)", `Scaling Up Deep Learning Applications on Summit `__, (`slides `__ | `recording `__) + "2020-01-29", "MyOLCF - A New Self-Service Portal for OLCF Users", "Adam Carlyle (OLCF)", `January 2020 OLCF User Conference Call `__, (`slides `__) + "2020-01-15", "Introduction to CUDA C++", "Bob Crovella (NVIDIA)", `Introduction to CUDA C++ `__, (`slides `__ | `recording `__) + "2019-10-30", "Distributed Deep Learning on Summit", "Brad Nemanich & Bryant Nelson (IBM)", `October 2019 OLCF User Conference Call - Distributed Deep Learning on Summit `__, (`slides `__ | `recording `__) + "2019-09-06", "Intro to AMD GPU Programming with HIP", "Damon McDougall, Chip Freitag, Joe Greathouse, Nicholas Malaya, Noah Wolfe, Noel Chalmers, Scott Moe, Rene van Oostrum, Nick Curtis (AMD)", `Intro to AMD GPU Programming with HIP `__, (`slides `__ | `recording `__) + "2019-08-28", "Intro to Slurm", "Chris Fuson (OLCF)", `August 2019 OLCF User Conference Call - Intro to Slurm `__, (`slides `__ | `recording `__) + "2019-08-09", "Profiling Tools Training Workshop: Issues and Lessons Learned", "George Markomanolis & Mike Brim (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-08", "Optimizing Dynamical Cluster Approximation on the Summit Supercomputer", "Ronnie Chatterjee (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-08", "Advanced Score-P", "Mike Brim (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-08", "Performance Analysis with Scalasca", "George Makomanolis (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-08", "Performance Analysis with Tau", "George Makomanolis (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-07", "Introduction to Extrae/Paraver", "George Makomanolis (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-07", "NVIDIA Profilers", "Jeff Larkin (NVIDIA)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-07", "Intro to Scalasca", "George Makomanolis (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-07", "Intro to Score-P", "George Makomanolis (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-07", "Intro to Tau", "George Makomanolis (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-08-07", "Introduction to Performance Analysis Concepts", "George Makomanolis (OLCF)", `Profiling Tools Workshop `__, (`slides `__) + "2019-06-19", "OLCF Best Practices", "Bill Renaud (OLCF)", `June 2019 OLCF User Conference Call - OLCF Best Practices `__, (`slides `__ | `recording `__) + "2019-06-11", "Linux Command Line Productivity Tools", "Ketan Maheshwari (OLCF)", `Linux Command Line Productivity Tools `__, (`slides `__) + "2019-06-07", "Introduction to AMD GPU Programming with HIP", "Damon McDougall, Chip Freitag, Joe Greathouse, Nicholas Malaya, Noah Wolfe, Noel Chalmers, Scott Moe, Rene van Oostrum, Nick Curtis (AMD)", `Introduction to AMD GPU Programming with HIP `__, (`slides `__ | `recording `__) + "2019-05-20", "Job Scheduler/Launcher", "Chris Fuson (OLCF)", `Introduction to Summit `__, (`slides `__) + "2019-05-20", "Programming Environment", "Matt Belhorn (OLCF)", `Introduction to Summit `__, (`slides `__) + "2019-05-20", "File Systems & Data Transfers", "George Markomanolis (OLCF)", `Introduction to Summit `__, (`slides `__) + "2019-05-20", "Summit System Overview", "Tom Papatheodore (OLCF)", `Introduction to Summit `__, (`slides `__) + "2019-04-11", "Introduction to NVIDIA Profilers on Summit", "Tom Papatheodore (OLCF) & Jeff Larkin (NVIDIA)", `Introduction to NVIDIA Profilers on Summit `__, (`slides `__ | `recording 1 `__ `recording 2 `__) + "2019-02-13", "CAAR Porting Experience: RAPTOR", "Ramanan Sankaran (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-13", "CAAR Porting Experience: LS-DALTON", "Ashleigh Barnes (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__) + "2019-02-13", "CAAR Porting Experience: FLASH", "Austin Harris (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-13", "Network Features & MPI Tuning", "Christopher Zimmer (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-13", "Burst Buffers / NVMe / SSDs", "Christopher Zimmer (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-13", "Burst Buffers / NVMe / SSDs", "George Markomanolis (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-13", "GPFS / Spectrum Scale", "George Markomanolis (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__) + "2019-02-13", "Arm Tools", "Nick Forrington (ARM)", `Summit Training Workshop (February 2019) `__, (`slides `__) + "2019-02-12", "Summit Node Performance", "Wayne Joubert (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-12", "Using V100 Tensor Cores", "Jeff Larkin (NVIDIA)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-12", "NVIDIA Profilers", "Jeff Larkin (NVIDIA)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-12", "GPU-Accelerated Libraries", "Jeff Larkin (NVIDIA)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-12", "CUDA-Aware MPI & GPUDirect", "Steve Abbott (NVIDIA)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-12", "Programming Methods for Summit's Multi-GPU Nodes", "Steve Abbott (NVIDIA)", `Summit Training Workshop (February 2019) `__, (`slides `__) + "2019-02-12", "CUDA Unified Memory", "Steve Abbott (NVIDIA)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-11", "Summit System Overview", "Scott Atchley (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-11", "Storage Areas & Data Transfers", "George Markomanolis (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-11", "Programming Environment", "Matt Belhorn (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-11", "Resource Scheduler & Job Launcher", "Chris Fuson (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-11", "Python on Summit", "Matt Belhorn (OLCF)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2019-02-11", "Practical Tips for Running on Summit", "David Appelhans (IBM)", `Summit Training Workshop (February 2019) `__, (`slides `__ | `recording `__) + "2018-12-06", "ML/DL Frameworks on Summit", "Junqi Yin (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-06", "Experiences Porting XGC to Summit", "Ed Dazevedo (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-06", "E3SM Application Readiness Experiences on Summit", "Matt Norman (OLCF)", `Summit Training Workshop `__, (`recording `__) + "2018-12-06", "CAAR Porting Experience: QMCPACK", "Andreas Tillack (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-06", "Python Environments", "Matt Belhorn (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-06", "Mixing OpenMP & OpenACC", "Lixiang Eric Luo (IBM)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-06", "ARM MAP/Performance Reports", "Nick Forrington (ARM)", `Summit Training Workshop `__, (`recording `__) + "2018-12-06", "Debugging: ARM DDT", "Nick Forrington (ARM)", `Summit Training Workshop `__, (`recording `__) + "2018-12-05", "Summit Node Performance", "Wayne Joubert (OLCF)", `Summit Training Workshop `__, (`recording `__) + "2018-12-05", "Targeting GPUs Using GPU Directives on Summit with GenASiS: A Simple and Effective Fortran Experience", "Reuben Budiardja (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-05", "Experiences Using the Volta Tensor Cores", "Wayne Joubert (OLCF)", `Summit Training Workshop `__, (`recording `__) + "2018-12-05", "IBM Power9 SMT Deep Dive", "Brian Thompto (IBM)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-05", "Network Features & MPI Tuning", "Christopher Zimmer (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-05", "NVMe / Burst Buffers", "Christopher Zimmer (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-05", "NVMe / Burst Buffers", "George Markomanolis (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-05", "Spectrum Scale - GPFS", "George Markomanolis (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-04", "Directive-Based GPU Programming", "Oscar Hernandez (OLCF)", `Summit Training Workshop `__, (`recording `__) + "2018-12-04", "Using V100 Tensor Cores", "Jeff Larkin (NVIDIA)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-04", "NVIDIA Profilers", "Jeff Larkin (NVIDIA)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-04", "GPU-Accelerated Libraries", "Jeff Larkin (NVIDIA)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-04", "Targeting Summit's Multi-GPU Nodes", "Steve Abbott (NVIDIA)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-04", "GPU Direct, RDMA, CUDA-Aware MPI", "Steve Abbott (NVIDIA)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-04", "CUDA Unified Memory", "Jeff Larkin (NVIDIA)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-03", "Experiences Porting/Optimizing Codes for Acceptance Testing", "Bob Walkup (IBM)", `Summit Training Workshop `__, (`slides `__ | `recording 1 `__ `recording 2 `__) + "2018-12-03", "Practical Tips for Running on Summit", "David Appelhans (IBM)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-03", "Summit Scheduler & Job Launcher", "Chris Fuson (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-03", "Storage Areas & Data Transfers", "George Markomanolis (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-03", "Summit Programming Environment", "Matt Belhorn (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-03", "IBM Power9", "Brian Thompto (IBM)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-03", "NVIDIA V100", "Jeff Larkin (NVIDIA)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-12-03", "Summit System Overview", "Scott Atchley (OLCF)", `Summit Training Workshop `__, (`slides `__ | `recording `__) + "2018-11-05", "Programming Methods for Summit's Multi-GPU Nodes", "Jeff Larkin & Steve Abbott (NVIDIA)", `Programming Methods for Summit's Multi-GPU Nodes `__, (`slides `__ | `recording 1 `__ `recording 2 `__) + "2018-06-28", "Intro to OpenACC", "Steve Abbott (NVIDIA)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-28", "Intro to CUDA", "Jeff Larkin (NVIDIA)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-28", "Intro to GPU Computing", "Jeff Larkin (NVIDIA)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-27", "Advanced UNIX & Shell Scripting", "Bill Renaud (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-27", "Intro to MPI", "Brian Smith (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-27", "Intro to OpenMP", "Dmitry Liakh & Markus Eisenbach (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording 1 `__ `recording 2 `__) + "2018-06-27", "Intro to Parallel Computing", "John Levesque (Cray)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-27", "Intro to git", "Jack Morrison & James Wynne (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-26", "Intro to UNIX", "Bill Renaud (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-26", "Intro to vim", "Jack Morrison (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-26", "Intro to C", "Tom Papatheodore (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2018-06-26", "Intro to Fortran", "Bronson Messer (OLCF)", `Introduction to HPC `__, (`slides `__ | `recording `__) + "2017-06-19", "Intro to CUDA C/C++", "Tom Papatheodore (OLCF)", `Introduction to CUDA C/C++ `__, (`slides `__) +