diff --git a/modules/agent_files/templates/xorg.conf.erb b/modules/agent_files/templates/xorg.conf.erb new file mode 100644 index 00000000..32e1a63c --- /dev/null +++ b/modules/agent_files/templates/xorg.conf.erb @@ -0,0 +1,56 @@ +Section "ServerLayout" + Identifier "Layout0" + InputDevice "Keyboard0" "CoreKeyboard" + InputDevice "Mouse0" "CorePointer" +EndSection + +Section "Files" +EndSection + +Section "InputDevice" + # generated from default + Identifier "Mouse0" + Driver "mouse" + Option "Protocol" "auto" + Option "Device" "/dev/psaux" + Option "Emulate3Buttons" "no" + Option "ZAxisMapping" "4 5" +EndSection + +Section "InputDevice" + # generated from default + Identifier "Keyboard0" + Driver "kbd" +EndSection + +Section "Monitor" + Identifier "Monitor0" + VendorName "Unknown" + ModelName "Unknown" + HorizSync 28.0 - 33.0 + VertRefresh 43.0 - 72.0 + Option "DPMS" +EndSection + +# Be sure of using always the right BusID +# can be obtained by: nvidia-xconfig --query-gpu-info +# The current BusID is the one corresponding to GRID K520 +# at AWS machines of the g2 series. +# The nvidia-config command can not be integrated into +# erb files since they are evaluated before the +# dependency resolution. See: +# https://github.com/ros-infrastructure/buildfarm_deployment/pull/211#discussion_r279920242 +Section "Device" + Identifier "Device0" + Driver "nvidia" + VendorName "NVIDIA Corporation" + BoardName "GRID K520" + BusID "PCI:0:3:0" +EndSection + +Section "Screen" + Identifier "Default Screen" + Device "Device0" + Monitor "Monitor0" + Option "AllowEmptyInitialConfiguration" "True" +EndSection diff --git a/modules/profile/files/jenkins/agent_gpu/etc/lightdm/lightdm.conf b/modules/profile/files/jenkins/agent_gpu/etc/lightdm/lightdm.conf new file mode 100644 index 00000000..261f6081 --- /dev/null +++ b/modules/profile/files/jenkins/agent_gpu/etc/lightdm/lightdm.conf @@ -0,0 +1,2 @@ +[SeatDefaults] +display-setup-script=/etc/lightdm/xhost.sh diff --git a/modules/profile/files/jenkins/agent_gpu/etc/lightdm/xhost.sh b/modules/profile/files/jenkins/agent_gpu/etc/lightdm/xhost.sh new file mode 100644 index 00000000..1e36935c --- /dev/null +++ b/modules/profile/files/jenkins/agent_gpu/etc/lightdm/xhost.sh @@ -0,0 +1,2 @@ +#!/bin/sh +xhost +si:localuser:jenkins-agent diff --git a/modules/profile/files/jenkins/agent_gpu/nvidia-docker.list b/modules/profile/files/jenkins/agent_gpu/nvidia-docker.list new file mode 100644 index 00000000..1c9d6fb8 --- /dev/null +++ b/modules/profile/files/jenkins/agent_gpu/nvidia-docker.list @@ -0,0 +1,3 @@ +deb https://nvidia.github.io/libnvidia-container/ubuntu16.04/$(ARCH) / +deb https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/$(ARCH) / +deb https://nvidia.github.io/nvidia-docker/ubuntu16.04/$(ARCH) / diff --git a/modules/profile/manifests/jenkins/agent_gpu.pp b/modules/profile/manifests/jenkins/agent_gpu.pp new file mode 100644 index 00000000..0f67125c --- /dev/null +++ b/modules/profile/manifests/jenkins/agent_gpu.pp @@ -0,0 +1,101 @@ +# Jenkins Agent Profile +# +# Profile class for a node configured to act as a swarm agent for Jenkins. +# This profile should only ever be declared with an include into a role or site manifest. +# Parameter overloading should be done using hiera automatic parameter lookup. +# +# @example +# include profile::jenkins::master +# +# @pararm agent_username The unix user the agent will configure and run as. +class profile::jenkins::agent_gpu { + + include apt + + # neeed for xhost + package { 'x11-xserver-utils' : + ensure => installed, + } + + if $facts['ec2_instance_id'] { + package { 'linux-aws': + ensure => installed, + # When running in EC2 the AWS kernel needs to be installed before + # compiling the nvidia driver. + # TODO(nuclearsandwich) Does the xorg.conf really depend on the kernel or + # is it implicit based on drivers? + before => [ File['/etc/X11/xorg.conf'], Package['nvidia-375'] ] + } + } + + package { 'xserver-xorg-dev': + ensure => installed, + } + + # needs to update first the kernel and headers before + # compiling the nvidia driver + package { 'nvidia-375': + ensure => installed, + } + + file { '/etc/X11/xorg.conf': + content => template('agent_files/xorg.conf.erb'), + mode => '0744', + require => [ + Package[lightdm], + Package['nvidia-375'], + Package['x11-xserver-utils'], + Package[xserver-xorg-dev], + ], + } + + apt::key { 'nvidia_docker_key' : + source => 'https://nvidia.github.io/nvidia-docker/gpgkey', + id => 'C95B321B61E88C1809C4F759DDCAE044F796ECB0', + } + + file { '/etc/apt/sources.list.d/nvidia-docker.list': + source => 'puppet:///modules/profile/jenkins/agent_gpu/nvidia-docker.list', + require => Apt::Key['nvidia_docker_key'], + notify => Exec['apt_update'] + } + + package { 'nvidia-docker2': + ensure => installed, + require => File['/etc/apt/sources.list.d/nvidia-docker.list'] + } + + package { 'lightdm': + ensure => installed, + } + + file { '/etc/lightdm/xhost.sh': + source => 'puppet:///modules/profile/jenkins/agent_gpu/etc/lightdm/xhost.sh', + mode => '0744', + require => [ Package[lightdm], Package[x11-xserver-utils] ] + } + + # This two rules do: check if no lightdm is present and create one + # Ensure that display-setup-script is set + + file { '/etc/lightdm/lightdm.conf': + ensure => 'present', + source => 'puppet:///modules/profile/jenkins/agent_gpu/etc/lightdm/lightdm.conf', + replace => 'no', # this is the important property + require => [ File['/etc/lightdm/xhost.sh'], File['/etc/X11/xorg.conf'] ] + } + + file_line { '/etc/lightdm/lightdm.conf': + ensure => present, + require => File['/etc/lightdm/lightdm.conf'], + line => 'display-setup-script=/etc/lightdm/xhost.sh', + notify => Service[lightdm], + path => '/etc/lightdm/lightdm.conf', + } + + service { 'lightdm': + ensure => running, + enable => true, + hasrestart => true, + } +} diff --git a/modules/profile/manifests/ros/base.pp b/modules/profile/manifests/ros/base.pp index a0d8de50..415242e5 100644 --- a/modules/profile/manifests/ros/base.pp +++ b/modules/profile/manifests/ros/base.pp @@ -42,7 +42,7 @@ $defaults = { 'ensure' => 'present', } - create_resources(ssh_authorized_key, hiera('ssh_keys'), $defaults) + # create_resources(ssh_authorized_key, hiera('ssh_keys'), $defaults) } else{ notice("No ssh_keys defined. You should probably have at least one.") diff --git a/modules/role/manifests/buildfarm/agent_gpu.pp b/modules/role/manifests/buildfarm/agent_gpu.pp new file mode 100644 index 00000000..a015e4f7 --- /dev/null +++ b/modules/role/manifests/buildfarm/agent_gpu.pp @@ -0,0 +1,6 @@ +class role::buildfarm::agent_gpu { + # Find the other instances + include profile::ros::base + include profile::jenkins::agent + include profile::jenkins::agent_gpu +}