forked from opencontainers/runtime-spec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
runtime_config_linux.go
297 lines (267 loc) · 10.5 KB
/
runtime_config_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
package specs
import "os"
// LinuxStateDirectory holds the container's state information
const LinuxStateDirectory = "/run/opencontainer/containers"
// LinuxRuntimeSpec is the full specification for linux containers.
type LinuxRuntimeSpec struct {
RuntimeSpec
// LinuxRuntime is platform specific configuration for linux based containers.
Linux LinuxRuntime `json:"linux"`
}
// LinuxRuntime hosts the Linux-only runtime information
type LinuxRuntime struct {
// UIDMapping specifies user mappings for supporting user namespaces on linux.
UIDMappings []IDMapping `json:"uidMappings"`
// GIDMapping specifies group mappings for supporting user namespaces on linux.
GIDMappings []IDMapping `json:"gidMappings"`
// Rlimits specifies rlimit options to apply to the container's process.
Rlimits []Rlimit `json:"rlimits"`
// Sysctl are a set of key value pairs that are set for the container on start
Sysctl map[string]string `json:"sysctl"`
// Resources contain cgroup information for handling resource constraints
// for the container
Resources *Resources `json:"resources"`
// CgroupsPath specifies the path to cgroups that are created and/or joined by the container.
// The path is expected to be relative to the cgroups mountpoint.
// If resources are specified, the cgroups at CgroupsPath will be updated based on resources.
CgroupsPath string `json:"cgroupsPath"`
// Namespaces contains the namespaces that are created and/or joined by the container
Namespaces []Namespace `json:"namespaces"`
// Devices are a list of device nodes that are created and enabled for the container
Devices []Device `json:"devices"`
// ApparmorProfile specified the apparmor profile for the container.
ApparmorProfile string `json:"apparmorProfile"`
// SelinuxProcessLabel specifies the selinux context that the container process is run as.
SelinuxProcessLabel string `json:"selinuxProcessLabel"`
// Seccomp specifies the seccomp security settings for the container.
Seccomp Seccomp `json:"seccomp"`
// RootfsPropagation is the rootfs mount propagation mode for the container
RootfsPropagation string `json:"rootfsPropagation"`
}
// Namespace is the configuration for a linux namespace
type Namespace struct {
// Type is the type of Linux namespace
Type NamespaceType `json:"type"`
// Path is a path to an existing namespace persisted on disk that can be joined
// and is of the same type
Path string `json:"path"`
}
// NamespaceType is one of the linux namespaces
type NamespaceType string
const (
// PIDNamespace for isolating process IDs
PIDNamespace NamespaceType = "pid"
// NetworkNamespace for isolating network devices, stacks, ports, etc
NetworkNamespace = "network"
// MountNamespace for isolating mount points
MountNamespace = "mount"
// IPCNamespace for isolating System V IPC, POSIX message queues
IPCNamespace = "ipc"
// UTSNamespace for isolating hostname and NIS domain name
UTSNamespace = "uts"
// UserNamespace for isolating user and group IDs
UserNamespace = "user"
)
// IDMapping specifies UID/GID mappings
type IDMapping struct {
// HostID is the UID/GID of the host user or group
HostID uint32 `json:"hostID"`
// ContainerID is the UID/GID of the container's user or group
ContainerID uint32 `json:"containerID"`
// Size is the length of the range of IDs mapped between the two namespaces
Size uint32 `json:"size"`
}
// Rlimit type and restrictions
type Rlimit struct {
// Type of the rlimit to set
Type string `json:"type"`
// Hard is the hard limit for the specified type
Hard uint64 `json:"hard"`
// Soft is the soft limit for the specified type
Soft uint64 `json:"soft"`
}
// HugepageLimit structure corresponds to limiting kernel hugepages
type HugepageLimit struct {
// Pagesize is the hugepage size
Pagesize string `json:"pageSize"`
// Limit is the limit of "hugepagesize" hugetlb usage
Limit uint64 `json:"limit"`
}
// InterfacePriority for network interfaces
type InterfacePriority struct {
// Name is the name of the network interface
Name string `json:"name"`
// Priority for the interface
Priority int64 `json:"priority"`
}
// blockIODevice holds major:minor format supported in blkio cgroup
type blockIODevice struct {
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
}
// WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice
type WeightDevice struct {
blockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
Weight uint16 `json:"weight"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
LeafWeight uint16 `json:"leafWeight"`
}
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
type ThrottleDevice struct {
blockIODevice
// Rate is the IO rate limit per cgroup per device
Rate uint64 `json:"rate"`
}
// BlockIO for Linux cgroup 'blkio' resource management
type BlockIO struct {
// Specifies per cgroup weight, range is from 10 to 1000
Weight uint16 `json:"blkioWeight"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
LeafWeight uint16 `json:"blkioLeafWeight"`
// Weight per cgroup per device, can override BlkioWeight
WeightDevice []*WeightDevice `json:"blkioWeightDevice"`
// IO read rate limit per cgroup per device, bytes per second
ThrottleReadBpsDevice []*ThrottleDevice `json:"blkioThrottleReadBpsDevice"`
// IO write rate limit per cgroup per device, bytes per second
ThrottleWriteBpsDevice []*ThrottleDevice `json:"blkioThrottleWriteBpsDevice"`
// IO read rate limit per cgroup per device, IO per second
ThrottleReadIOPSDevice []*ThrottleDevice `json:"blkioThrottleReadIOPSDevice"`
// IO write rate limit per cgroup per device, IO per second
ThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkioThrottleWriteIOPSDevice"`
}
// Memory for Linux cgroup 'memory' resource management
type Memory struct {
// Memory limit (in bytes)
Limit int64 `json:"limit"`
// Memory reservation or soft_limit (in bytes)
Reservation int64 `json:"reservation"`
// Total memory usage (memory + swap); set `-1' to disable swap
Swap int64 `json:"swap"`
// Kernel memory limit (in bytes)
Kernel int64 `json:"kernel"`
// How aggressive the kernel will swap memory pages. Range from 0 to 100. Set -1 to use system default
Swappiness int64 `json:"swappiness"`
}
// CPU for Linux cgroup 'cpu' resource management
type CPU struct {
// CPU shares (relative weight vs. other cgroups with cpu shares)
Shares int64 `json:"shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period
Quota int64 `json:"quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default
Period int64 `json:"period"`
// How many time CPU will use in realtime scheduling (in usecs)
RealtimeRuntime int64 `json:"realtimeRuntime"`
// CPU period to be used for realtime scheduling (in usecs)
RealtimePeriod int64 `json:"realtimePeriod"`
// CPU to use within the cpuset
Cpus string `json:"cpus"`
// MEM to use within the cpuset
Mems string `json:"mems"`
}
// Pids for Linux cgroup 'pids' resource management (Linux 4.3)
type Pids struct {
// Maximum number of PIDs. A value < 0 implies "no limit".
Limit int64 `json:"limit"`
}
// Network identification and priority configuration
type Network struct {
// Set class identifier for container's network packets
ClassID string `json:"classId"`
// Set priority of network traffic for container
Priorities []InterfacePriority `json:"priorities"`
}
// Resources has container runtime resource constraints
type Resources struct {
// DisableOOMKiller disables the OOM killer for out of memory conditions
DisableOOMKiller bool `json:"disableOOMKiller"`
// Memory restriction configuration
Memory Memory `json:"memory"`
// CPU resource restriction configuration
CPU CPU `json:"cpu"`
// Task resource restriction configuration.
Pids Pids `json:"pids"`
// BlockIO restriction configuration
BlockIO BlockIO `json:"blockIO"`
// Hugetlb limit (in bytes)
HugepageLimits []HugepageLimit `json:"hugepageLimits"`
// Network restriction configuration
Network Network `json:"network"`
}
// Device represents the information on a Linux special device file
type Device struct {
// Path to the device.
Path string `json:"path"`
// Device type, block, char, etc.
Type rune `json:"type"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Cgroup permissions format, rwm.
Permissions string `json:"permissions"`
// FileMode permission bits for the device.
FileMode os.FileMode `json:"fileMode"`
// UID of the device.
UID uint32 `json:"uid"`
// Gid of the device.
GID uint32 `json:"gid"`
}
// Seccomp represents syscall restrictions
type Seccomp struct {
DefaultAction Action `json:"defaultAction"`
Architectures []Arch `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
}
// Additional architectures permitted to be used for system calls
// By default only the native architecture of the kernel is permitted
type Arch string
const (
ArchX86 Arch = "SCMP_ARCH_X86"
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
ArchX32 Arch = "SCMP_ARCH_X32"
ArchARM Arch = "SCMP_ARCH_ARM"
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
ArchMIPS Arch = "SCMP_ARCH_MIPS"
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
)
// Action taken upon Seccomp rule match
type Action string
const (
ActKill Action = "SCMP_ACT_KILL"
ActTrap Action = "SCMP_ACT_TRAP"
ActErrno Action = "SCMP_ACT_ERRNO"
ActTrace Action = "SCMP_ACT_TRACE"
ActAllow Action = "SCMP_ACT_ALLOW"
)
// Operator used to match syscall arguments in Seccomp
type Operator string
const (
OpNotEqual Operator = "SCMP_CMP_NE"
OpLessThan Operator = "SCMP_CMP_LT"
OpLessEqual Operator = "SCMP_CMP_LE"
OpEqualTo Operator = "SCMP_CMP_EQ"
OpGreaterEqual Operator = "SCMP_CMP_GE"
OpGreaterThan Operator = "SCMP_CMP_GT"
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
)
// Arg used for matching specific syscall arguments in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"valueTwo"`
Op Operator `json:"op"`
}
// Syscall is used to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
}