Skip to content

Commit

Permalink
Merge pull request #43 from YJDoc2/main
Browse files Browse the repository at this point in the history
Add comments to create.rs
  • Loading branch information
utam0k authored Jun 4, 2021
2 parents 25fbcc8 + 0113e72 commit 5cb6000
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 5 deletions.
11 changes: 11 additions & 0 deletions docs/doc-draft.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ These are references to various documentations and specifications, which can be
- [OCI runtime specification] : The specification for a container runtime. Any OCI complaisant runtime must follow this.
- [runc man pages] : has information on various commandline options supported by runc, can be used to understand commands and their options.
- [cgroups man page](https://man7.org/linux/man-pages/man7/cgroups.7.html) : contains information about cgroups, their creation, deletion etc.
- [pseudoterminal man page](https://man7.org/linux/man-pages/man7/pty.7.html) : Information about the pseudoterminal system, useful to understand console_socket parameter in create subcommand
- [Unix Sockets man page](https://man7.org/linux/man-pages/man7/unix.7.html) : Useful to understand sockets
- [prctl man page](https://man7.org/linux/man-pages/man2/prctl.2.html) : Process control man pages
- [OCI Linux spec](https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md) : Linux specific section of OCI Spec

---

Expand Down Expand Up @@ -49,5 +53,12 @@ On invoking Youki, main function parses args passed to it, which contains direct

From there it matches subcommand arg with possible subcommand and takes appropriate actions, such as creating a new container, deleting a container erc.

### create container

One thing to note is that in the end, container is just another process in Linux. It has specific/different control group, namespace, using which program executing in it can be given impression that is is running on a complete system, but on the system which it is running, it is just another process, and has attributes such as pid, file descriptors, etc. associated with it like any other process.

When given create command, Youki will load the specification, configuration, sockets etc.
forks the process into parent an child (C1), forks the child process again (C2), applies the limits, namespaces etc to the child of child (C2)process ,and runs the command/program in the C2. After the command / program is finished the C2 returns. The C1 is waiting for the C2 to exit, after which it also exits.

[oci runtime specification]: https://github.com/opencontainers/runtime-spec/blob/master/runtime.md
[runc man pages]: (https://github.com/opencontainers/runc/blob/master/man/runc.8.md)
52 changes: 47 additions & 5 deletions src/create.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! This handles the creation of a new container
use std::fs;
use std::path::{Path, PathBuf};
use std::process;
Expand All @@ -19,19 +20,34 @@ use crate::tty;
use crate::utils;
use crate::{capabilities, command::Command};

/// This is the main structure which stores various commandline options given by
/// high-level container runtime
#[derive(Clap, Debug)]
pub struct Create {
/// File to write pid of the container created
// note that in the end, container is just another process
#[clap(short, long)]
pid_file: Option<String>,
/// path to the bundle directory, containing config.json and root filesystem
#[clap(short, long, default_value = ".")]
bundle: PathBuf,
/// Unix socket (file) path , which will receive file descriptor of the writing end of the pseudoterminal
#[clap(short, long)]
console_socket: Option<String>,
/// name of the container instance to be started
pub container_id: String,
}

// One thing to note is that in the end, container is just another process in Linux
// it has specific/different control group, namespace, using which program executing in it
// can be given impression that is is running on a complete system, but on the system which
// it is running, it is just another process, and has attributes such as pid, file descriptors, etc.
// associated with it like any other process.
impl Create {
/// Starts a new container process
pub fn exec(&self, root_path: PathBuf, command: impl Command) -> Result<()> {
// create a directory for the container to store state etc.
// if already present, return error
let bundle_canonicalized = fs::canonicalize(&self.bundle)
.unwrap_or_else(|_| panic!("failed to canonicalied {:?}", &self.bundle));
let container_dir = root_path.join(&self.container_id);
Expand All @@ -41,16 +57,21 @@ impl Create {
bail!("{} already exists", self.container_id)
}

// change directory to the bundle directory, and load configuration,
// copy that to the container's directory
unistd::chdir(&self.bundle)?;

let spec = oci_spec::Spec::load("config.json")?;
fs::copy("config.json", container_dir.join("config.json"))?;
log::debug!("spec: {:?}", spec);

// convert path to absolute path, as relative path will be evaluated
// relative to where youki command is executed, and will be difficult to manipulate
let container_dir = fs::canonicalize(container_dir)?;
unistd::chdir(&*container_dir)?;

log::debug!("{:?}", &container_dir);

let container = Container::new(
&self.container_id,
ContainerStatus::Creating,
Expand All @@ -61,9 +82,10 @@ impl Create {
container.save()?;

let mut notify_socket: NotifyListener = NotifyListener::new(&container_dir)?;

// convert path of root file system of the container to absolute path
let rootfs = fs::canonicalize(&spec.root.path)?;

// if socket file path is given in commandline options,
// get file descriptors of console and console socket
let (csocketfd, _consolefd) = {
if let Some(console_socket) = &self.console_socket {
let (csocketfd, consolefd) =
Expand All @@ -83,13 +105,16 @@ impl Create {
container,
command,
)?;
// the run_container forks the process, so not after return if in
// parent process, exit ; as the work of creating the container is done
if let Process::Parent(_) = process {
process::exit(0);
}
// if in the child process after fork, then just return
Ok(())
}
}

/// Fork the process and actually start the container process
fn run_container<P: AsRef<Path>>(
pid_file: Option<P>,
notify_socket: &mut NotifyListener,
Expand All @@ -99,13 +124,18 @@ fn run_container<P: AsRef<Path>>(
container: Container,
command: impl Command,
) -> Result<Process> {
// disable core dump for the process, check https://man7.org/linux/man-pages/man2/prctl.2.html for more information
prctl::set_dumpable(false).unwrap();

// get Linux specific section of OCI spec,
// refer https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md for more information
let linux = spec.linux.as_ref().unwrap();
let namespaces: Namespaces = linux.namespaces.clone().into();

let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, container.id());
let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path)?;

// first fork, which creates process, which will later create actual container process
match fork::fork_first(
pid_file,
namespaces
Expand All @@ -115,8 +145,11 @@ fn run_container<P: AsRef<Path>>(
&container,
cmanager,
)? {
// In the parent process, which called run_container
Process::Parent(parent) => Ok(Process::Parent(parent)),
// in child process
Process::Child(child) => {
// set limits and namespaces to the process
for rlimit in spec.process.rlimits.iter() {
command.set_rlimit(rlimit)?
}
Expand All @@ -125,22 +158,29 @@ fn run_container<P: AsRef<Path>>(
let without = sched::CloneFlags::CLONE_NEWUSER;
namespaces.apply_unshare(without)?;

// set up tty if specified
if let Some(csocketfd) = csocketfd {
tty::ready(csocketfd)?;
}

// set namespaces
namespaces.apply_setns()?;

// fork second time, which will later create container
match fork::fork_init(child)? {
Process::Child(child) => Ok(Process::Child(child)),
Process::Child(_child) => unreachable!(),
// This is actually the child process after fork
Process::Init(mut init) => {
// setup args and env vars as in the spec
let spec_args: &Vec<String> = &spec.process.args.clone();
let envs: &Vec<String> = &spec.process.env.clone();
// prepare process
init_process(spec, command, rootfs, namespaces)?;
init.ready()?;
notify_socket.wait_for_container_start()?;

// actually run the command / program to be run in container
utils::do_exec(&spec_args[0], spec_args, envs)?;
// the command / program is done executing
container.update_status(ContainerStatus::Stopped)?.save()?;

Ok(Process::Init(init))
Expand All @@ -152,6 +192,7 @@ fn run_container<P: AsRef<Path>>(
}
}

/// setup hostname, rootfs for the container process
fn init_process(
spec: oci_spec::Spec,
command: impl Command,
Expand All @@ -173,6 +214,7 @@ fn init_process(
.contains(sched::CloneFlags::CLONE_NEWUSER),
)?;

// change the root of filesystem of the process to the rootfs
command.pivot_rootfs(&rootfs)?;

command.set_id(Uid::from_raw(proc.user.uid), Gid::from_raw(proc.user.gid))?;
Expand Down

0 comments on commit 5cb6000

Please sign in to comment.