Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(vmm): call KVMCLOCK_CTRL when pausing #4460

Merged
merged 8 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ and this project adheres to
`VIRTIO_NET_F_RX_MRGBUF` support to the `virtio-net` device. When this feature
is negotiated, guest `virtio-net` driver can perform more efficient memory
management which in turn improves RX and TX performance.
- [#4460](https://github.com/firecracker-microvm/firecracker/pull/4460): Add a
call to
[`KVM_KVMCLOCK_CTRL`](https://docs.kernel.org/virt/kvm/api.html#kvm-kvmclock-ctrl)
after pausing vCPUs on x86_64 architectures. This ioctl sets a flag in the KVM
state of the vCPU indicating that it has been paused by the host userspace. In
guests that use kvmclock, the soft lockup watchdog checks this flag. If it is
set, it won't trigger the lockup condition. Calling the ioctl for guests that
don't use kvmclock will fail. These failures are not fatal. We log the failure
and increase the `vcpu.kvmclock_ctrl_fails` metric.

### Changed

Expand Down
12 changes: 12 additions & 0 deletions resources/seccomp/x86_64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,18 @@
}
]
},
{
"syscall": "ioctl",
"args": [
bchalios marked this conversation as resolved.
Show resolved Hide resolved
{
"index": 1,
"type": "dword",
"op": "eq",
"val": 44717,
"comment": "KVM_KVMCLOCK_CTRL. We call this after pausing vCPUs to avoid soft lockups in the guest."
}
]
},
{
"syscall": "sched_yield",
"comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication"
Expand Down
3 changes: 3 additions & 0 deletions src/vmm/src/logger/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ pub struct VcpuMetrics {
pub exit_mmio_write: SharedIncMetric,
/// Number of errors during this VCPU's run.
pub failures: SharedIncMetric,
/// Number of times that the `KVM_KVMCLOCK_CTRL` ioctl failed.
bchalios marked this conversation as resolved.
Show resolved Hide resolved
pub kvmclock_ctrl_fails: SharedIncMetric,
/// Provides Min/max/sum for KVM exits handling input IO.
pub exit_io_in_agg: LatencyAggregateMetrics,
/// Provides Min/max/sum for KVM exits handling output IO.
Expand All @@ -797,6 +799,7 @@ impl VcpuMetrics {
exit_mmio_read: SharedIncMetric::new(),
exit_mmio_write: SharedIncMetric::new(),
failures: SharedIncMetric::new(),
kvmclock_ctrl_fails: SharedIncMetric::new(),
exit_io_in_agg: LatencyAggregateMetrics::new(),
exit_io_out_agg: LatencyAggregateMetrics::new(),
exit_mmio_read_agg: LatencyAggregateMetrics::new(),
Expand Down
11 changes: 9 additions & 2 deletions src/vmm/src/vstate/vcpu/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,15 @@ impl Vcpu {
.send(VcpuResponse::Paused)
.expect("vcpu channel unexpectedly closed");

// TODO: we should call `KVM_KVMCLOCK_CTRL` here to make sure
// TODO continued: the guest soft lockup watchdog does not panic on Resume.
// Calling `KVM_KVMCLOCK_CTRL` to make sure the guest softlockup watchdog
// does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html .
// We do not want to fail if the call is not successful, because depending
// that may be acceptable depending on the workload.
#[cfg(target_arch = "x86_64")]
if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() {
METRICS.vcpu.kvmclock_ctrl_fails.inc();
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
}

// Move to 'paused' state.
state = StateMachine::next(Self::paused);
Expand Down
10 changes: 9 additions & 1 deletion tests/framework/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Generic utility functions that are used in the framework."""
import errno
import functools
import json
import logging
Expand Down Expand Up @@ -453,7 +454,9 @@ def get_process_pidfd(pid):
"""Get a pidfd file descriptor for the process with PID `pid`

Will return a pid file descriptor for the process with PID `pid` if it is
still alive. If the process has already exited it will return `None`.
still alive. If the process has already exited we will receive either a
`ProcessLookupError` exception or and an `OSError` exception with errno `EINVAL`.
In these cases, we will return `None`.

Any other error while calling the system call, will raise an OSError
exception.
Expand All @@ -462,6 +465,11 @@ def get_process_pidfd(pid):
pidfd = os.pidfd_open(pid)
except ProcessLookupError:
return None
except OSError as err:
if err.errno == errno.EINVAL:
return None
roypat marked this conversation as resolved.
Show resolved Hide resolved

raise

return pidfd

Expand Down
1 change: 1 addition & 0 deletions tests/host_tools/fcmetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def validate_fc_metrics(metrics):
"exit_mmio_read",
"exit_mmio_write",
"failures",
"kvmclock_ctrl_fails",
{"exit_io_in_agg": latency_agg_metrics_fields},
{"exit_io_out_agg": latency_agg_metrics_fields},
{"exit_mmio_read_agg": latency_agg_metrics_fields},
Expand Down
38 changes: 38 additions & 0 deletions tests/integration_tests/functional/test_pause_resume.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
"""Basic tests scenarios for snapshot save/restore."""

import platform
import time

import pytest


Expand Down Expand Up @@ -127,3 +130,38 @@ def test_pause_resume_preboot(uvm_nano):
# Try to resume microvm when not running, it must fail.
with pytest.raises(RuntimeError, match=expected_err):
basevm.api.vm.patch(state="Resumed")


@pytest.mark.skipif(
platform.machine() != "x86_64", reason="Only x86_64 supports pvclocks."
)
def test_kvmclock_ctrl(uvm_plain_any):
"""
Test that pausing vCPUs does not trigger a soft lock-up
"""

microvm = uvm_plain_any
microvm.help.enable_console()
microvm.spawn()
microvm.basic_config()
microvm.add_net_iface()
microvm.start()

# Launch reproducer in host
# This launches `ls -R /` in a loop inside the guest. The command writes its output in the
# console. This detail is important as it writing in the console seems to increase the probability
# that we will pause the execution inside the kernel and cause a lock up. Setting KVM_CLOCK_CTRL
# bit that informs the guest we're pausing the vCPUs, should avoid that lock up.
microvm.ssh.check_output(
"timeout 60 sh -c 'while true; do ls -R /; done' > /dev/ttyS0 2>&1 < /dev/null &"
)

for _ in range(12):
microvm.api.vm.patch(state="Paused")
time.sleep(5)
microvm.api.vm.patch(state="Resumed")
roypat marked this conversation as resolved.
Show resolved Hide resolved

dmesg = microvm.ssh.check_output("dmesg").stdout
assert "rcu_sched self-detected stall on CPU" not in dmesg
assert "rcu_preempt detected stalls on CPUs/tasks" not in dmesg
assert "BUG: soft lockup -" not in dmesg
Loading