Skip to content

Commit

Permalink
KVM: x86/mmu: Flush TLBs after zap in TDP MMU PF handler
Browse files Browse the repository at this point in the history
When the TDP MMU is allowed to handle page faults in parallel there is
the possiblity of a race where an SPTE is cleared and then imediately
replaced with a present SPTE pointing to a different PFN, before the
TLBs can be flushed. This race would violate architectural specs. Ensure
that the TLBs are flushed properly before other threads are allowed to
install any present value for the SPTE.

Reviewed-by: Peter Feiner <[email protected]>
Signed-off-by: Ben Gardon <[email protected]>

Message-Id: <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
  • Loading branch information
Ben Gardon authored and bonzini committed Feb 4, 2021
1 parent 9a77daa commit 08f07c8
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 10 deletions.
21 changes: 20 additions & 1 deletion arch/x86/kvm/mmu/spte.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,25 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
PT64_EPT_EXECUTABLE_MASK)
#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT

/*
* If a thread running without exclusive control of the MMU lock must perform a
* multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a
* non-present intermediate value. Other threads which encounter this value
* should not modify the SPTE.
*
* This constant works because it is considered non-present on both AMD and
* Intel CPUs and does not create a L1TF vulnerability because the pfn section
* is zeroed out.
*
* Only used by the TDP MMU.
*/
#define REMOVED_SPTE (1ull << 59)

static inline bool is_removed_spte(u64 spte)
{
return spte == REMOVED_SPTE;
}

/*
* In some cases, we need to preserve the GFN of a non-present or reserved
* SPTE when we usurp the upper five bits of the physical address space to
Expand Down Expand Up @@ -187,7 +206,7 @@ static inline bool is_access_track_spte(u64 spte)

static inline bool is_shadow_present_pte(u64 pte)
{
return (pte != 0) && !is_mmio_spte(pte);
return (pte != 0) && !is_mmio_spte(pte) && !is_removed_spte(pte);
}

static inline bool is_large_pte(u64 pte)
Expand Down
63 changes: 54 additions & 9 deletions arch/x86/kvm/mmu/tdp_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,15 +427,19 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
*/
if (!was_present && !is_present) {
/*
* If this change does not involve a MMIO SPTE, it is
* unexpected. Log the change, though it should not impact the
* guest since both the former and current SPTEs are nonpresent.
* If this change does not involve a MMIO SPTE or removed SPTE,
* it is unexpected. Log the change, though it should not
* impact the guest since both the former and current SPTEs
* are nonpresent.
*/
if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte)))
if (WARN_ON(!is_mmio_spte(old_spte) &&
!is_mmio_spte(new_spte) &&
!is_removed_spte(new_spte)))
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
"should not be replaced with another,\n"
"different nonpresent SPTE, unless one or both\n"
"are MMIO SPTEs.\n"
"are MMIO SPTEs, or the new SPTE is\n"
"a temporary removed SPTE.\n"
"as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
as_id, gfn, old_spte, new_spte, level);
return;
Expand Down Expand Up @@ -486,6 +490,13 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,

lockdep_assert_held_read(&kvm->mmu_lock);

/*
* Do not change removed SPTEs. Only the thread that froze the SPTE
* may modify it.
*/
if (iter->old_spte == REMOVED_SPTE)
return false;

if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte,
new_spte) != iter->old_spte)
return false;
Expand All @@ -496,6 +507,34 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
return true;
}

static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter)
{
/*
* Freeze the SPTE by setting it to a special,
* non-present value. This will stop other threads from
* immediately installing a present entry in its place
* before the TLBs are flushed.
*/
if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
return false;

kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
KVM_PAGES_PER_HPAGE(iter->level));

/*
* No other thread can overwrite the removed SPTE as they
* must either wait on the MMU lock or use
* tdp_mmu_set_spte_atomic which will not overrite the
* special removed SPTE value. No bookkeeping is needed
* here since the SPTE is going from non-present
* to non-present.
*/
WRITE_ONCE(*iter->sptep, 0);

return true;
}


/*
* __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
Expand Down Expand Up @@ -523,6 +562,15 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,

lockdep_assert_held_write(&kvm->mmu_lock);

/*
* No thread should be using this function to set SPTEs to the
* temporary removed SPTE value.
* If operating under the MMU lock in read mode, tdp_mmu_set_spte_atomic
* should be used. If operating under the MMU lock in write mode, the
* use of the removed SPTE should not be necessary.
*/
WARN_ON(iter->old_spte == REMOVED_SPTE);

WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);

__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
Expand Down Expand Up @@ -790,12 +838,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
*/
if (is_shadow_present_pte(iter.old_spte) &&
is_large_pte(iter.old_spte)) {
if (!tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, 0))
if (!tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter))
break;

kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn,
KVM_PAGES_PER_HPAGE(iter.level));

/*
* The iter must explicitly re-read the spte here
* because the new value informs the !present
Expand Down

0 comments on commit 08f07c8

Please sign in to comment.