root/arch/x86/kvm/vmx/vmenter.S
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include <asm/percpu.h>
#include <asm/segment.h>
#include "kvm-asm-offsets.h"
#include "run_flags.h"

#define WORD_SIZE (BITS_PER_LONG / 8)

#define VCPU_RAX        __VCPU_REGS_RAX * WORD_SIZE
#define VCPU_RCX        __VCPU_REGS_RCX * WORD_SIZE
#define VCPU_RDX        __VCPU_REGS_RDX * WORD_SIZE
#define VCPU_RBX        __VCPU_REGS_RBX * WORD_SIZE
/* Intentionally omit RSP as it's context switched by hardware */
#define VCPU_RBP        __VCPU_REGS_RBP * WORD_SIZE
#define VCPU_RSI        __VCPU_REGS_RSI * WORD_SIZE
#define VCPU_RDI        __VCPU_REGS_RDI * WORD_SIZE

#ifdef CONFIG_X86_64
#define VCPU_R8         __VCPU_REGS_R8  * WORD_SIZE
#define VCPU_R9         __VCPU_REGS_R9  * WORD_SIZE
#define VCPU_R10        __VCPU_REGS_R10 * WORD_SIZE
#define VCPU_R11        __VCPU_REGS_R11 * WORD_SIZE
#define VCPU_R12        __VCPU_REGS_R12 * WORD_SIZE
#define VCPU_R13        __VCPU_REGS_R13 * WORD_SIZE
#define VCPU_R14        __VCPU_REGS_R14 * WORD_SIZE
#define VCPU_R15        __VCPU_REGS_R15 * WORD_SIZE
#endif

.macro VMX_DO_EVENT_IRQOFF call_insn call_target
        /*
         * Unconditionally create a stack frame, getting the correct RSP on the
         * stack (for x86-64) would take two instructions anyways, and RBP can
         * be used to restore RSP to make objtool happy (see below).
         */
        push %_ASM_BP
        mov %_ASM_SP, %_ASM_BP

#ifdef CONFIG_X86_64
        /*
         * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
         * creating the synthetic interrupt stack frame for the IRQ/NMI.
         */
        and  $-16, %rsp
        push $__KERNEL_DS
        push %rbp
#endif
        pushf
        push $__KERNEL_CS
        \call_insn \call_target

        /*
         * "Restore" RSP from RBP, even though IRET has already unwound RSP to
         * the correct value.  objtool doesn't know the callee will IRET and,
         * without the explicit restore, thinks the stack is getting walloped.
         * Using an unwind hint is problematic due to x86-64's dynamic alignment.
         */
        leave
        RET
.endm

.section .noinstr.text, "ax"

/**
 * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
 * @vmx:        struct vcpu_vmx *
 * @regs:       unsigned long * (to guest registers)
 * @flags:      VMX_RUN_VMRESUME:       use VMRESUME instead of VMLAUNCH
 *              VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
 *              VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO: vCPU can access host MMIO
 *
 * Returns:
 *      0 on VM-Exit, 1 on VM-Fail
 */
SYM_FUNC_START(__vmx_vcpu_run)
        push %_ASM_BP
        mov  %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
        push %r15
        push %r14
        push %r13
        push %r12
#else
        push %edi
        push %esi
#endif
        push %_ASM_BX

        /* Save @vmx for SPEC_CTRL handling */
        push %_ASM_ARG1

        /* Save @flags (used for VMLAUNCH vs. VMRESUME and mitigations). */
        push %_ASM_ARG3

        /*
         * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
         * @regs is needed after VM-Exit to save the guest's register values.
         */
        push %_ASM_ARG2

        lea (%_ASM_SP), %_ASM_ARG2
        call vmx_update_host_rsp

        ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL

        /*
         * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
         * host's, write the MSR.
         *
         * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
         * there must not be any returns or indirect branches between this code
         * and vmentry.
         */
        mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
#ifdef CONFIG_X86_64
        mov VMX_spec_ctrl(%rdi), %rdx
        cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx
        je .Lspec_ctrl_done
        movl %edx, %eax
        shr $32, %rdx
#else
        mov VMX_spec_ctrl(%edi), %eax
        mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx
        xor %eax, %ecx
        mov VMX_spec_ctrl + 4(%edi), %edx
        mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edi
        xor %edx, %edi
        or %edi, %ecx
        je .Lspec_ctrl_done
#endif
        mov $MSR_IA32_SPEC_CTRL, %ecx
        wrmsr

.Lspec_ctrl_done:

        /*
         * Since vmentry is serializing on affected CPUs, there's no need for
         * an LFENCE to stop speculation from skipping the wrmsr.
         */

        /* Load @regs to RAX. */
        mov (%_ASM_SP), %_ASM_AX

        /* Load guest registers.  Don't clobber flags. */
        mov VCPU_RCX(%_ASM_AX), %_ASM_CX
        mov VCPU_RDX(%_ASM_AX), %_ASM_DX
        mov VCPU_RBX(%_ASM_AX), %_ASM_BX
        mov VCPU_RBP(%_ASM_AX), %_ASM_BP
        mov VCPU_RSI(%_ASM_AX), %_ASM_SI
        mov VCPU_RDI(%_ASM_AX), %_ASM_DI
#ifdef CONFIG_X86_64
        mov VCPU_R8 (%_ASM_AX),  %r8
        mov VCPU_R9 (%_ASM_AX),  %r9
        mov VCPU_R10(%_ASM_AX), %r10
        mov VCPU_R11(%_ASM_AX), %r11
        mov VCPU_R12(%_ASM_AX), %r12
        mov VCPU_R13(%_ASM_AX), %r13
        mov VCPU_R14(%_ASM_AX), %r14
        mov VCPU_R15(%_ASM_AX), %r15
#endif
        /* Load guest RAX.  This kills the @regs pointer! */
        mov VCPU_RAX(%_ASM_AX), %_ASM_AX

        /*
         * Note, ALTERNATIVE_2 works in reverse order.  If CLEAR_CPU_BUF_VM is
         * enabled, do VERW unconditionally.  If CPU_BUF_VM_MMIO is enabled,
         * check @flags to see if the vCPU has access to host MMIO, and if so,
         * do VERW.  Else, do nothing (no mitigations needed/enabled).
         */
        ALTERNATIVE_2 "",                                                                         \
                      __stringify(testl $VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO, WORD_SIZE(%_ASM_SP); \
                                  jz .Lskip_mmio_verw;                                            \
                                  VERW;                                                           \
                                  .Lskip_mmio_verw:),                                             \
                      X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO,                                          \
                      __stringify(VERW), X86_FEATURE_CLEAR_CPU_BUF_VM

        /* Check @flags to see if VMLAUNCH or VMRESUME is needed. */
        testl $VMX_RUN_VMRESUME, WORD_SIZE(%_ASM_SP)
        jz .Lvmlaunch

        /*
         * After a successful VMRESUME/VMLAUNCH, control flow "magically"
         * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
         * So this isn't a typical function and objtool needs to be told to
         * save the unwind state here and restore it below.
         */
        UNWIND_HINT_SAVE

/*
 * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
 * the 'vmx_vmexit' label below.
 */
.Lvmresume:
        vmresume
        jmp .Lvmfail

.Lvmlaunch:
        vmlaunch
        jmp .Lvmfail

        _ASM_EXTABLE(.Lvmresume, .Lfixup)
        _ASM_EXTABLE(.Lvmlaunch, .Lfixup)

SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)

        /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
        UNWIND_HINT_RESTORE
        ENDBR

        /* Temporarily save guest's RAX. */
        push %_ASM_AX

        /* Reload @regs to RAX. */
        mov WORD_SIZE(%_ASM_SP), %_ASM_AX

        /* Save all guest registers, including RAX from the stack */
        pop           VCPU_RAX(%_ASM_AX)
        mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
        mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
        mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
        mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
        mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
        mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
#ifdef CONFIG_X86_64
        mov %r8,  VCPU_R8 (%_ASM_AX)
        mov %r9,  VCPU_R9 (%_ASM_AX)
        mov %r10, VCPU_R10(%_ASM_AX)
        mov %r11, VCPU_R11(%_ASM_AX)
        mov %r12, VCPU_R12(%_ASM_AX)
        mov %r13, VCPU_R13(%_ASM_AX)
        mov %r14, VCPU_R14(%_ASM_AX)
        mov %r15, VCPU_R15(%_ASM_AX)
#endif

        /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
        xor %ebx, %ebx

.Lclear_regs:
        /* Discard @regs.  The register is irrelevant, it just can't be RBX. */
        pop %_ASM_AX

        /*
         * Clear all general purpose registers except RSP and RBX to prevent
         * speculative use of the guest's values, even those that are reloaded
         * via the stack.  In theory, an L1 cache miss when restoring registers
         * could lead to speculative execution with the guest's values.
         * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
         * free.  RSP and RBX are exempt as RSP is restored by hardware during
         * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
         * value.
         */
        xor %eax, %eax
        xor %ecx, %ecx
        xor %edx, %edx
        xor %ebp, %ebp
        xor %esi, %esi
        xor %edi, %edi
#ifdef CONFIG_X86_64
        xor %r8d,  %r8d
        xor %r9d,  %r9d
        xor %r10d, %r10d
        xor %r11d, %r11d
        xor %r12d, %r12d
        xor %r13d, %r13d
        xor %r14d, %r14d
        xor %r15d, %r15d
#endif

        /*
         * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
         * the first unbalanced RET after vmexit!
         *
         * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
         * entries and (in some cases) RSB underflow.
         *
         * eIBRS has its own protection against poisoned RSB, so it doesn't
         * need the RSB filling sequence.  But it does need to be enabled, and a
         * single call to retire, before the first unbalanced RET.
         */

        FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
                           X86_FEATURE_RSB_VMEXIT_LITE

        pop %_ASM_ARG2  /* @flags */
        pop %_ASM_ARG1  /* @vmx */

        call vmx_spec_ctrl_restore_host

        CLEAR_BRANCH_HISTORY_VMEXIT

        /* Put return value in AX */
        mov %_ASM_BX, %_ASM_AX

        pop %_ASM_BX
#ifdef CONFIG_X86_64
        pop %r12
        pop %r13
        pop %r14
        pop %r15
#else
        pop %esi
        pop %edi
#endif
        pop %_ASM_BP
        RET

.Lfixup:
        cmpb $0, _ASM_RIP(kvm_rebooting)
        jne .Lvmfail
        ud2
.Lvmfail:
        /* VM-Fail: set return value to 1 */
        mov $1, %_ASM_BX
        jmp .Lclear_regs

SYM_FUNC_END(__vmx_vcpu_run)

SYM_FUNC_START(vmx_do_nmi_irqoff)
        VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
SYM_FUNC_END(vmx_do_nmi_irqoff)

#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT

/**
 * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
 * @field:      VMCS field encoding that failed
 * @fault:      %true if the VMREAD faulted, %false if it failed
 *
 * Save and restore volatile registers across a call to vmread_error().  Note,
 * all parameters are passed on the stack.
 */
SYM_FUNC_START(vmread_error_trampoline)
        push %_ASM_BP
        mov  %_ASM_SP, %_ASM_BP

        push %_ASM_AX
        push %_ASM_CX
        push %_ASM_DX
#ifdef CONFIG_X86_64
        push %rdi
        push %rsi
        push %r8
        push %r9
        push %r10
        push %r11
#endif

        /* Load @field and @fault to arg1 and arg2 respectively. */
        mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
        mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1

        call vmread_error_trampoline2

        /* Zero out @fault, which will be popped into the result register. */
        _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)

#ifdef CONFIG_X86_64
        pop %r11
        pop %r10
        pop %r9
        pop %r8
        pop %rsi
        pop %rdi
#endif
        pop %_ASM_DX
        pop %_ASM_CX
        pop %_ASM_AX
        pop %_ASM_BP

        RET
SYM_FUNC_END(vmread_error_trampoline)
#endif

.section .text, "ax"

#ifndef CONFIG_X86_FRED

SYM_FUNC_START(vmx_do_interrupt_irqoff)
        VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)

#endif