root/arch/x86/kvm/svm/vmenter.S
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/frame.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include "kvm-asm-offsets.h"

#define WORD_SIZE (BITS_PER_LONG / 8)

/* Intentionally omit RAX as it's context switched by hardware */
#define VCPU_RCX        (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE)
#define VCPU_RDX        (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE)
#define VCPU_RBX        (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE)
/* Intentionally omit RSP as it's context switched by hardware */
#define VCPU_RBP        (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE)
#define VCPU_RSI        (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE)
#define VCPU_RDI        (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE)

#ifdef CONFIG_X86_64
#define VCPU_R8         (SVM_vcpu_arch_regs + __VCPU_REGS_R8  * WORD_SIZE)
#define VCPU_R9         (SVM_vcpu_arch_regs + __VCPU_REGS_R9  * WORD_SIZE)
#define VCPU_R10        (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE)
#define VCPU_R11        (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE)
#define VCPU_R12        (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE)
#define VCPU_R13        (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE)
#define VCPU_R14        (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE)
#define VCPU_R15        (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE)
#endif

#define SVM_vmcb01_pa   (SVM_vmcb01 + KVM_VMCB_pa)

.section .noinstr.text, "ax"

.macro RESTORE_GUEST_SPEC_CTRL
        /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
        ALTERNATIVE_2 "", \
                "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
                "", X86_FEATURE_V_SPEC_CTRL
801:
.endm
.macro RESTORE_GUEST_SPEC_CTRL_BODY
800:
        /*
         * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
         * host's, write the MSR.  This is kept out-of-line so that the common
         * case does not have to jump.
         *
         * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
         * there must not be any returns or indirect branches between this code
         * and vmentry.
         */
#ifdef CONFIG_X86_64
        mov SVM_spec_ctrl(%rdi), %rdx
        cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx
        je 801b
        movl %edx, %eax
        shr $32, %rdx
#else
        mov SVM_spec_ctrl(%edi), %eax
        mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx
        xor %eax, %ecx
        mov SVM_spec_ctrl + 4(%edi), %edx
        mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %esi
        xor %edx, %esi
        or %esi, %ecx
        je 801b
#endif
        mov $MSR_IA32_SPEC_CTRL, %ecx
        wrmsr
        jmp 801b
.endm

.macro RESTORE_HOST_SPEC_CTRL
        /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
        ALTERNATIVE_2 "", \
                "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
                "", X86_FEATURE_V_SPEC_CTRL
901:
.endm
.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req
900:
        /* Same for after vmexit.  */
        mov $MSR_IA32_SPEC_CTRL, %ecx

        /*
         * Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
         * if it was not intercepted during guest execution.
         */
        cmpb $0, \spec_ctrl_intercepted
        jnz 998f
        rdmsr
        movl %eax, SVM_spec_ctrl(%_ASM_DI)
        movl %edx, SVM_spec_ctrl + 4(%_ASM_DI)
998:
        /* Now restore the host value of the MSR if different from the guest's.  */
#ifdef CONFIG_X86_64
        mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx
        cmp SVM_spec_ctrl(%rdi), %rdx
        je 901b
        movl %edx, %eax
        shr $32, %rdx
#else
        mov PER_CPU_VAR(x86_spec_ctrl_current), %eax
        mov SVM_spec_ctrl(%edi), %esi
        xor %eax, %esi
        mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edx
        mov SVM_spec_ctrl + 4(%edi), %edi
        xor %edx, %edi
        or %edi, %esi
        je 901b
#endif
        wrmsr
        jmp 901b
.endm

#define SVM_CLEAR_CPU_BUFFERS \
        ALTERNATIVE "", __CLEAR_CPU_BUFFERS, X86_FEATURE_CLEAR_CPU_BUF_VM

/**
 * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
 * @svm:        struct vcpu_svm *
 * @spec_ctrl_intercepted: bool
 */
SYM_FUNC_START(__svm_vcpu_run)
        push %_ASM_BP
        mov  %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
        push %r15
        push %r14
        push %r13
        push %r12
#else
        push %edi
        push %esi
#endif
        push %_ASM_BX

        /*
         * Save variables needed after vmexit on the stack, in inverse
         * order compared to when they are needed.
         */

        /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL.  */
        push %_ASM_ARG2

        /* Needed to restore access to percpu variables.  */
        __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa)

        /* Finally save @svm. */
        push %_ASM_ARG1

.ifnc _ASM_ARG1, _ASM_DI
        /*
         * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
         * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
         */
        mov %_ASM_ARG1, %_ASM_DI
.endif

        /* Clobbers RAX, RCX, RDX (and ESI on 32-bit), consumes RDI (@svm). */
        RESTORE_GUEST_SPEC_CTRL

        /*
         * Use a single vmcb (vmcb01 because it's always valid) for
         * context switching guest state via VMLOAD/VMSAVE, that way
         * the state doesn't need to be copied between vmcb01 and
         * vmcb02 when switching vmcbs for nested virtualization.
         */
        mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
1:      vmload %_ASM_AX
2:

        /* Get svm->current_vmcb->pa into RAX. */
        mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
        mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX

        /* Load guest registers. */
        mov VCPU_RCX(%_ASM_DI), %_ASM_CX
        mov VCPU_RDX(%_ASM_DI), %_ASM_DX
        mov VCPU_RBX(%_ASM_DI), %_ASM_BX
        mov VCPU_RBP(%_ASM_DI), %_ASM_BP
        mov VCPU_RSI(%_ASM_DI), %_ASM_SI
#ifdef CONFIG_X86_64
        mov VCPU_R8 (%_ASM_DI),  %r8
        mov VCPU_R9 (%_ASM_DI),  %r9
        mov VCPU_R10(%_ASM_DI), %r10
        mov VCPU_R11(%_ASM_DI), %r11
        mov VCPU_R12(%_ASM_DI), %r12
        mov VCPU_R13(%_ASM_DI), %r13
        mov VCPU_R14(%_ASM_DI), %r14
        mov VCPU_R15(%_ASM_DI), %r15
#endif
        mov VCPU_RDI(%_ASM_DI), %_ASM_DI

        /* Clobbers EFLAGS.ZF */
        SVM_CLEAR_CPU_BUFFERS

        /* Enter guest mode */
3:      vmrun %_ASM_AX
4:
        /* Pop @svm to RAX while it's the only available register. */
        pop %_ASM_AX

        /* Save all guest registers.  */
        mov %_ASM_CX,   VCPU_RCX(%_ASM_AX)
        mov %_ASM_DX,   VCPU_RDX(%_ASM_AX)
        mov %_ASM_BX,   VCPU_RBX(%_ASM_AX)
        mov %_ASM_BP,   VCPU_RBP(%_ASM_AX)
        mov %_ASM_SI,   VCPU_RSI(%_ASM_AX)
        mov %_ASM_DI,   VCPU_RDI(%_ASM_AX)
#ifdef CONFIG_X86_64
        mov %r8,  VCPU_R8 (%_ASM_AX)
        mov %r9,  VCPU_R9 (%_ASM_AX)
        mov %r10, VCPU_R10(%_ASM_AX)
        mov %r11, VCPU_R11(%_ASM_AX)
        mov %r12, VCPU_R12(%_ASM_AX)
        mov %r13, VCPU_R13(%_ASM_AX)
        mov %r14, VCPU_R14(%_ASM_AX)
        mov %r15, VCPU_R15(%_ASM_AX)
#endif

        /* @svm can stay in RDI from now on.  */
        mov %_ASM_AX, %_ASM_DI

        mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
5:      vmsave %_ASM_AX
6:

        /* Restores GSBASE among other things, allowing access to percpu data.  */
        pop %_ASM_AX
7:      vmload %_ASM_AX
8:

        /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
        FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT

        /*
         * Clobbers RAX, RCX, RDX (and ESI, EDI on 32-bit), consumes RDI (@svm)
         * and RSP (pointer to @spec_ctrl_intercepted).
         */
        RESTORE_HOST_SPEC_CTRL

        /*
         * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
         * untrained as soon as we exit the VM and are back to the
         * kernel. This should be done before re-enabling interrupts
         * because interrupt handlers won't sanitize 'ret' if the return is
         * from the kernel.
         */
        UNTRAIN_RET_VM

        /*
         * Clear all general purpose registers except RSP and RAX to prevent
         * speculative use of the guest's values, even those that are reloaded
         * via the stack.  In theory, an L1 cache miss when restoring registers
         * could lead to speculative execution with the guest's values.
         * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
         * free.  RSP and RAX are exempt as they are restored by hardware
         * during VM-Exit.
         */
        xor %ecx, %ecx
        xor %edx, %edx
        xor %ebx, %ebx
        xor %ebp, %ebp
        xor %esi, %esi
        xor %edi, %edi
#ifdef CONFIG_X86_64
        xor %r8d,  %r8d
        xor %r9d,  %r9d
        xor %r10d, %r10d
        xor %r11d, %r11d
        xor %r12d, %r12d
        xor %r13d, %r13d
        xor %r14d, %r14d
        xor %r15d, %r15d
#endif

        /* "Pop" @spec_ctrl_intercepted.  */
        pop %_ASM_BX

        pop %_ASM_BX

#ifdef CONFIG_X86_64
        pop %r12
        pop %r13
        pop %r14
        pop %r15
#else
        pop %esi
        pop %edi
#endif
        pop %_ASM_BP
        RET

        RESTORE_GUEST_SPEC_CTRL_BODY
        RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)

10:     cmpb $0, _ASM_RIP(kvm_rebooting)
        jne 2b
        ud2
30:     cmpb $0, _ASM_RIP(kvm_rebooting)
        jne 4b
        ud2
50:     cmpb $0, _ASM_RIP(kvm_rebooting)
        jne 6b
        ud2
70:     cmpb $0, _ASM_RIP(kvm_rebooting)
        jne 8b
        ud2

        _ASM_EXTABLE(1b, 10b)
        _ASM_EXTABLE(3b, 30b)
        _ASM_EXTABLE(5b, 50b)
        _ASM_EXTABLE(7b, 70b)

SYM_FUNC_END(__svm_vcpu_run)

#ifdef CONFIG_KVM_AMD_SEV


#ifdef CONFIG_X86_64
#define SEV_ES_GPRS_BASE 0x300
#define SEV_ES_RBX      (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE)
#define SEV_ES_RBP      (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE)
#define SEV_ES_RSI      (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE)
#define SEV_ES_RDI      (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE)
#define SEV_ES_R12      (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE)
#define SEV_ES_R13      (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE)
#define SEV_ES_R14      (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE)
#define SEV_ES_R15      (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE)
#endif

/**
 * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
 * @svm:        struct vcpu_svm *
 * @spec_ctrl_intercepted: bool
 */
SYM_FUNC_START(__svm_sev_es_vcpu_run)
        FRAME_BEGIN

        /*
         * Save non-volatile (callee-saved) registers to the host save area.
         * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not
         * saved on VMRUN.
         */
        mov %rbp, SEV_ES_RBP (%rdx)
        mov %r15, SEV_ES_R15 (%rdx)
        mov %r14, SEV_ES_R14 (%rdx)
        mov %r13, SEV_ES_R13 (%rdx)
        mov %r12, SEV_ES_R12 (%rdx)
        mov %rbx, SEV_ES_RBX (%rdx)

        /*
         * Save volatile registers that hold arguments that are needed after
         * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted).
         */
        mov %rdi, SEV_ES_RDI (%rdx)
        mov %rsi, SEV_ES_RSI (%rdx)

        /* Clobbers RAX, RCX, and RDX (@hostsa), consumes RDI (@svm). */
        RESTORE_GUEST_SPEC_CTRL

        /* Get svm->current_vmcb->pa into RAX. */
        mov SVM_current_vmcb(%rdi), %rax
        mov KVM_VMCB_pa(%rax), %rax

        /* Clobbers EFLAGS.ZF */
        SVM_CLEAR_CPU_BUFFERS

        /* Enter guest mode */
1:      vmrun %rax
2:
        /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
        FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT

        /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
        RESTORE_HOST_SPEC_CTRL

        /*
         * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
         * untrained as soon as we exit the VM and are back to the
         * kernel. This should be done before re-enabling interrupts
         * because interrupt handlers won't sanitize RET if the return is
         * from the kernel.
         */
        UNTRAIN_RET_VM

        FRAME_END
        RET

        RESTORE_GUEST_SPEC_CTRL_BODY
        RESTORE_HOST_SPEC_CTRL_BODY %sil

3:      cmpb $0, kvm_rebooting(%rip)
        jne 2b
        ud2

        _ASM_EXTABLE(1b, 3b)

SYM_FUNC_END(__svm_sev_es_vcpu_run)
#endif /* CONFIG_KVM_AMD_SEV */