root/arch/x86/entry/calling.h
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/jump_label.h>
#include <asm/unwind_hints.h>
#include <asm/cpufeatures.h>
#include <asm/page_types.h>
#include <asm/percpu.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
#include <asm/ptrace-abi.h>
#include <asm/msr.h>
#include <asm/nospec-branch.h>

/*

 x86 function call convention, 64-bit:
 -------------------------------------
  arguments           |  callee-saved      | extra caller-saved | return
 [callee-clobbered]   |                    | [callee-clobbered] |
 ---------------------------------------------------------------------------
 rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11             | rax, rdx [**]

 ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
   functions when it sees tail-call optimization possibilities) rflags is
   clobbered. Leftover arguments are passed over the stack frame.)

 [*]  In the frame-pointers case rbp is fixed to the stack frame.

 [**] for struct return values wider than 64 bits the return convention is a
      bit more complex: up to 128 bits width we return small structures
      straight in rax, rdx. For structures larger than that (3 words or
      larger) the caller puts a pointer to an on-stack return struct
      [allocated in the caller's stack frame] into the first argument - i.e.
      into rdi. All other arguments shift up by one in this case.
      Fortunately this case is rare in the kernel.

For 32-bit we have the following conventions - kernel is built with
-mregparm=3 and -freg-struct-return:

 x86 function calling convention, 32-bit:
 ----------------------------------------
  arguments         | callee-saved        | extra caller-saved | return
 [callee-clobbered] |                     | [callee-clobbered] |
 -------------------------------------------------------------------------
 eax edx ecx        | ebx edi esi ebp [*] | <none>             | eax, edx [**]

 ( here too esp is obviously invariant across normal function calls. eflags
   is clobbered. Leftover arguments are passed over the stack frame. )

 [*]  In the frame-pointers case ebp is fixed to the stack frame.

 [**] We build with -freg-struct-return, which on 32-bit means similar
      semantics as on 64-bit: edx can be used for a second return value
      (i.e. covering integer and structure sizes up to 64 bits) - after that
      it gets more complex and more expensive: 3-word or larger struct returns
      get done in the caller's frame and the pointer to the return struct goes
      into regparm0, i.e. eax - the other arguments shift up and the
      function's register parameters degenerate to regparm=2 in essence.

*/

#ifdef CONFIG_X86_64

/*
 * 64-bit system call stack frame layout defines and helpers,
 * for assembly code:
 */

.macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 unwind_hint=1
        .if \save_ret
        pushq   %rsi            /* pt_regs->si */
        movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
        movq    %rdi, 8(%rsp)   /* pt_regs->di (overwriting original return address) */
        /* We just clobbered the return address - use the IRET frame for unwinding: */
        UNWIND_HINT_IRET_REGS offset=3*8
        .else
        pushq   %rdi            /* pt_regs->di */
        pushq   %rsi            /* pt_regs->si */
        .endif
        pushq   \rdx            /* pt_regs->dx */
        pushq   \rcx            /* pt_regs->cx */
        pushq   \rax            /* pt_regs->ax */
        pushq   %r8             /* pt_regs->r8 */
        pushq   %r9             /* pt_regs->r9 */
        pushq   %r10            /* pt_regs->r10 */
        pushq   %r11            /* pt_regs->r11 */
        pushq   %rbx            /* pt_regs->rbx */
        pushq   %rbp            /* pt_regs->rbp */
        pushq   %r12            /* pt_regs->r12 */
        pushq   %r13            /* pt_regs->r13 */
        pushq   %r14            /* pt_regs->r14 */
        pushq   %r15            /* pt_regs->r15 */

        .if \unwind_hint
        UNWIND_HINT_REGS
        .endif

        .if \save_ret
        pushq   %rsi            /* return address on top of stack */
        .endif
.endm

.macro CLEAR_REGS clear_callee=1
        /*
         * Sanitize registers of values that a speculation attack might
         * otherwise want to exploit. The lower registers are likely clobbered
         * well before they could be put to use in a speculative execution
         * gadget.
         */
        xorl    %esi,  %esi     /* nospec si  */
        xorl    %edx,  %edx     /* nospec dx  */
        xorl    %ecx,  %ecx     /* nospec cx  */
        xorl    %r8d,  %r8d     /* nospec r8  */
        xorl    %r9d,  %r9d     /* nospec r9  */
        xorl    %r10d, %r10d    /* nospec r10 */
        xorl    %r11d, %r11d    /* nospec r11 */
        .if \clear_callee
        xorl    %ebx,  %ebx     /* nospec rbx */
        xorl    %ebp,  %ebp     /* nospec rbp */
        xorl    %r12d, %r12d    /* nospec r12 */
        xorl    %r13d, %r13d    /* nospec r13 */
        xorl    %r14d, %r14d    /* nospec r14 */
        xorl    %r15d, %r15d    /* nospec r15 */
        .endif
.endm

.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1
        PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint
        CLEAR_REGS clear_callee=\clear_callee
.endm

.macro POP_REGS pop_rdi=1
        popq %r15
        popq %r14
        popq %r13
        popq %r12
        popq %rbp
        popq %rbx
        popq %r11
        popq %r10
        popq %r9
        popq %r8
        popq %rax
        popq %rcx
        popq %rdx
        popq %rsi
        .if \pop_rdi
        popq %rdi
        .endif
.endm

#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION

/*
 * MITIGATION_PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
 * halves:
 */
#define PTI_USER_PGTABLE_BIT            PAGE_SHIFT
#define PTI_USER_PGTABLE_MASK           (1 << PTI_USER_PGTABLE_BIT)
#define PTI_USER_PCID_BIT               X86_CR3_PTI_PCID_USER_BIT
#define PTI_USER_PCID_MASK              (1 << PTI_USER_PCID_BIT)
#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)

.macro SET_NOFLUSH_BIT  reg:req
        bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
.endm

.macro ADJUST_KERNEL_CR3 reg:req
        ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
        /* Clear PCID and "MITIGATION_PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
        andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
.endm

.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
        mov     %cr3, \scratch_reg
        ADJUST_KERNEL_CR3 \scratch_reg
        mov     \scratch_reg, %cr3
.Lend_\@:
.endm

#define THIS_CPU_user_pcid_flush_mask   \
        PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)

.macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req
        mov     %cr3, \scratch_reg

        ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID

        /*
         * Test if the ASID needs a flush.
         */
        movq    \scratch_reg, \scratch_reg2
        andq    $(0x7FF), \scratch_reg          /* mask ASID */
        bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
        jnc     .Lnoflush_\@

        /* Flush needed, clear the bit */
        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
        movq    \scratch_reg2, \scratch_reg
        jmp     .Lwrcr3_pcid_\@

.Lnoflush_\@:
        movq    \scratch_reg2, \scratch_reg
        SET_NOFLUSH_BIT \scratch_reg

.Lwrcr3_pcid_\@:
        /* Flip the ASID to the user version */
        orq     $(PTI_USER_PCID_MASK), \scratch_reg

.Lwrcr3_\@:
        /* Flip the PGD to the user version */
        orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
        mov     \scratch_reg, %cr3
.endm

.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
        SWITCH_TO_USER_CR3 \scratch_reg \scratch_reg2
.Lend_\@:
.endm

.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
        pushq   %rax
        SWITCH_TO_USER_CR3 scratch_reg=\scratch_reg scratch_reg2=%rax
        popq    %rax
.Lend_\@:
.endm

.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
        ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
        movq    %cr3, \scratch_reg
        movq    \scratch_reg, \save_reg
        /*
         * Test the user pagetable bit. If set, then the user page tables
         * are active. If clear CR3 already has the kernel page table
         * active.
         */
        bt      $PTI_USER_PGTABLE_BIT, \scratch_reg
        jnc     .Ldone_\@

        ADJUST_KERNEL_CR3 \scratch_reg
        movq    \scratch_reg, %cr3

.Ldone_\@:
.endm

/* Restore CR3 from a kernel context. May restore a user CR3 value. */
.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI

        /*
         * If CR3 contained the kernel page tables at the paranoid exception
         * entry, then there is nothing to restore as CR3 is not modified while
         * handling the exception.
         */
        bt      $PTI_USER_PGTABLE_BIT, \save_reg
        jnc     .Lend_\@

        ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID

        /*
         * Check if there's a pending flush for the user ASID we're
         * about to set.
         */
        movq    \save_reg, \scratch_reg
        andq    $(0x7FF), \scratch_reg
        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
        jc      .Lwrcr3_\@

        SET_NOFLUSH_BIT \save_reg

.Lwrcr3_\@:
        movq    \save_reg, %cr3
.Lend_\@:
.endm

#else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=n: */

.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
.endm
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
.endm
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
.endm
.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
.endm

#endif

/*
 * IBRS kernel mitigation for Spectre_v2.
 *
 * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
 * the regs it uses (AX, CX, DX). Must be called before the first RET
 * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
 *
 * The optional argument is used to save/restore the current value,
 * which is used on the paranoid paths.
 *
 * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
 */
.macro IBRS_ENTER save_reg
#ifdef CONFIG_MITIGATION_IBRS_ENTRY
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
        movl    $MSR_IA32_SPEC_CTRL, %ecx

.ifnb \save_reg
        rdmsr
        shl     $32, %rdx
        or      %rdx, %rax
        mov     %rax, \save_reg
        test    $SPEC_CTRL_IBRS, %eax
        jz      .Ldo_wrmsr_\@
        lfence
        jmp     .Lend_\@
.Ldo_wrmsr_\@:
.endif

        movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
        movl    %edx, %eax
        shr     $32, %rdx
        wrmsr
.Lend_\@:
#endif
.endm

/*
 * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
 * regs. Must be called after the last RET.
 */
.macro IBRS_EXIT save_reg
#ifdef CONFIG_MITIGATION_IBRS_ENTRY
        ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
        movl    $MSR_IA32_SPEC_CTRL, %ecx

.ifnb \save_reg
        mov     \save_reg, %rdx
.else
        movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
        andl    $(~SPEC_CTRL_IBRS), %edx
.endif

        movl    %edx, %eax
        shr     $32, %rdx
        wrmsr
.Lend_\@:
#endif
.endm

/*
 * Mitigate Spectre v1 for conditional swapgs code paths.
 *
 * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
 * prevent a speculative swapgs when coming from kernel space.
 *
 * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
 * to prevent the swapgs from getting speculatively skipped when coming from
 * user space.
 */
.macro FENCE_SWAPGS_USER_ENTRY
        ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
.endm
.macro FENCE_SWAPGS_KERNEL_ENTRY
        ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
.endm

.macro STACKLEAK_ERASE_NOCLOBBER
#ifdef CONFIG_KSTACK_ERASE
        PUSH_AND_CLEAR_REGS
        call stackleak_erase
        POP_REGS
#endif
.endm

.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
        rdgsbase \save_reg
        GET_PERCPU_BASE \scratch_reg
        wrgsbase \scratch_reg
.endm

#else /* CONFIG_X86_64 */
# undef         UNWIND_HINT_IRET_REGS
# define        UNWIND_HINT_IRET_REGS
#endif /* !CONFIG_X86_64 */

.macro STACKLEAK_ERASE
#ifdef CONFIG_KSTACK_ERASE
        call stackleak_erase
#endif
.endm

#ifdef CONFIG_SMP

/*
 * CPU/node NR is loaded from the limit (size) field of a special segment
 * descriptor entry in GDT.
 */
.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
        movq    $__CPUNODE_SEG, \reg
        lsl     \reg, \reg
.endm

/*
 * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
 * We normally use %gs for accessing per-CPU data, but we are setting up
 * %gs here and obviously can not use %gs itself to access per-CPU data.
 *
 * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
 * may not restore the host's value until the CPU returns to userspace.
 * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
 * while running KVM's run loop.
 */
.macro GET_PERCPU_BASE reg:req
        LOAD_CPU_AND_NODE_SEG_LIMIT \reg
        andq    $VDSO_CPUNODE_MASK, \reg
        movq    __per_cpu_offset(, \reg, 8), \reg
.endm

#else

.macro GET_PERCPU_BASE reg:req
        movq    pcpu_unit_offsets(%rip), \reg
.endm

#endif /* CONFIG_SMP */

#ifdef CONFIG_X86_64

/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func
SYM_FUNC_START(\name)
        ANNOTATE_NOENDBR
        pushq %rbp
        movq %rsp, %rbp

        pushq %rdi
        pushq %rsi
        pushq %rdx
        pushq %rcx
        pushq %rax
        pushq %r8
        pushq %r9
        pushq %r10
        pushq %r11

        call \func

        popq %r11
        popq %r10
        popq %r9
        popq %r8
        popq %rax
        popq %rcx
        popq %rdx
        popq %rsi
        popq %rdi
        popq %rbp
        RET
SYM_FUNC_END(\name)
        _ASM_NOKPROBE(\name)
.endm

#else /* CONFIG_X86_32 */

/* put return address in eax (arg1) */
.macro THUNK name, func, put_ret_addr_in_eax=0
SYM_CODE_START_NOALIGN(\name)
        pushl %eax
        pushl %ecx
        pushl %edx

        .if \put_ret_addr_in_eax
        /* Place EIP in the arg1 */
        movl 3*4(%esp), %eax
        .endif

        call \func
        popl %edx
        popl %ecx
        popl %eax
        RET
        _ASM_NOKPROBE(\name)
SYM_CODE_END(\name)
        .endm

#endif