root/sys/i386/i386/exception.S
/*-
 * Copyright (c) 1989, 1990 William F. Jolitz.
 * Copyright (c) 1990 The Regents of the University of California.
 * Copyright (c) 2007, 2018 The FreeBSD Foundation
 * All rights reserved.
 *
 * Portions of this software were developed by A. Joseph Koshy under
 * sponsorship from the FreeBSD Foundation and Google, Inc.
 * Portions of this software were developed by Konstantin Belousov
 * <kib@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "opt_apic.h"
#include "opt_atpic.h"
#include "opt_hwpmc_hooks.h"
#include "opt_hyperv.h"

#include "assym.inc"

#include <machine/psl.h>
#include <machine/asmacros.h>
#include <machine/trap.h>

#ifdef KDTRACE_HOOKS
        .bss
        .globl  dtrace_invop_jump_addr
        .align  4
        .type   dtrace_invop_jump_addr, @object
        .size   dtrace_invop_jump_addr, 4
dtrace_invop_jump_addr:
        .zero   4
        .globl  dtrace_invop_calltrap_addr
        .align  4
        .type   dtrace_invop_calltrap_addr, @object
        .size   dtrace_invop_calltrap_addr, 4
dtrace_invop_calltrap_addr:
        .zero   8
#endif
        .text
ENTRY(start_exceptions)
        .globl  tramp_idleptd
tramp_idleptd:  .long   0

/*****************************************************************************/
/* Trap handling                                                             */
/*****************************************************************************/
/*
 * Trap and fault vector routines.
 *
 * All traps are 'interrupt gates', SDT_SYS386IGT.  Interrupts are disabled
 * by hardware to not allow interrupts until code switched to the kernel
 * address space and the kernel thread stack.
 *
 * The cpu will push a certain amount of state onto the kernel stack for
 * the current process.  The amount of state depends on the type of trap
 * and whether the trap crossed rings or not.  See i386/include/frame.h.
 * At the very least the current EFLAGS (status register, which includes
 * the interrupt disable state prior to the trap), the code segment register,
 * and the return instruction pointer are pushed by the cpu.  The cpu
 * will also push an 'error' code for certain traps.  We push a dummy
 * error code for those traps where the cpu doesn't in order to maintain
 * a consistent frame.  We also push a contrived 'trap number'.
 *
 * The cpu does not push the general registers, we must do that, and we
 * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
 * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
 * must load them with appropriate values for supervisor mode operation.
 *
 * This code is not executed at the linked address, it is copied to the
 * trampoline area.  As the consequence, all code there and in included files
 * must be PIC.
 */

#define TRAP(a)         pushl $(a) ; jmp alltraps

IDTVEC(div)
        pushl $0; TRAP(T_DIVIDE)
IDTVEC(bpt)
        pushl $0; TRAP(T_BPTFLT)
IDTVEC(dtrace_ret)
        pushl $0; TRAP(T_DTRACE_RET)
IDTVEC(ofl)
        pushl $0; TRAP(T_OFLOW)
IDTVEC(bnd)
        pushl $0; TRAP(T_BOUND)
#ifndef KDTRACE_HOOKS
IDTVEC(ill)
        pushl $0; TRAP(T_PRIVINFLT)
#endif
IDTVEC(dna)
        pushl $0; TRAP(T_DNA)
IDTVEC(fpusegm)
        pushl $0; TRAP(T_FPOPFLT)
IDTVEC(tss)
        TRAP(T_TSSFLT)
IDTVEC(missing)
        pushl   $T_SEGNPFLT
        jmp     irettraps
IDTVEC(stk)
        pushl   $T_STKFLT
        jmp     irettraps
IDTVEC(prot)
        pushl   $T_PROTFLT
        jmp     irettraps
IDTVEC(page)
        testl   $PSL_VM, TF_EFLAGS-TF_ERR(%esp)
        jnz     upf
        testb   $SEL_RPL_MASK, TF_CS-TF_ERR(%esp)
        jnz     upf
        cmpl    $PMAP_TRM_MIN_ADDRESS, TF_EIP-TF_ERR(%esp)
        jb      upf

        /*
         * This is a handshake between copyout_fast.s and page fault
         * handler.  We check for page fault occuring at the special
         * places in the copyout fast path, where page fault can
         * legitimately happen while accessing either user space or
         * kernel pageable memory, and return control to *%edx.
         * We switch to the idleptd page table from a user page table,
         * if needed.
         */
        pushl   %eax
        movl    TF_EIP-TF_ERR+4(%esp), %eax
        addl    $1f, %eax
        call    5f
1:      cmpl    $pf_x1, %eax
        je      2f
        cmpl    $pf_x2, %eax
        je      2f
        cmpl    $pf_x3, %eax
        je      2f
        cmpl    $pf_x4, %eax
        je      2f
        cmpl    $pf_x5, %eax
        je      2f
        cmpl    $pf_x6, %eax
        je      2f
        cmpl    $pf_x7, %eax
        je      2f
        cmpl    $pf_x8, %eax
        je      2f
        cmpl    $pf_y1, %eax
        je      4f
        cmpl    $pf_y2, %eax
        je      4f
        jmp     upf_eax
2:      movl    $tramp_idleptd, %eax
        subl    $3f, %eax
        call    6f
3:      movl    (%eax), %eax
        movl    %eax, %cr3
4:      popl    %eax
        movl    %edx, TF_EIP-TF_ERR(%esp)
        addl    $4, %esp
        iret
5:      subl    (%esp), %eax
        retl
6:      addl    (%esp), %eax
        retl

upf_eax:popl    %eax
upf:    pushl   $T_PAGEFLT
        jmp     alltraps
IDTVEC(rsvd_pti)
IDTVEC(rsvd)
        pushl $0; TRAP(T_RESERVED)
IDTVEC(fpu)
        pushl $0; TRAP(T_ARITHTRAP)
IDTVEC(align)
        TRAP(T_ALIGNFLT)
IDTVEC(xmm)
        pushl $0; TRAP(T_XMMFLT)

        /*
         * All traps except ones for syscalls or invalid segment,
         * jump to alltraps.  If
         * interrupts were enabled when the trap occurred, then interrupts
         * are enabled now if the trap was through a trap gate, else
         * disabled if the trap was through an interrupt gate.  Note that
         * int0x80_syscall is a trap gate.   Interrupt gates are used by
         * page faults, non-maskable interrupts, debug and breakpoint
         * exceptions.
         */
        SUPERALIGN_TEXT
        .globl  alltraps
        .type   alltraps,@function
alltraps:
        PUSH_FRAME2
alltraps_with_regs_pushed:
        SET_KERNEL_SREGS
        cld
        KENTER
calltrap:
        pushl   %esp
        movl    $trap,%eax
        call    *%eax
        add     $4, %esp

        /*
         * Return via doreti to handle ASTs.
         */
        jmp     doreti

        .globl  irettraps
        .type   irettraps,@function
irettraps:
        testl   $PSL_VM, TF_EFLAGS-TF_TRAPNO(%esp)
        jnz     alltraps
        testb   $SEL_RPL_MASK, TF_CS-TF_TRAPNO(%esp)
        jnz     alltraps

        /*
         * Kernel mode.
         * The special case there is the kernel mode with user %cr3 and
         * trampoline stack. We need to copy both current frame and the
         * hardware portion of the frame we tried to return to, to the
         * normal stack.  This logic must follow the stack unwind order
         * in doreti.
         */
        PUSH_FRAME2
        SET_KERNEL_SREGS
        cld
        call    1f
1:      popl    %ebx
        leal    (doreti_iret - 1b)(%ebx), %edx
        cmpl    %edx, TF_EIP(%esp)
        jne     2f
        /* -8 because exception did not switch ring */
        movl    $(2 * TF_SZ - TF_EIP - 8), %ecx
        jmp     5f
2:      leal    (doreti_popl_ds - 1b)(%ebx), %edx
        cmpl    %edx, TF_EIP(%esp)
        jne     3f
        movl    $(2 * TF_SZ - TF_DS - 8), %ecx
        jmp     5f
3:      leal    (doreti_popl_es - 1b)(%ebx), %edx
        cmpl    %edx, TF_EIP(%esp)
        jne     4f
        movl    $(2 * TF_SZ - TF_ES - 8), %ecx
        jmp     5f
4:      leal    (doreti_popl_fs - 1b)(%ebx), %edx
        cmpl    %edx, TF_EIP(%esp)
        jne     calltrap
        movl    $(2 * TF_SZ - TF_FS - 8), %ecx
5:      cmpl    $PMAP_TRM_MIN_ADDRESS, %esp     /* trampoline stack ? */
        jb      calltrap          /* if not, no need to change stacks */
        movl    (tramp_idleptd - 1b)(%ebx), %eax
        movl    %eax, %cr3
        movl    PCPU(KESP0), %edx
        subl    %ecx, %edx
        movl    %edx, %edi
        movl    %esp, %esi
        rep; movsb
        movl    %edx, %esp
        /* kernel mode, normal */
        jmp     calltrap

/*
 * Privileged instruction fault.
 */
#ifdef KDTRACE_HOOKS
        SUPERALIGN_TEXT
IDTVEC(ill)
        /*
         * Check if this is a user fault.  If so, just handle it as a normal
         * trap.
         */
        testl   $PSL_VM, 8(%esp)        /* and vm86 mode. */
        jnz     norm_ill
        cmpl    $GSEL_KPL, 4(%esp)      /* Check the code segment */
        jne     norm_ill

        /*
         * Check if a DTrace hook is registered.  The trampoline cannot
         * be instrumented.
         */
        cmpl    $0, dtrace_invop_jump_addr
        je      norm_ill

        /*
         * This is a kernel instruction fault that might have been caused
         * by a DTrace provider.
         */
        pushal
        cld

        /*
         * Set our jump address for the jump back in the event that
         * the exception wasn't caused by DTrace at all.
         */
        movl    $norm_ill, dtrace_invop_calltrap_addr

        /* Jump to the code hooked in by DTrace. */
        jmpl    *dtrace_invop_jump_addr

        /*
         * Process the instruction fault in the normal way.
         */
norm_ill:
        pushl   $0
        pushl   $T_PRIVINFLT
        jmp     alltraps
#endif

/*
 * See comment in the handler for the kernel case T_TRCTRAP in trap.c.
 * The exception handler must be ready to execute with wrong %cr3.
 * We save original %cr3 in frame->tf_err, similarly to NMI and MCE
 * handlers.
 */
IDTVEC(dbg)
        pushl   $0
        pushl   $T_TRCTRAP
        PUSH_FRAME2
        SET_KERNEL_SREGS
        cld
        movl    %cr3, %eax
        movl    %eax, TF_ERR(%esp)
        call    1f
1:      popl    %eax
        movl    (tramp_idleptd - 1b)(%eax), %eax
        movl    %eax, %cr3
        testl   $PSL_VM, TF_EFLAGS(%esp)
        jnz     dbg_user
        testb   $SEL_RPL_MASK,TF_CS(%esp)
        jz      calltrap
dbg_user:
        NMOVE_STACKS
        movl    $handle_ibrs_entry,%eax
        call    *%eax
        pushl   %esp
        movl    $trap,%eax
        call    *%eax
        add     $4, %esp
        movl    $T_RESERVED, TF_TRAPNO(%esp)
        jmp     doreti

IDTVEC(mchk)
        pushl   $0
        pushl   $T_MCHK
        jmp     nmi_mchk_common

IDTVEC(nmi)
        pushl   $0
        pushl   $T_NMI
nmi_mchk_common:
        PUSH_FRAME2
        SET_KERNEL_SREGS
        cld
        /*
         * Save %cr3 into tf_err.  There is no good place to put it.
         * Always reload %cr3, since we might have interrupted the
         * kernel entry or exit.
         * Do not switch to the thread kernel stack, otherwise we might
         * obliterate the previous context partially copied from the
         * trampoline stack.
         * Do not re-enable IBRS, there is no good place to store
         * previous state if we come from the kernel.
         */
        movl    %cr3, %eax
        movl    %eax, TF_ERR(%esp)
        call    1f
1:      popl    %eax
        movl    (tramp_idleptd - 1b)(%eax), %eax
        movl    %eax, %cr3
        jmp     calltrap

/*
 * Trap gate entry for syscalls (int 0x80).
 * This is used by FreeBSD ELF executables, "new" a.out executables, and all
 * Linux executables.
 *
 * Even though the name says 'int0x80', this is actually a trap gate, not an
 * interrupt gate.  Thus interrupts are enabled on entry just as they are for
 * a normal syscall.
 */
        SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
        pushl   $2                      /* sizeof "int 0x80" */
        pushl   $0                      /* tf_trapno */
        PUSH_FRAME2
        SET_KERNEL_SREGS
        cld
        MOVE_STACKS
        movl    $handle_ibrs_entry,%eax
        call    *%eax
        sti
        pushl   %esp
        movl    $syscall, %eax
        call    *%eax
        add     $4, %esp
        jmp     doreti

ENTRY(fork_trampoline)
        pushl   %esp                    /* trapframe pointer */
        pushl   %ebx                    /* arg1 */
        pushl   %esi                    /* function */
        movl    $fork_exit, %eax
        call    *%eax
        addl    $12,%esp
        /* cut from syscall */

        /*
         * Return via doreti to handle ASTs.
         */
        jmp     doreti


        .data
        .p2align 4
        .text
        SUPERALIGN_TEXT

#ifdef DEV_ATPIC
#include <i386/i386/atpic_vector.S>
#endif

#if defined(DEV_APIC) && defined(DEV_ATPIC)
        .data
        .p2align 4
        .text
        SUPERALIGN_TEXT
#endif

#ifdef DEV_APIC
#include <i386/i386/apic_vector.S>
#endif

#ifdef DEV_HYPERV
        .data
        .p2align 4
        .text
        SUPERALIGN_TEXT
#include <dev/hyperv/vmbus/i386/vmbus_vector.S>
#endif

        .data
        .p2align 4
        .text
        SUPERALIGN_TEXT
#include <i386/i386/vm86bios.S>

        .text

#include <i386/i386/copyout_fast.S>

/*
 * void doreti(struct trapframe)
 *
 * Handle return from interrupts, traps and syscalls.
 */
        .text
        SUPERALIGN_TEXT
        .type   doreti,@function
        .globl  doreti
doreti:
doreti_next:
        /*
         * Check if ASTs can be handled now.  ASTs cannot be safely
         * processed when returning from an NMI.
         */
        cmpb    $T_NMI,TF_TRAPNO(%esp)
#ifdef HWPMC_HOOKS
        je      doreti_nmi
#else
        je      doreti_exit
#endif
        /*
         * PSL_VM must be checked first since segment registers only
         * have an RPL in non-VM86 mode.
         * ASTs can not be handled now if we are in a vm86 call.
         */
        testl   $PSL_VM,TF_EFLAGS(%esp)
        jz      doreti_notvm86
        movl    PCPU(CURPCB),%ecx
        testl   $PCB_VM86CALL,PCB_FLAGS(%ecx)
        jz      doreti_ast
        jmp     doreti_popl_fs

doreti_notvm86:
        testb   $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
        jz      doreti_exit             /* can't handle ASTs now if not */

doreti_ast:
        /*
         * Check for ASTs atomically with returning.  Disabling CPU
         * interrupts provides sufficient locking even in the SMP case,
         * since we will be informed of any new ASTs by an IPI.
         */
        cli
        movl    PCPU(CURTHREAD),%eax
        cmpl    $0,TD_AST(%eax)
        je      doreti_exit
        sti
        pushl   %esp                    /* pass a pointer to the trapframe */
        movl    $ast, %eax
        call    *%eax
        add     $4,%esp
        jmp     doreti_ast

        /*
         * doreti_exit: pop registers, iret.
         *
         *      The segment register pop is a special case, since it may
         *      fault if (for example) a sigreturn specifies bad segment
         *      registers.  The fault is handled in trap.c.
         */
doreti_exit:
        cmpl    $T_NMI, TF_TRAPNO(%esp)
        je      doreti_iret_nmi
        cmpl    $T_MCHK, TF_TRAPNO(%esp)
        je      doreti_iret_nmi
        cmpl    $T_TRCTRAP, TF_TRAPNO(%esp)
        je      doreti_iret_nmi
        testl   $PSL_VM,TF_EFLAGS(%esp)
        jnz     1f                      /* PCB_VM86CALL is not set */
        testl   $SEL_RPL_MASK, TF_CS(%esp)
        jz      doreti_popl_fs
1:      movl    $handle_ibrs_exit,%eax
        call    *%eax
        movl    mds_handler,%eax
        call    *%eax
        movl    %esp, %esi
        movl    PCPU(TRAMPSTK), %edx
        movl    $TF_SZ, %ecx
        testl   $PSL_VM,TF_EFLAGS(%esp)
        jz      2f                      /* PCB_VM86CALL is not set */
        addl    $VM86_STACK_SPACE, %ecx
2:      subl    %ecx, %edx
        movl    %edx, %edi
        rep; movsb
        movl    %edx, %esp
        movl    PCPU(CURPCB),%eax
        movl    PCB_CR3(%eax), %eax
        movl    %eax, %cr3

        .globl  doreti_popl_fs
doreti_popl_fs:
        popl    %fs
        .globl  doreti_popl_es
doreti_popl_es:
        popl    %es
        .globl  doreti_popl_ds
doreti_popl_ds:
        popl    %ds
        popal
        addl    $8,%esp
        .globl  doreti_iret
doreti_iret:
        iret

doreti_iret_nmi:
        movl    TF_ERR(%esp), %eax
        movl    %eax, %cr3
        jmp     doreti_popl_fs

        /*
         * doreti_iret_fault and friends.  Alternative return code for
         * the case where we get a fault in the doreti_exit code
         * above.  trap() (i386/i386/trap.c) catches this specific
         * case, and continues in the corresponding place in the code
         * below.
         *
         * If the fault occurred during return to usermode, we recreate
         * the trap frame and call trap() to send a signal.  Otherwise
         * the kernel was tricked into fault by attempt to restore invalid
         * usermode segment selectors on return from nested fault or
         * interrupt, where interrupted kernel entry code not yet loaded
         * kernel selectors.  In the latter case, emulate iret and zero
         * the invalid selector.
         */
        ALIGN_TEXT
        .globl  doreti_iret_fault
doreti_iret_fault:
        pushl   $0      /* tf_err */
        pushl   $0      /* tf_trapno XXXKIB: provide more useful value ? */
        pushal
        pushl   $0
        movw    %ds,(%esp)
        .globl  doreti_popl_ds_fault
doreti_popl_ds_fault:
        testb   $SEL_RPL_MASK,TF_CS-TF_DS(%esp)
        jz      doreti_popl_ds_kfault
        pushl   $0
        movw    %es,(%esp)
        .globl  doreti_popl_es_fault
doreti_popl_es_fault:
        testb   $SEL_RPL_MASK,TF_CS-TF_ES(%esp)
        jz      doreti_popl_es_kfault
        pushl   $0
        movw    %fs,(%esp)
        .globl  doreti_popl_fs_fault
doreti_popl_fs_fault:
        testb   $SEL_RPL_MASK,TF_CS-TF_FS(%esp)
        jz      doreti_popl_fs_kfault
        movl    $0,TF_ERR(%esp) /* XXX should be the error code */
        movl    $T_PROTFLT,TF_TRAPNO(%esp)
        SET_KERNEL_SREGS
        jmp     calltrap

doreti_popl_ds_kfault:
        movl    $0,(%esp)
        jmp     doreti_popl_ds
doreti_popl_es_kfault:
        movl    $0,(%esp)
        jmp     doreti_popl_es
doreti_popl_fs_kfault:
        movl    $0,(%esp)
        jmp     doreti_popl_fs

#ifdef HWPMC_HOOKS
doreti_nmi:
        /*
         * Since we are returning from an NMI, check if the current trap
         * was from user mode and if so whether the current thread
         * needs a user call chain capture.
         */
        testl   $PSL_VM, TF_EFLAGS(%esp)
        jnz     doreti_exit
        testb   $SEL_RPL_MASK,TF_CS(%esp)
        jz      doreti_exit
        movl    PCPU(CURTHREAD),%eax    /* curthread present? */
        orl     %eax,%eax
        jz      doreti_exit
        testl   $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */
        jz      doreti_exit
        /*
         * Switch to thread stack.  Reset tf_trapno to not indicate NMI,
         * to cause normal userspace exit.
         */
        movl    $T_RESERVED, TF_TRAPNO(%esp)
        NMOVE_STACKS
        /*
         * Take the processor out of NMI mode by executing a fake "iret".
         */
        pushfl
        pushl   %cs
        call    1f
1:      popl    %eax
        leal    (outofnmi-1b)(%eax),%eax
        pushl   %eax
        iret
outofnmi:
        /*
         * Call the callchain capture hook after turning interrupts back on.
         */
        movl    pmc_hook,%ecx
        orl     %ecx,%ecx
        jz      doreti_exit
        pushl   %esp                    /* frame pointer */
        pushl   $PMC_FN_USER_CALLCHAIN  /* command */
        movl    PCPU(CURTHREAD),%eax
        pushl   %eax                    /* curthread */
        sti
        call    *%ecx
        addl    $12,%esp
        jmp     doreti_ast
#endif

ENTRY(end_exceptions)