root/usr/src/uts/i86pc/ml/mpcore.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
 */
/*
 * Copyright (c) 2010, Intel Corporation.
 * All rights reserved.
 *
 * Copyright 2019 Joyent, Inc.
 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
 * Copyright 2023 Oxide Computer Co.
 */

#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
#include <sys/regset.h>
#include <sys/privregs.h>
#include <sys/x86_archext.h>

#include <sys/segments.h>
#include "assym.h"

/*
 *      Our assumptions:
 *              - We are running in real mode.
 *              - Interrupts are disabled.
 *              - Selectors are equal (cs == ds == ss) for all real mode code
 *              - The GDT, IDT, ktss and page directory has been built for us
 *
 *      Our actions:
 *      Start CPU:
 *              - We start using our GDT by loading correct values in the
 *                selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
 *                gs=KGS_SEL).
 *              - We change over to using our IDT.
 *              - We load the default LDT into the hardware LDT register.
 *              - We load the default TSS into the hardware task register.
 *              - call mp_startup(void) indirectly through the T_PC
 *      Stop CPU:
 *              - Put CPU into halted state with interrupts disabled
 *
 */

        ENTRY_NP(real_mode_start_cpu)

        /*
         * NOTE:  The GNU assembler automatically does the right thing to
         *        generate data size operand prefixes based on the code size
         *        generation mode (e.g. .code16, .code32, .code64) and as such
         *        prefixes need not be used on instructions EXCEPT in the case
         *        of address prefixes for code for which the reference is not
         *        automatically of the default operand size.
         */
        .code16
        cli
        movw            %cs, %ax
        movw            %ax, %ds        /* load cs into ds */
        movw            %ax, %ss        /* and into ss */

        /*
         * Helps in debugging by giving us the fault address.
         *
         * Remember to patch a hlt (0xf4) at cmntrap to get a good stack.
         */
        movl            $0xffc, %esp
        movl            %cr0, %eax

        /*
         * Enable protected-mode, write protect, and alignment mask
         */
        orl             $(CR0_PE|CR0_WP|CR0_AM), %eax
        movl            %eax, %cr0

        /*
         * Do a jmp immediately after writing to cr0 when enabling protected
         * mode to clear the real mode prefetch queue (per Intel's docs)
         */
        jmp             pestart

pestart:
        /*
         * 16-bit protected mode is now active, so prepare to turn on long
         * mode.
         */

        /*
         * Add any initial cr4 bits
         */
        movl            %cr4, %eax
        addr32 orl      CR4OFF, %eax

        /*
         * Enable PAE mode (CR4.PAE)
         */
        orl             $CR4_PAE, %eax
        movl            %eax, %cr4

        /*
         * Point cr3 to the 64-bit long mode page tables.
         *
         * Note that these MUST exist in 32-bit space, as we don't have
         * a way to load %cr3 with a 64-bit base address for the page tables
         * until the CPU is actually executing in 64-bit long mode.
         */
        addr32 movl     CR3OFF, %eax
        movl            %eax, %cr3

        /*
         * Set long mode enable in EFER (EFER.LME = 1)
         */
        movl    $MSR_AMD_EFER, %ecx
        rdmsr
        orl     $AMD_EFER_LME, %eax
        wrmsr

        /*
         * Finally, turn on paging (CR0.PG = 1) to activate long mode.
         */
        movl    %cr0, %eax
        orl     $CR0_PG, %eax
        movl    %eax, %cr0

        /*
         * The instruction after enabling paging in CR0 MUST be a branch.
         */
        jmp     long_mode_active

long_mode_active:
        /*
         * Long mode is now active but since we're still running with the
         * original 16-bit CS we're actually in 16-bit compatability mode.
         *
         * We have to load an intermediate GDT and IDT here that we know are
         * in 32-bit space before we can use the kernel's GDT and IDT, which
         * may be in the 64-bit address space, and since we're in compatability
         * mode, we only have access to 16 and 32-bit instructions at the
         * moment.
         */
        addr32 lgdtl    TEMPGDTOFF      /* load temporary GDT */
        addr32 lidtl    TEMPIDTOFF      /* load temporary IDT */

        /*
         * Do a far transfer to 64-bit mode.  Set the CS selector to a 64-bit
         * long mode selector (CS.L=1) in the temporary 32-bit GDT and jump
         * to the real mode platter address of long_mode 64 as until the 64-bit
         * CS is in place we don't have access to 64-bit instructions and thus
         * can't reference a 64-bit %rip.
         */
        pushl           $TEMP_CS64_SEL
        addr32 pushl    LM64OFF
        lretl

        .globl  long_mode_64
long_mode_64:
        .code64
        /*
         * We are now running in long mode with a 64-bit CS (EFER.LMA=1,
         * CS.L=1) so we now have access to 64-bit instructions.
         *
         * First, set the 64-bit GDT base.
         */
        .globl  rm_platter_pa
        movl    rm_platter_pa, %eax
        lgdtq   GDTROFF(%rax)           /* load 64-bit GDT */

        /*
         * Save the CPU number in %r11; get the value here since it's saved in
         * the real mode platter.
         */
        movl    CPUNOFF(%rax), %r11d

        /*
         * Add rm_platter_pa to %rsp to point it to the same location as seen
         * from 64-bit mode.
         */
        addq    %rax, %rsp

        /*
         * Now do an lretq to load CS with the appropriate selector for the
         * kernel's 64-bit GDT and to start executing 64-bit setup code at the
         * virtual address where boot originally loaded this code rather than
         * the copy in the real mode platter's rm_code array as we've been
         * doing so far.
         */
        pushq   $KCS_SEL
        pushq   $kernel_cs_code
        lretq
        .globl real_mode_start_cpu_end
real_mode_start_cpu_end:
        nop

kernel_cs_code:
        /*
         * Complete the balance of the setup we need to before executing
         * 64-bit kernel code (namely init rsp, TSS, LGDT, FS and GS).
         */
        .globl  rm_platter_va
        movq    rm_platter_va, %rax
        lidtq   IDTROFF(%rax)

        movw    $KDS_SEL, %ax
        movw    %ax, %ds
        movw    %ax, %es
        movw    %ax, %ss

        movw    $KTSS_SEL, %ax          /* setup kernel TSS */
        ltr     %ax

        xorw    %ax, %ax                /* clear LDTR */
        lldt    %ax

        /*
         * Set GS to the address of the per-cpu structure as contained in
         * cpu[cpu_number].
         *
         * Unfortunately there's no way to set the 64-bit gsbase with a mov,
         * so we have to stuff the low 32 bits in %eax and the high 32 bits in
         * %edx, then call wrmsr.
         */
        leaq    cpu(%rip), %rdi
        movl    (%rdi, %r11, 8), %eax
        movl    4(%rdi, %r11, 8), %edx
        movl    $MSR_AMD_GSBASE, %ecx
        wrmsr

        /*
         * Init FS and KernelGSBase.
         *
         * Based on code in mlsetup(), set them both to 8G (which shouldn't be
         * valid until some 64-bit processes run); this will then cause an
         * exception in any code that tries to index off them before they are
         * properly setup.
         */
        xorl    %eax, %eax              /* low 32 bits = 0 */
        movl    $2, %edx                /* high 32 bits = 2 */
        movl    $MSR_AMD_FSBASE, %ecx
        wrmsr

        movl    $MSR_AMD_KGSBASE, %ecx
        wrmsr

        /*
         * Init %rsp to the exception stack set in tss_ist1 and create a legal
         * AMD64 ABI stack frame
         */
        movq    %gs:CPU_TSS, %rax
        movq    TSS_IST1(%rax), %rsp
        pushq   $0              /* null return address */
        pushq   $0              /* null frame pointer terminates stack trace */
        movq    %rsp, %rbp      /* stack aligned on 16-byte boundary */

        /*
         * Get %cr0 into the state we (mostly) want, including turning on the
         * caches.
         */
        movq    %cr0, %rax
        andq    $~(CR0_CD|CR0_NW|CR0_TS|CR0_EM), %rax
        orq     $(CR0_MP|CR0_NE), %rax
        movq    %rax, %cr0              /* set machine status word */

        /*
         * Before going any further, enable usage of page table NX bit if
         * that's how our page tables are set up.
         */
        btl     $X86FSET_NX, x86_featureset(%rip)
        jnc     1f
        movl    $MSR_AMD_EFER, %ecx
        rdmsr
        orl     $AMD_EFER_NXE, %eax
        wrmsr
1:

        /*
         * Complete the rest of the setup and call mp_startup().
         */
        movq    %gs:CPU_THREAD, %rax    /* get thread ptr */
        movq    T_PC(%rax), %rax
        INDIRECT_CALL_REG(rax)          /* call mp_startup_boot */
        /* not reached */
        int     $20                     /* whoops, returned somehow! */

        SET_SIZE(real_mode_start_cpu)

        ENTRY_NP(real_mode_stop_cpu_stage1)

        /*
         * NOTE:  The GNU assembler automatically does the right thing to
         *        generate data size operand prefixes based on the code size
         *        generation mode (e.g. .code16, .code32, .code64) and as such
         *        prefixes need not be used on instructions EXCEPT in the case
         *        of address prefixes for code for which the reference is not
         *        automatically of the default operand size.
         */
        .code16
        cli
        movw            %cs, %ax
        movw            %ax, %ds        /* load cs into ds */
        movw            %ax, %ss        /* and into ss */

        /*
         * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
         */
        movw            $CPUHALTCODEOFF, %ax
        jmp             *%ax

        .globl real_mode_stop_cpu_stage1_end
real_mode_stop_cpu_stage1_end:
        nop

        SET_SIZE(real_mode_stop_cpu_stage1)

        ENTRY_NP(real_mode_stop_cpu_stage2)

        movw            $0xdead, %ax
        movw            %ax, CPUHALTEDOFF

real_mode_stop_cpu_loop:
        /*
         * Put CPU into halted state.
         * Only INIT, SMI, NMI could break the loop.
         */
        hlt
        jmp             real_mode_stop_cpu_loop

        .globl real_mode_stop_cpu_stage2_end
real_mode_stop_cpu_stage2_end:
        nop

        SET_SIZE(real_mode_stop_cpu_stage2)