root/sys/i386/i386/support.S
/*-
 * Copyright (c) 1993 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <machine/asmacros.h>
#include <machine/cputypes.h>
#include <machine/pmap.h>
#include <machine/specialreg.h>

#include "assym.inc"

#define IDXSHIFT        10

        .text

ENTRY(sse2_pagezero)
        pushl   %ebx
        movl    8(%esp),%ecx
        movl    %ecx,%eax
        addl    $4096,%eax
        xor     %ebx,%ebx
        jmp     1f
        /*
         * The loop takes 14 bytes.  Ensure that it doesn't cross a 16-byte
         * cache line.
         */
        .p2align 4,0x90
1:
        movnti  %ebx,(%ecx)
        movnti  %ebx,4(%ecx)
        addl    $8,%ecx
        cmpl    %ecx,%eax
        jne     1b
        sfence
        popl    %ebx
        ret
END(sse2_pagezero)

ENTRY(i686_pagezero)
        pushl   %edi
        pushl   %ebx

        movl    12(%esp),%edi
        movl    $1024,%ecx

        ALIGN_TEXT
1:
        xorl    %eax,%eax
        repe
        scasl
        jnz     2f

        popl    %ebx
        popl    %edi
        ret

        ALIGN_TEXT

2:
        incl    %ecx
        subl    $4,%edi

        movl    %ecx,%edx
        cmpl    $16,%ecx

        jge     3f

        movl    %edi,%ebx
        andl    $0x3f,%ebx
        shrl    %ebx
        shrl    %ebx
        movl    $16,%ecx
        subl    %ebx,%ecx

3:
        subl    %ecx,%edx
        rep
        stosl

        movl    %edx,%ecx
        testl   %edx,%edx
        jnz     1b

        popl    %ebx
        popl    %edi
        ret
END(i686_pagezero)

/* fillw(pat, base, cnt) */
ENTRY(fillw)
        pushl   %edi
        movl    8(%esp),%eax
        movl    12(%esp),%edi
        movl    16(%esp),%ecx
        rep
        stosw
        popl    %edi
        ret
END(fillw)

/*
 * memmove(dst, src, cnt) (return dst)
 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
 */
ENTRY(memmove)
        pushl   %ebp
        movl    %esp,%ebp
        pushl   %esi
        pushl   %edi
        movl    8(%ebp),%edi
        movl    12(%ebp),%esi
1:
        movl    16(%ebp),%ecx

        movl    %edi,%eax
        subl    %esi,%eax
        cmpl    %ecx,%eax                       /* overlapping && src < dst? */
        jb      1f

        shrl    $2,%ecx                         /* copy by 32-bit words */
        rep
        movsl
        movl    16(%ebp),%ecx
        andl    $3,%ecx                         /* any bytes left? */
        rep
        movsb
        popl    %edi
        popl    %esi
        movl    8(%ebp),%eax                    /* return dst for memmove */
        popl    %ebp
        ret

        ALIGN_TEXT
1:
        addl    %ecx,%edi                       /* copy backwards */
        addl    %ecx,%esi
        decl    %edi
        decl    %esi
        andl    $3,%ecx                         /* any fractional bytes? */
        std
        rep
        movsb
        movl    16(%ebp),%ecx                   /* copy remainder by 32-bit words */
        shrl    $2,%ecx
        subl    $3,%esi
        subl    $3,%edi
        rep
        movsl
        popl    %edi
        popl    %esi
        cld
        movl    8(%ebp),%eax                    /* return dst for memmove */
        popl    %ebp
        ret
END(memmove)

/*
 * Note: memcpy does not support overlapping copies
 */
ENTRY(memcpy)
        pushl   %edi
        pushl   %esi
        movl    12(%esp),%edi
        movl    16(%esp),%esi
        movl    20(%esp),%ecx
        movl    %edi,%eax
        shrl    $2,%ecx                         /* copy by 32-bit words */
        rep
        movsl
        movl    20(%esp),%ecx
        andl    $3,%ecx                         /* any bytes left? */
        rep
        movsb
        popl    %esi
        popl    %edi
        ret
END(memcpy)

/*
 * Handling of special 386 registers and descriptor tables etc
 */
/* void lgdt(struct region_descriptor *rdp); */
ENTRY(lgdt)
        /* reload the descriptor table */
        movl    4(%esp),%eax
        lgdt    (%eax)

        /* flush the prefetch q */
        jmp     1f
        nop
1:
        /* reload "stale" selectors */
        movl    $KDSEL,%eax
        movl    %eax,%ds
        movl    %eax,%es
        movl    %eax,%gs
        movl    %eax,%ss
        movl    $KPSEL,%eax
        movl    %eax,%fs

        /* reload code selector by turning return into intersegmental return */
        movl    (%esp),%eax
        pushl   %eax
        movl    $KCSEL,4(%esp)
        lret
END(lgdt)

/* ssdtosd(*ssdp,*sdp) */
ENTRY(ssdtosd)
        pushl   %ebx
        movl    8(%esp),%ecx
        movl    8(%ecx),%ebx
        shll    $16,%ebx
        movl    (%ecx),%edx
        roll    $16,%edx
        movb    %dh,%bl
        movb    %dl,%bh
        rorl    $8,%ebx
        movl    4(%ecx),%eax
        movw    %ax,%dx
        andl    $0xf0000,%eax
        orl     %eax,%ebx
        movl    12(%esp),%ecx
        movl    %edx,(%ecx)
        movl    %ebx,4(%ecx)
        popl    %ebx
        ret
END(ssdtosd)

/* void reset_dbregs() */
ENTRY(reset_dbregs)
        movl    $0,%eax
        movl    %eax,%dr7       /* disable all breakpoints first */
        movl    %eax,%dr0
        movl    %eax,%dr1
        movl    %eax,%dr2
        movl    %eax,%dr3
        movl    %eax,%dr6
        ret
END(reset_dbregs)

/*****************************************************************************/
/* setjump, longjump                                                         */
/*****************************************************************************/

ENTRY(setjmp)
        movl    4(%esp),%eax
        movl    %ebx,(%eax)                     /* save ebx */
        movl    %esp,4(%eax)                    /* save esp */
        movl    %ebp,8(%eax)                    /* save ebp */
        movl    %esi,12(%eax)                   /* save esi */
        movl    %edi,16(%eax)                   /* save edi */
        movl    (%esp),%edx                     /* get rta */
        movl    %edx,20(%eax)                   /* save eip */
        xorl    %eax,%eax                       /* return(0); */
        ret
END(setjmp)

ENTRY(longjmp)
        movl    4(%esp),%eax
        movl    (%eax),%ebx                     /* restore ebx */
        movl    4(%eax),%esp                    /* restore esp */
        movl    8(%eax),%ebp                    /* restore ebp */
        movl    12(%eax),%esi                   /* restore esi */
        movl    16(%eax),%edi                   /* restore edi */
        movl    20(%eax),%edx                   /* get rta */
        movl    %edx,(%esp)                     /* put in return frame */
        xorl    %eax,%eax                       /* return(1); */
        incl    %eax
        ret
END(longjmp)

/*
 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
 * return an error.)
 */
ENTRY(rdmsr_safe)
/* int rdmsr_safe(u_int msr, uint64_t *data) */
        movl    PCPU(CURPCB),%ecx
        movl    $msr_onfault,PCB_ONFAULT(%ecx)

        movl    4(%esp),%ecx
        rdmsr
        movl    8(%esp),%ecx
        movl    %eax,(%ecx)
        movl    %edx,4(%ecx)
        xorl    %eax,%eax

        movl    PCPU(CURPCB),%ecx
        movl    %eax,PCB_ONFAULT(%ecx)

        ret

/*
 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
 * return an error.)
 */
ENTRY(wrmsr_safe)
/* int wrmsr_safe(u_int msr, uint64_t data) */
        movl    PCPU(CURPCB),%ecx
        movl    $msr_onfault,PCB_ONFAULT(%ecx)

        movl    4(%esp),%ecx
        movl    8(%esp),%eax
        movl    12(%esp),%edx
        wrmsr
        xorl    %eax,%eax

        movl    PCPU(CURPCB),%ecx
        movl    %eax,PCB_ONFAULT(%ecx)

        ret

/*
 * MSR operations fault handler
 */
        ALIGN_TEXT
msr_onfault:
        movl    PCPU(CURPCB),%ecx
        movl    $0,PCB_ONFAULT(%ecx)
        movl    $EFAULT,%eax
        ret

        .altmacro
        .macro  rsb_seq_label l
rsb_seq_\l:
        .endm
        .macro  rsb_call_label l
        call    rsb_seq_\l
        .endm
        .macro  rsb_seq count
        ll=1
        .rept   \count
        rsb_call_label  %(ll)
        nop
        rsb_seq_label %(ll)
        addl    $4,%esp
        ll=ll+1
        .endr
        .endm

ENTRY(rsb_flush)
        rsb_seq 32
        ret

ENTRY(handle_ibrs_entry)
        cmpb    $0,hw_ibrs_ibpb_active
        je      1f
        movl    $MSR_IA32_SPEC_CTRL,%ecx
        rdmsr
        orl     $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
        orl     $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
        wrmsr
        movb    $1,PCPU(IBPB_SET)
        /*
         * i386 does not implement SMEP.
         */
1:      jmp     rsb_flush
END(handle_ibrs_entry)

ENTRY(handle_ibrs_exit)
        cmpb    $0,PCPU(IBPB_SET)
        je      1f
        movl    $MSR_IA32_SPEC_CTRL,%ecx
        rdmsr
        andl    $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
        andl    $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
        wrmsr
        movb    $0,PCPU(IBPB_SET)
1:      ret
END(handle_ibrs_exit)

ENTRY(mds_handler_void)
        ret
END(mds_handler_void)

ENTRY(mds_handler_verw)
        subl    $4, %esp
        movw    %ds, (%esp)
        verw    (%esp)
        addl    $4, %esp
        ret
END(mds_handler_verw)

ENTRY(mds_handler_ivb)
        movl    %cr0, %eax
        testb   $CR0_TS, %al
        je      1f
        clts
1:      movl    PCPU(MDS_BUF), %edx
        movdqa  %xmm0, PCPU(MDS_TMP)
        pxor    %xmm0, %xmm0

        lfence
        orpd    (%edx), %xmm0
        orpd    (%edx), %xmm0
        mfence
        movl    $40, %ecx
        addl    $16, %edx
2:      movntdq %xmm0, (%edx)
        addl    $16, %edx
        decl    %ecx
        jnz     2b
        mfence

        movdqa  PCPU(MDS_TMP),%xmm0
        testb   $CR0_TS, %al
        je      3f
        movl    %eax, %cr0
3:      ret
END(mds_handler_ivb)

ENTRY(mds_handler_bdw)
        movl    %cr0, %eax
        testb   $CR0_TS, %al
        je      1f
        clts
1:      movl    PCPU(MDS_BUF), %ebx
        movdqa  %xmm0, PCPU(MDS_TMP)
        pxor    %xmm0, %xmm0

        movl    %ebx, %edi
        movl    %ebx, %esi
        movl    $40, %ecx
2:      movntdq %xmm0, (%ebx)
        addl    $16, %ebx
        decl    %ecx
        jnz     2b
        mfence
        movl    $1536, %ecx
        rep; movsb
        lfence

        movdqa  PCPU(MDS_TMP),%xmm0
        testb   $CR0_TS, %al
        je      3f
        movl    %eax, %cr0
3:      ret
END(mds_handler_bdw)

ENTRY(mds_handler_skl_sse)
        movl    %cr0, %eax
        testb   $CR0_TS, %al
        je      1f
        clts
1:      movl    PCPU(MDS_BUF), %edi
        movl    PCPU(MDS_BUF64), %edx
        movdqa  %xmm0, PCPU(MDS_TMP)
        pxor    %xmm0, %xmm0

        lfence
        orpd    (%edx), %xmm0
        orpd    (%edx), %xmm0
        xorl    %eax, %eax
2:      clflushopt      5376(%edi, %eax, 8)
        addl    $8, %eax
        cmpl    $8 * 12, %eax
        jb      2b
        sfence
        movl    $6144, %ecx
        xorl    %eax, %eax
        rep; stosb
        mfence

        movdqa  PCPU(MDS_TMP), %xmm0
        testb   $CR0_TS, %al
        je      3f
        movl    %eax, %cr0
3:      ret
END(mds_handler_skl_sse)

ENTRY(mds_handler_skl_avx)
        movl    %cr0, %eax
        testb   $CR0_TS, %al
        je      1f
        clts
1:      movl    PCPU(MDS_BUF), %edi
        movl    PCPU(MDS_BUF64), %edx
        vmovdqa %ymm0, PCPU(MDS_TMP)
        vpxor   %ymm0, %ymm0, %ymm0

        lfence
        vorpd   (%edx), %ymm0, %ymm0
        vorpd   (%edx), %ymm0, %ymm0
        xorl    %eax, %eax
2:      clflushopt      5376(%edi, %eax, 8)
        addl    $8, %eax
        cmpl    $8 * 12, %eax
        jb      2b
        sfence
        movl    $6144, %ecx
        xorl    %eax, %eax
        rep; stosb
        mfence

        vmovdqa PCPU(MDS_TMP), %ymm0
        testb   $CR0_TS, %al
        je      3f
        movl    %eax, %cr0
3:      ret
END(mds_handler_skl_avx)

ENTRY(mds_handler_skl_avx512)
        movl    %cr0, %eax
        testb   $CR0_TS, %al
        je      1f
        clts
1:      movl    PCPU(MDS_BUF), %edi
        movl    PCPU(MDS_BUF64), %edx
        vmovdqa64       %zmm0, PCPU(MDS_TMP)
        vpxord  %zmm0, %zmm0, %zmm0

        lfence
        vorpd   (%edx), %zmm0, %zmm0
        vorpd   (%edx), %zmm0, %zmm0
        xorl    %eax, %eax
2:      clflushopt      5376(%edi, %eax, 8)
        addl    $8, %eax
        cmpl    $8 * 12, %eax
        jb      2b
        sfence
        movl    $6144, %ecx
        xorl    %eax, %eax
        rep; stosb
        mfence

        vmovdqa64       PCPU(MDS_TMP), %zmm0
        testb   $CR0_TS, %al
        je      3f
        movl    %eax, %cr0
3:      ret
END(mds_handler_skl_avx512)

ENTRY(mds_handler_silvermont)
        movl    %cr0, %eax
        testb   $CR0_TS, %al
        je      1f
        clts
1:      movl    PCPU(MDS_BUF), %edx
        movdqa  %xmm0, PCPU(MDS_TMP)
        pxor    %xmm0, %xmm0

        movl    $16, %ecx
2:      movntdq %xmm0, (%edx)
        addl    $16, %edx
        decl    %ecx
        jnz     2b
        mfence

        movdqa  PCPU(MDS_TMP),%xmm0
        testb   $CR0_TS, %al
        je      3f
        movl    %eax, %cr0
3:      ret
END(mds_handler_silvermont)

ENTRY(cpu_sync_core)
        popl    %eax
        pushfl
        pushl   %cs
        pushl   %eax
        iretl
END(cpu_sync_core)