root/tools/test/xregs_sig/c2x2c_amd64.S
/*
 * This file is in public domain.
 * Written by Dmitry Chagin <dchagin@FreeBSD.org>
 */

#if defined(__FreeBSD__)
#include <machine/specialreg.h>
#else
#define CPUID2_OSXSAVE                  0x08000000
#define CPUID2_AVX                      0x10000000
#define XFEATURE_ENABLED_X87            0x00000001
#define XFEATURE_ENABLED_SSE            0x00000002
#define XFEATURE_ENABLED_AVX            0x00000004
#define XFEATURE_AVX                                    \
    (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
#endif

        .text

        .globl xregs_banks_max
        .type xregs_banks_max, @function
xregs_banks_max:
        pushq   %rbx
        movl    $1, %eax
        cpuid
        andl    $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx
        cmpl    $(CPUID2_AVX|CPUID2_OSXSAVE), %ecx
        jne     sse
        xorl    %ecx, %ecx
        xgetbv
        andl    $XFEATURE_AVX, %eax
        cmpl    $XFEATURE_AVX, %eax
        jne     sse
        movl    $1, %eax
        jmp     out
sse:
        xorl    %eax, %eax
out:
        popq    %rbx
        retq

        .size xregs_banks_max, . - xregs_banks_max


        .globl cpu_to_xmm
        .type cpu_to_xmm, @function
cpu_to_xmm:
        movdqu  %xmm0, (%rdi)
        movdqu  %xmm1, 1 * 16(%rdi)
        movdqu  %xmm2, 2 * 16(%rdi)
        movdqu  %xmm3, 3 * 16(%rdi)
        movdqu  %xmm4, 4 * 16(%rdi)
        movdqu  %xmm5, 5 * 16(%rdi)
        movdqu  %xmm6, 6 * 16(%rdi)
        movdqu  %xmm7, 7 * 16(%rdi)
        movdqu  %xmm8, 8 * 16(%rdi)
        movdqu  %xmm9, 9 * 16(%rdi)
        movdqu  %xmm10, 10 * 16(%rdi)
        movdqu  %xmm11, 11 * 16(%rdi)
        movdqu  %xmm12, 12 * 16(%rdi)
        movdqu  %xmm13, 13 * 16(%rdi)
        movdqu  %xmm14, 14 * 16(%rdi)
        movdqu  %xmm15, 15 * 16(%rdi)
        retq

        .size cpu_to_xmm, . - cpu_to_xmm


        .globl xmm_to_cpu
        .type xmm_to_cpu, @function
xmm_to_cpu:
        movdqu  (%rdi), %xmm0
        movdqu  1 * 16(%rdi), %xmm1
        movdqu  2 * 16(%rdi), %xmm2
        movdqu  3 * 16(%rdi), %xmm3
        movdqu  4 * 16(%rdi), %xmm4
        movdqu  5 * 16(%rdi), %xmm5
        movdqu  6 * 16(%rdi), %xmm6
        movdqu  7 * 16(%rdi), %xmm7
        movdqu  8 * 16(%rdi), %xmm8
        movdqu  9 * 16(%rdi), %xmm9
        movdqu  10 * 16(%rdi), %xmm10
        movdqu  11 * 16(%rdi), %xmm11
        movdqu  12 * 16(%rdi), %xmm12
        movdqu  13 * 16(%rdi), %xmm13
        movdqu  14 * 16(%rdi), %xmm14
        movdqu  15 * 16(%rdi), %xmm15
        retq

        .size xmm_to_cpu, . - xmm_to_cpu


        .globl cpu_to_avx
        .type cpu_to_avx, @function
cpu_to_avx:
        vmovdqu %ymm0, (%rdi)
        vmovdqu %ymm1, 1 * 32(%rdi)
        vmovdqu %ymm2, 2 * 32(%rdi)
        vmovdqu %ymm3, 3 * 32(%rdi)
        vmovdqu %ymm4, 4 * 32(%rdi)
        vmovdqu %ymm5, 5 * 32(%rdi)
        vmovdqu %ymm6, 6 * 32(%rdi)
        vmovdqu %ymm7, 7 * 32(%rdi)
        vmovdqu %ymm8, 8 * 32(%rdi)
        vmovdqu %ymm9, 9 * 32(%rdi)
        vmovdqu %ymm10, 10 * 32(%rdi)
        vmovdqu %ymm11, 11 * 32(%rdi)
        vmovdqu %ymm12, 12 * 32(%rdi)
        vmovdqu %ymm13, 13 * 32(%rdi)
        vmovdqu %ymm14, 14 * 32(%rdi)
        vmovdqu %ymm15, 15 * 32(%rdi)
        retq

        .size cpu_to_avx, . - cpu_to_avx


        .globl avx_to_cpu
        .type avx_to_cpu, @function
avx_to_cpu:
        vmovdqu (%rdi), %ymm0
        vmovdqu 1 * 32(%rdi), %ymm1
        vmovdqu 2 * 32(%rdi), %ymm2
        vmovdqu 3 * 32(%rdi), %ymm3
        vmovdqu 4 * 32(%rdi), %ymm4
        vmovdqu 5 * 32(%rdi), %ymm5
        vmovdqu 6 * 32(%rdi), %ymm6
        vmovdqu 7 * 32(%rdi), %ymm7
        vmovdqu 8 * 32(%rdi), %ymm8
        vmovdqu 9 * 32(%rdi), %ymm9
        vmovdqu 10 * 32(%rdi), %ymm10
        vmovdqu 11 * 32(%rdi), %ymm11
        vmovdqu 12 * 32(%rdi), %ymm12
        vmovdqu 13 * 32(%rdi), %ymm13
        vmovdqu 14 * 32(%rdi), %ymm14
        vmovdqu 15 * 32(%rdi), %ymm15
        retq

        .size avx_to_cpu, . - avx_to_cpu

        .section        .note.GNU-stack,"",@progbits