root/arch/loongarch/include/asm/asmmacro.h
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
 */
#ifndef _ASM_ASMMACRO_H
#define _ASM_ASMMACRO_H

#include <linux/sizes.h>
#include <asm/asm-offsets.h>
#include <asm/regdef.h>
#include <asm/fpregdef.h>
#include <asm/loongarch.h>

#ifdef CONFIG_64BIT
#define TASK_STRUCT_OFFSET 0
#else
#define TASK_STRUCT_OFFSET 2000
#endif

        .macro  cpu_save_nonscratch thread
        LONG_SPTR       s0, \thread, (THREAD_REG23 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s1, \thread, (THREAD_REG24 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s2, \thread, (THREAD_REG25 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s3, \thread, (THREAD_REG26 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s4, \thread, (THREAD_REG27 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s5, \thread, (THREAD_REG28 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s6, \thread, (THREAD_REG29 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s7, \thread, (THREAD_REG30 - TASK_STRUCT_OFFSET)
        LONG_SPTR       s8, \thread, (THREAD_REG31 - TASK_STRUCT_OFFSET)
        LONG_SPTR       ra, \thread, (THREAD_REG01 - TASK_STRUCT_OFFSET)
        LONG_SPTR       sp, \thread, (THREAD_REG03 - TASK_STRUCT_OFFSET)
        LONG_SPTR       fp, \thread, (THREAD_REG22 - TASK_STRUCT_OFFSET)
        .endm

        .macro  cpu_restore_nonscratch thread
        LONG_LPTR       s0, \thread, (THREAD_REG23 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s1, \thread, (THREAD_REG24 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s2, \thread, (THREAD_REG25 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s3, \thread, (THREAD_REG26 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s4, \thread, (THREAD_REG27 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s5, \thread, (THREAD_REG28 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s6, \thread, (THREAD_REG29 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s7, \thread, (THREAD_REG30 - TASK_STRUCT_OFFSET)
        LONG_LPTR       s8, \thread, (THREAD_REG31 - TASK_STRUCT_OFFSET)
        LONG_LPTR       ra, \thread, (THREAD_REG01 - TASK_STRUCT_OFFSET)
        LONG_LPTR       sp, \thread, (THREAD_REG03 - TASK_STRUCT_OFFSET)
        LONG_LPTR       fp, \thread, (THREAD_REG22 - TASK_STRUCT_OFFSET)
        .endm

        .macro fpu_save_csr thread tmp
        movfcsr2gr      \tmp, fcsr0
#ifdef CONFIG_32BIT
        st.w            \tmp, \thread, THREAD_FCSR
#else
        stptr.w         \tmp, \thread, THREAD_FCSR
#endif
#ifdef CONFIG_CPU_HAS_LBT
        /* TM bit is always 0 if LBT not supported */
        andi            \tmp, \tmp, FPU_CSR_TM
        beqz            \tmp, 1f
        /* Save FTOP */
        x86mftop        \tmp
        stptr.w         \tmp, \thread, THREAD_FTOP
        /* Turn off TM to ensure the order of FPR in memory independent of TM */
        x86clrtm
1:
#endif
        .endm

        .macro fpu_restore_csr thread tmp0 tmp1
#ifdef CONFIG_32BIT
        ld.w            \tmp0, \thread, THREAD_FCSR
#else
        ldptr.w         \tmp0, \thread, THREAD_FCSR
#endif
        movgr2fcsr      fcsr0, \tmp0
#ifdef CONFIG_CPU_HAS_LBT
        /* TM bit is always 0 if LBT not supported */
        andi            \tmp0, \tmp0, FPU_CSR_TM
        beqz            \tmp0, 2f
        /* Restore FTOP */
        ldptr.w         \tmp0, \thread, THREAD_FTOP
        andi            \tmp0, \tmp0, 0x7
        la.pcrel        \tmp1, 1f
        alsl.d          \tmp1, \tmp0, \tmp1, 3
        jr              \tmp1
1:
        x86mttop        0
        b       2f
        x86mttop        1
        b       2f
        x86mttop        2
        b       2f
        x86mttop        3
        b       2f
        x86mttop        4
        b       2f
        x86mttop        5
        b       2f
        x86mttop        6
        b       2f
        x86mttop        7
2:
#endif
        .endm

#ifdef CONFIG_32BIT
        .macro fpu_save_cc thread tmp0 tmp1
        movcf2gr        \tmp0, $fcc0
        move            \tmp1, \tmp0
        movcf2gr        \tmp0, $fcc1
        bstrins.w       \tmp1, \tmp0, 15, 8
        movcf2gr        \tmp0, $fcc2
        bstrins.w       \tmp1, \tmp0, 23, 16
        movcf2gr        \tmp0, $fcc3
        bstrins.w       \tmp1, \tmp0, 31, 24
        st.w            \tmp1, \thread, THREAD_FCC
        movcf2gr        \tmp0, $fcc4
        move            \tmp1, \tmp0
        movcf2gr        \tmp0, $fcc5
        bstrins.w       \tmp1, \tmp0, 15, 8
        movcf2gr        \tmp0, $fcc6
        bstrins.w       \tmp1, \tmp0, 23, 16
        movcf2gr        \tmp0, $fcc7
        bstrins.w       \tmp1, \tmp0, 31, 24
        st.w            \tmp1, \thread, (THREAD_FCC + 4)
        .endm

        .macro fpu_restore_cc thread tmp0 tmp1
        ld.w            \tmp0, \thread, THREAD_FCC
        bstrpick.w      \tmp1, \tmp0, 7, 0
        movgr2cf        $fcc0, \tmp1
        bstrpick.w      \tmp1, \tmp0, 15, 8
        movgr2cf        $fcc1, \tmp1
        bstrpick.w      \tmp1, \tmp0, 23, 16
        movgr2cf        $fcc2, \tmp1
        bstrpick.w      \tmp1, \tmp0, 31, 24
        movgr2cf        $fcc3, \tmp1
        ld.w            \tmp0, \thread, (THREAD_FCC + 4)
        bstrpick.w      \tmp1, \tmp0, 7, 0
        movgr2cf        $fcc4, \tmp1
        bstrpick.w      \tmp1, \tmp0, 15, 8
        movgr2cf        $fcc5, \tmp1
        bstrpick.w      \tmp1, \tmp0, 23, 16
        movgr2cf        $fcc6, \tmp1
        bstrpick.w      \tmp1, \tmp0, 31, 24
        movgr2cf        $fcc7, \tmp1
        .endm
#else
        .macro fpu_save_cc thread tmp0 tmp1
        movcf2gr        \tmp0, $fcc0
        move            \tmp1, \tmp0
        movcf2gr        \tmp0, $fcc1
        bstrins.d       \tmp1, \tmp0, 15, 8
        movcf2gr        \tmp0, $fcc2
        bstrins.d       \tmp1, \tmp0, 23, 16
        movcf2gr        \tmp0, $fcc3
        bstrins.d       \tmp1, \tmp0, 31, 24
        movcf2gr        \tmp0, $fcc4
        bstrins.d       \tmp1, \tmp0, 39, 32
        movcf2gr        \tmp0, $fcc5
        bstrins.d       \tmp1, \tmp0, 47, 40
        movcf2gr        \tmp0, $fcc6
        bstrins.d       \tmp1, \tmp0, 55, 48
        movcf2gr        \tmp0, $fcc7
        bstrins.d       \tmp1, \tmp0, 63, 56
        stptr.d         \tmp1, \thread, THREAD_FCC
        .endm

        .macro fpu_restore_cc thread tmp0 tmp1
        ldptr.d         \tmp0, \thread, THREAD_FCC
        bstrpick.d      \tmp1, \tmp0, 7, 0
        movgr2cf        $fcc0, \tmp1
        bstrpick.d      \tmp1, \tmp0, 15, 8
        movgr2cf        $fcc1, \tmp1
        bstrpick.d      \tmp1, \tmp0, 23, 16
        movgr2cf        $fcc2, \tmp1
        bstrpick.d      \tmp1, \tmp0, 31, 24
        movgr2cf        $fcc3, \tmp1
        bstrpick.d      \tmp1, \tmp0, 39, 32
        movgr2cf        $fcc4, \tmp1
        bstrpick.d      \tmp1, \tmp0, 47, 40
        movgr2cf        $fcc5, \tmp1
        bstrpick.d      \tmp1, \tmp0, 55, 48
        movgr2cf        $fcc6, \tmp1
        bstrpick.d      \tmp1, \tmp0, 63, 56
        movgr2cf        $fcc7, \tmp1
        .endm
#endif

        .macro  fpu_save_double thread tmp
        li.w    \tmp, THREAD_FPR0
        PTR_ADD \tmp, \tmp, \thread
        fst.d   $f0, \tmp, THREAD_FPR0  - THREAD_FPR0
        fst.d   $f1, \tmp, THREAD_FPR1  - THREAD_FPR0
        fst.d   $f2, \tmp, THREAD_FPR2  - THREAD_FPR0
        fst.d   $f3, \tmp, THREAD_FPR3  - THREAD_FPR0
        fst.d   $f4, \tmp, THREAD_FPR4  - THREAD_FPR0
        fst.d   $f5, \tmp, THREAD_FPR5  - THREAD_FPR0
        fst.d   $f6, \tmp, THREAD_FPR6  - THREAD_FPR0
        fst.d   $f7, \tmp, THREAD_FPR7  - THREAD_FPR0
        fst.d   $f8, \tmp, THREAD_FPR8  - THREAD_FPR0
        fst.d   $f9, \tmp, THREAD_FPR9  - THREAD_FPR0
        fst.d   $f10, \tmp, THREAD_FPR10 - THREAD_FPR0
        fst.d   $f11, \tmp, THREAD_FPR11 - THREAD_FPR0
        fst.d   $f12, \tmp, THREAD_FPR12 - THREAD_FPR0
        fst.d   $f13, \tmp, THREAD_FPR13 - THREAD_FPR0
        fst.d   $f14, \tmp, THREAD_FPR14 - THREAD_FPR0
        fst.d   $f15, \tmp, THREAD_FPR15 - THREAD_FPR0
        fst.d   $f16, \tmp, THREAD_FPR16 - THREAD_FPR0
        fst.d   $f17, \tmp, THREAD_FPR17 - THREAD_FPR0
        fst.d   $f18, \tmp, THREAD_FPR18 - THREAD_FPR0
        fst.d   $f19, \tmp, THREAD_FPR19 - THREAD_FPR0
        fst.d   $f20, \tmp, THREAD_FPR20 - THREAD_FPR0
        fst.d   $f21, \tmp, THREAD_FPR21 - THREAD_FPR0
        fst.d   $f22, \tmp, THREAD_FPR22 - THREAD_FPR0
        fst.d   $f23, \tmp, THREAD_FPR23 - THREAD_FPR0
        fst.d   $f24, \tmp, THREAD_FPR24 - THREAD_FPR0
        fst.d   $f25, \tmp, THREAD_FPR25 - THREAD_FPR0
        fst.d   $f26, \tmp, THREAD_FPR26 - THREAD_FPR0
        fst.d   $f27, \tmp, THREAD_FPR27 - THREAD_FPR0
        fst.d   $f28, \tmp, THREAD_FPR28 - THREAD_FPR0
        fst.d   $f29, \tmp, THREAD_FPR29 - THREAD_FPR0
        fst.d   $f30, \tmp, THREAD_FPR30 - THREAD_FPR0
        fst.d   $f31, \tmp, THREAD_FPR31 - THREAD_FPR0
        .endm

        .macro  fpu_restore_double thread tmp
        li.w    \tmp, THREAD_FPR0
        PTR_ADD \tmp, \tmp, \thread
        fld.d   $f0, \tmp, THREAD_FPR0  - THREAD_FPR0
        fld.d   $f1, \tmp, THREAD_FPR1  - THREAD_FPR0
        fld.d   $f2, \tmp, THREAD_FPR2  - THREAD_FPR0
        fld.d   $f3, \tmp, THREAD_FPR3  - THREAD_FPR0
        fld.d   $f4, \tmp, THREAD_FPR4  - THREAD_FPR0
        fld.d   $f5, \tmp, THREAD_FPR5  - THREAD_FPR0
        fld.d   $f6, \tmp, THREAD_FPR6  - THREAD_FPR0
        fld.d   $f7, \tmp, THREAD_FPR7  - THREAD_FPR0
        fld.d   $f8, \tmp, THREAD_FPR8  - THREAD_FPR0
        fld.d   $f9, \tmp, THREAD_FPR9  - THREAD_FPR0
        fld.d   $f10, \tmp, THREAD_FPR10 - THREAD_FPR0
        fld.d   $f11, \tmp, THREAD_FPR11 - THREAD_FPR0
        fld.d   $f12, \tmp, THREAD_FPR12 - THREAD_FPR0
        fld.d   $f13, \tmp, THREAD_FPR13 - THREAD_FPR0
        fld.d   $f14, \tmp, THREAD_FPR14 - THREAD_FPR0
        fld.d   $f15, \tmp, THREAD_FPR15 - THREAD_FPR0
        fld.d   $f16, \tmp, THREAD_FPR16 - THREAD_FPR0
        fld.d   $f17, \tmp, THREAD_FPR17 - THREAD_FPR0
        fld.d   $f18, \tmp, THREAD_FPR18 - THREAD_FPR0
        fld.d   $f19, \tmp, THREAD_FPR19 - THREAD_FPR0
        fld.d   $f20, \tmp, THREAD_FPR20 - THREAD_FPR0
        fld.d   $f21, \tmp, THREAD_FPR21 - THREAD_FPR0
        fld.d   $f22, \tmp, THREAD_FPR22 - THREAD_FPR0
        fld.d   $f23, \tmp, THREAD_FPR23 - THREAD_FPR0
        fld.d   $f24, \tmp, THREAD_FPR24 - THREAD_FPR0
        fld.d   $f25, \tmp, THREAD_FPR25 - THREAD_FPR0
        fld.d   $f26, \tmp, THREAD_FPR26 - THREAD_FPR0
        fld.d   $f27, \tmp, THREAD_FPR27 - THREAD_FPR0
        fld.d   $f28, \tmp, THREAD_FPR28 - THREAD_FPR0
        fld.d   $f29, \tmp, THREAD_FPR29 - THREAD_FPR0
        fld.d   $f30, \tmp, THREAD_FPR30 - THREAD_FPR0
        fld.d   $f31, \tmp, THREAD_FPR31 - THREAD_FPR0
        .endm

        .macro  lsx_save_data thread tmp
        li.w    \tmp, THREAD_FPR0
        PTR_ADD \tmp, \thread, \tmp
        vst     $vr0, \tmp, THREAD_FPR0  - THREAD_FPR0
        vst     $vr1, \tmp, THREAD_FPR1  - THREAD_FPR0
        vst     $vr2, \tmp, THREAD_FPR2  - THREAD_FPR0
        vst     $vr3, \tmp, THREAD_FPR3  - THREAD_FPR0
        vst     $vr4, \tmp, THREAD_FPR4  - THREAD_FPR0
        vst     $vr5, \tmp, THREAD_FPR5  - THREAD_FPR0
        vst     $vr6, \tmp, THREAD_FPR6  - THREAD_FPR0
        vst     $vr7, \tmp, THREAD_FPR7  - THREAD_FPR0
        vst     $vr8, \tmp, THREAD_FPR8  - THREAD_FPR0
        vst     $vr9, \tmp, THREAD_FPR9  - THREAD_FPR0
        vst     $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
        vst     $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
        vst     $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
        vst     $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
        vst     $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
        vst     $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
        vst     $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
        vst     $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
        vst     $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
        vst     $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
        vst     $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
        vst     $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
        vst     $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
        vst     $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
        vst     $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
        vst     $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
        vst     $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
        vst     $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
        vst     $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
        vst     $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
        vst     $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
        vst     $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
        .endm

        .macro  lsx_restore_data thread tmp
        li.w    \tmp, THREAD_FPR0
        PTR_ADD \tmp, \thread, \tmp
        vld     $vr0, \tmp, THREAD_FPR0  - THREAD_FPR0
        vld     $vr1, \tmp, THREAD_FPR1  - THREAD_FPR0
        vld     $vr2, \tmp, THREAD_FPR2  - THREAD_FPR0
        vld     $vr3, \tmp, THREAD_FPR3  - THREAD_FPR0
        vld     $vr4, \tmp, THREAD_FPR4  - THREAD_FPR0
        vld     $vr5, \tmp, THREAD_FPR5  - THREAD_FPR0
        vld     $vr6, \tmp, THREAD_FPR6  - THREAD_FPR0
        vld     $vr7, \tmp, THREAD_FPR7  - THREAD_FPR0
        vld     $vr8, \tmp, THREAD_FPR8  - THREAD_FPR0
        vld     $vr9, \tmp, THREAD_FPR9  - THREAD_FPR0
        vld     $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
        vld     $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
        vld     $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
        vld     $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
        vld     $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
        vld     $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
        vld     $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
        vld     $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
        vld     $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
        vld     $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
        vld     $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
        vld     $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
        vld     $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
        vld     $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
        vld     $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
        vld     $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
        vld     $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
        vld     $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
        vld     $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
        vld     $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
        vld     $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
        vld     $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
        .endm

        .macro  lsx_save_all    thread tmp0 tmp1
        fpu_save_cc             \thread, \tmp0, \tmp1
        fpu_save_csr            \thread, \tmp0
        lsx_save_data           \thread, \tmp0
        .endm

        .macro  lsx_restore_all thread tmp0 tmp1
        lsx_restore_data        \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
        fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm

        .macro  lsx_save_upper vd base tmp off
        vpickve2gr.d    \tmp, \vd, 1
        st.d            \tmp, \base, (\off+8)
        .endm

        .macro  lsx_save_all_upper thread base tmp
        li.w            \tmp, THREAD_FPR0
        PTR_ADD         \base, \thread, \tmp
        lsx_save_upper  $vr0,  \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
        lsx_save_upper  $vr1,  \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
        lsx_save_upper  $vr2,  \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
        lsx_save_upper  $vr3,  \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
        lsx_save_upper  $vr4,  \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
        lsx_save_upper  $vr5,  \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
        lsx_save_upper  $vr6,  \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
        lsx_save_upper  $vr7,  \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
        lsx_save_upper  $vr8,  \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
        lsx_save_upper  $vr9,  \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
        lsx_save_upper  $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
        lsx_save_upper  $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
        lsx_save_upper  $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
        lsx_save_upper  $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
        lsx_save_upper  $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
        lsx_save_upper  $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
        lsx_save_upper  $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
        lsx_save_upper  $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
        lsx_save_upper  $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
        lsx_save_upper  $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
        lsx_save_upper  $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
        lsx_save_upper  $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
        lsx_save_upper  $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
        lsx_save_upper  $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
        lsx_save_upper  $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
        lsx_save_upper  $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
        lsx_save_upper  $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
        lsx_save_upper  $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
        lsx_save_upper  $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
        lsx_save_upper  $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
        lsx_save_upper  $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
        lsx_save_upper  $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
        .endm

        .macro  lsx_restore_upper vd base tmp off
        ld.d            \tmp, \base, (\off+8)
        vinsgr2vr.d     \vd,  \tmp, 1
        .endm

        .macro  lsx_restore_all_upper thread base tmp
        li.w              \tmp, THREAD_FPR0
        PTR_ADD           \base, \thread, \tmp
        lsx_restore_upper $vr0,  \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
        lsx_restore_upper $vr1,  \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
        lsx_restore_upper $vr2,  \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
        lsx_restore_upper $vr3,  \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
        lsx_restore_upper $vr4,  \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
        lsx_restore_upper $vr5,  \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
        lsx_restore_upper $vr6,  \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
        lsx_restore_upper $vr7,  \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
        lsx_restore_upper $vr8,  \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
        lsx_restore_upper $vr9,  \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
        lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
        lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
        lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
        lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
        lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
        lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
        lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
        lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
        lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
        lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
        lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
        lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
        lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
        lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
        lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
        lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
        lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
        lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
        lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
        lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
        lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
        lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
        .endm

        .macro  lsx_init_upper vd tmp
        vinsgr2vr.d     \vd, \tmp, 1
        .endm

        .macro  lsx_init_all_upper tmp
        not             \tmp, zero
        lsx_init_upper  $vr0 \tmp
        lsx_init_upper  $vr1 \tmp
        lsx_init_upper  $vr2 \tmp
        lsx_init_upper  $vr3 \tmp
        lsx_init_upper  $vr4 \tmp
        lsx_init_upper  $vr5 \tmp
        lsx_init_upper  $vr6 \tmp
        lsx_init_upper  $vr7 \tmp
        lsx_init_upper  $vr8 \tmp
        lsx_init_upper  $vr9 \tmp
        lsx_init_upper  $vr10 \tmp
        lsx_init_upper  $vr11 \tmp
        lsx_init_upper  $vr12 \tmp
        lsx_init_upper  $vr13 \tmp
        lsx_init_upper  $vr14 \tmp
        lsx_init_upper  $vr15 \tmp
        lsx_init_upper  $vr16 \tmp
        lsx_init_upper  $vr17 \tmp
        lsx_init_upper  $vr18 \tmp
        lsx_init_upper  $vr19 \tmp
        lsx_init_upper  $vr20 \tmp
        lsx_init_upper  $vr21 \tmp
        lsx_init_upper  $vr22 \tmp
        lsx_init_upper  $vr23 \tmp
        lsx_init_upper  $vr24 \tmp
        lsx_init_upper  $vr25 \tmp
        lsx_init_upper  $vr26 \tmp
        lsx_init_upper  $vr27 \tmp
        lsx_init_upper  $vr28 \tmp
        lsx_init_upper  $vr29 \tmp
        lsx_init_upper  $vr30 \tmp
        lsx_init_upper  $vr31 \tmp
        .endm

        .macro  lasx_save_data thread tmp
        li.w    \tmp, THREAD_FPR0
        PTR_ADD \tmp, \thread, \tmp
        xvst    $xr0, \tmp, THREAD_FPR0  - THREAD_FPR0
        xvst    $xr1, \tmp, THREAD_FPR1  - THREAD_FPR0
        xvst    $xr2, \tmp, THREAD_FPR2  - THREAD_FPR0
        xvst    $xr3, \tmp, THREAD_FPR3  - THREAD_FPR0
        xvst    $xr4, \tmp, THREAD_FPR4  - THREAD_FPR0
        xvst    $xr5, \tmp, THREAD_FPR5  - THREAD_FPR0
        xvst    $xr6, \tmp, THREAD_FPR6  - THREAD_FPR0
        xvst    $xr7, \tmp, THREAD_FPR7  - THREAD_FPR0
        xvst    $xr8, \tmp, THREAD_FPR8  - THREAD_FPR0
        xvst    $xr9, \tmp, THREAD_FPR9  - THREAD_FPR0
        xvst    $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
        xvst    $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
        xvst    $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
        xvst    $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
        xvst    $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
        xvst    $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
        xvst    $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
        xvst    $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
        xvst    $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
        xvst    $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
        xvst    $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
        xvst    $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
        xvst    $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
        xvst    $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
        xvst    $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
        xvst    $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
        xvst    $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
        xvst    $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
        xvst    $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
        xvst    $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
        xvst    $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
        xvst    $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
        .endm

        .macro  lasx_restore_data thread tmp
        li.w    \tmp, THREAD_FPR0
        PTR_ADD \tmp, \thread, \tmp
        xvld    $xr0, \tmp, THREAD_FPR0  - THREAD_FPR0
        xvld    $xr1, \tmp, THREAD_FPR1  - THREAD_FPR0
        xvld    $xr2, \tmp, THREAD_FPR2  - THREAD_FPR0
        xvld    $xr3, \tmp, THREAD_FPR3  - THREAD_FPR0
        xvld    $xr4, \tmp, THREAD_FPR4  - THREAD_FPR0
        xvld    $xr5, \tmp, THREAD_FPR5  - THREAD_FPR0
        xvld    $xr6, \tmp, THREAD_FPR6  - THREAD_FPR0
        xvld    $xr7, \tmp, THREAD_FPR7  - THREAD_FPR0
        xvld    $xr8, \tmp, THREAD_FPR8  - THREAD_FPR0
        xvld    $xr9, \tmp, THREAD_FPR9  - THREAD_FPR0
        xvld    $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
        xvld    $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
        xvld    $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
        xvld    $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
        xvld    $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
        xvld    $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
        xvld    $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
        xvld    $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
        xvld    $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
        xvld    $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
        xvld    $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
        xvld    $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
        xvld    $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
        xvld    $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
        xvld    $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
        xvld    $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
        xvld    $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
        xvld    $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
        xvld    $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
        xvld    $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
        xvld    $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
        xvld    $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
        .endm

        .macro  lasx_save_all   thread tmp0 tmp1
        fpu_save_cc             \thread, \tmp0, \tmp1
        fpu_save_csr            \thread, \tmp0
        lasx_save_data          \thread, \tmp0
        .endm

        .macro  lasx_restore_all thread tmp0 tmp1
        lasx_restore_data       \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
        fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm

        .macro  lasx_save_upper xd base tmp off
        /* Nothing */
        .endm

        .macro  lasx_save_all_upper thread base tmp
        /* Nothing */
        .endm

        .macro  lasx_restore_upper xd base tmp0 tmp1 off
        vld             \tmp0, \base, (\off+16)
        xvpermi.q       \xd,   \tmp1, 0x2
        .endm

        .macro  lasx_restore_all_upper thread base tmp
        li.w            \tmp, THREAD_FPR0
        PTR_ADD         \base, \thread, \tmp
        /* Save $vr31 ($xr31 lower bits) with xvpickve2gr */
        xvpickve2gr.d   $r17, $xr31, 0
        xvpickve2gr.d   $r18, $xr31, 1
        lasx_restore_upper $xr0, \base, $vr31, $xr31, (THREAD_FPR0-THREAD_FPR0)
        lasx_restore_upper $xr1, \base, $vr31, $xr31, (THREAD_FPR1-THREAD_FPR0)
        lasx_restore_upper $xr2, \base, $vr31, $xr31, (THREAD_FPR2-THREAD_FPR0)
        lasx_restore_upper $xr3, \base, $vr31, $xr31, (THREAD_FPR3-THREAD_FPR0)
        lasx_restore_upper $xr4, \base, $vr31, $xr31, (THREAD_FPR4-THREAD_FPR0)
        lasx_restore_upper $xr5, \base, $vr31, $xr31, (THREAD_FPR5-THREAD_FPR0)
        lasx_restore_upper $xr6, \base, $vr31, $xr31, (THREAD_FPR6-THREAD_FPR0)
        lasx_restore_upper $xr7, \base, $vr31, $xr31, (THREAD_FPR7-THREAD_FPR0)
        lasx_restore_upper $xr8, \base, $vr31, $xr31, (THREAD_FPR8-THREAD_FPR0)
        lasx_restore_upper $xr9, \base, $vr31, $xr31, (THREAD_FPR9-THREAD_FPR0)
        lasx_restore_upper $xr10, \base, $vr31, $xr31, (THREAD_FPR10-THREAD_FPR0)
        lasx_restore_upper $xr11, \base, $vr31, $xr31, (THREAD_FPR11-THREAD_FPR0)
        lasx_restore_upper $xr12, \base, $vr31, $xr31, (THREAD_FPR12-THREAD_FPR0)
        lasx_restore_upper $xr13, \base, $vr31, $xr31, (THREAD_FPR13-THREAD_FPR0)
        lasx_restore_upper $xr14, \base, $vr31, $xr31, (THREAD_FPR14-THREAD_FPR0)
        lasx_restore_upper $xr15, \base, $vr31, $xr31, (THREAD_FPR15-THREAD_FPR0)
        lasx_restore_upper $xr16, \base, $vr31, $xr31, (THREAD_FPR16-THREAD_FPR0)
        lasx_restore_upper $xr17, \base, $vr31, $xr31, (THREAD_FPR17-THREAD_FPR0)
        lasx_restore_upper $xr18, \base, $vr31, $xr31, (THREAD_FPR18-THREAD_FPR0)
        lasx_restore_upper $xr19, \base, $vr31, $xr31, (THREAD_FPR19-THREAD_FPR0)
        lasx_restore_upper $xr20, \base, $vr31, $xr31, (THREAD_FPR20-THREAD_FPR0)
        lasx_restore_upper $xr21, \base, $vr31, $xr31, (THREAD_FPR21-THREAD_FPR0)
        lasx_restore_upper $xr22, \base, $vr31, $xr31, (THREAD_FPR22-THREAD_FPR0)
        lasx_restore_upper $xr23, \base, $vr31, $xr31, (THREAD_FPR23-THREAD_FPR0)
        lasx_restore_upper $xr24, \base, $vr31, $xr31, (THREAD_FPR24-THREAD_FPR0)
        lasx_restore_upper $xr25, \base, $vr31, $xr31, (THREAD_FPR25-THREAD_FPR0)
        lasx_restore_upper $xr26, \base, $vr31, $xr31, (THREAD_FPR26-THREAD_FPR0)
        lasx_restore_upper $xr27, \base, $vr31, $xr31, (THREAD_FPR27-THREAD_FPR0)
        lasx_restore_upper $xr28, \base, $vr31, $xr31, (THREAD_FPR28-THREAD_FPR0)
        lasx_restore_upper $xr29, \base, $vr31, $xr31, (THREAD_FPR29-THREAD_FPR0)
        lasx_restore_upper $xr30, \base, $vr31, $xr31, (THREAD_FPR30-THREAD_FPR0)
        lasx_restore_upper $xr31, \base, $vr31, $xr31, (THREAD_FPR31-THREAD_FPR0)
        /* Restore $vr31 ($xr31 lower bits) with xvinsgr2vr */
        xvinsgr2vr.d    $xr31, $r17, 0
        xvinsgr2vr.d    $xr31, $r18, 1
        .endm

        .macro  lasx_init_upper xd tmp
        xvinsgr2vr.d    \xd, \tmp, 2
        xvinsgr2vr.d    \xd, \tmp, 3
        .endm

        .macro  lasx_init_all_upper tmp
        not             \tmp, zero
        lasx_init_upper $xr0 \tmp
        lasx_init_upper $xr1 \tmp
        lasx_init_upper $xr2 \tmp
        lasx_init_upper $xr3 \tmp
        lasx_init_upper $xr4 \tmp
        lasx_init_upper $xr5 \tmp
        lasx_init_upper $xr6 \tmp
        lasx_init_upper $xr7 \tmp
        lasx_init_upper $xr8 \tmp
        lasx_init_upper $xr9 \tmp
        lasx_init_upper $xr10 \tmp
        lasx_init_upper $xr11 \tmp
        lasx_init_upper $xr12 \tmp
        lasx_init_upper $xr13 \tmp
        lasx_init_upper $xr14 \tmp
        lasx_init_upper $xr15 \tmp
        lasx_init_upper $xr16 \tmp
        lasx_init_upper $xr17 \tmp
        lasx_init_upper $xr18 \tmp
        lasx_init_upper $xr19 \tmp
        lasx_init_upper $xr20 \tmp
        lasx_init_upper $xr21 \tmp
        lasx_init_upper $xr22 \tmp
        lasx_init_upper $xr23 \tmp
        lasx_init_upper $xr24 \tmp
        lasx_init_upper $xr25 \tmp
        lasx_init_upper $xr26 \tmp
        lasx_init_upper $xr27 \tmp
        lasx_init_upper $xr28 \tmp
        lasx_init_upper $xr29 \tmp
        lasx_init_upper $xr30 \tmp
        lasx_init_upper $xr31 \tmp
        .endm

.macro not dst src
        nor     \dst, \src, zero
.endm

.macro la_abs reg, sym
#ifndef CONFIG_RELOCATABLE
        la.abs  \reg, \sym
#else
        766:
        lu12i.w \reg, 0
        ori     \reg, \reg, 0
#ifdef CONFIG_64BIT
        lu32i.d \reg, 0
        lu52i.d \reg, \reg, 0
#endif
        .pushsection ".la_abs", "aw", %progbits
        .p2align PTRLOG
        PTR     766b
        PTR     \sym
        .popsection
#endif
.endm

#endif /* _ASM_ASMMACRO_H */