#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/bottom_half.h>
#include <linux/bug.h>
#include <linux/cache.h>
#include <linux/compat.h>
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/prctl.h>
#include <linux/preempt.h>
#include <linux/ptrace.h>
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <linux/swab.h>
#include <asm/esr.h>
#include <asm/exception.h>
#include <asm/fpsimd.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/neon.h>
#include <asm/processor.h>
#include <asm/simd.h>
#include <asm/sigcontext.h>
#include <asm/sysreg.h>
#include <asm/traps.h>
#include <asm/virt.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
#define FPEXC_OFF (1 << 2)
#define FPEXC_UFF (1 << 3)
#define FPEXC_IXF (1 << 4)
#define FPEXC_IDF (1 << 7)
DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
#ifdef CONFIG_ARM64_SVE
[ARM64_VEC_SVE] = {
.type = ARM64_VEC_SVE,
.name = "SVE",
.min_vl = SVE_VL_MIN,
.max_vl = SVE_VL_MIN,
.max_virtualisable_vl = SVE_VL_MIN,
},
#endif
#ifdef CONFIG_ARM64_SME
[ARM64_VEC_SME] = {
.type = ARM64_VEC_SME,
.name = "SME",
},
#endif
};
static unsigned int vec_vl_inherit_flag(enum vec_type type)
{
switch (type) {
case ARM64_VEC_SVE:
return TIF_SVE_VL_INHERIT;
case ARM64_VEC_SME:
return TIF_SME_VL_INHERIT;
default:
WARN_ON_ONCE(1);
return 0;
}
}
struct vl_config {
int __default_vl;
};
static struct vl_config vl_config[ARM64_VEC_MAX];
static inline int get_default_vl(enum vec_type type)
{
return READ_ONCE(vl_config[type].__default_vl);
}
#ifdef CONFIG_ARM64_SVE
static inline int get_sve_default_vl(void)
{
return get_default_vl(ARM64_VEC_SVE);
}
static inline void set_default_vl(enum vec_type type, int val)
{
WRITE_ONCE(vl_config[type].__default_vl, val);
}
static inline void set_sve_default_vl(int val)
{
set_default_vl(ARM64_VEC_SVE, val);
}
#endif
#ifdef CONFIG_ARM64_SME
static int get_sme_default_vl(void)
{
return get_default_vl(ARM64_VEC_SME);
}
static void set_sme_default_vl(int val)
{
set_default_vl(ARM64_VEC_SME, val);
}
static void sme_free(struct task_struct *);
#else
static inline void sme_free(struct task_struct *t) { }
#endif
static void fpsimd_bind_task_to_cpu(void);
static void get_cpu_fpsimd_context(void)
{
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
if (!irqs_disabled())
local_bh_disable();
} else {
preempt_disable();
}
}
static void put_cpu_fpsimd_context(void)
{
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
if (!irqs_disabled())
local_bh_enable();
} else {
preempt_enable();
}
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
return task->thread.vl[type];
}
void task_set_vl(struct task_struct *task, enum vec_type type,
unsigned long vl)
{
task->thread.vl[type] = vl;
}
unsigned int task_get_vl_onexec(const struct task_struct *task,
enum vec_type type)
{
return task->thread.vl_onexec[type];
}
void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
unsigned long vl)
{
task->thread.vl_onexec[type] = vl;
}
static void task_fpsimd_load(void)
{
bool restore_sve_regs = false;
bool restore_ffr;
WARN_ON(!system_supports_fpsimd());
WARN_ON(preemptible());
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
clear_thread_flag(TIF_SVE);
break;
case FP_STATE_SVE:
if (!thread_sm_enabled(¤t->thread))
WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));
if (test_thread_flag(TIF_SVE))
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
restore_sve_regs = true;
restore_ffr = true;
break;
default:
WARN_ON_ONCE(1);
clear_thread_flag(TIF_SVE);
break;
}
}
if (system_supports_sme()) {
unsigned long sme_vl = task_get_sme_vl(current);
if (test_thread_flag(TIF_SME))
sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
write_sysreg_s(current->thread.svcr, SYS_SVCR);
if (thread_za_enabled(¤t->thread))
sme_load_state(current->thread.sme_state,
system_supports_sme2());
if (thread_sm_enabled(¤t->thread))
restore_ffr = system_supports_fa64();
}
if (system_supports_fpmr())
write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
if (restore_sve_regs) {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(¤t->thread),
¤t->thread.uw.fpsimd_state.fpsr,
restore_ffr);
} else {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
fpsimd_load_state(¤t->thread.uw.fpsimd_state);
}
}
static void fpsimd_save_user_state(void)
{
struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state);
bool save_sve_regs = false;
bool save_ffr;
unsigned int vl;
WARN_ON(!system_supports_fpsimd());
WARN_ON(preemptible());
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
if (system_supports_fpmr())
*(last->fpmr) = read_sysreg_s(SYS_FPMR);
if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||
last->to_save == FP_STATE_SVE) {
save_sve_regs = true;
save_ffr = true;
vl = last->sve_vl;
}
if (system_supports_sme()) {
u64 *svcr = last->svcr;
*svcr = read_sysreg_s(SYS_SVCR);
if (*svcr & SVCR_ZA_MASK)
sme_save_state(last->sme_state,
system_supports_sme2());
if (*svcr & SVCR_SM_MASK) {
save_sve_regs = true;
save_ffr = system_supports_fa64();
vl = last->sme_vl;
}
}
if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
if (WARN_ON(sve_get_vl() != vl)) {
force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
return;
}
sve_save_state((char *)last->sve_state +
sve_ffr_offset(vl),
&last->st->fpsr, save_ffr);
*last->fp_type = FP_STATE_SVE;
} else {
fpsimd_save_state(last->st);
*last->fp_type = FP_STATE_FPSIMD;
}
}
static unsigned int find_supported_vector_length(enum vec_type type,
unsigned int vl)
{
struct vl_info *info = &vl_info[type];
int bit;
int max_vl = info->max_vl;
if (WARN_ON(!sve_vl_valid(vl)))
vl = info->min_vl;
if (WARN_ON(!sve_vl_valid(max_vl)))
max_vl = info->min_vl;
if (vl > max_vl)
vl = max_vl;
if (vl < info->min_vl)
vl = info->min_vl;
bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
__vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(__bit_to_vq(bit));
}
#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
static int vec_proc_do_default_vl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct vl_info *info = table->extra1;
enum vec_type type = info->type;
int ret;
int vl = get_default_vl(type);
struct ctl_table tmp_table = {
.data = &vl,
.maxlen = sizeof(vl),
};
ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
if (vl == -1)
vl = info->max_vl;
if (!sve_vl_valid(vl))
return -EINVAL;
set_default_vl(type, find_supported_vector_length(type, vl));
return 0;
}
static const struct ctl_table sve_default_vl_table[] = {
{
.procname = "sve_default_vector_length",
.mode = 0644,
.proc_handler = vec_proc_do_default_vl,
.extra1 = &vl_info[ARM64_VEC_SVE],
},
};
static int __init sve_sysctl_init(void)
{
if (system_supports_sve())
if (!register_sysctl("abi", sve_default_vl_table))
return -EINVAL;
return 0;
}
#else
static int __init sve_sysctl_init(void) { return 0; }
#endif
#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
static const struct ctl_table sme_default_vl_table[] = {
{
.procname = "sme_default_vector_length",
.mode = 0644,
.proc_handler = vec_proc_do_default_vl,
.extra1 = &vl_info[ARM64_VEC_SME],
},
};
static int __init sme_sysctl_init(void)
{
if (system_supports_sme())
if (!register_sysctl("abi", sme_default_vl_table))
return -EINVAL;
return 0;
}
#else
static int __init sme_sysctl_init(void) { return 0; }
#endif
#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
#ifdef CONFIG_CPU_BIG_ENDIAN
static __uint128_t arm64_cpu_to_le128(__uint128_t x)
{
u64 a = swab64(x);
u64 b = swab64(x >> 64);
return ((__uint128_t)a << 64) | b;
}
#else
static __uint128_t arm64_cpu_to_le128(__uint128_t x)
{
return x;
}
#endif
#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
unsigned int vq)
{
unsigned int i;
__uint128_t *p;
for (i = 0; i < SVE_NUM_ZREGS; ++i) {
p = (__uint128_t *)ZREG(sst, vq, i);
*p = arm64_cpu_to_le128(fst->vregs[i]);
}
}
static inline void fpsimd_to_sve(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
if (!system_supports_sve() && !system_supports_sme())
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
__fpsimd_to_sve(sst, fst, vq);
}
static inline void sve_to_fpsimd(struct task_struct *task)
{
unsigned int vq, vl;
void const *sst = task->thread.sve_state;
struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
unsigned int i;
__uint128_t const *p;
if (!system_supports_sve() && !system_supports_sme())
return;
vl = thread_get_cur_vl(&task->thread);
vq = sve_vq_from_vl(vl);
for (i = 0; i < SVE_NUM_ZREGS; ++i) {
p = (__uint128_t const *)ZREG(sst, vq, i);
fst->vregs[i] = arm64_le128_to_cpu(*p);
}
}
static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)
{
memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));
}
void task_smstop_sm(struct task_struct *task)
{
if (!thread_sm_enabled(&task->thread))
return;
__fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);
task->thread.uw.fpsimd_state.fpsr = 0x0800009f;
if (system_supports_fpmr())
task->thread.uw.fpmr = 0;
task->thread.svcr &= ~SVCR_SM_MASK;
task->thread.fp_type = FP_STATE_FPSIMD;
}
void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
SYS_SCTLR_EL1);
}
#ifdef CONFIG_ARM64_SVE
static void sve_free(struct task_struct *task)
{
kfree(task->thread.sve_state);
task->thread.sve_state = NULL;
}
void sve_alloc(struct task_struct *task, bool flush)
{
if (task->thread.sve_state) {
if (flush)
memset(task->thread.sve_state, 0,
sve_state_size(task));
return;
}
task->thread.sve_state =
kzalloc(sve_state_size(task), GFP_KERNEL);
}
void fpsimd_sync_from_effective_state(struct task_struct *task)
{
if (task->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(task);
}
void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
if (task->thread.fp_type != FP_STATE_SVE)
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
__fpsimd_to_sve(sst, fst, vq);
}
static int change_live_vector_length(struct task_struct *task,
enum vec_type type,
unsigned long vl)
{
unsigned int sve_vl = task_get_sve_vl(task);
unsigned int sme_vl = task_get_sme_vl(task);
void *sve_state = NULL, *sme_state = NULL;
if (type == ARM64_VEC_SME)
sme_vl = vl;
else
sve_vl = vl;
sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);
if (!sve_state)
goto out_mem;
if (type == ARM64_VEC_SME) {
sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);
if (!sme_state)
goto out_mem;
}
if (task == current)
fpsimd_save_and_flush_current_state();
else
fpsimd_flush_task_state(task);
fpsimd_sync_from_effective_state(task);
task_set_vl(task, type, vl);
kfree(task->thread.sve_state);
task->thread.sve_state = sve_state;
fpsimd_sync_to_effective_state_zeropad(task);
if (type == ARM64_VEC_SME) {
task->thread.svcr &= ~SVCR_ZA_MASK;
kfree(task->thread.sme_state);
task->thread.sme_state = sme_state;
}
return 0;
out_mem:
kfree(sve_state);
kfree(sme_state);
return -ENOMEM;
}
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
bool onexec = flags & PR_SVE_SET_VL_ONEXEC;
bool inherit = flags & PR_SVE_VL_INHERIT;
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
return -EINVAL;
if (!sve_vl_valid(vl))
return -EINVAL;
if (vl > VL_ARCH_MAX)
vl = VL_ARCH_MAX;
vl = find_supported_vector_length(type, vl);
if (!onexec && vl != task_get_vl(task, type)) {
if (change_live_vector_length(task, type, vl))
return -ENOMEM;
}
if (onexec || inherit)
task_set_vl_onexec(task, type, vl);
else
task_set_vl_onexec(task, type, 0);
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
flags & PR_SVE_VL_INHERIT);
return 0;
}
static int vec_prctl_status(enum vec_type type, unsigned long flags)
{
int ret;
if (flags & PR_SVE_SET_VL_ONEXEC)
ret = task_get_vl_onexec(current, type);
else
ret = task_get_vl(current, type);
if (test_thread_flag(vec_vl_inherit_flag(type)))
ret |= PR_SVE_VL_INHERIT;
return ret;
}
int sve_set_current_vl(unsigned long arg)
{
unsigned long vl, flags;
int ret;
vl = arg & PR_SVE_VL_LEN_MASK;
flags = arg & ~vl;
if (!system_supports_sve() || is_compat_task())
return -EINVAL;
ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags);
if (ret)
return ret;
return vec_prctl_status(ARM64_VEC_SVE, flags);
}
int sve_get_current_vl(void)
{
if (!system_supports_sve() || is_compat_task())
return -EINVAL;
return vec_prctl_status(ARM64_VEC_SVE, 0);
}
#ifdef CONFIG_ARM64_SME
int sme_set_current_vl(unsigned long arg)
{
unsigned long vl, flags;
int ret;
vl = arg & PR_SME_VL_LEN_MASK;
flags = arg & ~vl;
if (!system_supports_sme() || is_compat_task())
return -EINVAL;
ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);
if (ret)
return ret;
return vec_prctl_status(ARM64_VEC_SME, flags);
}
int sme_get_current_vl(void)
{
if (!system_supports_sme() || is_compat_task())
return -EINVAL;
return vec_prctl_status(ARM64_VEC_SME, 0);
}
#endif
static void vec_probe_vqs(struct vl_info *info,
DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
bitmap_zero(map, SVE_VQ_MAX);
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
write_vl(info->type, vq - 1);
switch (info->type) {
case ARM64_VEC_SVE:
vl = sve_get_vl();
break;
case ARM64_VEC_SME:
vl = sme_get_vl();
break;
default:
vl = 0;
break;
}
if (sve_vq_from_vl(vl) > vq)
break;
vq = sve_vq_from_vl(vl);
set_bit(__vq_to_bit(vq), map);
}
}
void __init vec_init_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
vec_probe_vqs(info, info->vq_map);
bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
}
void vec_update_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
vec_probe_vqs(info, tmp_map);
bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
SVE_VQ_MAX);
}
int vec_verify_vq_map(enum vec_type type)
{
struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
vec_probe_vqs(info, tmp_map);
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
pr_warn("%s: cpu%d: Required vector length(s) missing\n",
info->name, smp_processor_id());
return -EINVAL;
}
if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
return 0;
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);
b = find_last_bit(tmp_map, SVE_VQ_MAX);
if (b >= SVE_VQ_MAX)
return 0;
if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {
pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",
info->name, smp_processor_id());
return -EINVAL;
}
return 0;
}
void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
write_sysreg_s(0, SYS_ZCR_EL1);
}
void __init sve_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
int max_bit;
if (!system_supports_sve())
return;
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));
bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,
SVE_VQ_MAX);
b = find_last_bit(tmp_map, SVE_VQ_MAX);
if (b >= SVE_VQ_MAX)
info->max_virtualisable_vl = SVE_VQ_MAX;
else if (WARN_ON(b == SVE_VQ_MAX - 1))
info->max_virtualisable_vl = SVE_VQ_MIN;
else
info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
if (info->max_virtualisable_vl > info->max_vl)
info->max_virtualisable_vl = info->max_vl;
pr_info("%s: maximum available vector length %u bytes per vector\n",
info->name, info->max_vl);
pr_info("%s: default vector length %u bytes per vector\n",
info->name, get_sve_default_vl());
if (sve_max_virtualisable_vl() < sve_max_vl())
pr_warn("%s: unvirtualisable vector lengths present\n",
info->name);
}
void fpsimd_release_task(struct task_struct *dead_task)
{
sve_free(dead_task);
sme_free(dead_task);
}
#endif
#ifdef CONFIG_ARM64_SME
void sme_alloc(struct task_struct *task, bool flush)
{
if (task->thread.sme_state) {
if (flush)
memset(task->thread.sme_state, 0,
sme_state_size(task));
return;
}
task->thread.sme_state =
kzalloc(sme_state_size(task), GFP_KERNEL);
}
static void sme_free(struct task_struct *task)
{
kfree(task->thread.sme_state);
task->thread.sme_state = NULL;
}
void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
SYS_SMPRI_EL1);
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
isb();
write_sysreg_s(0, SYS_SMCR_EL1);
write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
isb();
}
void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)
{
BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
SYS_SMCR_EL1);
}
void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)
{
BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
SYS_SMCR_EL1);
}
void __init sme_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SME];
int min_bit, max_bit;
if (!system_supports_sme())
return;
min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
WARN_ON(min_bit >= SVE_VQ_MAX);
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
WARN_ON(info->min_vl > info->max_vl);
set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
pr_info("SME: minimum available vector length %u bytes per vector\n",
info->min_vl);
pr_info("SME: maximum available vector length %u bytes per vector\n",
info->max_vl);
pr_info("SME: default vector length %u bytes per vector\n",
get_sme_default_vl());
}
void sme_suspend_exit(void)
{
u64 smcr = 0;
if (!system_supports_sme())
return;
if (system_supports_fa64())
smcr |= SMCR_ELx_FA64;
if (system_supports_sme2())
smcr |= SMCR_ELx_EZT0;
write_sysreg_s(smcr, SYS_SMCR_EL1);
write_sysreg_s(0, SYS_SMPRI_EL1);
}
#endif
static void sve_init_regs(void)
{
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
unsigned long vq_minus_one =
sve_vq_from_vl(task_get_sve_vl(current)) - 1;
sve_set_vq(vq_minus_one);
sve_flush_live(true, vq_minus_one);
fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);
current->thread.fp_type = FP_STATE_SVE;
fpsimd_flush_task_state(current);
}
}
void do_sve_acc(unsigned long esr, struct pt_regs *regs)
{
if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
sve_alloc(current, true);
if (!current->thread.sve_state) {
force_sig(SIGKILL);
return;
}
get_cpu_fpsimd_context();
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1);
sve_init_regs();
put_cpu_fpsimd_context();
}
void do_sme_acc(unsigned long esr, struct pt_regs *regs)
{
if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
sve_alloc(current, false);
sme_alloc(current, true);
if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
}
get_cpu_fpsimd_context();
if (test_and_set_thread_flag(TIF_SME))
WARN_ON(1);
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
unsigned long vq_minus_one =
sve_vq_from_vl(task_get_sme_vl(current)) - 1;
sme_set_vq(vq_minus_one);
fpsimd_bind_task_to_cpu();
} else {
fpsimd_flush_task_state(current);
}
put_cpu_fpsimd_context();
}
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
{
if (!system_supports_fpsimd()) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
BUG();
}
void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
{
unsigned int si_code = FPE_FLTUNK;
if (esr & ESR_ELx_FP_EXC_TFV) {
if (esr & FPEXC_IOF)
si_code = FPE_FLTINV;
else if (esr & FPEXC_DZF)
si_code = FPE_FLTDIV;
else if (esr & FPEXC_OFF)
si_code = FPE_FLTOVF;
else if (esr & FPEXC_UFF)
si_code = FPE_FLTUND;
else if (esr & FPEXC_IXF)
si_code = FPE_FLTRES;
}
send_sig_fault(SIGFPE, si_code,
(void __user *)instruction_pointer(regs),
current);
}
static void fpsimd_load_kernel_state(struct task_struct *task)
{
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
.st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
BUG_ON(!cpu_fp_state.st);
fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
}
static void fpsimd_flush_cpu_state(void)
{
WARN_ON(!system_supports_fpsimd());
__this_cpu_write(fpsimd_last_state.st, NULL);
if (system_supports_sme())
sme_smstop();
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
if (!system_supports_fpsimd())
return;
WARN_ON_ONCE(!irqs_disabled());
if (test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_save_kernel_state(current);
else
fpsimd_save_user_state();
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
fpsimd_flush_cpu_state();
fpsimd_load_kernel_state(next);
} else {
wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
&next->thread.uw.fpsimd_state;
wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
wrong_task || wrong_cpu);
}
}
static void fpsimd_flush_thread_vl(enum vec_type type)
{
int vl, supported_vl;
vl = task_get_vl_onexec(current, type);
if (!vl)
vl = get_default_vl(type);
if (WARN_ON(!sve_vl_valid(vl)))
vl = vl_info[type].min_vl;
supported_vl = find_supported_vector_length(type, vl);
if (WARN_ON(supported_vl != vl))
vl = supported_vl;
task_set_vl(current, type, vl);
if (!test_thread_flag(vec_vl_inherit_flag(type)))
task_set_vl_onexec(current, type, 0);
}
void fpsimd_flush_thread(void)
{
void *sve_state = NULL;
void *sme_state = NULL;
if (!system_supports_fpsimd())
return;
get_cpu_fpsimd_context();
fpsimd_flush_task_state(current);
memset(¤t->thread.uw.fpsimd_state, 0,
sizeof(current->thread.uw.fpsimd_state));
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
sve_state = current->thread.sve_state;
current->thread.sve_state = NULL;
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
}
if (system_supports_sme()) {
clear_thread_flag(TIF_SME);
sme_state = current->thread.sme_state;
current->thread.sme_state = NULL;
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
}
if (system_supports_fpmr())
current->thread.uw.fpmr = 0;
current->thread.fp_type = FP_STATE_FPSIMD;
put_cpu_fpsimd_context();
kfree(sve_state);
kfree(sme_state);
}
void fpsimd_preserve_current_state(void)
{
if (!system_supports_fpsimd())
return;
get_cpu_fpsimd_context();
fpsimd_save_user_state();
put_cpu_fpsimd_context();
}
static void fpsimd_bind_task_to_cpu(void)
{
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
WARN_ON(!system_supports_fpsimd());
last->st = ¤t->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
last->sme_state = current->thread.sme_state;
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = ¤t->thread.svcr;
last->fpmr = ¤t->thread.uw.fpmr;
last->fp_type = ¤t->thread.fp_type;
last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();
if (system_supports_sme()) {
if (test_thread_flag(TIF_SME))
sme_user_enable();
else
sme_user_disable();
}
if (system_supports_sve()) {
if (test_thread_flag(TIF_SVE))
sve_user_enable();
else
sve_user_disable();
}
}
void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
{
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
WARN_ON(!system_supports_fpsimd());
WARN_ON(!in_softirq() && !irqs_disabled());
*last = *state;
}
void fpsimd_restore_current_state(void)
{
if (!system_supports_fpsimd()) {
clear_thread_flag(TIF_FOREIGN_FPSTATE);
return;
}
get_cpu_fpsimd_context();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
task_fpsimd_load();
fpsimd_bind_task_to_cpu();
}
put_cpu_fpsimd_context();
}
void fpsimd_update_current_state(struct user_fpsimd_state const *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
current->thread.uw.fpsimd_state = *state;
if (current->thread.fp_type == FP_STATE_SVE)
fpsimd_to_sve(current);
}
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
t->thread.kernel_fpsimd_state = NULL;
if (!system_supports_fpsimd())
return;
barrier();
set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
barrier();
}
void fpsimd_save_and_flush_current_state(void)
{
if (!system_supports_fpsimd())
return;
get_cpu_fpsimd_context();
fpsimd_save_user_state();
fpsimd_flush_task_state(current);
put_cpu_fpsimd_context();
}
void fpsimd_save_and_flush_cpu_state(void)
{
unsigned long flags;
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
local_irq_save(flags);
fpsimd_save_user_state();
fpsimd_flush_cpu_state();
local_irq_restore(flags);
}
#ifdef CONFIG_KERNEL_MODE_NEON
void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
WARN_ON((preemptible() || in_serving_softirq()) && !state);
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
WARN_ON(current->thread.kernel_fpsimd_state != NULL);
current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
}
}
fpsimd_flush_cpu_state();
put_cpu_fpsimd_context();
}
EXPORT_SYMBOL_GPL(kernel_neon_begin);
void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
if (!test_thread_flag(TIF_KERNEL_FPSTATE))
return;
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
fpsimd_load_state(state);
} else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
WARN_ON(current->thread.kernel_fpsimd_state != state);
current->thread.kernel_fpsimd_state = NULL;
}
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
static struct user_fpsimd_state efi_fpsimd_state;
void __efi_fpsimd_begin(void)
{
if (!system_supports_fpsimd())
return;
if (may_use_simd()) {
kernel_neon_begin(&efi_fpsimd_state);
} else {
pr_warn_ratelimited("Calling EFI runtime from %s context\n",
in_nmi() ? "NMI" : "hardirq");
force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
fpsimd_save_state(&efi_fpsimd_state);
}
}
void __efi_fpsimd_end(void)
{
if (!system_supports_fpsimd())
return;
if (may_use_simd()) {
kernel_neon_end(&efi_fpsimd_state);
} else {
fpsimd_load_state(&efi_fpsimd_state);
}
}
#endif
#endif
#ifdef CONFIG_CPU_PM
static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
unsigned long cmd, void *v)
{
switch (cmd) {
case CPU_PM_ENTER:
fpsimd_save_and_flush_cpu_state();
break;
case CPU_PM_EXIT:
break;
case CPU_PM_ENTER_FAILED:
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
}
static struct notifier_block fpsimd_cpu_pm_notifier_block = {
.notifier_call = fpsimd_cpu_pm_notifier,
};
static void __init fpsimd_pm_init(void)
{
cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
}
#else
static inline void fpsimd_pm_init(void) { }
#endif
#ifdef CONFIG_HOTPLUG_CPU
static int fpsimd_cpu_dead(unsigned int cpu)
{
per_cpu(fpsimd_last_state.st, cpu) = NULL;
return 0;
}
static inline void fpsimd_hotplug_init(void)
{
cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
NULL, fpsimd_cpu_dead);
}
#else
static inline void fpsimd_hotplug_init(void) { }
#endif
void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)
{
unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;
write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);
isb();
}
static int __init fpsimd_init(void)
{
if (cpu_have_named_feature(FP)) {
fpsimd_pm_init();
fpsimd_hotplug_init();
} else {
pr_notice("Floating-point is not implemented\n");
}
if (!cpu_have_named_feature(ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
sve_sysctl_init();
sme_sysctl_init();
return 0;
}
core_initcall(fpsimd_init);