#include <sys/archsystm.h>
#include <sys/disp.h>
#include <sys/cmt.h>
#include <sys/systm.h>
#include <sys/cpu.h>
#include <sys/var.h>
#include <sys/xc_levels.h>
#include <sys/cmn_err.h>
#include <sys/sysmacros.h>
#include <sys/x86_archext.h>
#include <sys/esunddi.h>
#include <sys/promif.h>
#include <sys/policy.h>
#include <sys/smt.h>
#define CS_SHIFT (8)
#define CS_MASK ((1 << CS_SHIFT) - 1)
#define CS_MARK(s) ((s) & CS_MASK)
#define CS_ZONE(s) ((s) >> CS_SHIFT)
#define CS_MK(s, z) ((s) | (z << CS_SHIFT))
typedef enum cs_mark {
CM_IDLE = 0,
CM_THREAD,
CM_UNSAFE,
CM_VCPU,
CM_POISONED
} cs_mark_t;
CTASSERT(offsetof(cpu_smt_t, cs_sib) == 64);
CTASSERT(CM_IDLE == 0);
CTASSERT(CM_POISONED < (1 << CS_SHIFT));
CTASSERT(CM_POISONED > CM_VCPU);
CTASSERT(CM_VCPU > CM_UNSAFE);
static uint_t empty_pil = XC_CPUPOKE_PIL;
int smt_exclusion = 1;
clock_t smt_acquire_wait_time = 64;
int smt_boot_disable;
int smt_enabled = 1;
void
smt_intr_alloc_pil(uint_t pil)
{
ASSERT(pil <= PIL_MAX);
if (empty_pil == pil)
empty_pil = PIL_MAX + 1;
}
static boolean_t
yield_to_vcpu(cpu_t *sib, zoneid_t zoneid)
{
cpu_smt_t *sibsmt = &sib->cpu_m.mcpu_smt;
uint64_t sibstate = sibsmt->cs_state;
if (sibsmt->cs_intr_depth != 0)
return (B_FALSE);
if (CS_MARK(sibstate) < CM_VCPU || CS_ZONE(sibstate) == zoneid)
return (B_FALSE);
if (curthread->t_pri < sib->cpu_dispatch_pri)
return (B_TRUE);
if (curthread->t_pri == sib->cpu_dispatch_pri &&
CPU->cpu_id < sib->cpu_id)
return (B_TRUE);
return (B_FALSE);
}
static inline boolean_t
sibling_compatible(cpu_smt_t *sibsmt, zoneid_t zoneid)
{
uint64_t sibstate = sibsmt->cs_state;
if (sibsmt->cs_intr_depth != 0)
return (B_FALSE);
if (CS_MARK(sibstate) == CM_UNSAFE)
return (B_FALSE);
if (CS_MARK(sibstate) == CM_IDLE)
return (B_TRUE);
return (CS_ZONE(sibstate) == zoneid);
}
int
smt_acquire(void)
{
clock_t wait = smt_acquire_wait_time;
cpu_smt_t *smt = &CPU->cpu_m.mcpu_smt;
zoneid_t zoneid = getzoneid();
cpu_smt_t *sibsmt;
int ret = 0;
ASSERT(!interrupts_enabled());
if (smt->cs_sib == NULL) {
spec_uarch_flush();
return (1);
}
sibsmt = &smt->cs_sib->cpu_m.mcpu_smt;
ASSERT3U(CS_ZONE(smt->cs_state), ==, zoneid);
ASSERT3U(CS_MARK(smt->cs_state), ==, CM_VCPU);
ASSERT3U(curthread->t_preempt, >=, 1);
ASSERT(curthread->t_schedflag & TS_VCPU);
while (ret == 0 && wait > 0) {
if (yield_to_vcpu(smt->cs_sib, zoneid)) {
ret = -1;
break;
}
if (sibling_compatible(sibsmt, zoneid)) {
lock_set(&sibsmt->cs_lock);
if (sibling_compatible(sibsmt, zoneid)) {
smt->cs_state = CS_MK(CM_POISONED, zoneid);
sibsmt->cs_sibstate = CS_MK(CM_POISONED,
zoneid);
membar_enter();
ret = 1;
}
lock_clear(&sibsmt->cs_lock);
} else {
drv_usecwait(10);
wait -= 10;
}
}
DTRACE_PROBE4(smt__acquire, int, ret, uint64_t, sibsmt->cs_state,
uint64_t, sibsmt->cs_intr_depth, clock_t, wait);
if (ret == 1)
spec_uarch_flush();
return (ret);
}
void
smt_release(void)
{
cpu_smt_t *smt = &CPU->cpu_m.mcpu_smt;
zoneid_t zoneid = getzoneid();
cpu_smt_t *sibsmt;
ASSERT(!interrupts_enabled());
if (smt->cs_sib == NULL)
return;
ASSERT3U(CS_ZONE(smt->cs_state), ==, zoneid);
ASSERT3U(CS_MARK(smt->cs_state), ==, CM_POISONED);
ASSERT3U(curthread->t_preempt, >=, 1);
sibsmt = &smt->cs_sib->cpu_m.mcpu_smt;
lock_set(&sibsmt->cs_lock);
smt->cs_state = CS_MK(CM_VCPU, zoneid);
sibsmt->cs_sibstate = CS_MK(CM_VCPU, zoneid);
membar_producer();
lock_clear(&sibsmt->cs_lock);
}
static void
smt_kick(cpu_smt_t *smt, zoneid_t zoneid)
{
uint64_t sibstate;
ASSERT(LOCK_HELD(&smt->cs_lock));
ASSERT(!interrupts_enabled());
poke_cpu(smt->cs_sib->cpu_id);
membar_consumer();
sibstate = smt->cs_sibstate;
if (CS_MARK(sibstate) != CM_POISONED || CS_ZONE(sibstate) == zoneid)
return;
lock_clear(&smt->cs_lock);
for (;;) {
membar_consumer();
sibstate = smt->cs_sibstate;
if (CS_MARK(sibstate) != CM_POISONED ||
CS_ZONE(sibstate) == zoneid)
break;
SMT_PAUSE();
}
lock_set(&smt->cs_lock);
}
static boolean_t
pil_needs_kick(uint_t pil)
{
return (pil != empty_pil);
}
void
smt_begin_intr(uint_t pil)
{
ulong_t flags;
cpu_smt_t *smt;
ASSERT(pil <= PIL_MAX);
flags = intr_clear();
smt = &CPU->cpu_m.mcpu_smt;
if (smt->cs_sib == NULL) {
intr_restore(flags);
return;
}
if (atomic_inc_64_nv(&smt->cs_intr_depth) == 1 && pil_needs_kick(pil)) {
lock_set(&smt->cs_lock);
membar_consumer();
if (CS_MARK(smt->cs_sibstate) == CM_POISONED)
smt_kick(smt, GLOBAL_ZONEID);
lock_clear(&smt->cs_lock);
}
intr_restore(flags);
}
void
smt_end_intr(void)
{
ulong_t flags;
cpu_smt_t *smt;
flags = intr_clear();
smt = &CPU->cpu_m.mcpu_smt;
if (smt->cs_sib == NULL) {
intr_restore(flags);
return;
}
ASSERT3U(smt->cs_intr_depth, >, 0);
atomic_dec_64(&smt->cs_intr_depth);
intr_restore(flags);
}
static inline boolean_t
smt_need_kick(cpu_smt_t *smt, zoneid_t zoneid)
{
membar_consumer();
if (CS_MARK(smt->cs_sibstate) != CM_POISONED)
return (B_FALSE);
if (CS_MARK(smt->cs_state) == CM_UNSAFE)
return (B_TRUE);
return (CS_ZONE(smt->cs_sibstate) != zoneid);
}
void
smt_mark(void)
{
zoneid_t zoneid = getzoneid();
kthread_t *t = curthread;
ulong_t flags;
cpu_smt_t *smt;
cpu_t *cp;
flags = intr_clear();
cp = CPU;
smt = &cp->cpu_m.mcpu_smt;
if (smt->cs_sib == NULL) {
intr_restore(flags);
return;
}
lock_set(&smt->cs_lock);
if (smt->cs_intr_depth > 0) {
ASSERT3P(t->t_intr, !=, NULL);
if (smt_need_kick(smt, zoneid))
smt_kick(smt, zoneid);
goto out;
}
if (t == t->t_cpu->cpu_idle_thread) {
ASSERT3U(zoneid, ==, GLOBAL_ZONEID);
smt->cs_state = CS_MK(CM_IDLE, zoneid);
} else {
uint64_t state = CM_THREAD;
if (t->t_unsafe)
state = CM_UNSAFE;
else if (t->t_schedflag & TS_VCPU)
state = CM_VCPU;
smt->cs_state = CS_MK(state, zoneid);
if (smt_need_kick(smt, zoneid))
smt_kick(smt, zoneid);
}
out:
membar_producer();
lock_clear(&smt->cs_lock);
intr_restore(flags);
}
void
smt_begin_unsafe(void)
{
curthread->t_unsafe++;
smt_mark();
}
void
smt_end_unsafe(void)
{
ASSERT3U(curthread->t_unsafe, >, 0);
curthread->t_unsafe--;
smt_mark();
}
void
smt_mark_as_vcpu(void)
{
thread_lock(curthread);
curthread->t_schedflag |= TS_VCPU;
smt_mark();
thread_unlock(curthread);
}
boolean_t
smt_should_run(kthread_t *t, cpu_t *cp)
{
uint64_t sibstate;
cpu_t *sib;
if (t == t->t_cpu->cpu_idle_thread)
return (B_TRUE);
if ((sib = cp->cpu_m.mcpu_smt.cs_sib) == NULL)
return (B_TRUE);
sibstate = sib->cpu_m.mcpu_smt.cs_state;
if ((t->t_schedflag & TS_VCPU)) {
if (CS_MARK(sibstate) == CM_IDLE)
return (B_TRUE);
if (CS_MARK(sibstate) == CM_UNSAFE)
return (B_FALSE);
return (CS_ZONE(sibstate) == ttozone(t)->zone_id);
}
if (CS_MARK(sibstate) < CM_VCPU)
return (B_TRUE);
return (CS_ZONE(sibstate) == ttozone(t)->zone_id);
}
pri_t
smt_adjust_cpu_score(kthread_t *t, struct cpu *cp, pri_t score)
{
if (smt_should_run(t, cp))
return (score);
if ((t->t_schedflag & TS_VCPU) && cp == t->t_cpu && score < 0)
return ((v.v_maxsyspri + 1) * 2);
return (score + 1);
}
static void
set_smt_prop(void)
{
(void) e_ddi_prop_update_string(DDI_DEV_T_NONE, ddi_root_node(),
"smt_enabled", smt_enabled ? "true" : "false");
}
static cpu_t *
smt_find_sibling(cpu_t *cp)
{
for (uint_t i = 0; i < GROUP_SIZE(&cp->cpu_pg->cmt_pgs); i++) {
pg_cmt_t *pg = GROUP_ACCESS(&cp->cpu_pg->cmt_pgs, i);
group_t *cg = &pg->cmt_pg.pghw_pg.pg_cpus;
if (pg->cmt_pg.pghw_hw != PGHW_IPIPE)
continue;
if (GROUP_SIZE(cg) == 1)
break;
if (GROUP_SIZE(cg) != 2) {
panic("%u SMT threads unsupported", GROUP_SIZE(cg));
}
if (GROUP_ACCESS(cg, 0) != cp)
return (GROUP_ACCESS(cg, 0));
VERIFY3P(GROUP_ACCESS(cg, 1), !=, cp);
return (GROUP_ACCESS(cg, 1));
}
return (NULL);
}
int
smt_disable(void)
{
int error = 0;
ASSERT(MUTEX_HELD(&cpu_lock));
if (secpolicy_ponline(CRED()) != 0)
return (EPERM);
if (!smt_enabled)
return (0);
for (size_t i = 0; i < NCPU; i++) {
cpu_t *sib;
cpu_t *cp;
if ((cp = cpu_get(i)) == NULL)
continue;
if ((sib = smt_find_sibling(cp)) == NULL)
continue;
if (cp->cpu_id < sib->cpu_id)
continue;
if (cp->cpu_flags & CPU_DISABLED) {
VERIFY(cp->cpu_flags & CPU_OFFLINE);
continue;
}
if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) {
error = EINVAL;
break;
}
if ((cp->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) {
cp->cpu_flags |= CPU_DISABLED;
continue;
}
if ((error = cpu_offline(cp, CPU_FORCED)) != 0)
break;
cp->cpu_flags |= CPU_DISABLED;
cpu_set_state(cp);
}
if (error != 0)
return (error);
smt_enabled = 0;
set_smt_prop();
cmn_err(CE_NOTE, "!SMT / hyper-threading explicitly disabled.");
return (0);
}
boolean_t
smt_can_enable(cpu_t *cp, int flags)
{
VERIFY(cp->cpu_flags & CPU_DISABLED);
return (!smt_boot_disable && (flags & CPU_FORCED));
}
void
smt_force_enabled(void)
{
VERIFY(!smt_boot_disable);
if (!smt_enabled)
cmn_err(CE_NOTE, "!Disabled SMT sibling forced on-line.");
smt_enabled = 1;
set_smt_prop();
}
void
smt_init(void)
{
boolean_t found_sibling = B_FALSE;
cpu_t *scp = CPU;
cpu_t *cp = scp;
ulong_t flags;
if (!smt_exclusion || smt_boot_disable)
return;
mutex_enter(&cpu_lock);
do {
thread_affinity_set(curthread, cp->cpu_id);
flags = intr_clear();
cp->cpu_m.mcpu_smt.cs_intr_depth = 0;
cp->cpu_m.mcpu_smt.cs_state = CS_MK(CM_THREAD, GLOBAL_ZONEID);
cp->cpu_m.mcpu_smt.cs_sibstate = CS_MK(CM_THREAD,
GLOBAL_ZONEID);
ASSERT3P(cp->cpu_m.mcpu_smt.cs_sib, ==, NULL);
cp->cpu_m.mcpu_smt.cs_sib = smt_find_sibling(cp);
if (cp->cpu_m.mcpu_smt.cs_sib != NULL)
found_sibling = B_TRUE;
intr_restore(flags);
thread_affinity_clear(curthread);
} while ((cp = cp->cpu_next_onln) != scp);
mutex_exit(&cpu_lock);
if (!found_sibling)
smt_enabled = 0;
}
void
smt_late_init(void)
{
if (smt_boot_disable) {
int err;
mutex_enter(&cpu_lock);
err = smt_disable();
if (err) {
cmn_err(CE_PANIC, "smt_disable() failed with %d", err);
}
mutex_exit(&cpu_lock);
}
if (smt_enabled)
cmn_err(CE_NOTE, "!SMT enabled\n");
set_smt_prop();
}