#include <sys/x86_archext.h>
#include <sys/machsystm.h>
#include <sys/x_call.h>
#include <sys/stat.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/cpu_acpi.h>
#include <sys/cpu_idle.h>
#include <sys/cpupm.h>
#include <sys/cpu_event.h>
#include <sys/hpet.h>
#include <sys/archsystm.h>
#include <vm/hat_i86.h>
#include <sys/dtrace.h>
#include <sys/sdt.h>
#include <sys/callb.h>
#define CSTATE_USING_HPET 1
#define CSTATE_USING_LAT 2
#define CPU_IDLE_STOP_TIMEOUT 1000
extern void cpu_idle_adaptive(void);
extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
static int cpu_idle_init(cpu_t *);
static void cpu_idle_fini(cpu_t *);
static void cpu_idle_stop(cpu_t *);
static boolean_t cpu_deep_idle_callb(void *arg, int code);
static boolean_t cpu_idle_cpr_callb(void *arg, int code);
static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate);
static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer);
static boolean_t cpu_cstate_arat = B_FALSE;
static boolean_t cpu_cstate_hpet = B_FALSE;
cpupm_state_ops_t cpu_idle_ops = {
"Generic ACPI C-state Support",
cpu_idle_init,
cpu_idle_fini,
NULL,
cpu_idle_stop
};
static kmutex_t cpu_idle_callb_mutex;
static callb_id_t cpu_deep_idle_callb_id;
static callb_id_t cpu_idle_cpr_callb_id;
static uint_t cpu_idle_cfg_state;
static kmutex_t cpu_idle_mutex;
cpu_idle_kstat_t cpu_idle_kstat = {
{ "address_space_id", KSTAT_DATA_STRING },
{ "latency", KSTAT_DATA_UINT32 },
{ "power", KSTAT_DATA_UINT32 },
};
static int
cpu_idle_kstat_update(kstat_t *ksp, int flag)
{
cpu_acpi_cstate_t *cstate = ksp->ks_private;
if (flag == KSTAT_WRITE) {
return (EACCES);
}
if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
"FFixedHW");
} else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) {
kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
"SystemIO");
} else {
kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
"Unsupported");
}
cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency;
cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power;
return (0);
}
boolean_t
cstate_timer_callback(int code)
{
if (cpu_cstate_arat) {
return (B_TRUE);
} else if (cpu_cstate_hpet) {
return (hpet.callback(code));
}
return (B_FALSE);
}
static boolean_t
cstate_use_timer(hrtime_t *lapic_expire, int timer)
{
if (cpu_cstate_arat)
return (B_TRUE);
if (!cpu_cstate_hpet)
return (B_FALSE);
switch (timer) {
case CSTATE_USING_HPET:
return (hpet.use_hpet_timer(lapic_expire));
case CSTATE_USING_LAT:
hpet.use_lapic_timer(*lapic_expire);
return (B_TRUE);
default:
return (B_FALSE);
}
}
void
cstate_wakeup(cpu_t *cp, int bound)
{
struct machcpu *mcpu = &(cp->cpu_m);
volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait;
cpupart_t *cpu_part;
uint_t cpu_found;
processorid_t cpu_sid;
cpu_part = cp->cpu_part;
cpu_sid = cp->cpu_seqid;
if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid);
if (cp != CPU) {
if ((mcpu_mwait != NULL) &&
(*mcpu_mwait == MWAIT_HALTED))
MWAIT_WAKEUP(cp);
else
poke_cpu(cp->cpu_id);
}
return;
} else {
if (cp->cpu_thread == cp->cpu_idle_thread ||
cp->cpu_disp_flags & CPU_DISP_DONTSTEAL)
return;
}
if (bound)
return;
do {
cpu_found = bitset_find(&cpu_part->cp_haltset);
if (cpu_found == (uint_t)-1)
return;
} while (bitset_atomic_test_and_del(&cpu_part->cp_haltset,
cpu_found) < 0);
if (cpu_found != CPU->cpu_seqid) {
mcpu_mwait = cpu_seq[cpu_found]->cpu_m.mcpu_mwait;
if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED))
MWAIT_WAKEUP(cpu_seq[cpu_found]);
else
poke_cpu(cpu_seq[cpu_found]->cpu_id);
}
}
static void
acpi_cpu_mwait_check_wakeup(void *arg)
{
volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
ASSERT(arg != NULL);
if (*mcpu_mwait != MWAIT_HALTED) {
cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
} else {
sti();
SMT_PAUSE();
cli();
}
}
static void
acpi_cpu_mwait_ipi_check_wakeup(void *arg)
{
volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
ASSERT(arg != NULL);
if (*mcpu_mwait != MWAIT_WAKEUP_IPI) {
cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
} else {
sti();
SMT_PAUSE();
cli();
}
}
static void
acpi_cpu_check_wakeup(void *arg)
{
sti();
SMT_PAUSE();
cli();
}
static void
acpi_io_idle(uint32_t address)
{
uint32_t value;
ACPI_TABLE_FADT *gbl_FADT;
boolean_t need_stpclk_workaround =
cpuid_getvendor(CPU) == X86_VENDOR_Intel;
x86_md_clear();
(void) cpu_acpi_read_port(address, &value, 8);
if (need_stpclk_workaround) {
acpica_get_global_FADT(&gbl_FADT);
(void) cpu_acpi_read_port(
gbl_FADT->XPmTimerBlock.Address,
&value, 32);
}
}
static void
acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
{
volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait;
uint32_t mwait_idle_state;
cpu_t *cpup = CPU;
processorid_t cpu_sid = cpup->cpu_seqid;
cpupart_t *cp = cpup->cpu_part;
hrtime_t lapic_expire;
uint8_t type = cstate->cs_addrspace_id;
uint32_t cs_type = cstate->cs_type;
int hset_update = 1;
boolean_t using_timer;
cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup;
if (mcpu_mwait != NULL) {
if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
mwait_idle_state = MWAIT_WAKEUP_IPI;
check_func = &acpi_cpu_mwait_ipi_check_wakeup;
} else {
mwait_idle_state = MWAIT_HALTED;
check_func = &acpi_cpu_mwait_check_wakeup;
}
*mcpu_mwait = mwait_idle_state;
} else {
mwait_idle_state = MWAIT_RUNNING;
}
if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1)
hset_update = 0;
if (hset_update) {
cpup->cpu_disp_flags |= CPU_DISP_HALTED;
bitset_atomic_add(&cp->cp_haltset, cpu_sid);
}
if (disp_anywork()) {
if (hset_update) {
cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
bitset_atomic_del(&cp->cp_haltset, cpu_sid);
}
return;
}
cli();
using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET);
if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) {
(void) cstate_use_timer(&lapic_expire,
CSTATE_USING_LAT);
sti();
cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
return;
}
if (cpup->cpu_disp->disp_nrunnable != 0) {
(void) cstate_use_timer(&lapic_expire,
CSTATE_USING_LAT);
sti();
if (hset_update) {
cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
bitset_atomic_del(&cp->cp_haltset, cpu_sid);
}
return;
}
if (using_timer == B_FALSE) {
(void) cstate_use_timer(&lapic_expire,
CSTATE_USING_LAT);
sti();
if (mcpu_mwait != NULL) {
i86_monitor(mcpu_mwait, 0, 0);
if (*mcpu_mwait == MWAIT_HALTED) {
if (cpu_idle_enter(IDLE_STATE_C1, 0,
check_func, (void *)mcpu_mwait) == 0) {
if (*mcpu_mwait == MWAIT_HALTED) {
i86_mwait(0, 0);
}
cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
}
}
} else {
if (cpu_idle_enter(cs_type, 0, check_func, NULL) == 0) {
mach_cpu_idle();
cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
}
}
if (hset_update) {
cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
bitset_atomic_del(&cp->cp_haltset, cpu_sid);
}
return;
}
boolean_t idle_ok = cpu_idle_enter(cs_type, 0, check_func,
(void *)mcpu_mwait) == 0;
if (idle_ok) {
if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
if (mcpu_mwait != NULL) {
i86_monitor(mcpu_mwait, 0, 0);
if (*mcpu_mwait == mwait_idle_state) {
i86_mwait(cstate->cs_address, 1);
}
} else {
mach_cpu_idle();
}
} else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
if (!mcpu_mwait || *mcpu_mwait == mwait_idle_state) {
acpi_io_idle(cstate->cs_address);
}
}
cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
}
(void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT);
sti();
if (hset_update) {
cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
bitset_atomic_del(&cp->cp_haltset, cpu_sid);
}
}
void
cpu_acpi_idle(void)
{
cpu_t *cp = CPU;
cpu_acpi_handle_t handle;
cma_c_state_t *cs_data;
cpu_acpi_cstate_t *cstates;
hrtime_t start, end;
int cpu_max_cstates;
uint32_t cs_indx;
uint16_t cs_type;
cpupm_mach_state_t *mach_state =
(cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
handle = mach_state->ms_acpi_handle;
ASSERT(CPU_ACPI_CSTATES(handle) != NULL);
cs_data = mach_state->ms_cstate.cma_state.cstate;
cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
ASSERT(cstates != NULL);
cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
if (cpu_max_cstates > CPU_MAX_CSTATES)
cpu_max_cstates = CPU_MAX_CSTATES;
if (cpu_max_cstates == 1) {
(*non_deep_idle_cpu)();
return;
}
start = gethrtime_unscaled();
cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start);
cs_type = cstates[cs_indx].cs_type;
switch (cs_type) {
default:
case CPU_ACPI_C1:
(*non_deep_idle_cpu)();
break;
case CPU_ACPI_C2:
acpi_cpu_cstate(&cstates[cs_indx]);
break;
case CPU_ACPI_C3:
acpi_cpu_cstate(&cstates[cs_indx]);
break;
}
end = gethrtime_unscaled();
cpupm_wakeup_cstate_data(cs_data, end);
}
boolean_t
cpu_deep_cstates_supported(void)
{
extern int idle_cpu_no_deep_c;
if (idle_cpu_no_deep_c)
return (B_FALSE);
if (!cpuid_deep_cstates_supported())
return (B_FALSE);
if (cpuid_arat_supported()) {
cpu_cstate_arat = B_TRUE;
return (B_TRUE);
}
if (cpuid_getvendor(CPU) == X86_VENDOR_Intel &&
(hpet.supported == HPET_FULL_SUPPORT) &&
hpet.install_proxy()) {
cpu_cstate_hpet = B_TRUE;
return (B_TRUE);
}
return (B_FALSE);
}
static int
cpu_idle_init(cpu_t *cp)
{
cpupm_mach_state_t *mach_state =
(cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
cpu_acpi_cstate_t *cstate;
char name[KSTAT_STRLEN];
int cpu_max_cstates, i;
int ret;
if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) {
if (ret < 0)
cmn_err(CE_NOTE,
"!Support for CPU deep idle states is being "
"disabled due to errors parsing ACPI C-state "
"objects exported by BIOS.");
cpu_idle_fini(cp);
return (-1);
}
cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
(void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type);
cstate->cs_ksp = kstat_create("cstate", cp->cpu_id,
name, "misc",
KSTAT_TYPE_NAMED,
sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (cstate->cs_ksp == NULL) {
cmn_err(CE_NOTE, "kstat_create(c_state) fail");
} else {
cstate->cs_ksp->ks_data = &cpu_idle_kstat;
cstate->cs_ksp->ks_lock = &cpu_idle_mutex;
cstate->cs_ksp->ks_update = cpu_idle_kstat_update;
cstate->cs_ksp->ks_data_size += MAXNAMELEN;
cstate->cs_ksp->ks_private = cstate;
kstat_install(cstate->cs_ksp);
}
cstate++;
}
cpupm_alloc_domains(cp, CPUPM_C_STATES);
cpupm_alloc_ms_cstate(cp);
if (cpu_deep_cstates_supported()) {
uint32_t value;
mutex_enter(&cpu_idle_callb_mutex);
if (cpu_deep_idle_callb_id == (callb_id_t)0)
cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb,
(void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle");
if (cpu_idle_cpr_callb_id == (callb_id_t)0)
cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb,
(void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr");
mutex_exit(&cpu_idle_callb_mutex);
cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value);
if (value & 1)
cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
}
return (0);
}
static void
cpu_idle_fini(cpu_t *cp)
{
cpupm_mach_state_t *mach_state =
(cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
cpu_acpi_cstate_t *cstate;
uint_t cpu_max_cstates, i;
idle_cpu = cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
disp_enq_thread = non_deep_idle_disp_enq_thread;
cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
if (cstate) {
cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
if (cstate->cs_ksp != NULL)
kstat_delete(cstate->cs_ksp);
cstate++;
}
}
cpupm_free_ms_cstate(cp);
cpupm_free_domains(&cpupm_cstate_domains);
cpu_acpi_free_cstate_data(handle);
mutex_enter(&cpu_idle_callb_mutex);
if (cpu_deep_idle_callb_id != (callb_id_t)0) {
(void) callb_delete(cpu_deep_idle_callb_id);
cpu_deep_idle_callb_id = (callb_id_t)0;
}
if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
(void) callb_delete(cpu_idle_cpr_callb_id);
cpu_idle_cpr_callb_id = (callb_id_t)0;
}
mutex_exit(&cpu_idle_callb_mutex);
}
static void
cpu_idle_stop_sync(void)
{
CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
}
static void
cpu_idle_stop(cpu_t *cp)
{
cpupm_mach_state_t *mach_state =
(cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
cpu_acpi_cstate_t *cstate;
uint_t cpu_max_cstates, i = 0;
mutex_enter(&cpu_idle_callb_mutex);
if (idle_cpu == cpu_idle_adaptive) {
cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync;
poke_cpu(cp->cpu_id);
while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) {
drv_usecwait(10);
if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0)
cmn_err(CE_NOTE, "!cpu_idle_stop: the slave"
" idle stop timeout");
}
}
mutex_exit(&cpu_idle_callb_mutex);
cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
if (cstate) {
cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
if (cstate->cs_ksp != NULL)
kstat_delete(cstate->cs_ksp);
cstate++;
}
}
cpupm_free_ms_cstate(cp);
cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains);
cpu_acpi_free_cstate_data(handle);
}
static boolean_t
cpu_deep_idle_callb(void *arg, int code)
{
boolean_t rslt = B_TRUE;
mutex_enter(&cpu_idle_callb_mutex);
switch (code) {
case PM_DEFAULT_CPU_DEEP_IDLE:
case PM_ENABLE_CPU_DEEP_IDLE:
if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
break;
if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) {
disp_enq_thread = cstate_wakeup;
idle_cpu = cpu_idle_adaptive;
cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG;
} else {
rslt = B_FALSE;
}
break;
case PM_DISABLE_CPU_DEEP_IDLE:
if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
break;
idle_cpu = non_deep_idle_cpu;
if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) {
disp_enq_thread = non_deep_idle_disp_enq_thread;
cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG;
}
break;
default:
cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n",
code);
break;
}
mutex_exit(&cpu_idle_callb_mutex);
return (rslt);
}
static boolean_t
cpu_idle_cpr_callb(void *arg, int code)
{
boolean_t rslt = B_TRUE;
mutex_enter(&cpu_idle_callb_mutex);
switch (code) {
case CB_CODE_CPR_RESUME:
if (cstate_timer_callback(CB_CODE_CPR_RESUME)) {
if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
break;
disp_enq_thread = cstate_wakeup;
idle_cpu = cpu_idle_adaptive;
} else {
rslt = B_FALSE;
}
break;
case CB_CODE_CPR_CHKPT:
idle_cpu = non_deep_idle_cpu;
disp_enq_thread = non_deep_idle_disp_enq_thread;
(void) cstate_timer_callback(CB_CODE_CPR_CHKPT);
break;
default:
cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code);
break;
}
mutex_exit(&cpu_idle_callb_mutex);
return (rslt);
}
void
cpuidle_cstate_instance(cpu_t *cp)
{
#ifndef __xpv
cpupm_mach_state_t *mach_state =
(cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
cpu_acpi_handle_t handle;
struct machcpu *mcpu;
cpuset_t dom_cpu_set;
kmutex_t *pm_lock;
int result = 0;
processorid_t cpu_id;
if (mach_state == NULL) {
return;
}
ASSERT(mach_state->ms_cstate.cma_domain != NULL);
dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus;
pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock;
mutex_enter(pm_lock);
do {
CPUSET_FIND(dom_cpu_set, cpu_id);
if (cpu_id == CPUSET_NOTINSET)
break;
ASSERT(cpu_id >= 0 && cpu_id < NCPU);
cp = cpu[cpu_id];
mach_state = (cpupm_mach_state_t *)
cp->cpu_m.mcpu_pm_mach_state;
if (!(mach_state->ms_caps & CPUPM_C_STATES)) {
mutex_exit(pm_lock);
return;
}
handle = mach_state->ms_acpi_handle;
ASSERT(handle != NULL);
if (cpu_acpi_cache_cstate_data(handle) != 0) {
cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
" object Instance: %d", cpu_id);
}
mcpu = &(cp->cpu_m);
mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
if (mcpu->max_cstates > CPU_ACPI_C1) {
(void) cstate_timer_callback(
CST_EVENT_MULTIPLE_CSTATES);
disp_enq_thread = cstate_wakeup;
cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
} else if (mcpu->max_cstates == CPU_ACPI_C1) {
disp_enq_thread = non_deep_idle_disp_enq_thread;
cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
(void) cstate_timer_callback(CST_EVENT_ONE_CSTATE);
}
CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
} while (result < 0);
mutex_exit(pm_lock);
#endif
}
void
cpuidle_manage_cstates(void *ctx)
{
cpu_t *cp = ctx;
cpupm_mach_state_t *mach_state =
(cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
boolean_t is_ready;
if (mach_state == NULL) {
return;
}
is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(cp);
if (!is_ready)
return;
cpuidle_cstate_instance(cp);
}