#include <sys/lgrp.h>
#include <sys/lgrp_user.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/var.h>
#include <sys/thread.h>
#include <sys/cpuvar.h>
#include <sys/cpupart.h>
#include <sys/kmem.h>
#include <vm/seg.h>
#include <vm/seg_kmem.h>
#include <vm/seg_spt.h>
#include <vm/seg_vn.h>
#include <vm/as.h>
#include <sys/atomic.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/cmn_err.h>
#include <sys/kstat.h>
#include <sys/sysmacros.h>
#include <sys/pg.h>
#include <sys/promif.h>
#include <sys/sdt.h>
#include <sys/smt.h>
lgrp_gen_t lgrp_gen = 0;
lgrp_t *lgrp_table[NLGRPS_MAX];
int nlgrps;
int lgrp_alloc_hint = -1;
int lgrp_alloc_max = 0;
extern struct lgrp_stats lgrp_stats[];
LGRP_KSTAT_NAMES;
static void lgrp_kstat_init(void);
static int lgrp_kstat_extract(kstat_t *, int);
static void lgrp_kstat_reset(lgrp_id_t);
static struct kstat_named lgrp_kstat_data[LGRP_NUM_STATS];
static kmutex_t lgrp_kstat_mutex;
int nlgrpsmax = 0;
lgrp_t *lgrp_root = NULL;
#define LPL_BOOTSTRAP_SIZE 2
static lpl_t lpl_bootstrap_list[LPL_BOOTSTRAP_SIZE];
lpl_t *lpl_bootstrap;
static lpl_t *lpl_bootstrap_rset[LPL_BOOTSTRAP_SIZE];
static int lpl_bootstrap_id2rset[LPL_BOOTSTRAP_SIZE];
#define LGRP_CPU_HAS_NO_LGRP(cp) ((cp)->cpu_lpl == lpl_bootstrap)
static lgrp_t lroot;
size_t lgrp_privm_random_thresh = (size_t)(-1);
#define LGRP_LOADAVG_MAX_EFFECT(ncpu) \
((lgrp_loadavg_max_effect) / (ncpu))
uint32_t lgrp_loadavg_max_effect = LGRP_LOADAVG_THREAD_MAX;
size_t lgrp_shm_random_thresh = 8*1024*1024;
int lgrp_mem_pset_aware = 0;
lgrp_mem_policy_t lgrp_mem_policy_root = LGRP_MEM_POLICY_RANDOM;
lgrp_mem_policy_t lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT;
static void lgrp_cpu_init(struct cpu *);
static void lgrp_cpu_fini(struct cpu *, lgrp_id_t);
static lgrp_t *lgrp_cpu_to_lgrp(struct cpu *);
static void lgrp_mem_init(int, lgrp_handle_t, boolean_t);
static void lgrp_mem_fini(int, lgrp_handle_t, boolean_t);
static void lgrp_mem_rename(int, lgrp_handle_t, lgrp_handle_t);
static void lgrp_part_add_cpu(struct cpu *, lgrp_id_t);
static void lgrp_part_del_cpu(struct cpu *);
static void lgrp_main_init(void);
static void lgrp_main_mp_init(void);
static void lgrp_root_init(void);
static void lgrp_setup(void);
static void lpl_init(lpl_t *, lpl_t *, lgrp_t *);
static void lpl_clear(lpl_t *);
static void lpl_leaf_insert(lpl_t *, struct cpupart *);
static void lpl_leaf_remove(lpl_t *, struct cpupart *);
static void lpl_rset_add(lpl_t *, lpl_t *);
static void lpl_rset_del(lpl_t *, lpl_t *);
static int lpl_rset_contains(lpl_t *, lpl_t *);
static void lpl_cpu_adjcnt(lpl_act_t, struct cpu *);
static void lpl_child_update(lpl_t *, struct cpupart *);
static int lpl_pick(lpl_t *, lpl_t *);
static void lpl_verify_wrapper(struct cpupart *);
#define LPL_TOPO_CORRECT 0
#define LPL_TOPO_PART_HAS_NO_LPL -1
#define LPL_TOPO_CPUS_NOT_EMPTY -2
#define LPL_TOPO_LGRP_MISMATCH -3
#define LPL_TOPO_MISSING_PARENT -4
#define LPL_TOPO_PARENT_MISMATCH -5
#define LPL_TOPO_BAD_CPUCNT -6
#define LPL_TOPO_RSET_MISMATCH -7
#define LPL_TOPO_LPL_ORPHANED -8
#define LPL_TOPO_LPL_BAD_NCPU -9
#define LPL_TOPO_RSET_MSSNG_LF -10
#define LPL_TOPO_CPU_HAS_BAD_LPL -11
#define LPL_TOPO_NONLEAF_HAS_CPUS -12
#define LPL_TOPO_LGRP_NOT_LEAF -13
#define LPL_TOPO_BAD_RSETCNT -14
int
lgrp_optimizations(void)
{
if (nlgrps > 2)
return (1);
return (0);
}
static void
lgrp_root_init(void)
{
lgrp_handle_t hand;
int i;
lgrp_id_t id;
ASSERT(nlgrps == 0);
id = nlgrps++;
lgrp_root = &lroot;
lgrp_root->lgrp_cpu = NULL;
lgrp_root->lgrp_mnodes = 0;
lgrp_root->lgrp_nmnodes = 0;
hand = lgrp_plat_root_hand();
lgrp_root->lgrp_plathand = hand;
lgrp_root->lgrp_id = id;
lgrp_root->lgrp_cpucnt = 0;
lgrp_root->lgrp_childcnt = 0;
klgrpset_clear(lgrp_root->lgrp_children);
klgrpset_clear(lgrp_root->lgrp_leaves);
lgrp_root->lgrp_parent = NULL;
lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand);
for (i = 0; i < LGRP_RSRC_COUNT; i++)
klgrpset_clear(lgrp_root->lgrp_set[i]);
lgrp_root->lgrp_kstat = NULL;
lgrp_table[id] = lgrp_root;
lpl_bootstrap = lpl_bootstrap_list;
t0.t_lpl = lpl_bootstrap;
cp_default.cp_nlgrploads = LPL_BOOTSTRAP_SIZE;
lpl_bootstrap_list[1].lpl_lgrpid = 1;
lpl_bootstrap_rset[0] = &lpl_bootstrap_list[1];
lpl_bootstrap_list[0].lpl_rset_sz = 1;
lpl_bootstrap_list[0].lpl_rset = lpl_bootstrap_rset;
lpl_bootstrap_list[0].lpl_id2rset = lpl_bootstrap_id2rset;
lpl_bootstrap_list[1].lpl_rset_sz = 1;
lpl_bootstrap_list[1].lpl_rset = lpl_bootstrap_rset;
lpl_bootstrap_list[1].lpl_id2rset = lpl_bootstrap_id2rset;
cp_default.cp_lgrploads = lpl_bootstrap;
}
void
lgrp_init(lgrp_init_stages_t stage)
{
lgrp_plat_init(stage);
switch (stage) {
case LGRP_INIT_STAGE1:
nlgrpsmax = lgrp_plat_max_lgrps();
ASSERT(nlgrpsmax <= NLGRPS_MAX);
break;
case LGRP_INIT_STAGE2:
lgrp_setup();
break;
case LGRP_INIT_STAGE4:
lgrp_main_init();
break;
case LGRP_INIT_STAGE5:
lgrp_main_mp_init();
break;
default:
break;
}
}
static void
lgrp_setup(void)
{
lgrp_root_init();
lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)CPU, 0);
lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)CPU, 0);
}
int lgrp_initialized = 0;
int lgrp_topo_initialized = 0;
static void
lgrp_main_init(void)
{
cpu_t *cp = CPU;
lgrp_id_t lgrpid;
int i;
extern void pg_cpu0_reinit();
if ((lgrp_mem_default_policy <= LGRP_MEM_POLICY_DEFAULT) ||
(lgrp_mem_default_policy >= LGRP_NUM_MEM_POLICIES) ||
(lgrp_mem_default_policy == LGRP_MEM_POLICY_NEXT_SEG))
lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT;
lgrpid = cp->cpu_lpl->lpl_lgrpid;
if (lgrp_table[lgrpid]->lgrp_plathand !=
lgrp_plat_cpu_to_hand(cp->cpu_id)) {
lgrp_part_del_cpu(cp);
lgrp_cpu_fini(cp, lgrpid);
lgrp_cpu_init(cp);
lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid);
ASSERT(cp->cpu_lpl->lpl_lgrpid == LGRP_ROOTID);
pg_cpu0_reinit();
for (i = 0; i <= lgrp_alloc_max; i++) {
if (LGRP_EXISTS(lgrp_table[i]) &&
lgrp_table[i] != lgrp_root)
lgrp_destroy(lgrp_table[i]);
}
lgrp_root->lgrp_childcnt = 0;
klgrpset_clear(lgrp_root->lgrp_children);
klgrpset_clear(lgrp_root->lgrp_leaves);
klgrpset_add(lgrp_root->lgrp_leaves, LGRP_ROOTID);
klgrpset_clear(lgrp_root->lgrp_set[LGRP_RSRC_MEM]);
klgrpset_add(lgrp_root->lgrp_set[LGRP_RSRC_MEM], LGRP_ROOTID);
}
lgrp_kstat_init();
mutex_enter(&cpu_lock);
lgrp_kstat_create(cp);
mutex_exit(&cpu_lock);
lgrp_initialized = 1;
}
static void
lgrp_main_mp_init(void)
{
klgrpset_t changed;
smt_init();
klgrpset_clear(changed);
(void) lgrp_topo_update(lgrp_table, lgrp_alloc_max + 1, &changed);
lgrp_topo_initialized = 1;
}
void
lgrp_latency_change(lgrp_handle_t hand, u_longlong_t oldtime,
u_longlong_t newtime)
{
lgrp_t *lgrp;
int i;
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp = lgrp_table[i];
if (!LGRP_EXISTS(lgrp))
continue;
if ((hand == LGRP_NULL_HANDLE &&
lgrp->lgrp_latency == oldtime) ||
(hand != LGRP_NULL_HANDLE && lgrp->lgrp_plathand == hand))
lgrp->lgrp_latency = (int)newtime;
}
}
void
lgrp_config(lgrp_config_flag_t event, uintptr_t resource, uintptr_t where)
{
klgrpset_t changed;
cpu_t *cp;
lgrp_id_t id;
int rc;
switch (event) {
case LGRP_CONFIG_CPU_ADD:
cp = (cpu_t *)resource;
cp->cpu_next_lpl = cp;
cp->cpu_prev_lpl = cp;
cp->cpu_next_lgrp = cp;
cp->cpu_prev_lgrp = cp;
cp->cpu_lpl = lpl_bootstrap;
lgrp_plat_config(event, resource);
atomic_inc_32(&lgrp_gen);
break;
case LGRP_CONFIG_CPU_DEL:
lgrp_plat_config(event, resource);
atomic_inc_32(&lgrp_gen);
break;
case LGRP_CONFIG_CPU_ONLINE:
cp = (cpu_t *)resource;
lgrp_cpu_init(cp);
lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid);
rc = lpl_topo_verify(cp->cpu_part);
if (rc != LPL_TOPO_CORRECT) {
panic("lpl_topo_verify failed: %d", rc);
}
lgrp_plat_config(event, resource);
atomic_inc_32(&lgrp_gen);
break;
case LGRP_CONFIG_CPU_OFFLINE:
cp = (cpu_t *)resource;
id = cp->cpu_lpl->lpl_lgrpid;
lgrp_part_del_cpu(cp);
lgrp_cpu_fini(cp, id);
rc = lpl_topo_verify(cp->cpu_part);
if (rc != LPL_TOPO_CORRECT) {
panic("lpl_topo_verify failed: %d", rc);
}
lgrp_plat_config(event, resource);
atomic_inc_32(&lgrp_gen);
break;
case LGRP_CONFIG_CPUPART_ADD:
cp = (cpu_t *)resource;
lgrp_part_add_cpu((cpu_t *)resource, (lgrp_id_t)where);
rc = lpl_topo_verify(cp->cpu_part);
if (rc != LPL_TOPO_CORRECT) {
panic("lpl_topo_verify failed: %d", rc);
}
lgrp_plat_config(event, resource);
break;
case LGRP_CONFIG_CPUPART_DEL:
cp = (cpu_t *)resource;
lgrp_part_del_cpu((cpu_t *)resource);
rc = lpl_topo_verify(cp->cpu_part);
if (rc != LPL_TOPO_CORRECT) {
panic("lpl_topo_verify failed: %d", rc);
}
lgrp_plat_config(event, resource);
break;
case LGRP_CONFIG_MEM_ADD:
lgrp_mem_init((int)resource, where, B_FALSE);
atomic_inc_32(&lgrp_gen);
break;
case LGRP_CONFIG_MEM_DEL:
lgrp_mem_fini((int)resource, where, B_FALSE);
atomic_inc_32(&lgrp_gen);
break;
case LGRP_CONFIG_MEM_RENAME: {
lgrp_config_mem_rename_t *ren_arg =
(lgrp_config_mem_rename_t *)where;
lgrp_mem_rename((int)resource,
ren_arg->lmem_rename_from,
ren_arg->lmem_rename_to);
atomic_inc_32(&lgrp_gen);
break;
}
case LGRP_CONFIG_GEN_UPDATE:
atomic_inc_32(&lgrp_gen);
break;
case LGRP_CONFIG_FLATTEN:
if (where == 0)
lgrp_topo_levels = (int)resource;
else
(void) lgrp_topo_flatten(resource,
lgrp_table, lgrp_alloc_max, &changed);
break;
case LGRP_CONFIG_LAT_CHANGE_ALL:
lgrp_latency_change(LGRP_NULL_HANDLE, (u_longlong_t)resource,
(u_longlong_t)where);
break;
case LGRP_CONFIG_LAT_CHANGE:
lgrp_latency_change((lgrp_handle_t)resource, 0,
(u_longlong_t)where);
break;
case LGRP_CONFIG_NOP:
break;
default:
break;
}
}
static void
lgrp_cpu_init(struct cpu *cp)
{
klgrpset_t changed;
int count;
lgrp_handle_t hand;
int first_cpu;
lgrp_t *my_lgrp;
lgrp_id_t lgrpid;
struct cpu *cptr;
if (klgrpset_isempty(lgrp_root->lgrp_set[LGRP_RSRC_CPU])) {
first_cpu = 1;
} else {
ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
ASSERT(cp->cpu_part != NULL);
first_cpu = 0;
}
hand = lgrp_plat_cpu_to_hand(cp->cpu_id);
my_lgrp = lgrp_hand_to_lgrp(hand);
if (my_lgrp == NULL) {
my_lgrp = lgrp_create();
my_lgrp->lgrp_plathand = hand;
my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand);
lgrpid = my_lgrp->lgrp_id;
klgrpset_add(my_lgrp->lgrp_leaves, lgrpid);
klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
count = 0;
klgrpset_clear(changed);
count += lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
&changed);
(void) lgrp_mnode_update(changed, NULL);
} else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand)
> 0) {
lgrpid = my_lgrp->lgrp_id;
klgrpset_clear(changed);
if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU],
lgrpid))
klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
&changed);
(void) lgrp_mnode_update(changed, NULL);
} else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU],
my_lgrp->lgrp_id)) {
int i;
lgrpid = my_lgrp->lgrp_id;
klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_t *lgrp;
lgrp = lgrp_table[i];
if (!LGRP_EXISTS(lgrp) ||
!lgrp_rsets_member(lgrp->lgrp_set, lgrpid))
continue;
klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
}
}
lgrpid = my_lgrp->lgrp_id;
cp->cpu_lpl = &cp->cpu_part->cp_lgrploads[lgrpid];
if (first_cpu && nlgrpsmax > 1 && lgrpid != cp->cpu_lpl->lpl_lgrpid)
cp->cpu_lpl->lpl_lgrpid = lgrpid;
if (my_lgrp->lgrp_cpucnt == 0) {
my_lgrp->lgrp_cpu = cp;
cp->cpu_next_lgrp = cp->cpu_prev_lgrp = cp;
} else {
cptr = my_lgrp->lgrp_cpu;
cp->cpu_next_lgrp = cptr;
cp->cpu_prev_lgrp = cptr->cpu_prev_lgrp;
cptr->cpu_prev_lgrp->cpu_next_lgrp = cp;
cptr->cpu_prev_lgrp = cp;
}
my_lgrp->lgrp_cpucnt++;
}
lgrp_t *
lgrp_create(void)
{
lgrp_t *my_lgrp;
lgrp_id_t lgrpid;
int i;
ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock));
lgrpid = 0;
my_lgrp = NULL;
if (lgrp_alloc_hint == -1)
lgrpid = nlgrps++;
else {
for (i = lgrp_alloc_hint; i < nlgrpsmax; i++) {
my_lgrp = lgrp_table[i];
if (!LGRP_EXISTS(my_lgrp)) {
lgrpid = i;
nlgrps++;
break;
}
}
lgrp_alloc_hint = lgrpid;
}
if (lgrpid > lgrp_alloc_max)
lgrp_alloc_max = lgrpid;
if (my_lgrp == NULL)
my_lgrp = lgrp_plat_alloc(lgrpid);
if (nlgrps > nlgrpsmax || my_lgrp == NULL)
panic("Too many lgrps for platform (%d)", nlgrps);
my_lgrp->lgrp_id = lgrpid;
my_lgrp->lgrp_latency = 0;
my_lgrp->lgrp_plathand = LGRP_NULL_HANDLE;
my_lgrp->lgrp_parent = NULL;
my_lgrp->lgrp_childcnt = 0;
my_lgrp->lgrp_mnodes = (mnodeset_t)0;
my_lgrp->lgrp_nmnodes = 0;
klgrpset_clear(my_lgrp->lgrp_children);
klgrpset_clear(my_lgrp->lgrp_leaves);
for (i = 0; i < LGRP_RSRC_COUNT; i++)
klgrpset_clear(my_lgrp->lgrp_set[i]);
my_lgrp->lgrp_cpu = NULL;
my_lgrp->lgrp_cpucnt = 0;
if (my_lgrp->lgrp_kstat != NULL)
lgrp_kstat_reset(lgrpid);
lgrp_table[my_lgrp->lgrp_id] = my_lgrp;
return (my_lgrp);
}
void
lgrp_destroy(lgrp_t *lgrp)
{
int i;
ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock));
if (nlgrps == 1)
cmn_err(CE_PANIC, "Can't destroy only lgroup!");
if (!LGRP_EXISTS(lgrp))
return;
if (lgrp_alloc_hint == -1 || lgrp->lgrp_id < lgrp_alloc_hint)
lgrp_alloc_hint = lgrp->lgrp_id;
lgrp->lgrp_id = LGRP_NONE;
lgrp->lgrp_latency = 0;
lgrp->lgrp_plathand = LGRP_NULL_HANDLE;
lgrp->lgrp_parent = NULL;
lgrp->lgrp_childcnt = 0;
klgrpset_clear(lgrp->lgrp_children);
klgrpset_clear(lgrp->lgrp_leaves);
for (i = 0; i < LGRP_RSRC_COUNT; i++)
klgrpset_clear(lgrp->lgrp_set[i]);
lgrp->lgrp_mnodes = (mnodeset_t)0;
lgrp->lgrp_nmnodes = 0;
lgrp->lgrp_cpu = NULL;
lgrp->lgrp_cpucnt = 0;
nlgrps--;
}
static void
lgrp_kstat_init(void)
{
lgrp_stat_t stat;
mutex_init(&lgrp_kstat_mutex, NULL, MUTEX_DEFAULT, NULL);
for (stat = 0; stat < LGRP_NUM_STATS; stat++)
kstat_named_init(&lgrp_kstat_data[stat],
lgrp_kstat_names[stat], KSTAT_DATA_INT64);
}
void
lgrp_kstat_create(cpu_t *cp)
{
kstat_t *lgrp_kstat;
lgrp_id_t lgrpid;
lgrp_t *my_lgrp;
ASSERT(MUTEX_HELD(&cpu_lock));
lgrpid = cp->cpu_lpl->lpl_lgrpid;
my_lgrp = lgrp_table[lgrpid];
if (my_lgrp->lgrp_kstat != NULL)
return;
lgrp_kstat = kstat_create("lgrp", lgrpid, NULL, "misc",
KSTAT_TYPE_NAMED, LGRP_NUM_STATS,
KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
if (lgrp_kstat != NULL) {
lgrp_kstat->ks_lock = &lgrp_kstat_mutex;
lgrp_kstat->ks_private = my_lgrp;
lgrp_kstat->ks_data = &lgrp_kstat_data;
lgrp_kstat->ks_update = lgrp_kstat_extract;
my_lgrp->lgrp_kstat = lgrp_kstat;
kstat_install(lgrp_kstat);
}
}
void
lgrp_kstat_destroy(cpu_t *cp)
{
ASSERT(MUTEX_HELD(&cpu_lock));
}
static void
lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
{
lgrp_t *my_lgrp;
struct cpu *prev;
struct cpu *next;
ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
prev = cp->cpu_prev_lgrp;
next = cp->cpu_next_lgrp;
prev->cpu_next_lgrp = next;
next->cpu_prev_lgrp = prev;
cp->cpu_next_lgrp = cp->cpu_prev_lgrp = NULL;
my_lgrp = lgrp_table[lgrpid];
my_lgrp->lgrp_cpucnt--;
if (my_lgrp->lgrp_cpucnt == 0) {
klgrpset_t changed;
int count;
int i;
my_lgrp->lgrp_cpu = NULL;
klgrpset_del(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
if (lgrp_rsets_empty(my_lgrp->lgrp_set)) {
count = 0;
klgrpset_clear(changed);
count += lgrp_leaf_delete(my_lgrp, lgrp_table,
lgrp_alloc_max + 1, &changed);
return;
}
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_t *lgrp;
lgrp = lgrp_table[i];
if (!LGRP_EXISTS(lgrp) ||
!klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_CPU],
lgrpid))
continue;
klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
}
return;
}
if (my_lgrp->lgrp_cpu == cp)
my_lgrp->lgrp_cpu = next;
}
int
lgrp_mnode_update(klgrpset_t target, klgrpset_t *changed)
{
int count;
int i;
int j;
lgrp_t *lgrp;
lgrp_t *lgrp_rsrc;
count = 0;
if (changed)
klgrpset_clear(*changed);
if (klgrpset_isempty(target))
return (0);
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp = lgrp_table[i];
if (!klgrpset_ismember(target, i) || !LGRP_EXISTS(lgrp)) {
continue;
}
if (lgrp->lgrp_childcnt && lgrp != lgrp_root) {
lgrp->lgrp_mnodes = (mnodeset_t)0;
lgrp->lgrp_nmnodes = 0;
}
for (j = 0; j <= lgrp_alloc_max; j++) {
int k;
lgrp_rsrc = lgrp_table[j];
if (!LGRP_EXISTS(lgrp_rsrc) ||
!klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM],
j))
continue;
for (k = 0; k < sizeof (mnodeset_t) * NBBY; k++) {
mnodeset_t mnode_mask;
mnode_mask = (mnodeset_t)1 << k;
if ((lgrp_rsrc->lgrp_mnodes & mnode_mask) &&
!(lgrp->lgrp_mnodes & mnode_mask)) {
lgrp->lgrp_mnodes |= mnode_mask;
lgrp->lgrp_nmnodes++;
}
}
count++;
if (changed)
klgrpset_add(*changed, lgrp->lgrp_id);
}
}
return (count);
}
void
lgrp_mem_rename(int mnode, lgrp_handle_t from, lgrp_handle_t to)
{
lgrp_mem_fini(mnode, from, B_TRUE);
lgrp_mem_init(mnode, to, B_TRUE);
}
void
lgrp_mem_init(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename)
{
klgrpset_t changed;
int count;
int i;
lgrp_t *my_lgrp;
lgrp_id_t lgrpid;
mnodeset_t mnodes_mask = ((mnodeset_t)1 << mnode);
boolean_t drop_lock = B_FALSE;
boolean_t need_synch = B_FALSE;
if (!MUTEX_HELD(&cpu_lock)) {
mutex_enter(&cpu_lock);
drop_lock = B_TRUE;
}
if (!cpus_paused())
need_synch = B_TRUE;
if (! (is_copy_rename && (lgrp_root->lgrp_mnodes == mnodes_mask)) &&
lgrp_root->lgrp_mnodes & mnodes_mask) {
if (drop_lock)
mutex_exit(&cpu_lock);
return;
}
count = 0;
klgrpset_clear(changed);
my_lgrp = lgrp_hand_to_lgrp(hand);
if (my_lgrp == NULL) {
my_lgrp = lgrp_create();
lgrpid = my_lgrp->lgrp_id;
my_lgrp->lgrp_plathand = hand;
my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand);
klgrpset_add(my_lgrp->lgrp_leaves, lgrpid);
klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
if (need_synch)
pause_cpus(NULL, NULL);
count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
&changed);
if (need_synch)
start_cpus();
} else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand)
> 0) {
klgrpset_clear(changed);
lgrpid = my_lgrp->lgrp_id;
if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM],
lgrpid))
klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
if (need_synch)
pause_cpus(NULL, NULL);
count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
&changed);
if (need_synch)
start_cpus();
} else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM],
my_lgrp->lgrp_id)) {
lgrpid = my_lgrp->lgrp_id;
klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
klgrpset_add(changed, lgrpid);
count++;
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_t *lgrp;
lgrp = lgrp_table[i];
if (!LGRP_EXISTS(lgrp) ||
!lgrp_rsets_member(lgrp->lgrp_set, lgrpid))
continue;
klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
klgrpset_add(changed, lgrp->lgrp_id);
count++;
}
} else {
if (drop_lock)
mutex_exit(&cpu_lock);
return;
}
if (!(my_lgrp->lgrp_mnodes & mnodes_mask)) {
my_lgrp->lgrp_mnodes |= mnodes_mask;
my_lgrp->lgrp_nmnodes++;
}
klgrpset_del(changed, lgrpid);
if (count)
(void) lgrp_mnode_update(changed, NULL);
if (drop_lock)
mutex_exit(&cpu_lock);
}
void
lgrp_mem_fini(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename)
{
klgrpset_t changed;
int count;
int i;
lgrp_t *my_lgrp;
lgrp_id_t lgrpid;
mnodeset_t mnodes_mask;
boolean_t drop_lock = B_FALSE;
boolean_t need_synch = B_FALSE;
if (!MUTEX_HELD(&cpu_lock)) {
mutex_enter(&cpu_lock);
drop_lock = B_TRUE;
}
if (!cpus_paused())
need_synch = B_TRUE;
my_lgrp = lgrp_hand_to_lgrp(hand);
ASSERT(my_lgrp != NULL);
mnodes_mask = ((mnodeset_t)1 << mnode);
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_t *lgrp = lgrp_table[i];
if (!LGRP_EXISTS(lgrp) ||
!(lgrp->lgrp_mnodes & mnodes_mask))
continue;
if (is_copy_rename &&
(lgrp == lgrp_root) && (lgrp->lgrp_mnodes == mnodes_mask))
continue;
lgrp->lgrp_mnodes &= ~mnodes_mask;
ASSERT(lgrp->lgrp_nmnodes > 0);
lgrp->lgrp_nmnodes--;
}
ASSERT(lgrp_root->lgrp_nmnodes > 0);
if ((my_lgrp->lgrp_nmnodes > 0) &&
!(is_copy_rename && (my_lgrp == lgrp_root) &&
(my_lgrp->lgrp_mnodes == mnodes_mask))) {
if (drop_lock)
mutex_exit(&cpu_lock);
return;
}
klgrpset_clear(my_lgrp->lgrp_set[LGRP_RSRC_MEM]);
lgrpid = my_lgrp->lgrp_id;
count = 0;
klgrpset_clear(changed);
if (lgrp_rsets_empty(my_lgrp->lgrp_set)) {
if (need_synch)
pause_cpus(NULL, NULL);
count = lgrp_leaf_delete(my_lgrp, lgrp_table,
lgrp_alloc_max + 1, &changed);
ASSERT(count > 0);
if (need_synch)
start_cpus();
} else {
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_t *lgrp;
lgrp = lgrp_table[i];
if (!LGRP_EXISTS(lgrp) ||
!klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM],
lgrpid))
continue;
klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
}
}
if (drop_lock)
mutex_exit(&cpu_lock);
}
lgrp_t *
lgrp_hand_to_lgrp(lgrp_handle_t hand)
{
int i;
lgrp_t *lgrp;
if (hand == LGRP_NULL_HANDLE)
return (NULL);
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp = lgrp_table[i];
if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand)
return (lgrp);
}
return (NULL);
}
lgrp_t *
lgrp_home_lgrp(void)
{
lgrp_t *lgrp;
lpl_t *lpl;
kpreempt_disable();
lpl = curthread->t_lpl;
ASSERT(lpl != NULL);
ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max);
ASSERT(LGRP_EXISTS(lgrp_table[lpl->lpl_lgrpid]));
lgrp = lgrp_table[lpl->lpl_lgrpid];
kpreempt_enable();
return (lgrp);
}
lgrp_id_t
lgrp_home_id(kthread_t *t)
{
lgrp_id_t lgrp;
lpl_t *lpl;
ASSERT(t != NULL);
kpreempt_disable();
lpl = t->t_lpl;
ASSERT(lpl != NULL);
ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max);
lgrp = lpl->lpl_lgrpid;
kpreempt_enable();
return (lgrp);
}
lgrp_t *
lgrp_pfn_to_lgrp(pfn_t pfn)
{
lgrp_handle_t hand;
int i;
lgrp_t *lgrp;
hand = lgrp_plat_pfn_to_hand(pfn);
if (hand != LGRP_NULL_HANDLE)
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp = lgrp_table[i];
if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand)
return (lgrp);
}
return (NULL);
}
lgrp_t *
lgrp_phys_to_lgrp(u_longlong_t physaddr)
{
lgrp_handle_t hand;
int i;
lgrp_t *lgrp;
pfn_t pfn;
pfn = btop(physaddr);
hand = lgrp_plat_pfn_to_hand(pfn);
if (hand != LGRP_NULL_HANDLE)
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp = lgrp_table[i];
if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand)
return (lgrp);
}
return (NULL);
}
static lgrp_t *
lgrp_cpu_to_lgrp(cpu_t *cpu)
{
return (cpu->cpu_lpl->lpl_lgrp);
}
static uint64_t
lgrp_sum_loadavgs(lgrp_t *lgrp)
{
cpu_t *cpu;
int ncpu;
uint64_t loads = 0;
mutex_enter(&cpu_lock);
cpu = lgrp->lgrp_cpu;
ncpu = lgrp->lgrp_cpucnt;
if (cpu == NULL || ncpu == 0) {
mutex_exit(&cpu_lock);
return (0ull);
}
do {
loads += cpu->cpu_lpl->lpl_loadavg;
cpu = cpu->cpu_next_lgrp;
} while (cpu != lgrp->lgrp_cpu);
mutex_exit(&cpu_lock);
return (loads / ncpu);
}
void
lgrp_stat_add(lgrp_id_t lgrpid, lgrp_stat_t stat, int64_t val)
{
struct lgrp_stats *pstats;
if (lgrpid < 0 || lgrpid > lgrp_alloc_max)
return;
pstats = &lgrp_stats[lgrpid];
atomic_add_64((uint64_t *)LGRP_STAT_WRITE_PTR(pstats, stat), val);
}
int64_t
lgrp_stat_read(lgrp_id_t lgrpid, lgrp_stat_t stat)
{
uint64_t val;
struct lgrp_stats *pstats;
if (lgrpid < 0 || lgrpid > lgrp_alloc_max)
return ((int64_t)0);
pstats = &lgrp_stats[lgrpid];
LGRP_STAT_READ(pstats, stat, val);
return (val);
}
static void
lgrp_kstat_reset(lgrp_id_t lgrpid)
{
lgrp_stat_t stat;
if (lgrpid < 0 || lgrpid > lgrp_alloc_max)
return;
for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) {
LGRP_STAT_RESET(&lgrp_stats[lgrpid], stat);
}
}
static int
lgrp_kstat_extract(kstat_t *ksp, int rw)
{
lgrp_stat_t stat;
struct kstat_named *ksd;
lgrp_t *lgrp;
lgrp_id_t lgrpid;
lgrp = (lgrp_t *)ksp->ks_private;
ksd = (struct kstat_named *)ksp->ks_data;
ASSERT(ksd == (struct kstat_named *)&lgrp_kstat_data);
lgrpid = lgrp->lgrp_id;
if (lgrpid == LGRP_NONE) {
for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) {
ksd[stat].value.i64 = 0;
}
ksd[stat + LGRP_NUM_CPUS].value.i64 = 0;
ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 = 0;
ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 = 0;
ksd[stat + LGRP_NUM_PG_FREE].value.i64 = 0;
ksd[stat + LGRP_LOADAVG].value.i64 = 0;
} else if (rw != KSTAT_WRITE) {
for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) {
ksd[stat].value.i64 = lgrp_stat_read(lgrpid, stat);
}
ksd[stat + LGRP_NUM_CPUS].value.i64 = lgrp->lgrp_cpucnt;
ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 =
lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_INSTALL);
ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 =
lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_AVAIL);
ksd[stat + LGRP_NUM_PG_FREE].value.i64 =
lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE);
ksd[stat + LGRP_LOADAVG].value.i64 = lgrp_sum_loadavgs(lgrp);
ksd[stat + LGRP_LOADAVG_SCALE].value.i64 =
lgrp_loadavg_max_effect;
} else {
lgrp_kstat_reset(lgrpid);
}
return (0);
}
int
lgrp_query_cpu(processorid_t id, lgrp_id_t *lp)
{
cpu_t *cp;
mutex_enter(&cpu_lock);
if ((cp = cpu_get(id)) == NULL) {
mutex_exit(&cpu_lock);
return (EINVAL);
}
if (cpu_is_offline(cp) || cpu_is_poweredoff(cp)) {
mutex_exit(&cpu_lock);
return (EINVAL);
}
ASSERT(cp->cpu_lpl != NULL);
*lp = cp->cpu_lpl->lpl_lgrpid;
mutex_exit(&cpu_lock);
return (0);
}
int
lgrp_query_load(processorid_t id, lgrp_load_t *lp)
{
cpu_t *cp;
mutex_enter(&cpu_lock);
if ((cp = cpu_get(id)) == NULL) {
mutex_exit(&cpu_lock);
return (EINVAL);
}
ASSERT(cp->cpu_lpl != NULL);
*lp = cp->cpu_lpl->lpl_loadavg;
mutex_exit(&cpu_lock);
return (0);
}
void
lpl_rset_add(lpl_t *lpl_target, lpl_t *lpl_leaf)
{
int i;
int entry_slot = 0;
for (i = 0; i < lpl_target->lpl_nrset; i++) {
if (lpl_target->lpl_rset[i] == lpl_leaf) {
return;
}
if (lpl_target->lpl_rset[i]->lpl_lgrpid >
lpl_leaf->lpl_lgrpid) {
break;
}
}
entry_slot = i;
i = lpl_target->lpl_nrset++;
while (i-- > entry_slot) {
lpl_target->lpl_rset[i + 1] = lpl_target->lpl_rset[i];
lpl_target->lpl_id2rset[lpl_target->lpl_rset[i]->lpl_lgrpid] =
i + 1;
}
lpl_target->lpl_rset[entry_slot] = lpl_leaf;
lpl_target->lpl_id2rset[lpl_leaf->lpl_lgrpid] = entry_slot;
lpl_target->lpl_ncpu += lpl_leaf->lpl_ncpu;
}
static void
lpl_child_update(lpl_t *lpl_parent, struct cpupart *cp)
{
klgrpset_t children;
int i;
children = lgrp_table[lpl_parent->lpl_lgrpid]->lgrp_children;
if (klgrpset_isempty(children))
return;
for (i = 0; i <= lgrp_alloc_max; i++) {
if (klgrpset_ismember(children, i)) {
cp->cp_lgrploads[i].lpl_parent = lpl_parent;
}
}
}
void
lpl_rset_del(lpl_t *lpl_target, lpl_t *lpl_leaf)
{
int i;
lpl_t *leaf;
if (lpl_target->lpl_nrset == 0)
return;
for (i = 0; i < lpl_target->lpl_nrset; i++) {
if (lpl_target->lpl_rset[i] == lpl_leaf)
break;
}
if (lpl_target->lpl_rset[i] != lpl_leaf)
return;
lpl_target->lpl_rset[lpl_target->lpl_nrset--] = NULL;
lpl_target->lpl_id2rset[lpl_leaf->lpl_lgrpid] = -1;
lpl_target->lpl_ncpu--;
do {
lpl_target->lpl_rset[i] = lpl_target->lpl_rset[i + 1];
if ((leaf = lpl_target->lpl_rset[i]) != NULL) {
lpl_target->lpl_id2rset[leaf->lpl_lgrpid] = i;
}
} while (i++ < lpl_target->lpl_nrset);
}
int
lpl_rset_contains(lpl_t *lpl_target, lpl_t *lpl_leaf)
{
int i;
for (i = 0; i < lpl_target->lpl_nrset; i++) {
if (lpl_target->lpl_rset[i] == lpl_leaf)
return (1);
}
return (0);
}
void
lpl_cpu_adjcnt(lpl_act_t act, cpu_t *cp)
{
cpupart_t *cpupart;
lgrp_t *lgrp_leaf;
lgrp_t *lgrp_cur;
lpl_t *lpl_leaf;
lpl_t *lpl_cur;
int i;
ASSERT(act == LPL_DECREMENT || act == LPL_INCREMENT);
cpupart = cp->cpu_part;
lpl_leaf = cp->cpu_lpl;
lgrp_leaf = lgrp_table[lpl_leaf->lpl_lgrpid];
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_cur = lgrp_table[i];
if (!LGRP_EXISTS(lgrp_cur) || (lgrp_cur == lgrp_leaf) ||
!klgrpset_intersects(lgrp_leaf->lgrp_set[LGRP_RSRC_CPU],
lgrp_cur->lgrp_set[LGRP_RSRC_CPU]))
continue;
lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id];
if (lpl_cur->lpl_nrset > 0) {
if (act == LPL_INCREMENT) {
lpl_cur->lpl_ncpu++;
} else if (act == LPL_DECREMENT) {
lpl_cur->lpl_ncpu--;
}
}
}
}
void
lpl_init(lpl_t *lpl, lpl_t *lpl_leaf, lgrp_t *lgrp)
{
lpl->lpl_lgrpid = lgrp->lgrp_id;
lpl->lpl_loadavg = 0;
if (lpl == lpl_leaf)
lpl->lpl_ncpu = 1;
else
lpl->lpl_ncpu = lpl_leaf->lpl_ncpu;
lpl->lpl_nrset = 1;
lpl->lpl_rset[0] = lpl_leaf;
lpl->lpl_id2rset[lpl_leaf->lpl_lgrpid] = 0;
lpl->lpl_lgrp = lgrp;
lpl->lpl_parent = NULL;
lpl->lpl_cpus = NULL;
}
void
lpl_clear(lpl_t *lpl)
{
lpl->lpl_loadavg = 0;
lpl->lpl_ncpu = 0;
lpl->lpl_lgrp = NULL;
lpl->lpl_parent = NULL;
lpl->lpl_cpus = NULL;
lpl->lpl_nrset = 0;
lpl->lpl_homed_time = 0;
bzero(lpl->lpl_rset, sizeof (lpl->lpl_rset[0]) * lpl->lpl_rset_sz);
bzero(lpl->lpl_id2rset,
sizeof (lpl->lpl_id2rset[0]) * lpl->lpl_rset_sz);
}
int
lpl_topo_verify(cpupart_t *cpupart)
{
lgrp_t *lgrp;
lpl_t *lpl;
klgrpset_t rset;
klgrpset_t cset;
cpu_t *cpu;
cpu_t *cp_start;
int i;
int j;
int sum;
if (!lgrp_topo_initialized || !lgrp_initialized)
return (LPL_TOPO_CORRECT);
ASSERT(cpupart != NULL);
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp = lgrp_table[i];
lpl = NULL;
ASSERT(cpupart->cp_lgrploads);
if (!cpupart->cp_lgrploads)
return (LPL_TOPO_PART_HAS_NO_LPL);
lpl = &cpupart->cp_lgrploads[i];
ASSERT(i < cpupart->cp_nlgrploads);
if (!LGRP_EXISTS(lgrp)) {
ASSERT(lpl->lpl_ncpu == 0);
if (lpl->lpl_ncpu > 0) {
return (LPL_TOPO_CPUS_NOT_EMPTY);
} else {
continue;
}
}
ASSERT(lgrp->lgrp_id == lpl->lpl_lgrpid);
if (!klgrpset_intersects(lgrp->lgrp_leaves,
cpupart->cp_lgrpset)) {
ASSERT(lpl->lpl_ncpu == 0);
if (lpl->lpl_ncpu > 0) {
return (LPL_TOPO_CPUS_NOT_EMPTY);
}
for (j = 0; j < cpupart->cp_nlgrploads; j++) {
lpl_t *i_lpl;
i_lpl = &cpupart->cp_lgrploads[j];
ASSERT(!lpl_rset_contains(i_lpl, lpl));
if (lpl_rset_contains(i_lpl, lpl)) {
return (LPL_TOPO_LPL_ORPHANED);
}
}
continue;
}
ASSERT(lgrp == lpl->lpl_lgrp);
if (lgrp != lpl->lpl_lgrp) {
return (LPL_TOPO_LGRP_MISMATCH);
}
if (lgrp->lgrp_parent) {
ASSERT(lpl->lpl_parent != NULL &&
lgrp->lgrp_parent->lgrp_id ==
lpl->lpl_parent->lpl_lgrpid);
if (!lpl->lpl_parent) {
return (LPL_TOPO_MISSING_PARENT);
} else if (lgrp->lgrp_parent->lgrp_id !=
lpl->lpl_parent->lpl_lgrpid) {
return (LPL_TOPO_PARENT_MISMATCH);
}
}
if ((lpl->lpl_nrset == 1) && (lpl == lpl->lpl_rset[0])) {
ASSERT((lgrp->lgrp_childcnt == 0) &&
(klgrpset_ismember(lgrp->lgrp_leaves,
lpl->lpl_lgrpid)));
if ((lgrp->lgrp_childcnt > 0) ||
(!klgrpset_ismember(lgrp->lgrp_leaves,
lpl->lpl_lgrpid))) {
return (LPL_TOPO_LGRP_NOT_LEAF);
}
ASSERT((lgrp->lgrp_cpucnt >= lpl->lpl_ncpu) &&
(lpl->lpl_ncpu > 0));
if ((lgrp->lgrp_cpucnt < lpl->lpl_ncpu) ||
(lpl->lpl_ncpu <= 0)) {
return (LPL_TOPO_BAD_CPUCNT);
}
j = 0;
cpu = cp_start = lpl->lpl_cpus;
while (cpu != NULL) {
j++;
ASSERT(cpu->cpu_lpl == lpl);
if (cpu->cpu_lpl != lpl) {
return (LPL_TOPO_CPU_HAS_BAD_LPL);
}
if ((cpu = cpu->cpu_next_lpl) != cp_start) {
continue;
} else {
cpu = NULL;
}
}
ASSERT(j == lpl->lpl_ncpu);
if (j != lpl->lpl_ncpu) {
return (LPL_TOPO_LPL_BAD_NCPU);
}
for (j = 0; j <= lgrp_alloc_max; j++) {
klgrpset_t intersect;
lgrp_t *lgrp_cand;
lpl_t *lpl_cand;
lgrp_cand = lgrp_table[j];
intersect = klgrpset_intersects(
lgrp_cand->lgrp_set[LGRP_RSRC_CPU],
cpupart->cp_lgrpset);
if (!LGRP_EXISTS(lgrp_cand) ||
!klgrpset_intersects(lgrp_cand->lgrp_leaves,
cpupart->cp_lgrpset) ||
(intersect == 0))
continue;
lpl_cand =
&cpupart->cp_lgrploads[lgrp_cand->lgrp_id];
if (klgrpset_ismember(intersect,
lgrp->lgrp_id)) {
ASSERT(lpl_rset_contains(lpl_cand,
lpl));
if (!lpl_rset_contains(lpl_cand, lpl)) {
return (LPL_TOPO_RSET_MSSNG_LF);
}
}
}
} else {
ASSERT(lpl->lpl_cpus == NULL);
if (lpl->lpl_cpus != NULL) {
return (LPL_TOPO_NONLEAF_HAS_CPUS);
}
for (j = sum = 0; j < lpl->lpl_nrset; j++) {
sum += lpl->lpl_rset[j]->lpl_ncpu;
}
ASSERT(sum == lpl->lpl_ncpu);
if (sum != lpl->lpl_ncpu) {
return (LPL_TOPO_LPL_BAD_NCPU);
}
}
klgrpset_clear(rset);
for (j = 0; j < lpl->lpl_nrset; j++) {
klgrpset_add(rset, lpl->lpl_rset[j]->lpl_lgrpid);
}
klgrpset_copy(cset, rset);
klgrpset_diff(rset, lgrp->lgrp_set[LGRP_RSRC_CPU]);
klgrpset_diff(cset, cpupart->cp_lgrpset);
ASSERT(klgrpset_isempty(rset) && klgrpset_isempty(cset));
if (!klgrpset_isempty(rset) || !klgrpset_isempty(cset)) {
return (LPL_TOPO_RSET_MISMATCH);
}
for (j = 0; j < lpl->lpl_nrset; j++) {
if (lpl->lpl_rset[j] == NULL)
break;
}
ASSERT(j == lpl->lpl_nrset);
if (j != lpl->lpl_nrset) {
return (LPL_TOPO_BAD_RSETCNT);
}
}
return (LPL_TOPO_CORRECT);
}
int
lpl_topo_flatten(int levels)
{
int i;
uint_t sum;
lgrp_t *lgrp_cur;
lpl_t *lpl_cur;
lpl_t *lpl_root;
cpupart_t *cp;
if (levels != 2)
return (0);
ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 ||
!lgrp_initialized);
cp = cp_list_head;
do {
lpl_root = &cp->cp_lgrploads[lgrp_root->lgrp_id];
ASSERT(LGRP_EXISTS(lgrp_root) && (lpl_root->lpl_ncpu > 0));
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_cur = lgrp_table[i];
lpl_cur = &cp->cp_lgrploads[i];
if ((lgrp_cur == lgrp_root) ||
(!LGRP_EXISTS(lgrp_cur) &&
(lpl_cur->lpl_ncpu == 0)))
continue;
if (!LGRP_EXISTS(lgrp_cur) && (lpl_cur->lpl_ncpu > 0)) {
lpl_clear(lpl_cur);
} else if ((lpl_cur->lpl_nrset == 1) &&
(lpl_cur->lpl_rset[0] == lpl_cur) &&
((lpl_cur->lpl_parent->lpl_ncpu == 0) ||
(!LGRP_EXISTS(lpl_cur->lpl_parent->lpl_lgrp)))) {
lpl_cur->lpl_parent = lpl_root;
lpl_rset_add(lpl_root, lpl_cur);
}
}
for (i = sum = 0; i < lpl_root->lpl_nrset; i++) {
sum += lpl_root->lpl_rset[i]->lpl_ncpu;
}
lpl_root->lpl_ncpu = sum;
lpl_child_update(lpl_root, cp);
cp = cp->cp_next;
} while (cp != cp_list_head);
return (levels);
}
void
lpl_leaf_insert(lpl_t *lpl_leaf, cpupart_t *cpupart)
{
int i;
int j;
int rset_num_intersect;
lgrp_t *lgrp_cur;
lpl_t *lpl_cur;
lpl_t *lpl_parent;
lgrp_id_t parent_id;
klgrpset_t rset_intersect;
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_cur = lgrp_table[i];
if (!LGRP_EXISTS(lgrp_cur) ||
!klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU],
lpl_leaf->lpl_lgrpid) ||
!klgrpset_intersects(lgrp_cur->lgrp_leaves,
cpupart->cp_lgrpset))
continue;
lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id];
if (lgrp_cur->lgrp_parent != NULL) {
parent_id = lgrp_cur->lgrp_parent->lgrp_id;
lpl_parent = &cpupart->cp_lgrploads[parent_id];
} else {
lpl_parent = NULL;
}
if (lpl_cur == lpl_leaf) {
lpl_cur->lpl_parent = lpl_parent;
continue;
}
lpl_clear(lpl_cur);
lpl_init(lpl_cur, lpl_leaf, lgrp_cur);
lpl_cur->lpl_parent = lpl_parent;
rset_intersect =
klgrpset_intersects(lgrp_cur->lgrp_set[LGRP_RSRC_CPU],
cpupart->cp_lgrpset);
klgrpset_nlgrps(rset_intersect, rset_num_intersect);
if (rset_num_intersect > 1) {
for (j = 0; j <= lgrp_alloc_max; j++) {
lgrp_t *lgrp_cand;
lpl_t *lpl_cand;
lgrp_cand = lgrp_table[j];
if (!LGRP_EXISTS(lgrp_cand) ||
!klgrpset_ismember(rset_intersect,
lgrp_cand->lgrp_id))
continue;
lpl_cand =
&cpupart->cp_lgrploads[lgrp_cand->lgrp_id];
lpl_rset_add(lpl_cur, lpl_cand);
}
}
lpl_child_update(lpl_cur, cpupart);
}
}
void
lpl_leaf_remove(lpl_t *lpl_leaf, cpupart_t *cpupart)
{
int i;
lgrp_t *lgrp_cur;
lpl_t *lpl_cur;
klgrpset_t leaf_intersect;
for (i = 0; i <= lgrp_alloc_max; i++) {
lgrp_cur = lgrp_table[i];
if (!LGRP_EXISTS(lgrp_cur))
continue;
lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id];
if (!klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU],
lpl_leaf->lpl_lgrpid) ||
(lpl_cur == lpl_leaf)) {
continue;
}
leaf_intersect = klgrpset_intersects(lgrp_cur->lgrp_leaves,
cpupart->cp_lgrpset);
lpl_rset_del(lpl_cur, lpl_leaf);
if ((lpl_cur->lpl_nrset == 0) || (!leaf_intersect)) {
lpl_clear(lpl_cur);
} else {
lpl_child_update(lpl_cur, cpupart);
}
}
lpl_clear(lpl_leaf);
}
void
lgrp_part_add_cpu(cpu_t *cp, lgrp_id_t lgrpid)
{
cpupart_t *cpupart;
lgrp_t *lgrp_leaf;
lpl_t *lpl_leaf;
ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
cpupart = cp->cpu_part;
lgrp_leaf = lgrp_table[lgrpid];
ASSERT(LGRP_EXISTS(lgrp_leaf));
lpl_leaf = &cpupart->cp_lgrploads[lgrpid];
cp->cpu_lpl = lpl_leaf;
if (lpl_leaf->lpl_ncpu++ == 0) {
lpl_init(lpl_leaf, lpl_leaf, lgrp_leaf);
klgrpset_add(cpupart->cp_lgrpset, lgrpid);
lpl_leaf_insert(lpl_leaf, cpupart);
} else {
lpl_cpu_adjcnt(LPL_INCREMENT, cp);
}
if (lpl_leaf->lpl_cpus) {
cp->cpu_next_lpl = lpl_leaf->lpl_cpus;
cp->cpu_prev_lpl = lpl_leaf->lpl_cpus->cpu_prev_lpl;
lpl_leaf->lpl_cpus->cpu_prev_lpl->cpu_next_lpl = cp;
lpl_leaf->lpl_cpus->cpu_prev_lpl = cp;
} else {
ASSERT(lpl_leaf->lpl_ncpu == 1);
lpl_leaf->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = cp;
}
}
void
lgrp_part_del_cpu(cpu_t *cp)
{
lpl_t *lpl;
lpl_t *leaf_lpl;
lgrp_t *lgrp_leaf;
ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
lpl = leaf_lpl = cp->cpu_lpl;
lgrp_leaf = leaf_lpl->lpl_lgrp;
ASSERT(LGRP_EXISTS(lgrp_leaf));
ASSERT(lpl->lpl_ncpu);
if (--lpl->lpl_ncpu == 0) {
klgrpset_del(cp->cpu_part->cp_lgrpset, lpl->lpl_lgrpid);
lpl->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = NULL;
lpl_leaf_remove(leaf_lpl, cp->cpu_part);
} else {
cp->cpu_prev_lpl->cpu_next_lpl = cp->cpu_next_lpl;
cp->cpu_next_lpl->cpu_prev_lpl = cp->cpu_prev_lpl;
if (lpl->lpl_cpus == cp) {
lpl->lpl_cpus = cp->cpu_next_lpl;
}
lpl_cpu_adjcnt(LPL_DECREMENT, cp);
}
cp->cpu_lpl = NULL;
}
void
lgrp_loadavg(lpl_t *lpl, uint_t nrcpus, int ageflag)
{
uint_t ncpu;
int64_t old, new, f;
static short expval[] = {
0, 3196, 1618, 1083,
814, 652, 543, 466,
408, 363, 326, 297,
272, 251, 233, 218,
204, 192, 181, 172,
163, 155, 148, 142,
136, 130, 125, 121,
116, 112, 109, 105
};
if ((lpl == NULL) ||
((ncpu = lpl->lpl_ncpu) == 0)) {
return;
}
for (;;) {
if (ncpu >= sizeof (expval) / sizeof (expval[0]))
f = expval[1]/ncpu;
else
f = expval[ncpu];
if (ageflag) {
int64_t q, r;
do {
old = new = lpl->lpl_loadavg;
q = (old >> 16) << 7;
r = (old & 0xffff) << 7;
new += ((long long)(nrcpus - q) * f -
((r * f) >> 16)) >> 7;
if (new > LGRP_LOADAVG_MAX)
new = LGRP_LOADAVG_MAX;
else if (new < 0)
new = 0;
} while (atomic_cas_32((lgrp_load_t *)&lpl->lpl_loadavg,
old, new) != old);
} else {
do {
old = new = lpl->lpl_loadavg;
new += f;
if (new < old)
new = LGRP_LOADAVG_MAX;
} while (atomic_cas_32((lgrp_load_t *)&lpl->lpl_loadavg,
old, new) != old);
}
if ((lpl = lpl->lpl_parent) == NULL)
break;
ncpu = lpl->lpl_ncpu;
}
}
void
lpl_topo_bootstrap(lpl_t *target, int size)
{
lpl_t *lpl = lpl_bootstrap;
lpl_t *target_lpl = target;
lpl_t **rset;
int *id2rset;
int sz;
int howmany;
int id;
int i;
ASSERT(target == cp_default.cp_lgrploads);
ASSERT(size == cp_default.cp_nlgrploads);
ASSERT(!lgrp_topo_initialized);
ASSERT(ncpus == 1);
howmany = MIN(LPL_BOOTSTRAP_SIZE, size);
for (i = 0; i < howmany; i++, lpl++, target_lpl++) {
rset = target_lpl->lpl_rset;
id2rset = target_lpl->lpl_id2rset;
sz = target_lpl->lpl_rset_sz;
*target_lpl = *lpl;
target_lpl->lpl_rset_sz = sz;
target_lpl->lpl_rset = rset;
target_lpl->lpl_id2rset = id2rset;
if (lpl->lpl_cpus == CPU) {
ASSERT(CPU->cpu_lpl == lpl);
CPU->cpu_lpl = target_lpl;
}
if (lpl->lpl_parent != NULL)
target_lpl->lpl_parent = (lpl_t *)
(((uintptr_t)lpl->lpl_parent -
(uintptr_t)lpl_bootstrap) +
(uintptr_t)target);
ASSERT(lpl->lpl_nrset <= 1);
for (id = 0; id < lpl->lpl_nrset; id++) {
if (lpl->lpl_rset[id] != NULL) {
target_lpl->lpl_rset[id] = (lpl_t *)
(((uintptr_t)lpl->lpl_rset[id] -
(uintptr_t)lpl_bootstrap) +
(uintptr_t)target);
}
target_lpl->lpl_id2rset[id] =
lpl->lpl_id2rset[id];
}
}
bzero(lpl_bootstrap_list, sizeof (lpl_bootstrap_list));
bzero(lpl_bootstrap_id2rset, sizeof (lpl_bootstrap_id2rset));
bzero(lpl_bootstrap_rset, sizeof (lpl_bootstrap_rset));
lpl_bootstrap_list[0].lpl_rset = lpl_bootstrap_rset;
lpl_bootstrap_list[0].lpl_id2rset = lpl_bootstrap_id2rset;
}
#define LGRP_EXPAND_PROC_THRESH_DEFAULT 62250
lgrp_load_t lgrp_expand_proc_thresh = LGRP_EXPAND_PROC_THRESH_DEFAULT;
#define LGRP_EXPAND_PROC_THRESH(ncpu) \
((lgrp_expand_proc_thresh) / (ncpu))
#define LGRP_EXPAND_PROC_DIFF_DEFAULT 60000
lgrp_load_t lgrp_expand_proc_diff = LGRP_EXPAND_PROC_DIFF_DEFAULT;
#define LGRP_EXPAND_PROC_DIFF(ncpu) \
((lgrp_expand_proc_diff) / (ncpu))
uint32_t lgrp_loadavg_tolerance = LGRP_LOADAVG_THREAD_MAX;
#define LGRP_LOADAVG_TOLERANCE(ncpu) \
((lgrp_loadavg_tolerance) / ncpu)
uint32_t lgrp_load_thresh = UINT32_MAX;
pgcnt_t lgrp_mem_free_thresh = 0;
#define LGRP_CHOOSE_RANDOM 1
#define LGRP_CHOOSE_RR 2
#define LGRP_CHOOSE_TIME 3
int lgrp_choose_policy = LGRP_CHOOSE_TIME;
lpl_t *
lgrp_choose(kthread_t *t, cpupart_t *cpupart)
{
lgrp_load_t bestload, bestrload;
int lgrpid_offset, lgrp_count;
lgrp_id_t lgrpid, lgrpid_start;
lpl_t *lpl, *bestlpl, *bestrlpl;
klgrpset_t lgrpset;
proc_t *p;
ASSERT(t != NULL);
ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 ||
THREAD_LOCK_HELD(t));
ASSERT(cpupart != NULL);
p = t->t_procp;
ASSERT(!klgrpset_isempty(cpupart->cp_lgrpset));
bestlpl = bestrlpl = NULL;
bestload = bestrload = LGRP_LOADAVG_MAX;
lgrpset = cpupart->cp_lgrpset;
switch (lgrp_choose_policy) {
case LGRP_CHOOSE_RR:
lgrpid = cpupart->cp_lgrp_hint;
do {
if (++lgrpid > lgrp_alloc_max)
lgrpid = 0;
} while (!klgrpset_ismember(lgrpset, lgrpid));
break;
default:
case LGRP_CHOOSE_TIME:
case LGRP_CHOOSE_RANDOM:
klgrpset_nlgrps(lgrpset, lgrp_count);
lgrpid_offset =
(((ushort_t)(gethrtime() >> 4)) % lgrp_count) + 1;
for (lgrpid = 0; ; lgrpid++) {
if (klgrpset_ismember(lgrpset, lgrpid)) {
if (--lgrpid_offset == 0)
break;
}
}
break;
}
lgrpid_start = lgrpid;
DTRACE_PROBE2(lgrp_choose_start, lgrp_id_t, lgrpid_start,
lgrp_id_t, cpupart->cp_lgrp_hint);
if (t->t_lgrp_affinity) {
lpl = lgrp_affinity_best(t, cpupart, lgrpid_start, B_FALSE);
if (lpl != NULL)
return (lpl);
}
ASSERT(klgrpset_ismember(lgrpset, lgrpid_start));
do {
pgcnt_t npgs;
if (!klgrpset_ismember(lgrpset, lgrpid)) {
if (++lgrpid > lgrp_alloc_max)
lgrpid = 0;
continue;
}
if (lgrp_table[lgrpid]->lgrp_childcnt != 0)
continue;
if (lgrp_mem_free_thresh > 0) {
npgs = lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE);
if (npgs < lgrp_mem_free_thresh) {
if (++lgrpid > lgrp_alloc_max)
lgrpid = 0;
continue;
}
}
lpl = &cpupart->cp_lgrploads[lgrpid];
if (klgrpset_isempty(p->p_lgrpset) ||
klgrpset_ismember(p->p_lgrpset, lgrpid)) {
if (bestlpl == NULL ||
lpl_pick(lpl, bestlpl)) {
bestload = lpl->lpl_loadavg;
bestlpl = lpl;
}
} else {
if (bestrlpl == NULL ||
lpl_pick(lpl, bestrlpl)) {
bestrload = lpl->lpl_loadavg;
bestrlpl = lpl;
}
}
if (++lgrpid > lgrp_alloc_max)
lgrpid = 0;
} while (lgrpid != lgrpid_start);
if (lgrp_load_thresh != UINT32_MAX &&
bestload >= lgrp_load_thresh && bestrload >= lgrp_load_thresh)
return (&cpupart->cp_lgrploads[lgrp_root->lgrp_id]);
if ((bestlpl == NULL) ||
((bestload > LGRP_EXPAND_PROC_THRESH(bestlpl->lpl_ncpu)) &&
(bestrload < bestload) &&
(bestrload + LGRP_EXPAND_PROC_DIFF(bestrlpl->lpl_ncpu) <
bestload))) {
bestlpl = bestrlpl;
}
if (bestlpl == NULL) {
bestlpl = &cpupart->cp_lgrploads[lgrpid_start];
}
cpupart->cp_lgrp_hint = bestlpl->lpl_lgrpid;
bestlpl->lpl_homed_time = gethrtime_unscaled();
ASSERT(bestlpl->lpl_ncpu > 0);
return (bestlpl);
}
static int
lpl_pick(lpl_t *lpl1, lpl_t *lpl2)
{
lgrp_load_t l1, l2;
lgrp_load_t tolerance = LGRP_LOADAVG_TOLERANCE(lpl1->lpl_ncpu);
l1 = lpl1->lpl_loadavg;
l2 = lpl2->lpl_loadavg;
if ((l1 + tolerance < l2) && (l1 < l2)) {
return (1);
}
if (lgrp_choose_policy == LGRP_CHOOSE_TIME &&
l1 + tolerance >= l2 && l1 < l2 &&
lpl1->lpl_homed_time < lpl2->lpl_homed_time) {
return (1);
}
return (0);
}
static uint64_t lgrp_trthr_moves = 0;
uint64_t
lgrp_get_trthr_migrations(void)
{
return (lgrp_trthr_moves);
}
void
lgrp_update_trthr_migrations(uint64_t incr)
{
atomic_add_64(&lgrp_trthr_moves, incr);
}
#define LGRP_MIN_NSEC (NANOSEC / 10)
hrtime_t lgrp_min_nsec = LGRP_MIN_NSEC;
void
lgrp_move_thread(kthread_t *t, lpl_t *newlpl, int do_lgrpset_delete)
{
proc_t *p;
lpl_t *lpl, *oldlpl;
lgrp_id_t oldid;
kthread_t *tp;
uint_t ncpu;
lgrp_load_t old, new;
ASSERT(t);
ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 ||
THREAD_LOCK_HELD(t));
if ((oldlpl = t->t_lpl) == newlpl)
return;
if (t->t_proc_flag & TP_LWPEXIT) {
if (newlpl != NULL) {
t->t_lpl = newlpl;
}
return;
}
p = ttoproc(t);
if ((oldlpl != NULL) &&
(p->p_tlist != NULL)) {
oldid = oldlpl->lpl_lgrpid;
if (newlpl != NULL)
lgrp_stat_add(oldid, LGRP_NUM_MIGR, 1);
if ((do_lgrpset_delete) &&
(klgrpset_ismember(p->p_lgrpset, oldid))) {
for (tp = p->p_tlist->t_forw; ; tp = tp->t_forw) {
ASSERT(tp->t_lpl != NULL);
if ((tp != t) &&
(tp->t_lpl->lpl_lgrpid == oldid)) {
break;
} else if (tp == p->p_tlist) {
klgrpset_del(p->p_lgrpset, oldid);
break;
}
}
}
if ((t->t_anttime + lgrp_min_nsec > gethrtime()) &&
((ncpu = oldlpl->lpl_ncpu) > 0)) {
lpl = oldlpl;
for (;;) {
do {
old = new = lpl->lpl_loadavg;
new -= LGRP_LOADAVG_MAX_EFFECT(ncpu);
if (new > old) {
new = 0;
}
} while (atomic_cas_32(
(lgrp_load_t *)&lpl->lpl_loadavg, old,
new) != old);
lpl = lpl->lpl_parent;
if (lpl == NULL)
break;
ncpu = lpl->lpl_ncpu;
ASSERT(ncpu > 0);
}
}
}
if (newlpl != NULL) {
t->t_lpl = newlpl;
if (t->t_tid == 1 && p->p_t1_lgrpid != newlpl->lpl_lgrpid) {
p->p_t1_lgrpid = newlpl->lpl_lgrpid;
membar_producer();
if (p->p_tr_lgrpid != LGRP_NONE &&
p->p_tr_lgrpid != p->p_t1_lgrpid) {
lgrp_update_trthr_migrations(1);
}
}
if (lgrp_table[newlpl->lpl_lgrpid] == lgrp_root)
return;
if (!klgrpset_ismember(p->p_lgrpset, newlpl->lpl_lgrpid)) {
klgrpset_add(p->p_lgrpset, newlpl->lpl_lgrpid);
}
lpl = newlpl;
for (;;) {
ncpu = lpl->lpl_ncpu;
ASSERT(ncpu > 0);
do {
old = new = lpl->lpl_loadavg;
new += LGRP_LOADAVG_MAX_EFFECT(ncpu);
if (new < old)
new = UINT32_MAX;
} while (atomic_cas_32((lgrp_load_t *)&lpl->lpl_loadavg,
old, new) != old);
lpl = lpl->lpl_parent;
if (lpl == NULL)
break;
}
t->t_anttime = gethrtime();
}
}
lgrp_mem_policy_t
lgrp_madv_to_policy(uchar_t advice, size_t size, int type)
{
switch (advice) {
case MADV_ACCESS_LWP:
return (LGRP_MEM_POLICY_NEXT);
case MADV_ACCESS_MANY:
return (LGRP_MEM_POLICY_RANDOM);
default:
return (lgrp_mem_policy_default(size, type));
}
}
lgrp_mem_policy_t
lgrp_mem_policy_default(size_t size, int type)
{
cpupart_t *cp;
lgrp_mem_policy_t policy;
size_t pset_mem_size;
if ((type != MAP_SHARED && size > lgrp_privm_random_thresh) ||
(type == MAP_SHARED && size > lgrp_shm_random_thresh)) {
kpreempt_disable();
cp = curthread->t_cpupart;
klgrpset_totalsize(cp->cp_lgrpset, pset_mem_size);
kpreempt_enable();
if (lgrp_mem_pset_aware && size < pset_mem_size)
policy = LGRP_MEM_POLICY_RANDOM_PSET;
else
policy = LGRP_MEM_POLICY_RANDOM;
} else
policy = lgrp_mem_default_policy;
return (policy);
}
lgrp_mem_policy_info_t *
lgrp_mem_policy_get(struct seg *seg, caddr_t vaddr)
{
lgrp_mem_policy_info_t *policy_info;
extern struct seg_ops segspt_ops;
extern struct seg_ops segspt_shmops;
if (seg->s_ops != &segvn_ops && seg->s_ops != &segspt_ops &&
seg->s_ops != &segspt_shmops)
return (NULL);
policy_info = NULL;
if (seg->s_ops->getpolicy != NULL)
policy_info = SEGOP_GETPOLICY(seg, vaddr);
return (policy_info);
}
int
lgrp_privm_policy_set(lgrp_mem_policy_t policy,
lgrp_mem_policy_info_t *policy_info, size_t size)
{
ASSERT(policy_info != NULL);
if (policy == LGRP_MEM_POLICY_DEFAULT)
policy = lgrp_mem_policy_default(size, MAP_PRIVATE);
if (policy == policy_info->mem_policy)
return (1);
policy_info->mem_policy = policy;
policy_info->mem_lgrpid = LGRP_NONE;
return (0);
}
lgrp_mem_policy_info_t *
lgrp_shm_policy_get(struct anon_map *amp, ulong_t anon_index, vnode_t *vp,
u_offset_t vn_off)
{
u_offset_t off;
lgrp_mem_policy_info_t *policy_info;
lgrp_shm_policy_seg_t *policy_seg;
lgrp_shm_locality_t *shm_locality;
avl_tree_t *tree;
avl_index_t where;
shm_locality = NULL;
tree = NULL;
if (amp) {
ASSERT(amp->refcnt != 0);
shm_locality = amp->locality;
if (shm_locality == NULL)
return (NULL);
tree = shm_locality->loc_tree;
off = ptob(anon_index);
} else if (vp) {
shm_locality = vp->v_locality;
if (shm_locality == NULL)
return (NULL);
ASSERT(shm_locality->loc_count != 0);
tree = shm_locality->loc_tree;
off = vn_off;
}
if (tree == NULL)
return (NULL);
rw_enter(&shm_locality->loc_lock, RW_READER);
policy_info = NULL;
policy_seg = avl_find(tree, &off, &where);
if (policy_seg)
policy_info = &policy_seg->shm_policy;
rw_exit(&shm_locality->loc_lock);
return (policy_info);
}
lgrp_mem_policy_t lgrp_segmap_default_policy = LGRP_MEM_POLICY_RANDOM;
lgrp_t *
lgrp_mem_choose(struct seg *seg, caddr_t vaddr, size_t pgsz)
{
int i;
lgrp_t *lgrp;
klgrpset_t lgrpset;
int lgrps_spanned;
unsigned long off;
lgrp_mem_policy_t policy;
lgrp_mem_policy_info_t *policy_info;
ushort_t random;
int stat = 0;
extern struct seg *segkmap;
if (nlgrps == 1 || !lgrp_initialized)
return (lgrp_root);
policy = lgrp_mem_default_policy;
if (seg != NULL) {
if (seg->s_as == &kas) {
if (seg == segkmap)
policy = lgrp_segmap_default_policy;
if (policy == LGRP_MEM_POLICY_RANDOM_PROC ||
policy == LGRP_MEM_POLICY_RANDOM_PSET)
policy = LGRP_MEM_POLICY_RANDOM;
} else {
policy_info = lgrp_mem_policy_get(seg, vaddr);
if (policy_info != NULL) {
policy = policy_info->mem_policy;
if (policy == LGRP_MEM_POLICY_NEXT_SEG) {
lgrp_id_t id = policy_info->mem_lgrpid;
ASSERT(id != LGRP_NONE);
ASSERT(id < NLGRPS_MAX);
lgrp = lgrp_table[id];
if (!LGRP_EXISTS(lgrp)) {
policy = LGRP_MEM_POLICY_NEXT;
} else {
lgrp_stat_add(id,
LGRP_NUM_NEXT_SEG, 1);
return (lgrp);
}
}
}
}
}
lgrpset = 0;
lgrp = lgrp_home_lgrp();
if (lgrp == lgrp_root)
policy = lgrp_mem_policy_root;
switch (policy) {
case LGRP_MEM_POLICY_NEXT_CPU:
if (LGRP_CPU_HAS_NO_LGRP(CPU)) {
lgrp = lgrp_root;
} else {
kpreempt_disable();
lgrp = lgrp_cpu_to_lgrp(CPU);
kpreempt_enable();
}
break;
case LGRP_MEM_POLICY_NEXT:
case LGRP_MEM_POLICY_DEFAULT:
default:
if (lgrp != lgrp_root) {
if (policy == LGRP_MEM_POLICY_NEXT)
lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_NEXT, 1);
else
lgrp_stat_add(lgrp->lgrp_id,
LGRP_NUM_DEFAULT, 1);
break;
}
case LGRP_MEM_POLICY_RANDOM:
lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM];
klgrpset_nlgrps(lgrpset, lgrps_spanned);
if (lgrps_spanned == 0) {
lgrp = lgrp_root;
break;
}
random = (ushort_t)gethrtime() >> 4;
off = random % lgrps_spanned;
ASSERT(off <= lgrp_alloc_max);
for (i = 0; i <= lgrp_alloc_max; i++) {
if (!klgrpset_ismember(lgrpset, i))
continue;
if (off)
off--;
else {
lgrp = lgrp_table[i];
lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM,
1);
break;
}
}
break;
case LGRP_MEM_POLICY_RANDOM_PROC:
klgrpset_copy(lgrpset, curproc->p_lgrpset);
stat = LGRP_NUM_RANDOM_PROC;
case LGRP_MEM_POLICY_RANDOM_PSET:
if (!stat)
stat = LGRP_NUM_RANDOM_PSET;
if (klgrpset_isempty(lgrpset)) {
kpreempt_disable();
klgrpset_copy(lgrpset,
curthread->t_cpupart->cp_lgrpset);
kpreempt_enable();
}
klgrpset_nlgrps(lgrpset, lgrps_spanned);
ASSERT(lgrps_spanned <= nlgrps);
if (lgrps_spanned == 0) {
lgrp = lgrp_root;
break;
}
random = (ushort_t)gethrtime() >> 4;
off = random % lgrps_spanned;
ASSERT(off <= lgrp_alloc_max);
for (i = 0; i <= lgrp_alloc_max; i++) {
if (!klgrpset_ismember(lgrpset, i))
continue;
if (off)
off--;
else {
lgrp = lgrp_table[i];
lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM,
1);
break;
}
}
break;
case LGRP_MEM_POLICY_ROUNDROBIN:
off = ((unsigned long)(vaddr - seg->s_base) / pgsz) %
(lgrp_alloc_max + 1);
kpreempt_disable();
lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM];
i = lgrp->lgrp_id;
kpreempt_enable();
while (off > 0) {
i = (i + 1) % (lgrp_alloc_max + 1);
lgrp = lgrp_table[i];
if (klgrpset_ismember(lgrpset, i))
off--;
}
lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ROUNDROBIN, 1);
break;
}
ASSERT(lgrp != NULL);
return (lgrp);
}
pgcnt_t
lgrp_mem_size(lgrp_id_t lgrpid, lgrp_mem_query_t query)
{
lgrp_t *lgrp;
lgrp = lgrp_table[lgrpid];
if (!LGRP_EXISTS(lgrp) ||
klgrpset_isempty(lgrp->lgrp_set[LGRP_RSRC_MEM]) ||
!klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid))
return (0);
return (lgrp_plat_mem_size(lgrp->lgrp_plathand, query));
}
void
lgrp_shm_policy_init(struct anon_map *amp, vnode_t *vp)
{
lgrp_shm_locality_t *shm_locality;
if (amp) {
ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
if (amp->locality == NULL) {
ANON_LOCK_EXIT(&->a_rwlock);
shm_locality = kmem_alloc(sizeof (*shm_locality),
KM_SLEEP);
rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT,
NULL);
shm_locality->loc_count = 1;
shm_locality->loc_tree = NULL;
ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
if (amp->locality != NULL) {
rw_destroy(&shm_locality->loc_lock);
kmem_free(shm_locality,
sizeof (*shm_locality));
} else
amp->locality = shm_locality;
}
ANON_LOCK_EXIT(&->a_rwlock);
return;
}
mutex_enter(&vp->v_lock);
if ((vp->v_flag & V_LOCALITY) == 0) {
mutex_exit(&vp->v_lock);
shm_locality = kmem_alloc(sizeof (*shm_locality), KM_SLEEP);
rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT, NULL);
shm_locality->loc_count = 1;
shm_locality->loc_tree = NULL;
mutex_enter(&vp->v_lock);
if ((vp->v_flag & V_LOCALITY) == 0) {
vp->v_locality = shm_locality;
vp->v_flag |= V_LOCALITY;
} else {
rw_destroy(&shm_locality->loc_lock);
kmem_free(shm_locality, sizeof (*shm_locality));
shm_locality = vp->v_locality;
shm_locality->loc_count++;
}
mutex_exit(&vp->v_lock);
return;
}
shm_locality = vp->v_locality;
shm_locality->loc_count++;
mutex_exit(&vp->v_lock);
}
void
lgrp_shm_policy_tree_destroy(avl_tree_t *tree)
{
lgrp_shm_policy_seg_t *cur;
lgrp_shm_policy_seg_t *next;
if (tree == NULL)
return;
cur = (lgrp_shm_policy_seg_t *)avl_first(tree);
while (cur != NULL) {
next = AVL_NEXT(tree, cur);
avl_remove(tree, cur);
kmem_free(cur, sizeof (*cur));
cur = next;
}
kmem_free(tree, sizeof (avl_tree_t));
}
void
lgrp_shm_policy_fini(struct anon_map *amp, vnode_t *vp)
{
lgrp_shm_locality_t *shm_locality;
if (amp) {
if (amp->locality == NULL)
return;
shm_locality = amp->locality;
shm_locality->loc_count = 0;
rw_destroy(&shm_locality->loc_lock);
lgrp_shm_policy_tree_destroy(shm_locality->loc_tree);
kmem_free(shm_locality, sizeof (*shm_locality));
amp->locality = 0;
return;
}
mutex_enter(&vp->v_lock);
shm_locality = vp->v_locality;
shm_locality->loc_count--;
if (shm_locality->loc_count == 0) {
rw_destroy(&shm_locality->loc_lock);
lgrp_shm_policy_tree_destroy(shm_locality->loc_tree);
kmem_free(shm_locality, sizeof (*shm_locality));
vp->v_locality = 0;
vp->v_flag &= ~V_LOCALITY;
}
mutex_exit(&vp->v_lock);
}
int
lgrp_shm_policy_compar(const void *x, const void *y)
{
lgrp_shm_policy_seg_t *a = (lgrp_shm_policy_seg_t *)x;
lgrp_shm_policy_seg_t *b = (lgrp_shm_policy_seg_t *)y;
if (a->shm_off < b->shm_off)
return (-1);
if (a->shm_off >= b->shm_off + b->shm_size)
return (1);
return (0);
}
static int
lgrp_shm_policy_concat(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg1,
lgrp_shm_policy_seg_t *seg2)
{
if (!seg1 || !seg2 ||
seg1->shm_off + seg1->shm_size != seg2->shm_off ||
seg1->shm_policy.mem_policy != seg2->shm_policy.mem_policy)
return (-1);
seg1->shm_size += seg2->shm_size;
avl_remove(tree, seg2);
kmem_free(seg2, sizeof (*seg2));
return (0);
}
static lgrp_shm_policy_seg_t *
lgrp_shm_policy_split(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg,
u_offset_t off)
{
lgrp_shm_policy_seg_t *newseg;
avl_index_t where;
ASSERT(seg != NULL && (off >= seg->shm_off &&
off <= seg->shm_off + seg->shm_size));
if (!seg || off < seg->shm_off ||
off > seg->shm_off + seg->shm_size) {
return (NULL);
}
if (off == seg->shm_off || off == seg->shm_off + seg->shm_size)
return (seg);
newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t), KM_SLEEP);
newseg->shm_policy = seg->shm_policy;
newseg->shm_off = off;
newseg->shm_size = seg->shm_size - (off - seg->shm_off);
seg->shm_size = off - seg->shm_off;
(void) avl_find(tree, &off, &where);
avl_insert(tree, newseg, where);
return (newseg);
}
int
lgrp_shm_policy_set(lgrp_mem_policy_t policy, struct anon_map *amp,
ulong_t anon_index, vnode_t *vp, u_offset_t vn_off, size_t len)
{
u_offset_t eoff;
lgrp_shm_policy_seg_t *next;
lgrp_shm_policy_seg_t *newseg;
u_offset_t off;
u_offset_t oldeoff;
lgrp_shm_policy_seg_t *prev;
int retval;
lgrp_shm_policy_seg_t *seg;
lgrp_shm_locality_t *shm_locality;
avl_tree_t *tree;
avl_index_t where;
ASSERT(amp || vp);
ASSERT((len & PAGEOFFSET) == 0);
if (len == 0)
return (-1);
retval = 0;
if (amp) {
ASSERT(amp->refcnt != 0);
if (amp->locality == NULL)
lgrp_shm_policy_init(amp, NULL);
shm_locality = amp->locality;
off = ptob(anon_index);
} else if (vp) {
if ((vp->v_flag & V_LOCALITY) == 0 || vp->v_locality == NULL)
lgrp_shm_policy_init(NULL, vp);
shm_locality = vp->v_locality;
ASSERT(shm_locality->loc_count != 0);
off = vn_off;
} else
return (-1);
ASSERT((off & PAGEOFFSET) == 0);
if (policy == LGRP_MEM_POLICY_DEFAULT)
policy = lgrp_mem_policy_default(len, MAP_SHARED);
rw_enter(&shm_locality->loc_lock, RW_WRITER);
tree = shm_locality->loc_tree;
if (!tree) {
rw_exit(&shm_locality->loc_lock);
tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
rw_enter(&shm_locality->loc_lock, RW_WRITER);
if (shm_locality->loc_tree == NULL) {
avl_create(tree, lgrp_shm_policy_compar,
sizeof (lgrp_shm_policy_seg_t),
offsetof(lgrp_shm_policy_seg_t, shm_tree));
shm_locality->loc_tree = tree;
} else {
kmem_free(tree, sizeof (*tree));
tree = shm_locality->loc_tree;
}
}
while (len != 0) {
seg = avl_find(tree, &off, &where);
if (seg == NULL) {
newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t),
KM_SLEEP);
newseg->shm_policy.mem_policy = policy;
newseg->shm_policy.mem_lgrpid = LGRP_NONE;
newseg->shm_off = off;
avl_insert(tree, newseg, where);
seg = AVL_NEXT(tree, newseg);
if (seg == NULL || off + len <= seg->shm_off) {
newseg->shm_size = len;
len = 0;
} else {
newseg->shm_size = seg->shm_off - off;
off = seg->shm_off;
len -= newseg->shm_size;
}
prev = AVL_PREV(tree, newseg);
next = AVL_NEXT(tree, newseg);
(void) lgrp_shm_policy_concat(tree, newseg, next);
(void) lgrp_shm_policy_concat(tree, prev, newseg);
continue;
}
eoff = off + len;
oldeoff = seg->shm_off + seg->shm_size;
if (policy == seg->shm_policy.mem_policy) {
if (eoff <= oldeoff) {
retval = 1;
break;
} else {
len = eoff - oldeoff;
off = oldeoff;
continue;
}
}
if (off == seg->shm_off && len == seg->shm_size) {
seg->shm_policy.mem_policy = policy;
seg->shm_policy.mem_lgrpid = LGRP_NONE;
len = 0;
prev = AVL_PREV(tree, seg);
next = AVL_NEXT(tree, seg);
(void) lgrp_shm_policy_concat(tree, seg, next);
(void) lgrp_shm_policy_concat(tree, prev, seg);
} else {
newseg = NULL;
if (off > seg->shm_off) {
newseg = lgrp_shm_policy_split(tree, seg, off);
if (eoff == oldeoff) {
newseg->shm_policy.mem_policy = policy;
newseg->shm_policy.mem_lgrpid =
LGRP_NONE;
(void) lgrp_shm_policy_concat(tree,
newseg, AVL_NEXT(tree, newseg));
break;
}
}
if (eoff < oldeoff) {
if (newseg) {
(void) lgrp_shm_policy_split(tree,
newseg, eoff);
newseg->shm_policy.mem_policy = policy;
newseg->shm_policy.mem_lgrpid =
LGRP_NONE;
} else {
(void) lgrp_shm_policy_split(tree, seg,
eoff);
seg->shm_policy.mem_policy = policy;
seg->shm_policy.mem_lgrpid = LGRP_NONE;
}
if (off == seg->shm_off)
(void) lgrp_shm_policy_concat(tree,
AVL_PREV(tree, seg), seg);
break;
}
len = eoff - oldeoff;
off = oldeoff;
}
}
rw_exit(&shm_locality->loc_lock);
return (retval);
}
int
lgrp_memnode_choose(lgrp_mnode_cookie_t *c)
{
lgrp_t *lp = c->lmc_lgrp;
mnodeset_t nodes = c->lmc_nodes;
int cnt = c->lmc_cnt;
int offset, mnode;
extern int max_mem_nodes;
while (nodes == (mnodeset_t)0 || cnt <= 0) {
if (c->lmc_scope == LGRP_SRCH_LOCAL ||
((lp = lp->lgrp_parent) == NULL))
return (-1);
nodes = lp->lgrp_mnodes & ~(c->lmc_tried);
cnt = lp->lgrp_nmnodes - c->lmc_ntried;
}
ASSERT(nodes != (mnodeset_t)0 && cnt > 0);
offset = c->lmc_rand % cnt;
do {
for (mnode = 0; mnode < max_mem_nodes; mnode++)
if (nodes & ((mnodeset_t)1 << mnode))
if (!offset--)
break;
} while (mnode >= max_mem_nodes);
c->lmc_lgrp = lp;
c->lmc_nodes = (nodes & ~((mnodeset_t)1 << mnode));
c->lmc_cnt = cnt - 1;
c->lmc_tried = (c->lmc_tried | ((mnodeset_t)1 << mnode));
c->lmc_ntried++;
return (mnode);
}