#include <sys/priv_impl.h>
#include <sys/cred.h>
#include <c2/audit.h>
#include <sys/debug.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/kstat.h>
#include <sys/mutex.h>
#include <sys/note.h>
#include <sys/pathname.h>
#include <sys/proc.h>
#include <sys/project.h>
#include <sys/sysevent.h>
#include <sys/task.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/vnode.h>
#include <sys/vfs.h>
#include <sys/systeminfo.h>
#include <sys/policy.h>
#include <sys/cred_impl.h>
#include <sys/contract_impl.h>
#include <sys/contract/process_impl.h>
#include <sys/class.h>
#include <sys/pool.h>
#include <sys/pool_pset.h>
#include <sys/pset.h>
#include <sys/strlog.h>
#include <sys/sysmacros.h>
#include <sys/callb.h>
#include <sys/vmparam.h>
#include <sys/corectl.h>
#include <sys/ipc_impl.h>
#include <sys/klpd.h>
#include <sys/door.h>
#include <sys/cpuvar.h>
#include <sys/sdt.h>
#include <sys/uadmin.h>
#include <sys/session.h>
#include <sys/cmn_err.h>
#include <sys/modhash.h>
#include <sys/sunddi.h>
#include <sys/nvpair.h>
#include <sys/rctl.h>
#include <sys/fss.h>
#include <sys/brand.h>
#include <sys/zone.h>
#include <net/if.h>
#include <sys/cpucaps.h>
#include <vm/seg.h>
#include <sys/mac.h>
#define ZONE_DESTROY_TIMEOUT_SECS 60
typedef struct zone_dl {
datalink_id_t zdl_id;
nvlist_t *zdl_net;
list_node_t zdl_linkage;
} zone_dl_t;
static kcondvar_t zone_destroy_cv;
static kmutex_t zone_status_lock;
static kmutex_t zsd_key_lock;
static zone_key_t zsd_keyval = 0;
static list_t zsd_registered_keys;
int zone_hash_size = 256;
static mod_hash_t *zonehashbyname, *zonehashbyid, *zonehashbylabel;
static kmutex_t zonehash_lock;
static uint_t zonecount;
static id_space_t *zoneid_space;
zone_t zone0;
zone_t *global_zone = NULL;
static list_t zone_active;
static list_t zone_deathrow;
static kmutex_t zone_deathrow_lock;
uint_t maxzones = 8192;
evchan_t *zone_event_chan;
const char *zone_status_table[] = {
ZONE_EVENT_UNINITIALIZED,
ZONE_EVENT_INITIALIZED,
ZONE_EVENT_READY,
ZONE_EVENT_READY,
ZONE_EVENT_RUNNING,
ZONE_EVENT_SHUTTING_DOWN,
ZONE_EVENT_SHUTTING_DOWN,
ZONE_EVENT_SHUTTING_DOWN,
ZONE_EVENT_SHUTTING_DOWN,
ZONE_EVENT_UNINITIALIZED,
};
static char *zone_ref_subsys_names[] = {
"NFS",
"NFSv4",
"SMBFS",
"MNTFS",
"LOFI",
"VFS",
"IPC"
};
rctl_hndl_t rc_zone_cpu_shares;
rctl_hndl_t rc_zone_locked_mem;
rctl_hndl_t rc_zone_max_swap;
rctl_hndl_t rc_zone_max_lofi;
rctl_hndl_t rc_zone_cpu_cap;
rctl_hndl_t rc_zone_nlwps;
rctl_hndl_t rc_zone_nprocs;
rctl_hndl_t rc_zone_shmmax;
rctl_hndl_t rc_zone_shmmni;
rctl_hndl_t rc_zone_semmni;
rctl_hndl_t rc_zone_msgmni;
const char * const zone_default_initname = "/sbin/init";
static char * const zone_prefix = "/zone/";
static int zone_shutdown(zoneid_t zoneid);
static int zone_add_datalink(zoneid_t, datalink_id_t);
static int zone_remove_datalink(zoneid_t, datalink_id_t);
static int zone_list_datalink(zoneid_t, int *, datalink_id_t *);
static int zone_set_network(zoneid_t, zone_net_data_t *);
static int zone_get_network(zoneid_t, zone_net_data_t *);
typedef boolean_t zsd_applyfn_t(kmutex_t *, boolean_t, zone_t *, zone_key_t);
static void zsd_apply_all_zones(zsd_applyfn_t *, zone_key_t);
static void zsd_apply_all_keys(zsd_applyfn_t *, zone_t *);
static boolean_t zsd_apply_create(kmutex_t *, boolean_t, zone_t *, zone_key_t);
static boolean_t zsd_apply_shutdown(kmutex_t *, boolean_t, zone_t *,
zone_key_t);
static boolean_t zsd_apply_destroy(kmutex_t *, boolean_t, zone_t *, zone_key_t);
static boolean_t zsd_wait_for_creator(zone_t *, struct zsd_entry *,
kmutex_t *);
static boolean_t zsd_wait_for_inprogress(zone_t *, struct zsd_entry *,
kmutex_t *);
static const int ZONE_SYSCALL_API_VERSION = 6;
static int
block_mounts(zone_t *zp)
{
int retval = 0;
ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
mutex_enter(&zp->zone_mount_lock);
while (zp->zone_mounts_in_progress > 0) {
if (cv_wait_sig(&zp->zone_mount_cv, &zp->zone_mount_lock) == 0)
goto signaled;
}
zp->zone_mounts_in_progress--;
retval = 1;
signaled:
mutex_exit(&zp->zone_mount_lock);
return (retval);
}
static void
resume_mounts(zone_t *zp)
{
mutex_enter(&zp->zone_mount_lock);
if (++zp->zone_mounts_in_progress == 0)
cv_broadcast(&zp->zone_mount_cv);
mutex_exit(&zp->zone_mount_lock);
}
void
mount_in_progress(zone_t *zp)
{
mutex_enter(&zp->zone_mount_lock);
while (zp->zone_mounts_in_progress < 0)
cv_wait(&zp->zone_mount_cv, &zp->zone_mount_lock);
zp->zone_mounts_in_progress++;
mutex_exit(&zp->zone_mount_lock);
}
void
mount_completed(zone_t *zp)
{
mutex_enter(&zp->zone_mount_lock);
if (--zp->zone_mounts_in_progress == 0)
cv_broadcast(&zp->zone_mount_cv);
mutex_exit(&zp->zone_mount_lock);
}
static struct zsd_entry *
zsd_find(list_t *l, zone_key_t key)
{
struct zsd_entry *zsd;
for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) {
if (zsd->zsd_key == key) {
return (zsd);
}
}
return (NULL);
}
static struct zsd_entry *
zsd_find_mru(list_t *l, zone_key_t key)
{
struct zsd_entry *zsd;
for (zsd = list_head(l); zsd != NULL; zsd = list_next(l, zsd)) {
if (zsd->zsd_key == key) {
if (zsd != list_head(l)) {
list_remove(l, zsd);
list_insert_head(l, zsd);
}
return (zsd);
}
}
return (NULL);
}
void
zone_key_create(zone_key_t *keyp, void *(*create)(zoneid_t),
void (*shutdown)(zoneid_t, void *), void (*destroy)(zoneid_t, void *))
{
struct zsd_entry *zsdp;
struct zsd_entry *t;
struct zone *zone;
zone_key_t key;
zsdp = kmem_zalloc(sizeof (*zsdp), KM_SLEEP);
zsdp->zsd_data = NULL;
zsdp->zsd_create = create;
zsdp->zsd_shutdown = shutdown;
zsdp->zsd_destroy = destroy;
mutex_enter(&zsd_key_lock);
key = zsdp->zsd_key = ++zsd_keyval;
ASSERT(zsd_keyval != 0);
list_insert_tail(&zsd_registered_keys, zsdp);
mutex_exit(&zsd_key_lock);
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
zone_status_t status;
mutex_enter(&zone->zone_lock);
status = zone_status_get(zone);
if (status >= ZONE_IS_DOWN ||
status == ZONE_IS_UNINITIALIZED) {
mutex_exit(&zone->zone_lock);
continue;
}
t = zsd_find_mru(&zone->zone_zsd, key);
if (t != NULL) {
mutex_exit(&zone->zone_lock);
continue;
}
t = kmem_zalloc(sizeof (*t), KM_SLEEP);
t->zsd_key = key;
t->zsd_create = create;
t->zsd_shutdown = shutdown;
t->zsd_destroy = destroy;
if (create != NULL) {
t->zsd_flags = ZSD_CREATE_NEEDED;
DTRACE_PROBE2(zsd__create__needed,
zone_t *, zone, zone_key_t, key);
}
list_insert_tail(&zone->zone_zsd, t);
mutex_exit(&zone->zone_lock);
}
mutex_exit(&zonehash_lock);
if (create != NULL) {
zsd_apply_all_zones(zsd_apply_create, key);
}
*keyp = key;
}
int
zone_key_delete(zone_key_t key)
{
struct zsd_entry *zsdp = NULL;
zone_t *zone;
mutex_enter(&zsd_key_lock);
zsdp = zsd_find_mru(&zsd_registered_keys, key);
if (zsdp == NULL) {
mutex_exit(&zsd_key_lock);
return (-1);
}
list_remove(&zsd_registered_keys, zsdp);
mutex_exit(&zsd_key_lock);
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
struct zsd_entry *del;
mutex_enter(&zone->zone_lock);
del = zsd_find_mru(&zone->zone_zsd, key);
if (del == NULL) {
mutex_exit(&zone->zone_lock);
continue;
}
ASSERT(del->zsd_shutdown == zsdp->zsd_shutdown);
ASSERT(del->zsd_destroy == zsdp->zsd_destroy);
if (del->zsd_shutdown != NULL &&
(del->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
del->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
DTRACE_PROBE2(zsd__shutdown__needed,
zone_t *, zone, zone_key_t, key);
}
if (del->zsd_destroy != NULL &&
(del->zsd_flags & ZSD_DESTROY_ALL) == 0) {
del->zsd_flags |= ZSD_DESTROY_NEEDED;
DTRACE_PROBE2(zsd__destroy__needed,
zone_t *, zone, zone_key_t, key);
}
mutex_exit(&zone->zone_lock);
}
mutex_exit(&zonehash_lock);
kmem_free(zsdp, sizeof (*zsdp));
zsd_apply_all_zones(zsd_apply_shutdown, key);
zsd_apply_all_zones(zsd_apply_destroy, key);
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
struct zsd_entry *del;
mutex_enter(&zone->zone_lock);
del = zsd_find(&zone->zone_zsd, key);
if (del != NULL) {
list_remove(&zone->zone_zsd, del);
ASSERT(!(del->zsd_flags & ZSD_ALL_INPROGRESS));
kmem_free(del, sizeof (*del));
}
mutex_exit(&zone->zone_lock);
}
mutex_exit(&zonehash_lock);
return (0);
}
int
zone_setspecific(zone_key_t key, zone_t *zone, const void *data)
{
struct zsd_entry *t;
mutex_enter(&zone->zone_lock);
t = zsd_find_mru(&zone->zone_zsd, key);
if (t != NULL) {
t->zsd_data = (void *)data;
mutex_exit(&zone->zone_lock);
return (0);
}
mutex_exit(&zone->zone_lock);
return (-1);
}
void *
zone_getspecific(zone_key_t key, zone_t *zone)
{
struct zsd_entry *t;
void *data;
mutex_enter(&zone->zone_lock);
t = zsd_find_mru(&zone->zone_zsd, key);
data = (t == NULL ? NULL : t->zsd_data);
mutex_exit(&zone->zone_lock);
return (data);
}
static void
zone_zsd_configure(zone_t *zone)
{
struct zsd_entry *zsdp;
struct zsd_entry *t;
ASSERT(MUTEX_HELD(&zonehash_lock));
ASSERT(list_head(&zone->zone_zsd) == NULL);
mutex_enter(&zone->zone_lock);
mutex_enter(&zsd_key_lock);
for (zsdp = list_head(&zsd_registered_keys); zsdp != NULL;
zsdp = list_next(&zsd_registered_keys, zsdp)) {
ASSERT(zsd_find(&zone->zone_zsd, zsdp->zsd_key) == NULL);
t = kmem_zalloc(sizeof (*t), KM_SLEEP);
t->zsd_key = zsdp->zsd_key;
t->zsd_create = zsdp->zsd_create;
t->zsd_shutdown = zsdp->zsd_shutdown;
t->zsd_destroy = zsdp->zsd_destroy;
if (zsdp->zsd_create != NULL) {
t->zsd_flags = ZSD_CREATE_NEEDED;
DTRACE_PROBE2(zsd__create__needed,
zone_t *, zone, zone_key_t, zsdp->zsd_key);
}
list_insert_tail(&zone->zone_zsd, t);
}
mutex_exit(&zsd_key_lock);
mutex_exit(&zone->zone_lock);
}
enum zsd_callback_type { ZSD_CREATE, ZSD_SHUTDOWN, ZSD_DESTROY };
static void
zone_zsd_callbacks(zone_t *zone, enum zsd_callback_type ct)
{
struct zsd_entry *t;
ASSERT(ct == ZSD_SHUTDOWN || ct == ZSD_DESTROY);
ASSERT(ct != ZSD_SHUTDOWN || zone_status_get(zone) >= ZONE_IS_EMPTY);
ASSERT(ct != ZSD_DESTROY || zone_status_get(zone) >= ZONE_IS_DOWN);
mutex_enter(&zone->zone_lock);
for (t = list_head(&zone->zone_zsd); t != NULL;
t = list_next(&zone->zone_zsd, t)) {
zone_key_t key = t->zsd_key;
if (ct == ZSD_SHUTDOWN) {
if (t->zsd_shutdown != NULL &&
(t->zsd_flags & ZSD_SHUTDOWN_ALL) == 0) {
t->zsd_flags |= ZSD_SHUTDOWN_NEEDED;
DTRACE_PROBE2(zsd__shutdown__needed,
zone_t *, zone, zone_key_t, key);
}
} else {
if (t->zsd_destroy != NULL &&
(t->zsd_flags & ZSD_DESTROY_ALL) == 0) {
t->zsd_flags |= ZSD_DESTROY_NEEDED;
DTRACE_PROBE2(zsd__destroy__needed,
zone_t *, zone, zone_key_t, key);
}
}
}
mutex_exit(&zone->zone_lock);
zsd_apply_all_keys(zsd_apply_shutdown, zone);
zsd_apply_all_keys(zsd_apply_destroy, zone);
}
static void
zone_free_zsd(zone_t *zone)
{
struct zsd_entry *t, *next;
mutex_enter(&zone->zone_lock);
for (t = list_head(&zone->zone_zsd); t != NULL; t = next) {
next = list_next(&zone->zone_zsd, t);
list_remove(&zone->zone_zsd, t);
ASSERT(!(t->zsd_flags & ZSD_ALL_INPROGRESS));
kmem_free(t, sizeof (*t));
}
list_destroy(&zone->zone_zsd);
mutex_exit(&zone->zone_lock);
}
static void
zsd_apply_all_zones(zsd_applyfn_t *applyfn, zone_key_t key)
{
zone_t *zone;
mutex_enter(&zonehash_lock);
zone = list_head(&zone_active);
while (zone != NULL) {
if ((applyfn)(&zonehash_lock, B_FALSE, zone, key)) {
zone = list_head(&zone_active);
} else {
zone = list_next(&zone_active, zone);
}
}
mutex_exit(&zonehash_lock);
}
static void
zsd_apply_all_keys(zsd_applyfn_t *applyfn, zone_t *zone)
{
struct zsd_entry *t;
mutex_enter(&zone->zone_lock);
t = list_head(&zone->zone_zsd);
while (t != NULL) {
if ((applyfn)(NULL, B_TRUE, zone, t->zsd_key)) {
t = list_head(&zone->zone_zsd);
} else {
t = list_next(&zone->zone_zsd, t);
}
}
mutex_exit(&zone->zone_lock);
}
static boolean_t
zsd_apply_create(kmutex_t *lockp, boolean_t zone_lock_held,
zone_t *zone, zone_key_t key)
{
void *result;
struct zsd_entry *t;
boolean_t dropped;
if (lockp != NULL) {
ASSERT(MUTEX_HELD(lockp));
}
if (zone_lock_held) {
ASSERT(MUTEX_HELD(&zone->zone_lock));
} else {
mutex_enter(&zone->zone_lock);
}
t = zsd_find(&zone->zone_zsd, key);
if (t == NULL) {
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (B_FALSE);
}
dropped = B_FALSE;
if (zsd_wait_for_inprogress(zone, t, lockp))
dropped = B_TRUE;
if (t->zsd_flags & ZSD_CREATE_NEEDED) {
t->zsd_flags &= ~ZSD_CREATE_NEEDED;
t->zsd_flags |= ZSD_CREATE_INPROGRESS;
DTRACE_PROBE2(zsd__create__inprogress,
zone_t *, zone, zone_key_t, key);
mutex_exit(&zone->zone_lock);
if (lockp != NULL)
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(t->zsd_create != NULL);
DTRACE_PROBE2(zsd__create__start,
zone_t *, zone, zone_key_t, key);
result = (*t->zsd_create)(zone->zone_id);
DTRACE_PROBE2(zsd__create__end,
zone_t *, zone, voidn *, result);
ASSERT(result != NULL);
if (lockp != NULL)
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
t->zsd_data = result;
t->zsd_flags &= ~ZSD_CREATE_INPROGRESS;
t->zsd_flags |= ZSD_CREATE_COMPLETED;
cv_broadcast(&t->zsd_cv);
DTRACE_PROBE2(zsd__create__completed,
zone_t *, zone, zone_key_t, key);
}
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (dropped);
}
static boolean_t
zsd_apply_shutdown(kmutex_t *lockp, boolean_t zone_lock_held,
zone_t *zone, zone_key_t key)
{
struct zsd_entry *t;
void *data;
boolean_t dropped;
if (lockp != NULL) {
ASSERT(MUTEX_HELD(lockp));
}
if (zone_lock_held) {
ASSERT(MUTEX_HELD(&zone->zone_lock));
} else {
mutex_enter(&zone->zone_lock);
}
t = zsd_find(&zone->zone_zsd, key);
if (t == NULL) {
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (B_FALSE);
}
dropped = B_FALSE;
if (zsd_wait_for_creator(zone, t, lockp))
dropped = B_TRUE;
if (zsd_wait_for_inprogress(zone, t, lockp))
dropped = B_TRUE;
if (t->zsd_flags & ZSD_SHUTDOWN_NEEDED) {
t->zsd_flags &= ~ZSD_SHUTDOWN_NEEDED;
t->zsd_flags |= ZSD_SHUTDOWN_INPROGRESS;
DTRACE_PROBE2(zsd__shutdown__inprogress,
zone_t *, zone, zone_key_t, key);
mutex_exit(&zone->zone_lock);
if (lockp != NULL)
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(t->zsd_shutdown != NULL);
data = t->zsd_data;
DTRACE_PROBE2(zsd__shutdown__start,
zone_t *, zone, zone_key_t, key);
(t->zsd_shutdown)(zone->zone_id, data);
DTRACE_PROBE2(zsd__shutdown__end,
zone_t *, zone, zone_key_t, key);
if (lockp != NULL)
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
t->zsd_flags &= ~ZSD_SHUTDOWN_INPROGRESS;
t->zsd_flags |= ZSD_SHUTDOWN_COMPLETED;
cv_broadcast(&t->zsd_cv);
DTRACE_PROBE2(zsd__shutdown__completed,
zone_t *, zone, zone_key_t, key);
}
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (dropped);
}
static boolean_t
zsd_apply_destroy(kmutex_t *lockp, boolean_t zone_lock_held,
zone_t *zone, zone_key_t key)
{
struct zsd_entry *t;
void *data;
boolean_t dropped;
if (lockp != NULL) {
ASSERT(MUTEX_HELD(lockp));
}
if (zone_lock_held) {
ASSERT(MUTEX_HELD(&zone->zone_lock));
} else {
mutex_enter(&zone->zone_lock);
}
t = zsd_find(&zone->zone_zsd, key);
if (t == NULL) {
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (B_FALSE);
}
dropped = B_FALSE;
if (zsd_wait_for_creator(zone, t, lockp))
dropped = B_TRUE;
if (zsd_wait_for_inprogress(zone, t, lockp))
dropped = B_TRUE;
if (t->zsd_flags & ZSD_DESTROY_NEEDED) {
t->zsd_flags &= ~ZSD_DESTROY_NEEDED;
t->zsd_flags |= ZSD_DESTROY_INPROGRESS;
DTRACE_PROBE2(zsd__destroy__inprogress,
zone_t *, zone, zone_key_t, key);
mutex_exit(&zone->zone_lock);
if (lockp != NULL)
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(t->zsd_destroy != NULL);
data = t->zsd_data;
DTRACE_PROBE2(zsd__destroy__start,
zone_t *, zone, zone_key_t, key);
(t->zsd_destroy)(zone->zone_id, data);
DTRACE_PROBE2(zsd__destroy__end,
zone_t *, zone, zone_key_t, key);
if (lockp != NULL)
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
t->zsd_data = NULL;
t->zsd_flags &= ~ZSD_DESTROY_INPROGRESS;
t->zsd_flags |= ZSD_DESTROY_COMPLETED;
cv_broadcast(&t->zsd_cv);
DTRACE_PROBE2(zsd__destroy__completed,
zone_t *, zone, zone_key_t, key);
}
if (!zone_lock_held)
mutex_exit(&zone->zone_lock);
return (dropped);
}
static boolean_t
zsd_wait_for_creator(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
{
boolean_t dropped = B_FALSE;
while (t->zsd_flags & ZSD_CREATE_NEEDED) {
DTRACE_PROBE2(zsd__wait__for__creator,
zone_t *, zone, struct zsd_entry *, t);
if (lockp != NULL) {
dropped = B_TRUE;
mutex_exit(lockp);
}
cv_wait(&t->zsd_cv, &zone->zone_lock);
if (lockp != NULL) {
mutex_exit(&zone->zone_lock);
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
}
}
return (dropped);
}
static boolean_t
zsd_wait_for_inprogress(zone_t *zone, struct zsd_entry *t, kmutex_t *lockp)
{
boolean_t dropped = B_FALSE;
while (t->zsd_flags & ZSD_ALL_INPROGRESS) {
DTRACE_PROBE2(zsd__wait__for__inprogress,
zone_t *, zone, struct zsd_entry *, t);
if (lockp != NULL) {
dropped = B_TRUE;
mutex_exit(lockp);
}
cv_wait(&t->zsd_cv, &zone->zone_lock);
if (lockp != NULL) {
mutex_exit(&zone->zone_lock);
mutex_enter(lockp);
mutex_enter(&zone->zone_lock);
}
}
return (dropped);
}
static void
zone_free_datasets(zone_t *zone)
{
zone_dataset_t *t, *next;
for (t = list_head(&zone->zone_datasets); t != NULL; t = next) {
next = list_next(&zone->zone_datasets, t);
list_remove(&zone->zone_datasets, t);
kmem_free(t->zd_dataset, strlen(t->zd_dataset) + 1);
kmem_free(t, sizeof (*t));
}
list_destroy(&zone->zone_datasets);
}
static rctl_qty_t
zone_cpu_shares_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_shares);
}
static int
zone_cpu_shares_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_shares = nv;
return (0);
}
static rctl_ops_t zone_cpu_shares_ops = {
rcop_no_action,
zone_cpu_shares_usage,
zone_cpu_shares_set,
rcop_no_test
};
static rctl_qty_t
zone_cpu_cap_get(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (cpucaps_zone_get(p->p_zone));
}
static int
zone_cpu_cap_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
zone_t *zone = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (zone == NULL)
return (0);
return (cpucaps_zone_set(zone, nv));
}
static rctl_ops_t zone_cpu_cap_ops = {
rcop_no_action,
zone_cpu_cap_get,
zone_cpu_cap_set,
rcop_no_test
};
static rctl_qty_t
zone_lwps_usage(rctl_t *r, proc_t *p)
{
rctl_qty_t nlwps;
zone_t *zone = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&zone->zone_nlwps_lock);
nlwps = zone->zone_nlwps;
mutex_exit(&zone->zone_nlwps_lock);
return (nlwps);
}
static int
zone_lwps_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t nlwps;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
ASSERT(MUTEX_HELD(&(e->rcep_p.zone->zone_nlwps_lock)));
nlwps = e->rcep_p.zone->zone_nlwps;
if (nlwps + incr > rcntl->rcv_value)
return (1);
return (0);
}
static int
zone_lwps_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_nlwps_ctl = nv;
return (0);
}
static rctl_ops_t zone_lwps_ops = {
rcop_no_action,
zone_lwps_usage,
zone_lwps_set,
zone_lwps_test,
};
static rctl_qty_t
zone_procs_usage(rctl_t *r, proc_t *p)
{
rctl_qty_t nprocs;
zone_t *zone = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&zone->zone_nlwps_lock);
nprocs = zone->zone_nprocs;
mutex_exit(&zone->zone_nlwps_lock);
return (nprocs);
}
static int
zone_procs_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rcntl,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t nprocs;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
ASSERT(MUTEX_HELD(&(e->rcep_p.zone->zone_nlwps_lock)));
nprocs = e->rcep_p.zone->zone_nprocs;
if (nprocs + incr > rcntl->rcv_value)
return (1);
return (0);
}
static int
zone_procs_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_nprocs_ctl = nv;
return (0);
}
static rctl_ops_t zone_procs_ops = {
rcop_no_action,
zone_procs_usage,
zone_procs_set,
zone_procs_test,
};
static rctl_qty_t
zone_shmmax_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_shmmax);
}
static int
zone_shmmax_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_shmmax + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_shmmax_ops = {
rcop_no_action,
zone_shmmax_usage,
rcop_no_set,
zone_shmmax_test
};
static rctl_qty_t
zone_shmmni_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_ipc.ipcq_shmmni);
}
static int
zone_shmmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_ipc.ipcq_shmmni + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_shmmni_ops = {
rcop_no_action,
zone_shmmni_usage,
rcop_no_set,
zone_shmmni_test
};
static rctl_qty_t
zone_semmni_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_ipc.ipcq_semmni);
}
static int
zone_semmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_ipc.ipcq_semmni + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_semmni_ops = {
rcop_no_action,
zone_semmni_usage,
rcop_no_set,
zone_semmni_test
};
static rctl_qty_t
zone_msgmni_usage(rctl_t *rctl, struct proc *p)
{
ASSERT(MUTEX_HELD(&p->p_lock));
return (p->p_zone->zone_ipc.ipcq_msgmni);
}
static int
zone_msgmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
{
rctl_qty_t v;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
v = e->rcep_p.zone->zone_ipc.ipcq_msgmni + incr;
if (v > rval->rcv_value)
return (1);
return (0);
}
static rctl_ops_t zone_msgmni_ops = {
rcop_no_action,
zone_msgmni_usage,
rcop_no_set,
zone_msgmni_test
};
static rctl_qty_t
zone_locked_mem_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&p->p_zone->zone_mem_lock);
q = p->p_zone->zone_locked_mem;
mutex_exit(&p->p_zone->zone_mem_lock);
return (q);
}
static int
zone_locked_mem_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
{
rctl_qty_t q;
zone_t *z;
z = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(MUTEX_HELD(&z->zone_mem_lock));
q = z->zone_locked_mem;
if (q + incr > rcntl->rcv_value)
return (1);
return (0);
}
static int
zone_locked_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_locked_mem_ctl = nv;
return (0);
}
static rctl_ops_t zone_locked_mem_ops = {
rcop_no_action,
zone_locked_mem_usage,
zone_locked_mem_set,
zone_locked_mem_test
};
static rctl_qty_t
zone_max_swap_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
zone_t *z = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&z->zone_mem_lock);
q = z->zone_max_swap;
mutex_exit(&z->zone_mem_lock);
return (q);
}
static int
zone_max_swap_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
{
rctl_qty_t q;
zone_t *z;
z = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(MUTEX_HELD(&z->zone_mem_lock));
q = z->zone_max_swap;
if (q + incr > rcntl->rcv_value)
return (1);
return (0);
}
static int
zone_max_swap_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_max_swap_ctl = nv;
return (0);
}
static rctl_ops_t zone_max_swap_ops = {
rcop_no_action,
zone_max_swap_usage,
zone_max_swap_set,
zone_max_swap_test
};
static rctl_qty_t
zone_max_lofi_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
zone_t *z = p->p_zone;
ASSERT(MUTEX_HELD(&p->p_lock));
mutex_enter(&z->zone_rctl_lock);
q = z->zone_max_lofi;
mutex_exit(&z->zone_rctl_lock);
return (q);
}
static int
zone_max_lofi_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
{
rctl_qty_t q;
zone_t *z;
z = e->rcep_p.zone;
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(MUTEX_HELD(&z->zone_rctl_lock));
q = z->zone_max_lofi;
if (q + incr > rcntl->rcv_value)
return (1);
return (0);
}
static int
zone_max_lofi_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_qty_t nv)
{
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(e->rcep_t == RCENTITY_ZONE);
if (e->rcep_p.zone == NULL)
return (0);
e->rcep_p.zone->zone_max_lofi_ctl = nv;
return (0);
}
static rctl_ops_t zone_max_lofi_ops = {
rcop_no_action,
zone_max_lofi_usage,
zone_max_lofi_set,
zone_max_lofi_test
};
static void
zone_uniqid(zone_t *zone)
{
static uint64_t uniqid = 0;
ASSERT(MUTEX_HELD(&zonehash_lock));
zone->zone_uniqid = uniqid++;
}
struct cred *
zone_get_kcred(zoneid_t zoneid)
{
zone_t *zone;
cred_t *cr;
if ((zone = zone_find_by_id(zoneid)) == NULL)
return (NULL);
cr = zone->zone_kcred;
crhold(cr);
zone_rele(zone);
return (cr);
}
static int
zone_lockedmem_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_kstat_t *zk = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zk->zk_usage.value.ui64 = zone->zone_locked_mem;
zk->zk_value.value.ui64 = zone->zone_locked_mem_ctl;
return (0);
}
static int
zone_nprocs_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_kstat_t *zk = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zk->zk_usage.value.ui64 = zone->zone_nprocs;
zk->zk_value.value.ui64 = zone->zone_nprocs_ctl;
return (0);
}
static int
zone_swapresv_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_kstat_t *zk = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zk->zk_usage.value.ui64 = zone->zone_max_swap;
zk->zk_value.value.ui64 = zone->zone_max_swap_ctl;
return (0);
}
static kstat_t *
zone_kstat_create_common(zone_t *zone, char *name,
int (*updatefunc) (kstat_t *, int))
{
kstat_t *ksp;
zone_kstat_t *zk;
ksp = rctl_kstat_create_zone(zone, name, KSTAT_TYPE_NAMED,
sizeof (zone_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (ksp == NULL)
return (NULL);
zk = ksp->ks_data = kmem_alloc(sizeof (zone_kstat_t), KM_SLEEP);
ksp->ks_data_size += strlen(zone->zone_name) + 1;
kstat_named_init(&zk->zk_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zk->zk_zonename, zone->zone_name);
kstat_named_init(&zk->zk_usage, "usage", KSTAT_DATA_UINT64);
kstat_named_init(&zk->zk_value, "value", KSTAT_DATA_UINT64);
ksp->ks_update = updatefunc;
ksp->ks_private = zone;
kstat_install(ksp);
return (ksp);
}
static int
zone_mcap_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_mcap_kstat_t *zmp = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zmp->zm_pgpgin.value.ui64 = zone->zone_pgpgin;
zmp->zm_anonpgin.value.ui64 = zone->zone_anonpgin;
zmp->zm_execpgin.value.ui64 = zone->zone_execpgin;
zmp->zm_fspgin.value.ui64 = zone->zone_fspgin;
zmp->zm_anon_alloc_fail.value.ui64 = zone->zone_anon_alloc_fail;
return (0);
}
static kstat_t *
zone_mcap_kstat_create(zone_t *zone)
{
kstat_t *ksp;
zone_mcap_kstat_t *zmp;
if ((ksp = kstat_create_zone("memory_cap", zone->zone_id,
zone->zone_name, "zone_memory_cap", KSTAT_TYPE_NAMED,
sizeof (zone_mcap_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
return (NULL);
if (zone->zone_id != GLOBAL_ZONEID)
kstat_zone_add(ksp, GLOBAL_ZONEID);
zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_mcap_kstat_t), KM_SLEEP);
ksp->ks_data_size += strlen(zone->zone_name) + 1;
ksp->ks_lock = &zone->zone_mcap_lock;
zone->zone_mcap_stats = zmp;
kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
kstat_named_init(&zmp->zm_pgpgin, "pgpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_anonpgin, "anonpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_execpgin, "execpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_fspgin, "fspgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_anon_alloc_fail, "anon_alloc_fail",
KSTAT_DATA_UINT64);
ksp->ks_update = zone_mcap_kstat_update;
ksp->ks_private = zone;
kstat_install(ksp);
return (ksp);
}
static int
zone_misc_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
zone_misc_kstat_t *zmp = ksp->ks_data;
hrtime_t hrtime;
uint64_t tmp;
if (rw == KSTAT_WRITE)
return (EACCES);
tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_STIME);
hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
scalehrtime(&hrtime);
zmp->zm_stime.value.ui64 = hrtime;
tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_UTIME);
hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
scalehrtime(&hrtime);
zmp->zm_utime.value.ui64 = hrtime;
tmp = cpu_uarray_sum(zone->zone_ustate, ZONE_USTATE_WTIME);
hrtime = UINT64_OVERFLOW_TO_INT64(tmp);
scalehrtime(&hrtime);
zmp->zm_wtime.value.ui64 = hrtime;
zmp->zm_avenrun1.value.ui32 = zone->zone_avenrun[0];
zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];
zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2];
zmp->zm_ffcap.value.ui32 = zone->zone_ffcap;
zmp->zm_ffnoproc.value.ui32 = zone->zone_ffnoproc;
zmp->zm_ffnomem.value.ui32 = zone->zone_ffnomem;
zmp->zm_ffmisc.value.ui32 = zone->zone_ffmisc;
zmp->zm_nested_intp.value.ui32 = zone->zone_nested_intp;
zmp->zm_init_pid.value.ui32 = zone->zone_proc_initpid;
zmp->zm_boot_time.value.ui64 = (uint64_t)zone->zone_boot_time;
return (0);
}
static kstat_t *
zone_misc_kstat_create(zone_t *zone)
{
kstat_t *ksp;
zone_misc_kstat_t *zmp;
if ((ksp = kstat_create_zone("zones", zone->zone_id,
zone->zone_name, "zone_misc", KSTAT_TYPE_NAMED,
sizeof (zone_misc_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
return (NULL);
if (zone->zone_id != GLOBAL_ZONEID)
kstat_zone_add(ksp, GLOBAL_ZONEID);
zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_misc_kstat_t), KM_SLEEP);
ksp->ks_data_size += strlen(zone->zone_name) + 1;
ksp->ks_lock = &zone->zone_misc_lock;
zone->zone_misc_stats = zmp;
kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
kstat_named_init(&zmp->zm_utime, "nsec_user", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_stime, "nsec_sys", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_wtime, "nsec_waitrq", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_avenrun1, "avenrun_1min", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min",
KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffcap, "forkfail_cap", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffnoproc, "forkfail_noproc",
KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffnomem, "forkfail_nomem", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_ffmisc, "forkfail_misc", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_nested_intp, "nested_interp",
KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_init_pid, "init_pid", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_boot_time, "boot_time", KSTAT_DATA_UINT64);
ksp->ks_update = zone_misc_kstat_update;
ksp->ks_private = zone;
kstat_install(ksp);
return (ksp);
}
static void
zone_kstat_create(zone_t *zone)
{
zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
"lockedmem", zone_lockedmem_kstat_update);
zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
"swapresv", zone_swapresv_kstat_update);
zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
"nprocs", zone_nprocs_kstat_update);
if ((zone->zone_mcap_ksp = zone_mcap_kstat_create(zone)) == NULL) {
zone->zone_mcap_stats = kmem_zalloc(
sizeof (zone_mcap_kstat_t), KM_SLEEP);
}
if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) {
zone->zone_misc_stats = kmem_zalloc(
sizeof (zone_misc_kstat_t), KM_SLEEP);
}
}
static void
zone_kstat_delete_common(kstat_t **pkstat, size_t datasz)
{
void *data;
if (*pkstat != NULL) {
data = (*pkstat)->ks_data;
kstat_delete(*pkstat);
kmem_free(data, datasz);
*pkstat = NULL;
}
}
static void
zone_kstat_delete(zone_t *zone)
{
zone_kstat_delete_common(&zone->zone_lockedmem_kstat,
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_swapresv_kstat,
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_nprocs_kstat,
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_mcap_ksp,
sizeof (zone_mcap_kstat_t));
zone_kstat_delete_common(&zone->zone_misc_ksp,
sizeof (zone_misc_kstat_t));
}
void
zone_zsd_init(void)
{
mutex_init(&zonehash_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zsd_key_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zsd_registered_keys, sizeof (struct zsd_entry),
offsetof(struct zsd_entry, zsd_linkage));
list_create(&zone_active, sizeof (zone_t),
offsetof(zone_t, zone_linkage));
list_create(&zone_deathrow, sizeof (zone_t),
offsetof(zone_t, zone_linkage));
mutex_init(&zone0.zone_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone0.zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone0.zone_mem_lock, NULL, MUTEX_DEFAULT, NULL);
zone0.zone_shares = 1;
zone0.zone_nlwps = 0;
zone0.zone_nlwps_ctl = INT_MAX;
zone0.zone_nprocs = 0;
zone0.zone_nprocs_ctl = INT_MAX;
zone0.zone_locked_mem = 0;
zone0.zone_locked_mem_ctl = UINT64_MAX;
ASSERT(zone0.zone_max_swap == 0);
zone0.zone_max_swap_ctl = UINT64_MAX;
zone0.zone_max_lofi = 0;
zone0.zone_max_lofi_ctl = UINT64_MAX;
zone0.zone_shmmax = 0;
zone0.zone_ipc.ipcq_shmmni = 0;
zone0.zone_ipc.ipcq_semmni = 0;
zone0.zone_ipc.ipcq_msgmni = 0;
zone0.zone_name = GLOBAL_ZONENAME;
zone0.zone_nodename = utsname.nodename;
zone0.zone_domain = srpc_domain;
zone0.zone_hostid = HW_INVALID_HOSTID;
zone0.zone_fs_allowed = NULL;
psecflags_default(&zone0.zone_secflags);
zone0.zone_ref = 1;
zone0.zone_id = GLOBAL_ZONEID;
zone0.zone_status = ZONE_IS_RUNNING;
zone0.zone_rootpath = "/";
zone0.zone_rootpathlen = 2;
zone0.zone_psetid = ZONE_PS_INVAL;
zone0.zone_ncpus = 0;
zone0.zone_ncpus_online = 0;
zone0.zone_proc_initpid = 1;
zone0.zone_initname = initname;
zone0.zone_lockedmem_kstat = NULL;
zone0.zone_swapresv_kstat = NULL;
zone0.zone_nprocs_kstat = NULL;
list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
offsetof(zone_ref_t, zref_linkage));
list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
offsetof(struct zsd_entry, zsd_linkage));
list_insert_head(&zone_active, &zone0);
zone0.zone_rootvp = NULL;
zone0.zone_vfslist = NULL;
zone0.zone_bootargs = initargs;
zone0.zone_privset = kmem_alloc(sizeof (priv_set_t), KM_SLEEP);
priv_fillset(zone0.zone_privset);
zone0.zone_zsched = &p0;
p0.p_zone = &zone0;
}
static uint_t
hash_bylabel(void *hdata, mod_hash_key_t key)
{
const ts_label_t *lab = (ts_label_t *)key;
const uint32_t *up, *ue;
uint_t hash;
int i;
_NOTE(ARGUNUSED(hdata));
hash = lab->tsl_doi + (lab->tsl_doi << 1);
up = (const uint32_t *)&lab->tsl_label;
ue = up + sizeof (lab->tsl_label) / sizeof (*up);
i = 1;
while (up < ue) {
hash += *up + (*up << ((i % 16) + 1));
up++;
i++;
}
return (hash);
}
static int
hash_labelkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
{
ts_label_t *lab1 = (ts_label_t *)key1;
ts_label_t *lab2 = (ts_label_t *)key2;
return (label_equal(lab1, lab2) ? 0 : 1);
}
void
zone_init(void)
{
rctl_dict_entry_t *rde;
rctl_val_t *dval;
rctl_set_t *set;
rctl_alloc_gp_t *gp;
rctl_entity_p_t e;
int res;
ASSERT(curproc == &p0);
zoneid_space = id_space_create("zoneid_space", 1, MAX_ZONEID);
rc_zone_cpu_shares = rctl_register("zone.cpu-shares",
RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
FSS_MAXSHARES, FSS_MAXSHARES, &zone_cpu_shares_ops);
rc_zone_cpu_cap = rctl_register("zone.cpu-cap",
RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_ALWAYS |
RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |RCTL_GLOBAL_SYSLOG_NEVER |
RCTL_GLOBAL_INFINITE,
MAXCAP, MAXCAP, &zone_cpu_cap_ops);
rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE,
RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
INT_MAX, INT_MAX, &zone_lwps_ops);
rc_zone_nprocs = rctl_register("zone.max-processes", RCENTITY_ZONE,
RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
INT_MAX, INT_MAX, &zone_procs_ops);
rc_zone_msgmni = rctl_register("zone.max-msg-ids",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_msgmni_ops);
rc_zone_semmni = rctl_register("zone.max-sem-ids",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_semmni_ops);
rc_zone_shmmni = rctl_register("zone.max-shm-ids",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_COUNT, IPC_IDS_MAX, IPC_IDS_MAX, &zone_shmmni_ops);
rc_zone_shmmax = rctl_register("zone.max-shm-memory",
RCENTITY_ZONE, RCTL_GLOBAL_DENY_ALWAYS | RCTL_GLOBAL_NOBASIC |
RCTL_GLOBAL_BYTES, UINT64_MAX, UINT64_MAX, &zone_shmmax_ops);
dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
bzero(dval, sizeof (rctl_val_t));
dval->rcv_value = 1;
dval->rcv_privilege = RCPRIV_PRIVILEGED;
dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
dval->rcv_action_recip_pid = -1;
rde = rctl_dict_lookup("zone.cpu-shares");
(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_locked_mem_ops);
rc_zone_max_swap = rctl_register("zone.max-swap",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_max_swap_ops);
rc_zone_max_lofi = rctl_register("zone.max-lofi",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_max_lofi_ops);
set = rctl_set_create();
gp = rctl_set_init_prealloc(RCENTITY_ZONE);
mutex_enter(&p0.p_lock);
e.rcep_p.zone = &zone0;
e.rcep_t = RCENTITY_ZONE;
zone0.zone_rctls = rctl_set_init(RCENTITY_ZONE, &p0, &e, set,
gp);
zone0.zone_nlwps = p0.p_lwpcnt;
zone0.zone_nprocs = 1;
zone0.zone_ntasks = 1;
mutex_exit(&p0.p_lock);
zone0.zone_restart_init = B_TRUE;
zone0.zone_reboot_on_init_exit = B_FALSE;
zone0.zone_restart_init_0 = B_FALSE;
zone0.zone_brand = &native_brand;
rctl_prealloc_destroy(gp);
zone_kstat_create(&zone0);
zone0.zone_slabel = l_admin_low;
rw_init(&zone0.zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
label_hold(l_admin_low);
rw_init(&zone0.zone_mntfs_db_lock, NULL, RW_DEFAULT, NULL);
zone0.zone_ustate = cpu_uarray_zalloc(ZONE_USTATE_MAX, KM_SLEEP);
mutex_enter(&zonehash_lock);
zone_uniqid(&zone0);
ASSERT(zone0.zone_uniqid == GLOBAL_ZONEUNIQID);
zonehashbyid = mod_hash_create_idhash("zone_by_id", zone_hash_size,
mod_hash_null_valdtor);
zonehashbyname = mod_hash_create_strhash("zone_by_name",
zone_hash_size, mod_hash_null_valdtor);
if (is_system_labeled())
zonehashbylabel = mod_hash_create_extended("zone_by_label",
zone_hash_size, mod_hash_null_keydtor,
mod_hash_null_valdtor, hash_bylabel, NULL,
hash_labelkey_cmp, KM_SLEEP);
zonecount = 1;
(void) mod_hash_insert(zonehashbyid, (mod_hash_key_t)GLOBAL_ZONEID,
(mod_hash_val_t)&zone0);
(void) mod_hash_insert(zonehashbyname, (mod_hash_key_t)zone0.zone_name,
(mod_hash_val_t)&zone0);
if (is_system_labeled()) {
zone0.zone_flags |= ZF_HASHED_LABEL;
(void) mod_hash_insert(zonehashbylabel,
(mod_hash_key_t)zone0.zone_slabel, (mod_hash_val_t)&zone0);
}
mutex_exit(&zonehash_lock);
zone0.zone_kcred = kcred;
global_zone = &zone0;
res = sysevent_evc_bind(ZONE_EVENT_CHANNEL, &zone_event_chan,
EVCH_CREAT);
if (res)
panic("Sysevent_evc_bind failed during zone setup.\n");
}
static void
zone_free(zone_t *zone)
{
zone_dl_t *zdl;
ASSERT(zone != global_zone);
ASSERT(zone->zone_ntasks == 0);
ASSERT(zone->zone_nlwps == 0);
ASSERT(zone->zone_nprocs == 0);
ASSERT(zone->zone_cred_ref == 0);
ASSERT(zone->zone_kcred == NULL);
ASSERT(zone_status_get(zone) == ZONE_IS_DEAD ||
zone_status_get(zone) == ZONE_IS_UNINITIALIZED);
ASSERT(list_is_empty(&zone->zone_ref_list));
cpucaps_zone_remove(zone);
ASSERT(zone->zone_cpucap == NULL);
if (zone_status_get(zone) == ZONE_IS_DEAD) {
ASSERT(zone->zone_ref == 0);
mutex_enter(&zone_deathrow_lock);
list_remove(&zone_deathrow, zone);
mutex_exit(&zone_deathrow_lock);
}
list_destroy(&zone->zone_ref_list);
zone_free_zsd(zone);
zone_free_datasets(zone);
while ((zdl = list_remove_head(&zone->zone_dl_list)) != NULL) {
if (zdl->zdl_net != NULL)
nvlist_free(zdl->zdl_net);
kmem_free(zdl, sizeof (zone_dl_t));
}
list_destroy(&zone->zone_dl_list);
cpu_uarray_free(zone->zone_ustate);
if (zone->zone_rootvp != NULL)
VN_RELE(zone->zone_rootvp);
if (zone->zone_rootpath)
kmem_free(zone->zone_rootpath, zone->zone_rootpathlen);
if (zone->zone_name != NULL)
kmem_free(zone->zone_name, ZONENAME_MAX);
if (zone->zone_slabel != NULL)
label_rele(zone->zone_slabel);
if (zone->zone_nodename != NULL)
kmem_free(zone->zone_nodename, _SYS_NMLN);
if (zone->zone_domain != NULL)
kmem_free(zone->zone_domain, _SYS_NMLN);
if (zone->zone_privset != NULL)
kmem_free(zone->zone_privset, sizeof (priv_set_t));
if (zone->zone_rctls != NULL)
rctl_set_free(zone->zone_rctls);
if (zone->zone_bootargs != NULL)
strfree(zone->zone_bootargs);
if (zone->zone_initname != NULL)
strfree(zone->zone_initname);
if (zone->zone_fs_allowed != NULL)
strfree(zone->zone_fs_allowed);
if (zone->zone_pfexecd != NULL)
klpd_freelist(&zone->zone_pfexecd);
id_free(zoneid_space, zone->zone_id);
mutex_destroy(&zone->zone_lock);
cv_destroy(&zone->zone_cv);
rw_destroy(&zone->zone_mlps.mlpl_rwlock);
rw_destroy(&zone->zone_mntfs_db_lock);
kmem_free(zone, sizeof (zone_t));
}
static void
zone_status_set(zone_t *zone, zone_status_t status)
{
nvlist_t *nvl = NULL;
ASSERT(MUTEX_HELD(&zone_status_lock));
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE &&
status >= zone_status_get(zone));
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) ||
nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) ||
nvlist_add_string(nvl, ZONE_CB_NEWSTATE,
zone_status_table[status]) ||
nvlist_add_string(nvl, ZONE_CB_OLDSTATE,
zone_status_table[zone->zone_status]) ||
nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) ||
nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) ||
sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS,
ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) {
#ifdef DEBUG
(void) printf(
"Failed to allocate and send zone state change event.\n");
#endif
}
nvlist_free(nvl);
zone->zone_status = status;
cv_broadcast(&zone->zone_cv);
}
zone_status_t
zone_status_get(zone_t *zone)
{
return (zone->zone_status);
}
static int
zone_set_bootargs(zone_t *zone, const char *zone_bootargs)
{
char *buf = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
int err = 0;
ASSERT(zone != global_zone);
if ((err = copyinstr(zone_bootargs, buf, BOOTARGS_MAX, NULL)) != 0)
goto done;
if (zone->zone_bootargs != NULL)
strfree(zone->zone_bootargs);
zone->zone_bootargs = strdup(buf);
done:
kmem_free(buf, BOOTARGS_MAX);
return (err);
}
static int
zone_set_brand(zone_t *zone, const char *brand)
{
struct brand_attr *attrp;
brand_t *bp;
attrp = kmem_alloc(sizeof (struct brand_attr), KM_SLEEP);
if (copyin(brand, attrp, sizeof (struct brand_attr)) != 0) {
kmem_free(attrp, sizeof (struct brand_attr));
return (EFAULT);
}
bp = brand_register_zone(attrp);
kmem_free(attrp, sizeof (struct brand_attr));
if (bp == NULL)
return (EINVAL);
mutex_enter(&zone_status_lock);
if ((ZONE_IS_BRANDED(zone)) ||
(zone_status_get(zone) >= ZONE_IS_BOOTING)) {
mutex_exit(&zone_status_lock);
brand_unregister_zone(bp);
return (EINVAL);
}
zone->zone_brand = bp;
ZBROP(zone)->b_init_brand_data(zone);
mutex_exit(&zone_status_lock);
return (0);
}
static int
zone_set_secflags(zone_t *zone, const psecflags_t *zone_secflags)
{
int err = 0;
psecflags_t psf;
ASSERT(zone != global_zone);
if ((err = copyin(zone_secflags, &psf, sizeof (psf))) != 0)
return (err);
if (zone_status_get(zone) > ZONE_IS_READY)
return (EINVAL);
if (!psecflags_validate(&psf))
return (EINVAL);
(void) memcpy(&zone->zone_secflags, &psf, sizeof (psf));
(void) memcpy(&zone->zone_zsched->p_secflags, &zone->zone_secflags,
sizeof (zone->zone_zsched->p_secflags));
return (0);
}
static int
zone_set_fs_allowed(zone_t *zone, const char *zone_fs_allowed)
{
char *buf = kmem_zalloc(ZONE_FS_ALLOWED_MAX, KM_SLEEP);
int err = 0;
ASSERT(zone != global_zone);
if ((err = copyinstr(zone_fs_allowed, buf,
ZONE_FS_ALLOWED_MAX, NULL)) != 0)
goto done;
if (zone->zone_fs_allowed != NULL)
strfree(zone->zone_fs_allowed);
zone->zone_fs_allowed = strdup(buf);
done:
kmem_free(buf, ZONE_FS_ALLOWED_MAX);
return (err);
}
static int
zone_set_initname(zone_t *zone, const char *zone_initname)
{
char initname[INITNAME_SZ];
size_t len;
int err = 0;
ASSERT(zone != global_zone);
if ((err = copyinstr(zone_initname, initname, INITNAME_SZ, &len)) != 0)
return (err);
if (zone->zone_initname != NULL)
strfree(zone->zone_initname);
zone->zone_initname = kmem_alloc(strlen(initname) + 1, KM_SLEEP);
(void) strcpy(zone->zone_initname, initname);
return (0);
}
static int
zone_set_phys_mcap(zone_t *zone, const uint64_t *zone_mcap)
{
uint64_t mcap;
int err = 0;
if ((err = copyin(zone_mcap, &mcap, sizeof (uint64_t))) == 0)
zone->zone_phys_mcap = mcap;
return (err);
}
static int
zone_set_sched_class(zone_t *zone, const char *new_class)
{
char sched_class[PC_CLNMSZ];
id_t classid;
int err;
ASSERT(zone != global_zone);
if ((err = copyinstr(new_class, sched_class, PC_CLNMSZ, NULL)) != 0)
return (err);
if (getcid(sched_class, &classid) != 0 || CLASS_KERNEL(classid))
return (set_errno(EINVAL));
zone->zone_defaultcid = classid;
ASSERT(zone->zone_defaultcid > 0 &&
zone->zone_defaultcid < loaded_classes);
return (0);
}
void
zone_status_wait(zone_t *zone, zone_status_t status)
{
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
cv_wait(&zone->zone_cv, &zone_status_lock);
}
mutex_exit(&zone_status_lock);
}
static void
zone_status_wait_cpr(zone_t *zone, zone_status_t status, char *str)
{
callb_cpr_t cprinfo;
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
CALLB_CPR_INIT(&cprinfo, &zone_status_lock, callb_generic_cpr,
str);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&zone->zone_cv, &zone_status_lock);
CALLB_CPR_SAFE_END(&cprinfo, &zone_status_lock);
}
CALLB_CPR_EXIT(&cprinfo);
}
int
zone_status_wait_sig(zone_t *zone, zone_status_t status)
{
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
if (!cv_wait_sig(&zone->zone_cv, &zone_status_lock)) {
mutex_exit(&zone_status_lock);
return (0);
}
}
mutex_exit(&zone_status_lock);
return (1);
}
clock_t
zone_status_timedwait(zone_t *zone, clock_t tim, zone_status_t status)
{
clock_t timeleft = 0;
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status && timeleft != -1) {
timeleft = cv_timedwait(&zone->zone_cv, &zone_status_lock, tim);
}
mutex_exit(&zone_status_lock);
return (timeleft);
}
clock_t
zone_status_timedwait_sig(zone_t *zone, clock_t tim, zone_status_t status)
{
clock_t timeleft = tim - ddi_get_lbolt();
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE);
mutex_enter(&zone_status_lock);
while (zone->zone_status < status) {
timeleft = cv_timedwait_sig(&zone->zone_cv, &zone_status_lock,
tim);
if (timeleft <= 0)
break;
}
mutex_exit(&zone_status_lock);
return (timeleft);
}
int zone_wait_for_cred = 0;
static void
zone_hold_locked(zone_t *z)
{
ASSERT(MUTEX_HELD(&z->zone_lock));
z->zone_ref++;
ASSERT(z->zone_ref != 0);
}
void
zone_hold(zone_t *z)
{
mutex_enter(&z->zone_lock);
zone_hold_locked(z);
mutex_exit(&z->zone_lock);
}
#define ZONE_IS_UNREF(zone) ((zone)->zone_ref == 1 && \
(!zone_wait_for_cred || (zone)->zone_cred_ref == 0))
static void
zone_rele_common(zone_t *z, zone_ref_t *ref, zone_ref_subsys_t subsys)
{
boolean_t wakeup;
mutex_enter(&z->zone_lock);
ASSERT(z->zone_ref != 0);
z->zone_ref--;
if (subsys != ZONE_REF_NUM_SUBSYS) {
ASSERT(z->zone_subsys_ref[subsys] != 0);
z->zone_subsys_ref[subsys]--;
list_remove(&z->zone_ref_list, ref);
}
if (z->zone_ref == 0 && z->zone_cred_ref == 0) {
mutex_exit(&z->zone_lock);
zone_free(z);
return;
}
wakeup = (ZONE_IS_UNREF(z) && zone_status_get(z) >= ZONE_IS_DEAD);
mutex_exit(&z->zone_lock);
if (wakeup) {
mutex_enter(&zonehash_lock);
cv_broadcast(&zone_destroy_cv);
mutex_exit(&zonehash_lock);
}
}
void
zone_rele(zone_t *z)
{
zone_rele_common(z, NULL, ZONE_REF_NUM_SUBSYS);
}
void
zone_init_ref(zone_ref_t *ref)
{
ref->zref_zone = NULL;
list_link_init(&ref->zref_linkage);
}
void
zone_hold_ref(zone_t *z, zone_ref_t *ref, zone_ref_subsys_t subsys)
{
ASSERT(subsys >= 0 && subsys < ZONE_REF_NUM_SUBSYS);
VERIFY(ref->zref_zone == NULL);
ref->zref_zone = z;
mutex_enter(&z->zone_lock);
zone_hold_locked(z);
z->zone_subsys_ref[subsys]++;
ASSERT(z->zone_subsys_ref[subsys] != 0);
list_insert_head(&z->zone_ref_list, ref);
mutex_exit(&z->zone_lock);
}
void
zone_rele_ref(zone_ref_t *ref, zone_ref_subsys_t subsys)
{
zone_rele_common(ref->zref_zone, ref, subsys);
ref->zref_zone = NULL;
}
void
zone_cred_hold(zone_t *z)
{
mutex_enter(&z->zone_lock);
z->zone_cred_ref++;
ASSERT(z->zone_cred_ref != 0);
mutex_exit(&z->zone_lock);
}
void
zone_cred_rele(zone_t *z)
{
boolean_t wakeup;
mutex_enter(&z->zone_lock);
ASSERT(z->zone_cred_ref != 0);
z->zone_cred_ref--;
if (z->zone_ref == 0 && z->zone_cred_ref == 0) {
mutex_exit(&z->zone_lock);
zone_free(z);
return;
}
wakeup = (zone_wait_for_cred && ZONE_IS_UNREF(z) &&
zone_status_get(z) >= ZONE_IS_DEAD);
mutex_exit(&z->zone_lock);
if (wakeup) {
mutex_enter(&zonehash_lock);
cv_broadcast(&zone_destroy_cv);
mutex_exit(&zonehash_lock);
}
}
void
zone_task_hold(zone_t *z)
{
mutex_enter(&z->zone_lock);
z->zone_ntasks++;
ASSERT(z->zone_ntasks != 0);
mutex_exit(&z->zone_lock);
}
void
zone_task_rele(zone_t *zone)
{
uint_t refcnt;
mutex_enter(&zone->zone_lock);
ASSERT(zone->zone_ntasks != 0);
refcnt = --zone->zone_ntasks;
if (refcnt > 1) {
mutex_exit(&zone->zone_lock);
return;
}
zone_hold_locked(zone);
mutex_exit(&zone->zone_lock);
if (refcnt == 1) {
mutex_enter(&zone_status_lock);
if (zone_status_get(zone) != ZONE_IS_SHUTTING_DOWN) {
goto out;
}
mutex_enter(&zone->zone_lock);
if (refcnt != zone->zone_ntasks) {
mutex_exit(&zone->zone_lock);
goto out;
}
mutex_exit(&zone->zone_lock);
zone_status_set(zone, ZONE_IS_EMPTY);
goto out;
}
ASSERT(refcnt == 0);
zone->zone_zsched = NULL;
mutex_enter(&zone_status_lock);
zone_status_set(zone, ZONE_IS_DEAD);
out:
mutex_exit(&zone_status_lock);
zone_rele(zone);
}
zoneid_t
getzoneid(void)
{
return (curproc->p_zone->zone_id);
}
static zone_t *
zone_find_all_by_id(zoneid_t zoneid)
{
mod_hash_val_t hv;
zone_t *zone = NULL;
ASSERT(MUTEX_HELD(&zonehash_lock));
if (mod_hash_find(zonehashbyid,
(mod_hash_key_t)(uintptr_t)zoneid, &hv) == 0)
zone = (zone_t *)hv;
return (zone);
}
static zone_t *
zone_find_all_by_label(const ts_label_t *label)
{
mod_hash_val_t hv;
zone_t *zone = NULL;
ASSERT(MUTEX_HELD(&zonehash_lock));
if (!is_system_labeled())
return (NULL);
if (mod_hash_find(zonehashbylabel, (mod_hash_key_t)label, &hv) == 0)
zone = (zone_t *)hv;
return (zone);
}
static zone_t *
zone_find_all_by_name(char *name)
{
mod_hash_val_t hv;
zone_t *zone = NULL;
ASSERT(MUTEX_HELD(&zonehash_lock));
if (mod_hash_find(zonehashbyname, (mod_hash_key_t)name, &hv) == 0)
zone = (zone_t *)hv;
return (zone);
}
zone_t *
zone_find_by_id(zoneid_t zoneid)
{
zone_t *zone;
zone_status_t status;
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
return (NULL);
}
status = zone_status_get(zone);
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) {
mutex_exit(&zonehash_lock);
return (NULL);
}
zone_hold(zone);
mutex_exit(&zonehash_lock);
return (zone);
}
zone_t *
zone_find_by_label(const ts_label_t *label)
{
zone_t *zone;
zone_status_t status;
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_label(label)) == NULL) {
mutex_exit(&zonehash_lock);
return (NULL);
}
status = zone_status_get(zone);
if (status > ZONE_IS_DOWN) {
mutex_exit(&zonehash_lock);
return (NULL);
}
zone_hold(zone);
mutex_exit(&zonehash_lock);
return (zone);
}
zone_t *
zone_find_by_name(char *name)
{
zone_t *zone;
zone_status_t status;
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_name(name)) == NULL) {
mutex_exit(&zonehash_lock);
return (NULL);
}
status = zone_status_get(zone);
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) {
mutex_exit(&zonehash_lock);
return (NULL);
}
zone_hold(zone);
mutex_exit(&zonehash_lock);
return (zone);
}
zone_t *
zone_find_by_path(const char *path)
{
zone_t *zone;
zone_t *zret = NULL;
zone_status_t status;
if (path == NULL) {
zone_hold(global_zone);
return (global_zone);
}
ASSERT(*path == '/');
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
if (ZONE_PATH_VISIBLE(path, zone))
zret = zone;
}
ASSERT(zret != NULL);
status = zone_status_get(zret);
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN) {
zret = global_zone;
}
zone_hold(zret);
mutex_exit(&zonehash_lock);
return (zret);
}
void
zone_loadavg_update(void)
{
zone_t *zp;
zone_status_t status;
struct loadavg_s *lavg;
hrtime_t zone_total;
uint64_t tmp;
int i;
hrtime_t hr_avg;
int nrun;
static int64_t f[3] = { 135, 27, 9 };
int64_t q, r;
mutex_enter(&zonehash_lock);
for (zp = list_head(&zone_active); zp != NULL;
zp = list_next(&zone_active, zp)) {
mutex_enter(&zp->zone_lock);
status = zone_status_get(zp);
if (status < ZONE_IS_READY || status >= ZONE_IS_DOWN) {
mutex_exit(&zp->zone_lock);
continue;
}
lavg = &zp->zone_loadavg;
tmp = cpu_uarray_sum_all(zp->zone_ustate);
zone_total = UINT64_OVERFLOW_TO_INT64(tmp);
scalehrtime(&zone_total);
lavg->lg_loads[lavg->lg_cur] = (zone_total > lavg->lg_total) ?
zone_total - lavg->lg_total : 0;
lavg->lg_cur = (lavg->lg_cur + 1) % S_LOADAVG_SZ;
lavg->lg_total = zone_total;
if ((lavg->lg_len + 1) < S_LOADAVG_SZ) {
lavg->lg_len++;
mutex_exit(&zp->zone_lock);
continue;
}
hr_avg = 0;
for (i = 0; i < S_LOADAVG_SZ; i++)
hr_avg += lavg->lg_loads[i];
hr_avg = hr_avg / S_LOADAVG_SZ;
nrun = hr_avg / (NANOSEC / LGRP_LOADAVG_IN_THREAD_MAX);
for (i = 0; i < 3; i++) {
q = (zp->zone_hp_avenrun[i] >> 16) << 7;
r = (zp->zone_hp_avenrun[i] & 0xffff) << 7;
zp->zone_hp_avenrun[i] +=
((nrun - q) * f[i] - ((r * f[i]) >> 16)) >> 4;
if (zp->zone_hp_avenrun[i] <
((uint64_t)1<<(31+16-FSHIFT)))
zp->zone_avenrun[i] = (int32_t)
(zp->zone_hp_avenrun[i] >> (16 - FSHIFT));
else
zp->zone_avenrun[i] = 0x7fffffff;
}
mutex_exit(&zp->zone_lock);
}
mutex_exit(&zonehash_lock);
}
int
zone_ncpus_get(zone_t *zone)
{
int myncpus = zone == NULL ? 0 : zone->zone_ncpus;
return (myncpus != 0 ? myncpus : ncpus);
}
int
zone_ncpus_online_get(zone_t *zone)
{
int myncpus_online = zone == NULL ? 0 : zone->zone_ncpus_online;
return (myncpus_online != 0 ? myncpus_online : ncpus_online);
}
pool_t *
zone_pool_get(zone_t *zone)
{
ASSERT(pool_lock_held());
return (zone->zone_pool);
}
void
zone_pool_set(zone_t *zone, pool_t *pool)
{
ASSERT(pool_lock_held());
ASSERT(MUTEX_HELD(&cpu_lock));
zone->zone_pool = pool;
zone_pset_set(zone, pool->pool_pset->pset_id);
}
psetid_t
zone_pset_get(zone_t *zone)
{
ASSERT(MUTEX_HELD(&cpu_lock));
return (zone->zone_psetid);
}
void
zone_pset_set(zone_t *zone, psetid_t newpsetid)
{
psetid_t oldpsetid;
ASSERT(MUTEX_HELD(&cpu_lock));
oldpsetid = zone_pset_get(zone);
if (oldpsetid == newpsetid)
return;
if (zone != global_zone) {
zone->zone_psetid = newpsetid;
if (newpsetid != ZONE_PS_INVAL)
pool_pset_visibility_add(newpsetid, zone);
if (oldpsetid != ZONE_PS_INVAL)
pool_pset_visibility_remove(oldpsetid, zone);
}
if (newpsetid == ZONE_PS_INVAL) {
zone->zone_ncpus = 0;
zone->zone_ncpus_online = 0;
}
}
int
zone_walk(int (*cb)(zone_t *, void *), void *data)
{
zone_t *zone;
int ret = 0;
zone_status_t status;
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
status = zone_status_get(zone);
if (status < ZONE_IS_READY || status > ZONE_IS_DOWN)
continue;
ret = (*cb)(zone, data);
if (ret != 0)
break;
}
mutex_exit(&zonehash_lock);
return (ret);
}
static int
zone_set_root(zone_t *zone, const char *upath)
{
vnode_t *vp;
int trycount;
int error = 0;
char *path;
struct pathname upn, pn;
size_t pathlen;
if ((error = pn_get((char *)upath, UIO_USERSPACE, &upn)) != 0)
return (error);
pn_alloc(&pn);
trycount = 10;
for (;;) {
if (--trycount <= 0) {
error = ESTALE;
goto out;
}
if ((error = lookuppn(&upn, &pn, FOLLOW, NULLVPP, &vp)) == 0) {
if ((error =
VOP_ACCESS(vp, VEXEC, 0, CRED(), NULL)) == 0 &&
(!vn_ismntpt(vp) ||
(error = traverse(&vp)) == 0)) {
pathlen = pn.pn_pathlen + 2;
path = kmem_alloc(pathlen, KM_SLEEP);
(void) strncpy(path, pn.pn_path,
pn.pn_pathlen + 1);
path[pathlen - 2] = '/';
path[pathlen - 1] = '\0';
pn_free(&pn);
pn_free(&upn);
break;
}
VN_RELE(vp);
}
if (error != ESTALE)
goto out;
}
ASSERT(error == 0);
zone->zone_rootvp = vp;
zone->zone_rootpath = path;
zone->zone_rootpathlen = pathlen;
if (pathlen > 5 && strcmp(path + pathlen - 5, "/lu/") == 0)
zone->zone_flags |= ZF_IS_SCRATCH;
return (0);
out:
pn_free(&pn);
pn_free(&upn);
return (error);
}
#define isalnum(c) (((c) >= '0' && (c) <= '9') || \
((c) >= 'a' && (c) <= 'z') || \
((c) >= 'A' && (c) <= 'Z'))
static int
zone_set_name(zone_t *zone, const char *uname)
{
char *kname = kmem_zalloc(ZONENAME_MAX, KM_SLEEP);
size_t len;
int i, err;
if ((err = copyinstr(uname, kname, ZONENAME_MAX, &len)) != 0) {
kmem_free(kname, ZONENAME_MAX);
return (err);
}
if (len == ZONENAME_MAX && kname[ZONENAME_MAX - 1] != '\0') {
kmem_free(kname, ZONENAME_MAX);
return (EINVAL);
}
if (!isalnum(kname[0])) {
kmem_free(kname, ZONENAME_MAX);
return (EINVAL);
}
for (i = 1; i < len - 1; i++) {
if (!isalnum(kname[i]) && kname[i] != '-' && kname[i] != '_' &&
kname[i] != '.') {
kmem_free(kname, ZONENAME_MAX);
return (EINVAL);
}
}
zone->zone_name = kname;
return (0);
}
uint32_t
zone_get_hostid(zone_t *zonep)
{
unsigned long machine_hostid;
if (zonep == NULL || zonep->zone_hostid == HW_INVALID_HOSTID) {
if (ddi_strtoul(hw_serial, NULL, 10, &machine_hostid) != 0)
return (HW_INVALID_HOSTID);
return ((uint32_t)machine_hostid);
}
return (zonep->zone_hostid);
}
kthread_t *
zthread_create(
caddr_t stk,
size_t stksize,
void (*proc)(),
void *arg,
size_t len,
pri_t pri)
{
kthread_t *t;
zone_t *zone = curproc->p_zone;
proc_t *pp = zone->zone_zsched;
zone_hold(zone);
ASSERT(!(zone->zone_kthreads == NULL &&
zone_status_get(zone) >= ZONE_IS_EMPTY));
t = thread_create(stk, stksize, proc, arg, len, pp, TS_STOPPED, pri);
ASSERT(t->t_forw == NULL);
mutex_enter(&zone_status_lock);
if (zone->zone_kthreads == NULL) {
t->t_forw = t->t_back = t;
} else {
kthread_t *tx = zone->zone_kthreads;
t->t_forw = tx;
t->t_back = tx->t_back;
tx->t_back->t_forw = t;
tx->t_back = t;
}
zone->zone_kthreads = t;
mutex_exit(&zone_status_lock);
mutex_enter(&pp->p_lock);
t->t_proc_flag |= TP_ZTHREAD;
project_rele(t->t_proj);
t->t_proj = project_hold(pp->p_task->tk_proj);
thread_lock(t);
t->t_schedflag |= TS_ALLSTART;
setrun_locked(t);
thread_unlock(t);
mutex_exit(&pp->p_lock);
return (t);
}
void
zthread_exit(void)
{
kthread_t *t = curthread;
proc_t *pp = curproc;
zone_t *zone = pp->p_zone;
mutex_enter(&zone_status_lock);
kpreempt_disable();
mutex_enter(&pp->p_lock);
t->t_proc_flag &= ~TP_ZTHREAD;
t->t_procp = &p0;
hat_thread_exit(t);
mutex_exit(&pp->p_lock);
kpreempt_enable();
if (t->t_back == t) {
ASSERT(t->t_forw == t);
zone->zone_kthreads = NULL;
if (zone_status_get(zone) == ZONE_IS_EMPTY) {
zone_status_set(zone, ZONE_IS_DOWN);
cpucaps_zone_remove(zone);
}
} else {
t->t_forw->t_back = t->t_back;
t->t_back->t_forw = t->t_forw;
if (zone->zone_kthreads == t)
zone->zone_kthreads = t->t_forw;
}
mutex_exit(&zone_status_lock);
zone_rele(zone);
thread_exit();
}
static void
zone_chdir(vnode_t *vp, vnode_t **vpp, proc_t *pp)
{
vnode_t *oldvp;
VN_HOLD(vp);
if (AU_AUDITING())
audit_chdirec(vp, vpp);
mutex_enter(&pp->p_lock);
oldvp = *vpp;
*vpp = vp;
mutex_exit(&pp->p_lock);
if (oldvp != NULL)
VN_RELE(oldvp);
}
static int
nvlist2rctlval(nvlist_t *nvl, rctl_val_t *rv)
{
nvpair_t *nvp = NULL;
boolean_t priv_set = B_FALSE;
boolean_t limit_set = B_FALSE;
boolean_t action_set = B_FALSE;
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
const char *name;
uint64_t ui64;
name = nvpair_name(nvp);
if (nvpair_type(nvp) != DATA_TYPE_UINT64)
return (EINVAL);
(void) nvpair_value_uint64(nvp, &ui64);
if (strcmp(name, "privilege") == 0) {
if (ui64 != RCPRIV_PRIVILEGED)
return (EINVAL);
rv->rcv_privilege = ui64;
priv_set = B_TRUE;
} else if (strcmp(name, "limit") == 0) {
rv->rcv_value = ui64;
limit_set = B_TRUE;
} else if (strcmp(name, "action") == 0) {
if (ui64 != RCTL_LOCAL_NOACTION &&
ui64 != RCTL_LOCAL_DENY)
return (EINVAL);
rv->rcv_flagaction = ui64;
action_set = B_TRUE;
} else {
return (EINVAL);
}
}
if (!(priv_set && limit_set && action_set))
return (EINVAL);
rv->rcv_action_signal = 0;
rv->rcv_action_recipient = NULL;
rv->rcv_action_recip_pid = -1;
rv->rcv_firing_time = 0;
return (0);
}
void
zone_start_init(void)
{
proc_t *p = ttoproc(curthread);
zone_t *z = p->p_zone;
ASSERT(!INGLOBALZONE(curproc));
z->zone_proc_initpid = p->p_pid;
p->p_zone->zone_boot_err = start_init_common();
mutex_enter(&zone_status_lock);
if (z->zone_boot_err != 0 || zone_status_get(global_zone) >=
ZONE_IS_SHUTTING_DOWN) {
if (zone_status_get(z) == ZONE_IS_BOOTING) {
zone_status_set(z, ZONE_IS_SHUTTING_DOWN);
}
mutex_exit(&zone_status_lock);
if (proc_exit(CLD_EXITED, z->zone_boot_err) != 0) {
mutex_enter(&p->p_lock);
ASSERT(p->p_flag & SEXITLWPS);
lwp_exit();
}
} else {
if (zone_status_get(z) == ZONE_IS_BOOTING)
zone_status_set(z, ZONE_IS_RUNNING);
mutex_exit(&zone_status_lock);
lwp_rtt();
}
}
struct zsched_arg {
zone_t *zone;
nvlist_t *nvlist;
};
static void
zsched(void *arg)
{
struct zsched_arg *za = arg;
proc_t *pp = curproc;
proc_t *initp = proc_init;
zone_t *zone = za->zone;
cred_t *cr, *oldcred;
rctl_set_t *set;
rctl_alloc_gp_t *gp;
contract_t *ct = NULL;
task_t *tk, *oldtk;
rctl_entity_p_t e;
kproject_t *pj;
nvlist_t *nvl = za->nvlist;
nvpair_t *nvp = NULL;
bcopy("zsched", PTOU(pp)->u_psargs, sizeof ("zsched"));
bcopy("zsched", PTOU(pp)->u_comm, sizeof ("zsched"));
PTOU(pp)->u_argc = 0;
PTOU(pp)->u_argv = 0;
PTOU(pp)->u_envp = 0;
PTOU(pp)->u_commpagep = 0;
closeall(P_FINFO(pp));
zone_hold(zone);
zone->zone_zsched = pp;
mutex_enter(&pp->p_lock);
pp->p_zone = zone;
mutex_exit(&pp->p_lock);
sess_create();
mutex_enter(&pidlock);
proc_detach(pp);
pp->p_ppid = 1;
pp->p_flag |= SZONETOP;
pp->p_ancpid = 1;
pp->p_parent = initp;
pp->p_psibling = NULL;
if (initp->p_child)
initp->p_child->p_psibling = pp;
pp->p_sibling = initp->p_child;
initp->p_child = pp;
upcount_dec(crgetruid(CRED()), GLOBAL_ZONEID);
upcount_inc(crgetruid(kcred), zone->zone_id);
mutex_exit(&pidlock);
pj = pp->p_task->tk_proj;
mutex_enter(&global_zone->zone_nlwps_lock);
pj->kpj_nlwps -= pp->p_lwpcnt;
global_zone->zone_nlwps -= pp->p_lwpcnt;
pj->kpj_nprocs--;
global_zone->zone_nprocs--;
mutex_exit(&global_zone->zone_nlwps_lock);
mutex_enter(&global_zone->zone_mem_lock);
global_zone->zone_locked_mem -= pp->p_locked_mem;
pj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
mutex_exit(&global_zone->zone_mem_lock);
tk = task_create(0, zone);
mutex_enter(&cpu_lock);
oldtk = task_join(tk, 0);
pj = pp->p_task->tk_proj;
mutex_enter(&zone->zone_mem_lock);
zone->zone_locked_mem += pp->p_locked_mem;
pj->kpj_data.kpd_locked_mem += pp->p_locked_mem;
mutex_exit(&zone->zone_mem_lock);
mutex_enter(&zone->zone_nlwps_lock);
pj->kpj_nlwps += pp->p_lwpcnt;
pj->kpj_ntasks += 1;
zone->zone_nlwps += pp->p_lwpcnt;
pj->kpj_nprocs++;
zone->zone_nprocs++;
mutex_exit(&zone->zone_nlwps_lock);
mutex_exit(&curproc->p_lock);
mutex_exit(&cpu_lock);
task_rele(oldtk);
cr = zone->zone_kcred;
crhold(cr);
mutex_enter(&pp->p_crlock);
oldcred = pp->p_cred;
pp->p_cred = cr;
mutex_exit(&pp->p_crlock);
crfree(oldcred);
crhold(cr);
crset(pp, cr);
zone_chdir(zone->zone_rootvp, &PTOU(pp)->u_cdir, pp);
zone_chdir(zone->zone_rootvp, &PTOU(pp)->u_rdir, pp);
set = rctl_set_create();
gp = rctl_set_init_prealloc(RCENTITY_ZONE);
mutex_enter(&pp->p_lock);
e.rcep_p.zone = zone;
e.rcep_t = RCENTITY_ZONE;
zone->zone_rctls = rctl_set_init(RCENTITY_ZONE, pp, &e, set, gp);
mutex_exit(&pp->p_lock);
rctl_prealloc_destroy(gp);
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
rctl_dict_entry_t *rde;
rctl_hndl_t hndl;
char *name;
nvlist_t **nvlarray;
uint_t i, nelem;
int error;
name = nvpair_name(nvp);
hndl = rctl_hndl_lookup(name);
ASSERT(hndl != -1);
rde = rctl_dict_lookup_hndl(hndl);
ASSERT(rde != NULL);
for (; ; ) {
rctl_val_t oval;
mutex_enter(&pp->p_lock);
error = rctl_local_get(hndl, NULL, &oval, pp);
mutex_exit(&pp->p_lock);
ASSERT(error == 0);
ASSERT(oval.rcv_privilege != RCPRIV_BASIC);
if (oval.rcv_privilege == RCPRIV_SYSTEM)
break;
mutex_enter(&pp->p_lock);
error = rctl_local_delete(hndl, &oval, pp);
mutex_exit(&pp->p_lock);
ASSERT(error == 0);
}
error = nvpair_value_nvlist_array(nvp, &nvlarray, &nelem);
ASSERT(error == 0);
for (i = 0; i < nelem; i++) {
rctl_val_t *nvalp;
nvalp = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
error = nvlist2rctlval(nvlarray[i], nvalp);
ASSERT(error == 0);
mutex_enter(&pp->p_lock);
if (rctl_local_insert(hndl, nvalp, pp) != 0)
kmem_cache_free(rctl_val_cache, nvalp);
mutex_exit(&pp->p_lock);
}
}
pool_lock();
mutex_enter(&cpu_lock);
mutex_enter(&zonehash_lock);
zone_uniqid(zone);
zone_zsd_configure(zone);
if (pool_state == POOL_ENABLED)
zone_pset_set(zone, pool_default->pool_pset->pset_id);
mutex_enter(&zone_status_lock);
ASSERT(zone_status_get(zone) == ZONE_IS_UNINITIALIZED);
zone_status_set(zone, ZONE_IS_INITIALIZED);
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
mutex_exit(&cpu_lock);
pool_unlock();
zsd_apply_all_keys(zsd_apply_create, zone);
mutex_enter(&zone_status_lock);
ASSERT(zone_status_get(zone) == ZONE_IS_INITIALIZED);
zone_status_set(zone, ZONE_IS_READY);
mutex_exit(&zone_status_lock);
zone_status_wait_cpr(zone, ZONE_IS_BOOTING, "zsched");
if (zone_status_get(zone) == ZONE_IS_BOOTING) {
id_t cid;
pool_lock();
if (zone->zone_defaultcid > 0)
cid = zone->zone_defaultcid;
else
cid = pool_get_class(zone->zone_pool);
if (cid == -1)
cid = defaultcid;
if ((zone->zone_boot_err = newproc(zone_start_init, NULL, cid,
minclsyspri - 1, &ct, 0)) != 0) {
mutex_enter(&zone_status_lock);
zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
mutex_exit(&zone_status_lock);
} else {
zone->zone_boot_time = gethrestime_sec();
}
pool_unlock();
}
zone_status_wait_cpr(zone, ZONE_IS_DYING, "zsched");
if (ct)
VERIFY(contract_abandon(ct, curproc, B_TRUE) == 0);
crfree(zone->zone_kcred);
zone->zone_kcred = NULL;
exit(CLD_EXITED, 0);
}
static uint_t
zone_mount_count(const char *rootpath)
{
vfs_t *vfsp;
uint_t count = 0;
size_t rootpathlen = strlen(rootpath);
ASSERT(MUTEX_HELD(&zonehash_lock));
ASSERT(rootpath[rootpathlen - 1] == '/');
vfs_list_read_lock();
vfsp = rootvfs;
do {
if (strncmp(rootpath, refstr_value(vfsp->vfs_mntpt),
rootpathlen) == 0)
count++;
vfsp = vfsp->vfs_next;
} while (vfsp != rootvfs);
vfs_list_unlock();
return (count);
}
static boolean_t
zone_is_nested(const char *rootpath)
{
zone_t *zone;
size_t rootpathlen = strlen(rootpath);
size_t len;
ASSERT(MUTEX_HELD(&zonehash_lock));
if ((rootpathlen <= 3) && (rootpath[0] == '/') &&
(rootpath[1] == '/') && (rootpath[2] == '\0'))
return (B_TRUE);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
if (zone == global_zone)
continue;
len = strlen(zone->zone_rootpath);
if (strncmp(rootpath, zone->zone_rootpath,
MIN(rootpathlen, len)) == 0)
return (B_TRUE);
}
return (B_FALSE);
}
static int
zone_set_privset(zone_t *zone, const priv_set_t *zone_privs,
size_t zone_privssz)
{
priv_set_t *privs;
if (zone_privssz < sizeof (priv_set_t))
return (ENOMEM);
privs = kmem_alloc(sizeof (priv_set_t), KM_SLEEP);
if (copyin(zone_privs, privs, sizeof (priv_set_t))) {
kmem_free(privs, sizeof (priv_set_t));
return (EFAULT);
}
zone->zone_privset = privs;
return (0);
}
static int
parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp)
{
nvpair_t *nvp = NULL;
nvlist_t *nvl = NULL;
char *kbuf;
int error;
rctl_val_t rv;
*nvlp = NULL;
if (buflen == 0)
return (0);
if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL)
return (ENOMEM);
if (copyin(ubuf, kbuf, buflen)) {
error = EFAULT;
goto out;
}
if (nvlist_unpack(kbuf, buflen, &nvl, KM_SLEEP) != 0) {
nvl = NULL;
error = EINVAL;
goto out;
}
while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
rctl_dict_entry_t *rde;
rctl_hndl_t hndl;
nvlist_t **nvlarray;
uint_t i, nelem;
char *name;
error = EINVAL;
name = nvpair_name(nvp);
if (strncmp(nvpair_name(nvp), "zone.", sizeof ("zone.") - 1)
!= 0 || nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) {
goto out;
}
if ((hndl = rctl_hndl_lookup(name)) == -1) {
goto out;
}
rde = rctl_dict_lookup_hndl(hndl);
error = nvpair_value_nvlist_array(nvp, &nvlarray, &nelem);
ASSERT(error == 0);
for (i = 0; i < nelem; i++) {
if (error = nvlist2rctlval(nvlarray[i], &rv))
goto out;
}
if (rctl_invalid_value(rde, &rv)) {
error = EINVAL;
goto out;
}
}
error = 0;
*nvlp = nvl;
out:
kmem_free(kbuf, buflen);
if (error && nvl != NULL)
nvlist_free(nvl);
return (error);
}
int
zone_create_error(int er_error, int er_ext, int *er_out)
{
if (er_out != NULL) {
if (copyout(&er_ext, er_out, sizeof (int))) {
return (set_errno(EFAULT));
}
}
return (set_errno(er_error));
}
static int
zone_set_label(zone_t *zone, const bslabel_t *lab, uint32_t doi)
{
ts_label_t *tsl;
bslabel_t blab;
if (copyin(lab, &blab, sizeof (blab)) != 0)
return (EFAULT);
tsl = labelalloc(&blab, doi, KM_NOSLEEP);
if (tsl == NULL)
return (ENOMEM);
zone->zone_slabel = tsl;
return (0);
}
static int
parse_zfs(zone_t *zone, caddr_t ubuf, size_t buflen)
{
char *kbuf;
char *dataset, *next;
zone_dataset_t *zd;
size_t len;
if (ubuf == NULL || buflen == 0)
return (0);
if ((kbuf = kmem_alloc(buflen, KM_NOSLEEP)) == NULL)
return (ENOMEM);
if (copyin(ubuf, kbuf, buflen) != 0) {
kmem_free(kbuf, buflen);
return (EFAULT);
}
dataset = next = kbuf;
for (;;) {
zd = kmem_alloc(sizeof (zone_dataset_t), KM_SLEEP);
next = strchr(dataset, ',');
if (next == NULL)
len = strlen(dataset);
else
len = next - dataset;
zd->zd_dataset = kmem_alloc(len + 1, KM_SLEEP);
bcopy(dataset, zd->zd_dataset, len);
zd->zd_dataset[len] = '\0';
list_insert_head(&zone->zone_datasets, zd);
if (next == NULL)
break;
dataset = next + 1;
}
kmem_free(kbuf, buflen);
return (0);
}
static zoneid_t
zone_create(const char *zone_name, const char *zone_root,
const priv_set_t *zone_privs, size_t zone_privssz,
caddr_t rctlbuf, size_t rctlbufsz,
caddr_t zfsbuf, size_t zfsbufsz, int *extended_error,
int match, uint32_t doi, const bslabel_t *label,
int flags)
{
struct zsched_arg zarg;
nvlist_t *rctls = NULL;
proc_t *pp = curproc;
zone_t *zone, *ztmp;
zoneid_t zoneid, start = GLOBAL_ZONEID;
int error;
int error2 = 0;
char *str;
cred_t *zkcr;
boolean_t insert_label_hash;
if (secpolicy_zone_config(CRED()) != 0)
return (set_errno(EPERM));
if (PTOU(pp)->u_rdir != NULL && PTOU(pp)->u_rdir != rootdir)
return (zone_create_error(ENOTSUP, ZE_CHROOTED,
extended_error));
for (;;) {
zoneid = id_alloc(zoneid_space);
if (!netstack_inuse_by_stackid(zoneid_to_netstackid(zoneid)))
break;
id_free(zoneid_space, zoneid);
if (start == GLOBAL_ZONEID) {
start = zoneid;
} else if (zoneid == start) {
cmn_err(CE_WARN, "zone_create() failed: all available "
"zone IDs have netstacks still in use");
return (set_errno(ENFILE));
}
cmn_err(CE_WARN, "unable to reuse zone ID %d; "
"netstack still in use", zoneid);
}
zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP);
zone->zone_id = zoneid;
zone->zone_status = ZONE_IS_UNINITIALIZED;
zone->zone_pool = pool_default;
zone->zone_pool_mod = gethrtime();
zone->zone_psetid = ZONE_PS_INVAL;
zone->zone_ncpus = 0;
zone->zone_ncpus_online = 0;
zone->zone_restart_init = B_TRUE;
zone->zone_reboot_on_init_exit = B_FALSE;
zone->zone_restart_init_0 = B_FALSE;
zone->zone_brand = &native_brand;
zone->zone_initname = NULL;
mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone->zone_mem_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL);
list_create(&zone->zone_ref_list, sizeof (zone_ref_t),
offsetof(zone_ref_t, zref_linkage));
list_create(&zone->zone_zsd, sizeof (struct zsd_entry),
offsetof(struct zsd_entry, zsd_linkage));
list_create(&zone->zone_datasets, sizeof (zone_dataset_t),
offsetof(zone_dataset_t, zd_linkage));
list_create(&zone->zone_dl_list, sizeof (zone_dl_t),
offsetof(zone_dl_t, zdl_linkage));
rw_init(&zone->zone_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
rw_init(&zone->zone_mntfs_db_lock, NULL, RW_DEFAULT, NULL);
if (flags & ZCF_NET_EXCL) {
zone->zone_flags |= ZF_NET_EXCL;
}
if ((error = zone_set_name(zone, zone_name)) != 0) {
zone_free(zone);
return (zone_create_error(error, 0, extended_error));
}
if ((error = zone_set_root(zone, zone_root)) != 0) {
zone_free(zone);
return (zone_create_error(error, 0, extended_error));
}
if ((error = zone_set_privset(zone, zone_privs, zone_privssz)) != 0) {
zone_free(zone);
return (zone_create_error(error, 0, extended_error));
}
zone->zone_nodename = kmem_alloc(_SYS_NMLN, KM_SLEEP);
(void) strncpy(zone->zone_nodename, zone->zone_name, _SYS_NMLN);
zone->zone_nodename[_SYS_NMLN - 1] = '\0';
zone->zone_domain = kmem_alloc(_SYS_NMLN, KM_SLEEP);
zone->zone_domain[0] = '\0';
zone->zone_hostid = HW_INVALID_HOSTID;
zone->zone_shares = 1;
zone->zone_shmmax = 0;
zone->zone_ipc.ipcq_shmmni = 0;
zone->zone_ipc.ipcq_semmni = 0;
zone->zone_ipc.ipcq_msgmni = 0;
zone->zone_bootargs = NULL;
zone->zone_fs_allowed = NULL;
psecflags_default(&zone->zone_secflags);
zone->zone_initname =
kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP);
(void) strcpy(zone->zone_initname, zone_default_initname);
zone->zone_nlwps = 0;
zone->zone_nlwps_ctl = INT_MAX;
zone->zone_nprocs = 0;
zone->zone_nprocs_ctl = INT_MAX;
zone->zone_locked_mem = 0;
zone->zone_locked_mem_ctl = UINT64_MAX;
zone->zone_max_swap = 0;
zone->zone_max_swap_ctl = UINT64_MAX;
zone->zone_max_lofi = 0;
zone->zone_max_lofi_ctl = UINT64_MAX;
zone0.zone_lockedmem_kstat = NULL;
zone0.zone_swapresv_kstat = NULL;
zone->zone_ustate = cpu_uarray_zalloc(ZONE_USTATE_MAX, KM_SLEEP);
zone->zone_rctls = NULL;
if ((error = parse_rctls(rctlbuf, rctlbufsz, &rctls)) != 0) {
zone_free(zone);
return (zone_create_error(error, 0, extended_error));
}
if ((error = parse_zfs(zone, zfsbuf, zfsbufsz)) != 0) {
zone_free(zone);
return (set_errno(error));
}
zone->zone_match = match;
if (is_system_labeled() && !(zone->zone_flags & ZF_IS_SCRATCH)) {
if (doi != 0 && doi != default_doi) {
zone_free(zone);
return (set_errno(EINVAL));
}
error = zone_set_label(zone, label, default_doi);
if (error != 0) {
zone_free(zone);
return (set_errno(error));
}
insert_label_hash = B_TRUE;
} else {
zone->zone_slabel = l_admin_low;
label_hold(l_admin_low);
insert_label_hash = B_FALSE;
}
if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK)) {
zone_free(zone);
nvlist_free(rctls);
return (zone_create_error(error, 0, extended_error));
}
if (block_mounts(zone) == 0) {
mutex_enter(&pp->p_lock);
if (curthread != pp->p_agenttp)
continuelwps(pp);
mutex_exit(&pp->p_lock);
zone_free(zone);
nvlist_free(rctls);
return (zone_create_error(error, 0, extended_error));
}
zone->zone_kcred = crdup(kcred);
crsetzone(zone->zone_kcred, zone);
priv_intersect(zone->zone_privset, &CR_PPRIV(zone->zone_kcred));
priv_intersect(zone->zone_privset, &CR_EPRIV(zone->zone_kcred));
priv_intersect(zone->zone_privset, &CR_IPRIV(zone->zone_kcred));
priv_intersect(zone->zone_privset, &CR_LPRIV(zone->zone_kcred));
mutex_enter(&zonehash_lock);
if ((ztmp = zone_find_all_by_name(zone->zone_name)) != NULL ||
(insert_label_hash &&
(ztmp = zone_find_all_by_label(zone->zone_slabel)) != NULL)) {
zone_status_t status;
status = zone_status_get(ztmp);
if (status == ZONE_IS_READY || status == ZONE_IS_RUNNING)
error = EEXIST;
else
error = EBUSY;
if (insert_label_hash)
error2 = ZE_LABELINUSE;
goto errout;
}
if (zone_is_nested(zone->zone_rootpath)) {
error = EBUSY;
goto errout;
}
ASSERT(zonecount != 0);
if (zonecount + 1 > maxzones) {
error = ENOMEM;
goto errout;
}
if (zone_mount_count(zone->zone_rootpath) != 0) {
error = EBUSY;
error2 = ZE_AREMOUNTS;
goto errout;
}
zonecount++;
(void) mod_hash_insert(zonehashbyid,
(mod_hash_key_t)(uintptr_t)zone->zone_id,
(mod_hash_val_t)(uintptr_t)zone);
str = kmem_alloc(strlen(zone->zone_name) + 1, KM_SLEEP);
(void) strcpy(str, zone->zone_name);
(void) mod_hash_insert(zonehashbyname, (mod_hash_key_t)str,
(mod_hash_val_t)(uintptr_t)zone);
if (insert_label_hash) {
(void) mod_hash_insert(zonehashbylabel,
(mod_hash_key_t)zone->zone_slabel, (mod_hash_val_t)zone);
zone->zone_flags |= ZF_HASHED_LABEL;
}
list_insert_tail(&zone_active, zone);
mutex_exit(&zonehash_lock);
zarg.zone = zone;
zarg.nvlist = rctls;
error = newproc(zsched, (void *)&zarg, syscid, minclsyspri, NULL, 0);
if (error != 0) {
mutex_enter(&zonehash_lock);
list_remove(&zone_active, zone);
if (zone->zone_flags & ZF_HASHED_LABEL) {
ASSERT(zone->zone_slabel != NULL);
(void) mod_hash_destroy(zonehashbylabel,
(mod_hash_key_t)zone->zone_slabel);
}
(void) mod_hash_destroy(zonehashbyname,
(mod_hash_key_t)(uintptr_t)zone->zone_name);
(void) mod_hash_destroy(zonehashbyid,
(mod_hash_key_t)(uintptr_t)zone->zone_id);
ASSERT(zonecount > 1);
zonecount--;
goto errout;
}
zone_kstat_create(zone);
mutex_enter(&pp->p_lock);
if (curthread != pp->p_agenttp)
continuelwps(pp);
mutex_exit(&pp->p_lock);
zone_status_wait(zone, ZONE_IS_READY);
resume_mounts(zone);
nvlist_free(rctls);
return (zoneid);
errout:
mutex_exit(&zonehash_lock);
mutex_enter(&pp->p_lock);
if (curthread != pp->p_agenttp)
continuelwps(pp);
mutex_exit(&pp->p_lock);
resume_mounts(zone);
nvlist_free(rctls);
ASSERT(zone->zone_cred_ref == 1);
ASSERT(zone->zone_kcred->cr_ref == 1);
ASSERT(zone->zone_ref == 0);
zkcr = zone->zone_kcred;
zone->zone_kcred = NULL;
crfree(zkcr);
return (zone_create_error(error, error2, extended_error));
}
static int
zone_boot(zoneid_t zoneid)
{
int err;
zone_t *zone;
if (secpolicy_zone_config(CRED()) != 0)
return (set_errno(EPERM));
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
return (set_errno(EINVAL));
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
mutex_enter(&zone_status_lock);
if (zone_status_get(zone) != ZONE_IS_READY) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
zone_status_set(zone, ZONE_IS_BOOTING);
mutex_exit(&zone_status_lock);
zone_hold(zone);
mutex_exit(&zonehash_lock);
if (zone_status_wait_sig(zone, ZONE_IS_RUNNING) == 0) {
zone_rele(zone);
return (set_errno(EINTR));
}
err = zone->zone_boot_err;
zone_rele(zone);
return (err ? set_errno(err) : 0);
}
static int
zone_empty(zone_t *zone)
{
int waitstatus;
ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
while ((waitstatus = zone_status_timedwait_sig(zone,
ddi_get_lbolt() + hz, ZONE_IS_EMPTY)) == -1) {
killall(zone->zone_id);
}
if (waitstatus == 0)
return (EINTR);
return (0);
}
static boolean_t
zone_list_access(zone_t *zone)
{
if (curproc->p_zone == global_zone ||
curproc->p_zone == zone) {
return (B_TRUE);
} else if (is_system_labeled() && !(zone->zone_flags & ZF_IS_SCRATCH)) {
bslabel_t *curproc_label;
bslabel_t *zone_label;
curproc_label = label2bslabel(curproc->p_zone->zone_slabel);
zone_label = label2bslabel(zone->zone_slabel);
if (zone->zone_id != GLOBAL_ZONEID &&
bldominates(curproc_label, zone_label)) {
return (B_TRUE);
} else {
return (B_FALSE);
}
} else {
return (B_FALSE);
}
}
static int
zone_shutdown(zoneid_t zoneid)
{
int error;
zone_t *zone;
zone_status_t status;
if (secpolicy_zone_config(CRED()) != 0)
return (set_errno(EPERM));
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
return (set_errno(EINVAL));
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
zone_hold(zone);
mutex_exit(&zonehash_lock);
if (block_mounts(zone) == 0) {
zone_rele(zone);
return (set_errno(EINTR));
}
mutex_enter(&zonehash_lock);
mutex_enter(&zone_status_lock);
status = zone_status_get(zone);
if (status < ZONE_IS_READY) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
resume_mounts(zone);
zone_rele(zone);
return (set_errno(EINVAL));
}
if (status >= ZONE_IS_DOWN) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
resume_mounts(zone);
zone_rele(zone);
return (0);
}
if (status < ZONE_IS_EMPTY) {
uint_t ntasks;
mutex_enter(&zone->zone_lock);
if ((ntasks = zone->zone_ntasks) != 1) {
zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
}
mutex_exit(&zone->zone_lock);
if (ntasks == 1) {
if (zone->zone_kthreads == NULL) {
zone_status_set(zone, ZONE_IS_DOWN);
} else {
zone_status_set(zone, ZONE_IS_EMPTY);
}
}
}
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
resume_mounts(zone);
if (error = zone_empty(zone)) {
zone_rele(zone);
return (set_errno(error));
}
if (pool_lock_intr() != 0) {
zone_rele(zone);
return (set_errno(EINTR));
}
if (pool_state == POOL_ENABLED) {
mutex_enter(&cpu_lock);
zone_pool_set(zone, pool_default);
zone_pset_set(zone, ZONE_PS_INVAL);
mutex_exit(&cpu_lock);
}
pool_unlock();
zone_zsd_callbacks(zone, ZSD_SHUTDOWN);
mutex_enter(&zone_status_lock);
if (zone->zone_kthreads == NULL && zone_status_get(zone) < ZONE_IS_DOWN)
zone_status_set(zone, ZONE_IS_DOWN);
mutex_exit(&zone_status_lock);
if (!zone_status_wait_sig(zone, ZONE_IS_DOWN)) {
zone_rele(zone);
return (set_errno(EINTR));
}
zone_rele(zone);
return (0);
}
static void
zone_log_refcounts(zone_t *zone)
{
char *buffer;
char *buffer_position;
uint32_t buffer_size;
uint32_t index;
uint_t ref;
uint_t cred_ref;
buffer_size = 2;
mutex_enter(&zone->zone_lock);
zone->zone_flags |= ZF_REFCOUNTS_LOGGED;
ref = zone->zone_ref;
cred_ref = zone->zone_cred_ref;
for (index = 0; index < ZONE_REF_NUM_SUBSYS; ++index)
if (zone->zone_subsys_ref[index] != 0)
buffer_size += strlen(zone_ref_subsys_names[index]) +
13;
if (buffer_size == 2) {
mutex_exit(&zone->zone_lock);
(void) strlog(0, 0, 1, SL_CONSOLE | SL_NOTE,
"Zone '%s' (ID: %d) is shutting down, but %u zone "
"references and %u credential references are still extant",
zone->zone_name, zone->zone_id, ref, cred_ref);
return;
}
buffer = kmem_alloc(buffer_size, KM_SLEEP);
buffer_position = &buffer[1];
for (index = 0; index < ZONE_REF_NUM_SUBSYS; ++index) {
if (zone->zone_subsys_ref[index] != 0)
buffer_position += snprintf(buffer_position, INT_MAX,
"%s: %u,", zone_ref_subsys_names[index],
zone->zone_subsys_ref[index]);
}
mutex_exit(&zone->zone_lock);
buffer[0] = '[';
ASSERT((uintptr_t)(buffer_position - buffer) < buffer_size);
ASSERT(buffer_position[0] == '\0' && buffer_position[-1] == ',');
buffer_position[-1] = ']';
(void) strlog(0, 0, 1, SL_CONSOLE | SL_NOTE,
"Zone '%s' (ID: %d) is shutting down, but %u zone references and "
"%u credential references are still extant %s", zone->zone_name,
zone->zone_id, ref, cred_ref, buffer);
kmem_free(buffer, buffer_size);
}
static int
zone_destroy(zoneid_t zoneid)
{
uint64_t uniqid;
zone_t *zone;
zone_status_t status;
clock_t wait_time;
boolean_t log_refcounts;
if (secpolicy_zone_config(CRED()) != 0)
return (set_errno(EPERM));
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
return (set_errno(EINVAL));
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
if (zone_mount_count(zone->zone_rootpath) != 0) {
mutex_exit(&zonehash_lock);
return (set_errno(EBUSY));
}
mutex_enter(&zone_status_lock);
status = zone_status_get(zone);
if (status < ZONE_IS_DOWN) {
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
return (set_errno(EBUSY));
} else if (status == ZONE_IS_DOWN) {
zone_status_set(zone, ZONE_IS_DYING);
}
mutex_exit(&zone_status_lock);
zone_hold(zone);
mutex_exit(&zonehash_lock);
zone_status_wait(zone, ZONE_IS_DEAD);
zone_zsd_callbacks(zone, ZSD_DESTROY);
zone->zone_netstack = NULL;
uniqid = zone->zone_uniqid;
zone_rele(zone);
zone = NULL;
log_refcounts = B_FALSE;
wait_time = SEC_TO_TICK(ZONE_DESTROY_TIMEOUT_SECS);
mutex_enter(&zonehash_lock);
for (; ; ) {
boolean_t unref;
boolean_t refs_have_been_logged;
if ((zone = zone_find_all_by_id(zoneid)) == NULL ||
zone->zone_uniqid != uniqid) {
mutex_exit(&zonehash_lock);
return (0);
}
mutex_enter(&zone->zone_lock);
unref = ZONE_IS_UNREF(zone);
refs_have_been_logged = (zone->zone_flags &
ZF_REFCOUNTS_LOGGED);
mutex_exit(&zone->zone_lock);
if (unref) {
break;
}
if (!refs_have_been_logged) {
if (!log_refcounts) {
ASSERT(wait_time > 0);
wait_time = cv_reltimedwait_sig(
&zone_destroy_cv, &zonehash_lock, wait_time,
TR_SEC);
if (wait_time > 0) {
continue;
} else if (wait_time == 0) {
mutex_exit(&zonehash_lock);
return (set_errno(EINTR));
}
log_refcounts = B_TRUE;
continue;
}
zone_log_refcounts(zone);
}
if (cv_wait_sig(&zone_destroy_cv, &zonehash_lock) == 0) {
mutex_exit(&zonehash_lock);
return (set_errno(EINTR));
}
}
cpucaps_zone_remove(zone);
zone_kstat_delete(zone);
if (zone->zone_pfexecd != NULL) {
klpd_freelist(&zone->zone_pfexecd);
zone->zone_pfexecd = NULL;
}
if (ZONE_IS_BRANDED(zone))
ZBROP(zone)->b_free_brand_data(zone);
brand_unregister_zone(zone->zone_brand);
ASSERT(zonecount > 1);
zonecount--;
list_remove(&zone_active, zone);
(void) mod_hash_destroy(zonehashbyname,
(mod_hash_key_t)zone->zone_name);
(void) mod_hash_destroy(zonehashbyid,
(mod_hash_key_t)(uintptr_t)zone->zone_id);
if (zone->zone_flags & ZF_HASHED_LABEL)
(void) mod_hash_destroy(zonehashbylabel,
(mod_hash_key_t)zone->zone_slabel);
mutex_exit(&zonehash_lock);
if (zone->zone_rootvp != NULL) {
VN_RELE(zone->zone_rootvp);
zone->zone_rootvp = NULL;
}
mutex_enter(&zone_deathrow_lock);
list_insert_tail(&zone_deathrow, zone);
mutex_exit(&zone_deathrow_lock);
zone_rele(zone);
return (0);
}
static ssize_t
zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
{
size_t size;
int error = 0, err;
zone_t *zone;
char *zonepath;
char *outstr;
zone_status_t zone_status;
pid_t initpid;
boolean_t global = (curzone == global_zone);
boolean_t inzone = (curzone->zone_id == zoneid);
ushort_t flags;
zone_net_data_t *zbuf;
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
zone_status = zone_status_get(zone);
if (zone_status < ZONE_IS_INITIALIZED) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
zone_hold(zone);
mutex_exit(&zonehash_lock);
if (!zone_list_access(zone)) {
zone_rele(zone);
return (set_errno(EINVAL));
}
switch (attr) {
case ZONE_ATTR_ROOT:
if (global) {
if (zone != global_zone)
size = zone->zone_rootpathlen - 1;
else
size = zone->zone_rootpathlen;
zonepath = kmem_alloc(size, KM_SLEEP);
bcopy(zone->zone_rootpath, zonepath, size);
zonepath[size - 1] = '\0';
} else {
if (inzone || !is_system_labeled()) {
zonepath = "/";
size = 2;
} else {
int prefix_len = strlen(zone_prefix);
int zname_len = strlen(zone->zone_name);
size = prefix_len + zname_len + 1;
zonepath = kmem_alloc(size, KM_SLEEP);
bcopy(zone_prefix, zonepath, prefix_len);
bcopy(zone->zone_name, zonepath +
prefix_len, zname_len);
zonepath[size - 1] = '\0';
}
}
if (bufsize > size)
bufsize = size;
if (buf != NULL) {
err = copyoutstr(zonepath, buf, bufsize, NULL);
if (err != 0 && err != ENAMETOOLONG)
error = EFAULT;
}
if (global || (is_system_labeled() && !inzone))
kmem_free(zonepath, size);
break;
case ZONE_ATTR_NAME:
size = strlen(zone->zone_name) + 1;
if (bufsize > size)
bufsize = size;
if (buf != NULL) {
err = copyoutstr(zone->zone_name, buf, bufsize, NULL);
if (err != 0 && err != ENAMETOOLONG)
error = EFAULT;
}
break;
case ZONE_ATTR_STATUS:
size = sizeof (zone_status);
if (bufsize > size)
bufsize = size;
zone_status = zone_status_get(zone);
if (buf != NULL &&
copyout(&zone_status, buf, bufsize) != 0)
error = EFAULT;
break;
case ZONE_ATTR_FLAGS:
size = sizeof (zone->zone_flags);
if (bufsize > size)
bufsize = size;
flags = zone->zone_flags;
if (buf != NULL &&
copyout(&flags, buf, bufsize) != 0)
error = EFAULT;
break;
case ZONE_ATTR_PRIVSET:
size = sizeof (priv_set_t);
if (bufsize > size)
bufsize = size;
if (buf != NULL &&
copyout(zone->zone_privset, buf, bufsize) != 0)
error = EFAULT;
break;
case ZONE_ATTR_UNIQID:
size = sizeof (zone->zone_uniqid);
if (bufsize > size)
bufsize = size;
if (buf != NULL &&
copyout(&zone->zone_uniqid, buf, bufsize) != 0)
error = EFAULT;
break;
case ZONE_ATTR_POOLID:
{
pool_t *pool;
poolid_t poolid;
if (pool_lock_intr() != 0) {
error = EINTR;
break;
}
pool = zone_pool_get(zone);
poolid = pool->pool_id;
pool_unlock();
size = sizeof (poolid);
if (bufsize > size)
bufsize = size;
if (buf != NULL && copyout(&poolid, buf, size) != 0)
error = EFAULT;
}
break;
case ZONE_ATTR_SLBL:
size = sizeof (bslabel_t);
if (bufsize > size)
bufsize = size;
if (zone->zone_slabel == NULL)
error = EINVAL;
else if (buf != NULL &&
copyout(label2bslabel(zone->zone_slabel), buf,
bufsize) != 0)
error = EFAULT;
break;
case ZONE_ATTR_INITPID:
size = sizeof (initpid);
if (bufsize > size)
bufsize = size;
initpid = zone->zone_proc_initpid;
if (initpid == -1) {
error = ESRCH;
break;
}
if (buf != NULL &&
copyout(&initpid, buf, bufsize) != 0)
error = EFAULT;
break;
case ZONE_ATTR_BRAND:
size = strlen(zone->zone_brand->b_name) + 1;
if (bufsize > size)
bufsize = size;
if (buf != NULL) {
err = copyoutstr(zone->zone_brand->b_name, buf,
bufsize, NULL);
if (err != 0 && err != ENAMETOOLONG)
error = EFAULT;
}
break;
case ZONE_ATTR_INITNAME:
size = strlen(zone->zone_initname) + 1;
if (bufsize > size)
bufsize = size;
if (buf != NULL) {
err = copyoutstr(zone->zone_initname, buf, bufsize,
NULL);
if (err != 0 && err != ENAMETOOLONG)
error = EFAULT;
}
break;
case ZONE_ATTR_BOOTARGS:
if (zone->zone_bootargs == NULL)
outstr = "";
else
outstr = zone->zone_bootargs;
size = strlen(outstr) + 1;
if (bufsize > size)
bufsize = size;
if (buf != NULL) {
err = copyoutstr(outstr, buf, bufsize, NULL);
if (err != 0 && err != ENAMETOOLONG)
error = EFAULT;
}
break;
case ZONE_ATTR_PHYS_MCAP:
size = sizeof (zone->zone_phys_mcap);
if (bufsize > size)
bufsize = size;
if (buf != NULL &&
copyout(&zone->zone_phys_mcap, buf, bufsize) != 0)
error = EFAULT;
break;
case ZONE_ATTR_SCHED_CLASS:
mutex_enter(&class_lock);
if (zone->zone_defaultcid >= loaded_classes)
outstr = "";
else
outstr = sclass[zone->zone_defaultcid].cl_name;
size = strlen(outstr) + 1;
if (bufsize > size)
bufsize = size;
if (buf != NULL) {
err = copyoutstr(outstr, buf, bufsize, NULL);
if (err != 0 && err != ENAMETOOLONG)
error = EFAULT;
}
mutex_exit(&class_lock);
break;
case ZONE_ATTR_HOSTID:
if (zone->zone_hostid != HW_INVALID_HOSTID &&
bufsize == sizeof (zone->zone_hostid)) {
size = sizeof (zone->zone_hostid);
if (buf != NULL && copyout(&zone->zone_hostid, buf,
bufsize) != 0)
error = EFAULT;
} else {
error = EINVAL;
}
break;
case ZONE_ATTR_FS_ALLOWED:
if (zone->zone_fs_allowed == NULL)
outstr = "";
else
outstr = zone->zone_fs_allowed;
size = strlen(outstr) + 1;
if (bufsize > size)
bufsize = size;
if (buf != NULL) {
err = copyoutstr(outstr, buf, bufsize, NULL);
if (err != 0 && err != ENAMETOOLONG)
error = EFAULT;
}
break;
case ZONE_ATTR_SECFLAGS:
size = sizeof (zone->zone_secflags);
if (bufsize > size)
bufsize = size;
if ((err = copyout(&zone->zone_secflags, buf, bufsize)) != 0)
error = EFAULT;
break;
case ZONE_ATTR_NETWORK:
bufsize = MIN(bufsize, PIPE_BUF + sizeof (zone_net_data_t));
size = bufsize;
zbuf = kmem_alloc(bufsize, KM_SLEEP);
if (copyin(buf, zbuf, bufsize) != 0) {
error = EFAULT;
} else {
error = zone_get_network(zoneid, zbuf);
if (error == 0 && copyout(zbuf, buf, bufsize) != 0)
error = EFAULT;
}
kmem_free(zbuf, bufsize);
break;
default:
if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
size = bufsize;
error = ZBROP(zone)->b_getattr(zone, attr, buf, &size);
} else {
error = EINVAL;
}
}
zone_rele(zone);
if (error)
return (set_errno(error));
return ((ssize_t)size);
}
static int
zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
{
zone_t *zone;
zone_status_t zone_status;
int err = -1;
zone_net_data_t *zbuf;
if (secpolicy_zone_config(CRED()) != 0)
return (set_errno(EPERM));
if (zoneid == GLOBAL_ZONEID && attr != ZONE_ATTR_PHYS_MCAP) {
return (set_errno(EINVAL));
}
mutex_enter(&zonehash_lock);
if ((zone = zone_find_all_by_id(zoneid)) == NULL) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
}
zone_hold(zone);
mutex_exit(&zonehash_lock);
zone_status = zone_status_get(zone);
if (attr != ZONE_ATTR_PHYS_MCAP && zone_status > ZONE_IS_READY) {
err = EINVAL;
goto done;
}
switch (attr) {
case ZONE_ATTR_INITNAME:
err = zone_set_initname(zone, (const char *)buf);
break;
case ZONE_ATTR_INITNORESTART:
zone->zone_restart_init = B_FALSE;
err = 0;
break;
case ZONE_ATTR_INITRESTART0:
zone->zone_restart_init_0 = B_TRUE;
err = 0;
break;
case ZONE_ATTR_INITREBOOT:
zone->zone_reboot_on_init_exit = B_TRUE;
err = 0;
break;
case ZONE_ATTR_BOOTARGS:
err = zone_set_bootargs(zone, (const char *)buf);
break;
case ZONE_ATTR_BRAND:
err = zone_set_brand(zone, (const char *)buf);
break;
case ZONE_ATTR_FS_ALLOWED:
err = zone_set_fs_allowed(zone, (const char *)buf);
break;
case ZONE_ATTR_SECFLAGS:
err = zone_set_secflags(zone, (psecflags_t *)buf);
break;
case ZONE_ATTR_PHYS_MCAP:
err = zone_set_phys_mcap(zone, (const uint64_t *)buf);
break;
case ZONE_ATTR_SCHED_CLASS:
err = zone_set_sched_class(zone, (const char *)buf);
break;
case ZONE_ATTR_HOSTID:
if (bufsize == sizeof (zone->zone_hostid)) {
if (copyin(buf, &zone->zone_hostid, bufsize) == 0)
err = 0;
else
err = EFAULT;
} else {
err = EINVAL;
}
break;
case ZONE_ATTR_NETWORK:
if (bufsize > (PIPE_BUF + sizeof (zone_net_data_t))) {
err = EINVAL;
break;
}
zbuf = kmem_alloc(bufsize, KM_SLEEP);
if (copyin(buf, zbuf, bufsize) != 0) {
kmem_free(zbuf, bufsize);
err = EFAULT;
break;
}
err = zone_set_network(zoneid, zbuf);
kmem_free(zbuf, bufsize);
break;
default:
if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone))
err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize);
else
err = EINVAL;
}
done:
zone_rele(zone);
ASSERT(err != -1);
return (err != 0 ? set_errno(err) : 0);
}
static int
as_can_change_zones(void)
{
proc_t *pp = curproc;
struct seg *seg;
struct as *as = pp->p_as;
vnode_t *vp;
int allow = 1;
ASSERT(pp->p_as != &kas);
AS_LOCK_ENTER(as, RW_READER);
for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
if (seg_can_change_zones(seg) == B_FALSE) {
allow = 0;
break;
}
vp = NULL;
if (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)
continue;
if (!vn_can_change_zones(vp)) {
allow = 0;
break;
}
}
AS_LOCK_EXIT(as);
return (allow);
}
static size_t
as_swresv(void)
{
proc_t *pp = curproc;
struct seg *seg;
struct as *as = pp->p_as;
size_t swap = 0;
ASSERT(pp->p_as != &kas);
ASSERT(AS_WRITE_HELD(as));
for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg))
swap += seg_swresv(seg);
return (swap);
}
static int
zone_enter(zoneid_t zoneid)
{
zone_t *zone;
vnode_t *vp;
proc_t *pp = curproc;
contract_t *ct;
cont_process_t *ctp;
task_t *tk, *oldtk;
kproject_t *zone_proj0;
cred_t *cr, *newcr;
pool_t *oldpool, *newpool;
sess_t *sp;
uid_t uid;
zone_status_t status;
int err = 0;
rctl_entity_p_t e;
size_t swap;
kthread_id_t t;
if (secpolicy_zone_config(CRED()) != 0)
return (set_errno(EPERM));
if (zoneid < MIN_USERZONEID || zoneid > MAX_ZONEID)
return (set_errno(EINVAL));
if (curthread != pp->p_agenttp && !holdlwps(SHOLDFORK))
return (set_errno(EINTR));
if (!files_can_change_zones()) {
err = EBADF;
goto out;
}
if (!as_can_change_zones()) {
err = EFAULT;
goto out;
}
mutex_enter(&zonehash_lock);
if (pp->p_zone != global_zone) {
mutex_exit(&zonehash_lock);
err = EINVAL;
goto out;
}
zone = zone_find_all_by_id(zoneid);
if (zone == NULL) {
mutex_exit(&zonehash_lock);
err = EINVAL;
goto out;
}
ctp = pp->p_ct_process;
ct = &ctp->conp_contract;
mutex_enter(&ct->ct_lock);
mutex_enter(&pp->p_lock);
if ((avl_numnodes(&pp->p_ct_held) != 0) || (ctp->conp_nmembers != 1)) {
mutex_exit(&pp->p_lock);
mutex_exit(&ct->ct_lock);
mutex_exit(&zonehash_lock);
err = EINVAL;
goto out;
}
if (ctp->conp_ninherited != 0) {
contract_t *next;
for (next = list_head(&ctp->conp_inherited); next;
next = list_next(&ctp->conp_inherited, next)) {
if (contract_getzuniqid(next) != zone->zone_uniqid) {
mutex_exit(&pp->p_lock);
mutex_exit(&ct->ct_lock);
mutex_exit(&zonehash_lock);
err = EINVAL;
goto out;
}
}
}
mutex_exit(&pp->p_lock);
mutex_exit(&ct->ct_lock);
status = zone_status_get(zone);
if (status < ZONE_IS_READY || status >= ZONE_IS_SHUTTING_DOWN) {
mutex_exit(&zonehash_lock);
err = EINVAL;
goto out;
}
if (!priv_issubset(zone->zone_privset, &CR_OPPRIV(CRED()))) {
mutex_exit(&zonehash_lock);
err = EPERM;
goto out;
}
zone_hold(zone);
mutex_exit(&zonehash_lock);
if (pool_lock_intr() != 0) {
zone_rele(zone);
err = EINTR;
goto out;
}
ASSERT(secpolicy_pool(CRED()) == 0);
oldpool = curproc->p_pool;
newpool = zone_pool_get(zone);
if (pool_state == POOL_ENABLED && newpool != oldpool &&
(err = pool_do_bind(newpool, P_PID, P_MYID,
POOL_BIND_ALL)) != 0) {
pool_unlock();
zone_rele(zone);
goto out;
}
mutex_enter(&cpu_lock);
mutex_enter(&zonehash_lock);
if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) {
mutex_exit(&zonehash_lock);
mutex_exit(&cpu_lock);
if (pool_state == POOL_ENABLED &&
newpool != oldpool)
(void) pool_do_bind(oldpool, P_PID, P_MYID,
POOL_BIND_ALL);
pool_unlock();
zone_rele(zone);
err = EINVAL;
goto out;
}
AS_LOCK_ENTER(pp->p_as, RW_WRITER);
swap = as_swresv();
mutex_enter(&pp->p_lock);
zone_proj0 = zone->zone_zsched->p_task->tk_proj;
mutex_enter(&zone->zone_nlwps_lock);
zone_proj0->kpj_nlwps += pp->p_lwpcnt;
zone->zone_nlwps += pp->p_lwpcnt;
zone_proj0->kpj_ntasks += 1;
zone_proj0->kpj_nprocs++;
zone->zone_nprocs++;
mutex_exit(&zone->zone_nlwps_lock);
mutex_enter(&zone->zone_mem_lock);
zone->zone_locked_mem += pp->p_locked_mem;
zone_proj0->kpj_data.kpd_locked_mem += pp->p_locked_mem;
zone->zone_max_swap += swap;
mutex_exit(&zone->zone_mem_lock);
mutex_enter(&(zone_proj0->kpj_data.kpd_crypto_lock));
zone_proj0->kpj_data.kpd_crypto_mem += pp->p_crypto_mem;
mutex_exit(&(zone_proj0->kpj_data.kpd_crypto_lock));
mutex_enter(&pp->p_zone->zone_nlwps_lock);
pp->p_zone->zone_nlwps -= pp->p_lwpcnt;
pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt;
pp->p_task->tk_proj->kpj_nprocs--;
pp->p_zone->zone_nprocs--;
mutex_exit(&pp->p_zone->zone_nlwps_lock);
mutex_enter(&pp->p_zone->zone_mem_lock);
pp->p_zone->zone_locked_mem -= pp->p_locked_mem;
pp->p_task->tk_proj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
pp->p_zone->zone_max_swap -= swap;
mutex_exit(&pp->p_zone->zone_mem_lock);
mutex_enter(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
pp->p_task->tk_proj->kpj_data.kpd_crypto_mem -= pp->p_crypto_mem;
mutex_exit(&(pp->p_task->tk_proj->kpj_data.kpd_crypto_lock));
pp->p_flag |= SZONETOP;
pp->p_zone = zone;
mutex_exit(&pp->p_lock);
AS_LOCK_EXIT(pp->p_as);
if (ctp->conp_svc_ctid != ct->ct_id) {
mutex_enter(&ct->ct_lock);
ctp->conp_svc_zone_enter = ct->ct_id;
mutex_exit(&ct->ct_lock);
}
ASSERT(ct->ct_mzuniqid == GLOBAL_ZONEUNIQID);
contract_setzuniqid(ct, zone->zone_uniqid);
tk = task_create(0, zone);
oldtk = task_join(tk, 0);
mutex_exit(&cpu_lock);
e.rcep_p.zone = zone;
e.rcep_t = RCENTITY_ZONE;
(void) rctl_set_dup(NULL, NULL, pp, &e, zone->zone_rctls, NULL,
RCD_CALLBACK);
mutex_exit(&pp->p_lock);
mutex_enter(&pidlock);
sp = zone->zone_zsched->p_sessp;
sess_hold(zone->zone_zsched);
mutex_enter(&pp->p_lock);
pgexit(pp);
sess_rele(pp->p_sessp, B_TRUE);
pp->p_sessp = sp;
pgjoin(pp, zone->zone_zsched->p_pidp);
if ((t = pp->p_tlist) != NULL) {
do {
thread_lock(t);
if (ISWAITING(t))
setrun_locked(t);
if (t->t_schedflag & TS_ANYWAITQ)
t->t_schedflag &= ~ TS_ANYWAITQ;
thread_unlock(t);
} while ((t = t->t_forw) != pp->p_tlist);
}
if (zone->zone_defaultcid > 0 &&
zone->zone_defaultcid != curthread->t_cid) {
pcparms_t pcparms;
pcparms.pc_cid = zone->zone_defaultcid;
pcparms.pc_clparms[0] = 0;
if ((t = pp->p_tlist) != NULL) {
do {
(void) parmsset(&pcparms, t);
} while ((t = t->t_forw) != pp->p_tlist);
}
}
mutex_exit(&pp->p_lock);
mutex_exit(&pidlock);
mutex_exit(&zonehash_lock);
pool_unlock();
task_rele(oldtk);
zone_rele(zone);
vp = zone->zone_rootvp;
zone_chdir(vp, &PTOU(pp)->u_cdir, pp);
zone_chdir(vp, &PTOU(pp)->u_rdir, pp);
secflags_copy(&pp->p_secflags.psf_lower,
&zone->zone_secflags.psf_lower);
secflags_copy(&pp->p_secflags.psf_upper,
&zone->zone_secflags.psf_upper);
secflags_copy(&pp->p_secflags.psf_inherit,
&zone->zone_secflags.psf_inherit);
newcr = cralloc();
mutex_enter(&pp->p_crlock);
cr = pp->p_cred;
crcopy_to(cr, newcr);
crsetzone(newcr, zone);
pp->p_cred = newcr;
priv_intersect(zone->zone_privset, &CR_PPRIV(newcr));
priv_intersect(zone->zone_privset, &CR_EPRIV(newcr));
priv_intersect(zone->zone_privset, &CR_IPRIV(newcr));
priv_intersect(zone->zone_privset, &CR_LPRIV(newcr));
mutex_exit(&pp->p_crlock);
crset(pp, newcr);
uid = crgetruid(newcr);
mutex_enter(&pidlock);
upcount_dec(uid, GLOBAL_ZONEID);
upcount_inc(uid, zoneid);
mutex_exit(&pidlock);
set_core_defaults();
out:
mutex_enter(&pp->p_lock);
if (curthread != pp->p_agenttp)
continuelwps(pp);
mutex_exit(&pp->p_lock);
return (err != 0 ? set_errno(err) : 0);
}
static int
zone_list(zoneid_t *zoneidlist, uint_t *numzones)
{
zoneid_t *zoneids;
zone_t *zone, *myzone;
uint_t user_nzones, real_nzones;
uint_t domi_nzones;
int error;
if (copyin(numzones, &user_nzones, sizeof (uint_t)) != 0)
return (set_errno(EFAULT));
myzone = curproc->p_zone;
ASSERT(zonecount > 0);
if (myzone != global_zone) {
bslabel_t *mybslab;
if (!is_system_labeled()) {
real_nzones = domi_nzones = 1;
zoneids = kmem_alloc(sizeof (zoneid_t), KM_SLEEP);
zoneids[0] = myzone->zone_id;
} else {
mutex_enter(&zonehash_lock);
real_nzones = zonecount;
domi_nzones = 0;
zoneids = kmem_alloc(real_nzones *
sizeof (zoneid_t), KM_SLEEP);
mybslab = label2bslabel(myzone->zone_slabel);
for (zone = list_head(&zone_active);
zone != NULL;
zone = list_next(&zone_active, zone)) {
if (zone->zone_id == GLOBAL_ZONEID)
continue;
if (zone != myzone &&
(zone->zone_flags & ZF_IS_SCRATCH))
continue;
if (bldominates(mybslab,
label2bslabel(zone->zone_slabel))) {
zoneids[domi_nzones++] = zone->zone_id;
}
}
mutex_exit(&zonehash_lock);
}
} else {
mutex_enter(&zonehash_lock);
real_nzones = zonecount;
domi_nzones = 0;
zoneids = kmem_alloc(real_nzones * sizeof (zoneid_t), KM_SLEEP);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone))
zoneids[domi_nzones++] = zone->zone_id;
ASSERT(domi_nzones == real_nzones);
mutex_exit(&zonehash_lock);
}
if (domi_nzones < user_nzones)
user_nzones = domi_nzones;
error = 0;
if (copyout(&domi_nzones, numzones, sizeof (uint_t)) != 0) {
error = EFAULT;
} else if (zoneidlist != NULL && user_nzones != 0) {
if (copyout(zoneids, zoneidlist,
user_nzones * sizeof (zoneid_t)) != 0)
error = EFAULT;
}
kmem_free(zoneids, real_nzones * sizeof (zoneid_t));
if (error != 0)
return (set_errno(error));
else
return (0);
}
static zoneid_t
zone_lookup(const char *zone_name)
{
char *kname;
zone_t *zone;
zoneid_t zoneid;
int err;
if (zone_name == NULL) {
return (getzoneid());
}
kname = kmem_zalloc(ZONENAME_MAX, KM_SLEEP);
if ((err = copyinstr(zone_name, kname, ZONENAME_MAX, NULL)) != 0) {
kmem_free(kname, ZONENAME_MAX);
return (set_errno(err));
}
mutex_enter(&zonehash_lock);
zone = zone_find_all_by_name(kname);
kmem_free(kname, ZONENAME_MAX);
if (zone == NULL ||
zone_status_get(zone) < ZONE_IS_READY ||
!zone_list_access(zone)) {
mutex_exit(&zonehash_lock);
return (set_errno(EINVAL));
} else {
zoneid = zone->zone_id;
mutex_exit(&zonehash_lock);
return (zoneid);
}
}
static int
zone_version(int *version_arg)
{
int version = ZONE_SYSCALL_API_VERSION;
if (copyout(&version, version_arg, sizeof (int)) != 0)
return (set_errno(EFAULT));
return (0);
}
long
zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
{
zone_def zs;
int err;
switch (cmd) {
case ZONE_CREATE:
if (get_udatamodel() == DATAMODEL_NATIVE) {
if (copyin(arg1, &zs, sizeof (zone_def))) {
return (set_errno(EFAULT));
}
} else {
#ifdef _SYSCALL32_IMPL
zone_def32 zs32;
if (copyin(arg1, &zs32, sizeof (zone_def32))) {
return (set_errno(EFAULT));
}
zs.zone_name =
(const char *)(unsigned long)zs32.zone_name;
zs.zone_root =
(const char *)(unsigned long)zs32.zone_root;
zs.zone_privs =
(const struct priv_set *)
(unsigned long)zs32.zone_privs;
zs.zone_privssz = zs32.zone_privssz;
zs.rctlbuf = (caddr_t)(unsigned long)zs32.rctlbuf;
zs.rctlbufsz = zs32.rctlbufsz;
zs.zfsbuf = (caddr_t)(unsigned long)zs32.zfsbuf;
zs.zfsbufsz = zs32.zfsbufsz;
zs.extended_error =
(int *)(unsigned long)zs32.extended_error;
zs.match = zs32.match;
zs.doi = zs32.doi;
zs.label = (const bslabel_t *)(uintptr_t)zs32.label;
zs.flags = zs32.flags;
#else
panic("get_udatamodel() returned bogus result\n");
#endif
}
return (zone_create(zs.zone_name, zs.zone_root,
zs.zone_privs, zs.zone_privssz,
(caddr_t)zs.rctlbuf, zs.rctlbufsz,
(caddr_t)zs.zfsbuf, zs.zfsbufsz,
zs.extended_error, zs.match, zs.doi,
zs.label, zs.flags));
case ZONE_BOOT:
return (zone_boot((zoneid_t)(uintptr_t)arg1));
case ZONE_DESTROY:
return (zone_destroy((zoneid_t)(uintptr_t)arg1));
case ZONE_GETATTR:
return (zone_getattr((zoneid_t)(uintptr_t)arg1,
(int)(uintptr_t)arg2, arg3, (size_t)arg4));
case ZONE_SETATTR:
return (zone_setattr((zoneid_t)(uintptr_t)arg1,
(int)(uintptr_t)arg2, arg3, (size_t)arg4));
case ZONE_ENTER:
return (zone_enter((zoneid_t)(uintptr_t)arg1));
case ZONE_LIST:
return (zone_list((zoneid_t *)arg1, (uint_t *)arg2));
case ZONE_SHUTDOWN:
return (zone_shutdown((zoneid_t)(uintptr_t)arg1));
case ZONE_LOOKUP:
return (zone_lookup((const char *)arg1));
case ZONE_VERSION:
return (zone_version((int *)arg1));
case ZONE_ADD_DATALINK:
return (zone_add_datalink((zoneid_t)(uintptr_t)arg1,
(datalink_id_t)(uintptr_t)arg2));
case ZONE_DEL_DATALINK:
return (zone_remove_datalink((zoneid_t)(uintptr_t)arg1,
(datalink_id_t)(uintptr_t)arg2));
case ZONE_CHECK_DATALINK: {
zoneid_t zoneid;
boolean_t need_copyout;
if (copyin(arg1, &zoneid, sizeof (zoneid)) != 0)
return (EFAULT);
need_copyout = (zoneid == ALL_ZONES);
err = zone_check_datalink(&zoneid,
(datalink_id_t)(uintptr_t)arg2);
if (err == 0 && need_copyout) {
if (copyout(&zoneid, arg1, sizeof (zoneid)) != 0)
err = EFAULT;
}
return (err == 0 ? 0 : set_errno(err));
}
case ZONE_LIST_DATALINK:
return (zone_list_datalink((zoneid_t)(uintptr_t)arg1,
(int *)arg2, (datalink_id_t *)(uintptr_t)arg3));
default:
return (set_errno(EINVAL));
}
}
struct zarg {
zone_t *zone;
zone_cmd_arg_t arg;
};
static int
zone_lookup_door(const char *zone_name, door_handle_t *doorp)
{
char *buf;
size_t buflen;
int error;
buflen = sizeof (ZONE_DOOR_PATH) + strlen(zone_name);
buf = kmem_alloc(buflen, KM_SLEEP);
(void) snprintf(buf, buflen, ZONE_DOOR_PATH, zone_name);
error = door_ki_open(buf, doorp);
kmem_free(buf, buflen);
return (error);
}
static void
zone_release_door(door_handle_t *doorp)
{
door_ki_rele(*doorp);
*doorp = NULL;
}
static void
zone_ki_call_zoneadmd(struct zarg *zargp)
{
door_handle_t door = NULL;
door_arg_t darg, save_arg;
char *zone_name;
size_t zone_namelen;
zoneid_t zoneid;
zone_t *zone;
zone_cmd_arg_t arg;
uint64_t uniqid;
size_t size;
int error;
int retry;
zone = zargp->zone;
arg = zargp->arg;
kmem_free(zargp, sizeof (*zargp));
zone_namelen = strlen(zone->zone_name) + 1;
zone_name = kmem_alloc(zone_namelen, KM_SLEEP);
bcopy(zone->zone_name, zone_name, zone_namelen);
zoneid = zone->zone_id;
uniqid = zone->zone_uniqid;
ASSERT(curproc == &p0);
(void) zone_empty(zone);
ASSERT(zone_status_get(zone) >= ZONE_IS_EMPTY);
zone_rele(zone);
size = sizeof (arg);
darg.rbuf = (char *)&arg;
darg.data_ptr = (char *)&arg;
darg.rsize = size;
darg.data_size = size;
darg.desc_ptr = NULL;
darg.desc_num = 0;
save_arg = darg;
for (retry = 0; ; retry++) {
if (door == NULL &&
(error = zone_lookup_door(zone_name, &door)) != 0) {
goto next;
}
ASSERT(door != NULL);
if ((error = door_ki_upcall_limited(door, &darg, NULL,
SIZE_MAX, 0)) == 0) {
break;
}
switch (error) {
case EINTR:
case EAGAIN:
break;
case EBADF:
zone_release_door(&door);
if (zone_lookup_door(zone_name, &door) != 0) {
break;
}
break;
default:
cmn_err(CE_WARN,
"zone_ki_call_zoneadmd: door_ki_upcall error %d\n",
error);
goto out;
}
next:
if ((zone = zone_find_by_id(zoneid)) == NULL) {
break;
}
if (zone->zone_uniqid != uniqid) {
zone_rele(zone);
break;
}
zone_rele(zone);
delay(hz);
darg = save_arg;
}
out:
if (door != NULL) {
zone_release_door(&door);
}
kmem_free(zone_name, zone_namelen);
thread_exit();
}
int
zone_kadmin(int cmd, int fcn, const char *mdep, cred_t *credp)
{
struct zarg *zargp;
zone_cmd_t zcmd;
zone_t *zone;
zone = curproc->p_zone;
ASSERT(getzoneid() != GLOBAL_ZONEID);
switch (cmd) {
case A_SHUTDOWN:
switch (fcn) {
case AD_HALT:
case AD_POWEROFF:
zcmd = Z_HALT;
break;
case AD_BOOT:
zcmd = Z_REBOOT;
break;
case AD_IBOOT:
case AD_SBOOT:
case AD_SIBOOT:
case AD_NOSYNC:
return (ENOTSUP);
default:
return (EINVAL);
}
break;
case A_REBOOT:
zcmd = Z_REBOOT;
break;
case A_FTRACE:
case A_REMOUNT:
case A_FREEZE:
case A_DUMP:
case A_CONFIG:
return (ENOTSUP);
default:
ASSERT(cmd != A_SWAPCTL);
return (EINVAL);
}
if (secpolicy_zone_admin(credp, B_FALSE))
return (EPERM);
mutex_enter(&zone_status_lock);
ASSERT(zone_status_get(zone) < ZONE_IS_EMPTY);
if (zone_status_get(zone) > ZONE_IS_RUNNING) {
mutex_exit(&zone_status_lock);
return (0);
}
zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
mutex_exit(&zone_status_lock);
killall(zone->zone_id);
zargp = kmem_zalloc(sizeof (*zargp), KM_SLEEP);
zargp->arg.cmd = zcmd;
zargp->arg.uniqid = zone->zone_uniqid;
zargp->zone = zone;
(void) strcpy(zargp->arg.locale, "C");
if (mdep != NULL)
(void) strlcpy(zargp->arg.bootbuf, mdep,
sizeof (zargp->arg.bootbuf));
zone_hold(zone);
(void) thread_create(NULL, 0, zone_ki_call_zoneadmd, zargp, 0, &p0,
TS_RUN, minclsyspri);
exit(CLD_EXITED, 0);
return (EINVAL);
}
void
zone_shutdown_global(void)
{
zone_t *current_zonep;
ASSERT(INGLOBALZONE(curproc));
mutex_enter(&zonehash_lock);
mutex_enter(&zone_status_lock);
ASSERT(zone_status_get(global_zone) == ZONE_IS_RUNNING);
zone_status_set(global_zone, ZONE_IS_SHUTTING_DOWN);
for (current_zonep = list_head(&zone_active); current_zonep != NULL;
current_zonep = list_next(&zone_active, current_zonep)) {
if (zone_status_get(current_zonep) == ZONE_IS_RUNNING)
zone_status_set(current_zonep, ZONE_IS_SHUTTING_DOWN);
}
mutex_exit(&zone_status_lock);
mutex_exit(&zonehash_lock);
}
int
zone_dataset_visible(const char *dataset, int *write)
{
static int zfstype = -1;
zone_dataset_t *zd;
size_t len;
zone_t *zone = curproc->p_zone;
const char *name = NULL;
vfs_t *vfsp = NULL;
if (dataset[0] == '\0')
return (0);
for (zd = list_head(&zone->zone_datasets); zd != NULL;
zd = list_next(&zone->zone_datasets, zd)) {
len = strlen(zd->zd_dataset);
if (strlen(dataset) >= len &&
bcmp(dataset, zd->zd_dataset, len) == 0 &&
(dataset[len] == '\0' || dataset[len] == '/' ||
dataset[len] == '@')) {
if (write)
*write = 1;
return (1);
}
}
for (zd = list_head(&zone->zone_datasets); zd != NULL;
zd = list_next(&zone->zone_datasets, zd)) {
len = strlen(dataset);
if (dataset[len - 1] == '/')
len--;
if (len < strlen(zd->zd_dataset) &&
bcmp(dataset, zd->zd_dataset, len) == 0 &&
zd->zd_dataset[len] == '/') {
if (write)
*write = 0;
return (1);
}
}
if (zfstype == -1) {
struct vfssw *vswp = vfs_getvfssw("zfs");
zfstype = vswp - vfssw;
vfs_unrefvfssw(vswp);
}
vfs_list_read_lock();
vfsp = zone->zone_vfslist;
do {
ASSERT(vfsp);
if (vfsp->vfs_fstype == zfstype) {
name = refstr_value(vfsp->vfs_resource);
if (strcmp(dataset, name) == 0) {
vfs_list_unlock();
if (write)
*write = 0;
return (1);
}
len = strlen(dataset);
if (dataset[len - 1] == '/')
len--;
if (len < strlen(name) &&
bcmp(dataset, name, len) == 0 && name[len] == '/') {
vfs_list_unlock();
if (write)
*write = 0;
return (1);
}
}
vfsp = vfsp->vfs_zone_next;
} while (vfsp != zone->zone_vfslist);
vfs_list_unlock();
return (0);
}
zone_t *
zone_find_by_any_path(const char *path, boolean_t treat_abs)
{
zone_t *zone;
int path_offset = 0;
if (path == NULL) {
zone_hold(global_zone);
return (global_zone);
}
if (*path != '/') {
ASSERT(treat_abs);
path_offset = 1;
}
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
char *c;
size_t pathlen;
char *rootpath_start;
if (zone == global_zone)
continue;
c = zone->zone_rootpath + zone->zone_rootpathlen - 2;
do {
c--;
} while (*c != '/');
pathlen = c - zone->zone_rootpath + 1 - path_offset;
rootpath_start = (zone->zone_rootpath + path_offset);
if (strncmp(path, rootpath_start, pathlen) == 0)
break;
}
if (zone == NULL)
zone = global_zone;
zone_hold(zone);
mutex_exit(&zonehash_lock);
return (zone);
}
static zone_dl_t *
zone_find_dl(zone_t *zone, datalink_id_t linkid)
{
zone_dl_t *zdl;
ASSERT(mutex_owned(&zone->zone_lock));
for (zdl = list_head(&zone->zone_dl_list); zdl != NULL;
zdl = list_next(&zone->zone_dl_list, zdl)) {
if (zdl->zdl_id == linkid)
break;
}
return (zdl);
}
static boolean_t
zone_dl_exists(zone_t *zone, datalink_id_t linkid)
{
boolean_t exists;
mutex_enter(&zone->zone_lock);
exists = (zone_find_dl(zone, linkid) != NULL);
mutex_exit(&zone->zone_lock);
return (exists);
}
static int
zone_add_datalink(zoneid_t zoneid, datalink_id_t linkid)
{
zone_dl_t *zdl;
zone_t *zone;
zone_t *thiszone;
if ((thiszone = zone_find_by_id(zoneid)) == NULL)
return (set_errno(ENXIO));
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
if (zone_dl_exists(zone, linkid)) {
mutex_exit(&zonehash_lock);
zone_rele(thiszone);
return (set_errno((zone == thiszone) ? EEXIST : EPERM));
}
}
zdl = kmem_zalloc(sizeof (*zdl), KM_SLEEP);
zdl->zdl_id = linkid;
zdl->zdl_net = NULL;
mutex_enter(&thiszone->zone_lock);
list_insert_head(&thiszone->zone_dl_list, zdl);
mutex_exit(&thiszone->zone_lock);
mutex_exit(&zonehash_lock);
zone_rele(thiszone);
return (0);
}
static int
zone_remove_datalink(zoneid_t zoneid, datalink_id_t linkid)
{
zone_dl_t *zdl;
zone_t *zone;
int err = 0;
if ((zone = zone_find_by_id(zoneid)) == NULL)
return (set_errno(EINVAL));
mutex_enter(&zone->zone_lock);
if ((zdl = zone_find_dl(zone, linkid)) == NULL) {
err = ENXIO;
} else {
list_remove(&zone->zone_dl_list, zdl);
nvlist_free(zdl->zdl_net);
kmem_free(zdl, sizeof (zone_dl_t));
}
mutex_exit(&zone->zone_lock);
zone_rele(zone);
return (err == 0 ? 0 : set_errno(err));
}
int
zone_check_datalink(zoneid_t *zoneidp, datalink_id_t linkid)
{
zone_t *zone;
int err = ENXIO;
if (*zoneidp != ALL_ZONES) {
if ((zone = zone_find_by_id(*zoneidp)) != NULL) {
if (zone_dl_exists(zone, linkid))
err = 0;
zone_rele(zone);
}
return (err);
}
mutex_enter(&zonehash_lock);
for (zone = list_head(&zone_active); zone != NULL;
zone = list_next(&zone_active, zone)) {
if (zone_dl_exists(zone, linkid)) {
*zoneidp = zone->zone_id;
err = 0;
break;
}
}
mutex_exit(&zonehash_lock);
return (err);
}
static int
zone_list_datalink(zoneid_t zoneid, int *nump, datalink_id_t *idarray)
{
uint_t num, dlcount;
zone_t *zone;
zone_dl_t *zdl;
datalink_id_t *idptr = idarray;
if (copyin(nump, &dlcount, sizeof (dlcount)) != 0)
return (set_errno(EFAULT));
if ((zone = zone_find_by_id(zoneid)) == NULL)
return (set_errno(ENXIO));
num = 0;
mutex_enter(&zone->zone_lock);
for (zdl = list_head(&zone->zone_dl_list); zdl != NULL;
zdl = list_next(&zone->zone_dl_list, zdl)) {
if (++num > dlcount)
continue;
if (copyout(&zdl->zdl_id, idptr, sizeof (*idptr)) != 0) {
mutex_exit(&zone->zone_lock);
zone_rele(zone);
return (set_errno(EFAULT));
}
idptr++;
}
mutex_exit(&zone->zone_lock);
zone_rele(zone);
if (num > INT_MAX)
return (set_errno(EOVERFLOW));
if (num != dlcount) {
if (copyout(&num, nump, sizeof (num)) != 0)
return (set_errno(EFAULT));
}
return (0);
}
zone_t *
zone_find_by_id_nolock(zoneid_t zoneid)
{
zone_t *zone;
mutex_enter(&zonehash_lock);
if (zonehashbyid == NULL)
zone = &zone0;
else
zone = zone_find_all_by_id(zoneid);
mutex_exit(&zonehash_lock);
return (zone);
}
int
zone_datalink_walk(zoneid_t zoneid, int (*cb)(datalink_id_t, void *),
void *data)
{
zone_t *zone;
zone_dl_t *zdl;
datalink_id_t *idarray;
uint_t idcount = 0;
int i, ret = 0;
if ((zone = zone_find_by_id(zoneid)) == NULL)
return (ENOENT);
mutex_enter(&zone->zone_lock);
for (zdl = list_head(&zone->zone_dl_list); zdl != NULL;
zdl = list_next(&zone->zone_dl_list, zdl)) {
idcount++;
}
if (idcount == 0) {
mutex_exit(&zone->zone_lock);
zone_rele(zone);
return (0);
}
idarray = kmem_alloc(sizeof (datalink_id_t) * idcount, KM_NOSLEEP);
if (idarray == NULL) {
mutex_exit(&zone->zone_lock);
zone_rele(zone);
return (ENOMEM);
}
for (i = 0, zdl = list_head(&zone->zone_dl_list); zdl != NULL;
i++, zdl = list_next(&zone->zone_dl_list, zdl)) {
idarray[i] = zdl->zdl_id;
}
mutex_exit(&zone->zone_lock);
for (i = 0; i < idcount && ret == 0; i++) {
if ((ret = (*cb)(idarray[i], data)) != 0)
break;
}
zone_rele(zone);
kmem_free(idarray, sizeof (datalink_id_t) * idcount);
return (ret);
}
static char *
zone_net_type2name(int type)
{
switch (type) {
case ZONE_NETWORK_ADDRESS:
return (ZONE_NET_ADDRNAME);
case ZONE_NETWORK_DEFROUTER:
return (ZONE_NET_RTRNAME);
default:
return (NULL);
}
}
static int
zone_set_network(zoneid_t zoneid, zone_net_data_t *znbuf)
{
zone_t *zone;
zone_dl_t *zdl;
nvlist_t *nvl;
int err = 0;
uint8_t *new = NULL;
char *nvname;
int bufsize;
datalink_id_t linkid = znbuf->zn_linkid;
if (secpolicy_zone_config(CRED()) != 0)
return (set_errno(EPERM));
if (zoneid == GLOBAL_ZONEID)
return (set_errno(EINVAL));
nvname = zone_net_type2name(znbuf->zn_type);
bufsize = znbuf->zn_len;
new = znbuf->zn_val;
if (nvname == NULL)
return (set_errno(EINVAL));
if ((zone = zone_find_by_id(zoneid)) == NULL) {
return (set_errno(EINVAL));
}
mutex_enter(&zone->zone_lock);
if ((zdl = zone_find_dl(zone, linkid)) == NULL) {
err = ENXIO;
goto done;
}
if ((nvl = zdl->zdl_net) == NULL) {
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
err = ENOMEM;
goto done;
} else {
zdl->zdl_net = nvl;
}
}
if (nvlist_exists(nvl, nvname)) {
err = EINVAL;
goto done;
}
err = nvlist_add_uint8_array(nvl, nvname, new, bufsize);
ASSERT(err == 0);
done:
mutex_exit(&zone->zone_lock);
zone_rele(zone);
if (err != 0)
return (set_errno(err));
else
return (0);
}
static int
zone_get_network(zoneid_t zoneid, zone_net_data_t *znbuf)
{
zone_t *zone;
zone_dl_t *zdl;
nvlist_t *nvl;
uint8_t *ptr;
uint_t psize;
int err = 0;
char *nvname;
int bufsize;
void *buf;
datalink_id_t linkid = znbuf->zn_linkid;
if (zoneid == GLOBAL_ZONEID)
return (set_errno(EINVAL));
nvname = zone_net_type2name(znbuf->zn_type);
bufsize = znbuf->zn_len;
buf = znbuf->zn_val;
if (nvname == NULL)
return (set_errno(EINVAL));
if ((zone = zone_find_by_id(zoneid)) == NULL)
return (set_errno(EINVAL));
mutex_enter(&zone->zone_lock);
if ((zdl = zone_find_dl(zone, linkid)) == NULL) {
err = ENXIO;
goto done;
}
if ((nvl = zdl->zdl_net) == NULL || !nvlist_exists(nvl, nvname)) {
err = ENOENT;
goto done;
}
err = nvlist_lookup_uint8_array(nvl, nvname, &ptr, &psize);
ASSERT(err == 0);
if (psize > bufsize) {
err = ENOBUFS;
goto done;
}
znbuf->zn_len = psize;
bcopy(ptr, buf, psize);
done:
mutex_exit(&zone->zone_lock);
zone_rele(zone);
if (err != 0)
return (set_errno(err));
else
return (0);
}