#include <sys/zfs_context.h>
#include <sys/dmu.h>
#include <sys/avl.h>
#include <sys/zap.h>
#include <sys/refcount.h>
#include <sys/nvpair.h>
#ifdef _KERNEL
#include <sys/kidmap.h>
#include <sys/sid.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
#endif
#include <sys/zfs_fuid.h>
#define FUID_IDX "fuid_idx"
#define FUID_DOMAIN "fuid_domain"
#define FUID_OFFSET "fuid_offset"
#define FUID_NVP_ARRAY "fuid_nvlist"
typedef struct fuid_domain {
avl_node_t f_domnode;
avl_node_t f_idxnode;
ksiddomain_t *f_ksid;
uint64_t f_idx;
} fuid_domain_t;
static char *nulldomain = "";
static int
idx_compare(const void *arg1, const void *arg2)
{
const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
return (TREE_CMP(node1->f_idx, node2->f_idx));
}
static int
domain_compare(const void *arg1, const void *arg2)
{
const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
int val;
val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
return (TREE_ISIGN(val));
}
void
zfs_fuid_avl_tree_create(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
{
avl_create(idx_tree, idx_compare,
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
avl_create(domain_tree, domain_compare,
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
}
uint64_t
zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
avl_tree_t *domain_tree)
{
dmu_buf_t *db;
uint64_t fuid_size;
ASSERT(fuid_obj != 0);
VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
FTAG, &db));
fuid_size = *(uint64_t *)db->db_data;
dmu_buf_rele(db, FTAG);
if (fuid_size) {
nvlist_t **fuidnvp;
nvlist_t *nvp = NULL;
uint_t count;
char *packed;
int i;
packed = kmem_alloc(fuid_size, KM_SLEEP);
VERIFY(dmu_read(os, fuid_obj, 0,
fuid_size, packed, DMU_READ_PREFETCH) == 0);
VERIFY(nvlist_unpack(packed, fuid_size,
&nvp, 0) == 0);
VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
&fuidnvp, &count) == 0);
for (i = 0; i != count; i++) {
fuid_domain_t *domnode;
char *domain;
uint64_t idx;
VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
&domain) == 0);
VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
&idx) == 0);
domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
domnode->f_idx = idx;
domnode->f_ksid = ksid_lookupdomain(domain);
avl_add(idx_tree, domnode);
avl_add(domain_tree, domnode);
}
nvlist_free(nvp);
kmem_free(packed, fuid_size);
}
return (fuid_size);
}
void
zfs_fuid_table_destroy(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
{
fuid_domain_t *domnode;
void *cookie;
cookie = NULL;
while (domnode = avl_destroy_nodes(domain_tree, &cookie))
ksiddomain_rele(domnode->f_ksid);
avl_destroy(domain_tree);
cookie = NULL;
while (domnode = avl_destroy_nodes(idx_tree, &cookie))
kmem_free(domnode, sizeof (fuid_domain_t));
avl_destroy(idx_tree);
}
char *
zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
{
fuid_domain_t searchnode, *findnode;
avl_index_t loc;
searchnode.f_idx = idx;
findnode = avl_find(idx_tree, &searchnode, &loc);
return (findnode ? findnode->f_ksid->kd_name : nulldomain);
}
#ifdef _KERNEL
static void
zfs_fuid_init(zfsvfs_t *zfsvfs)
{
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
if (zfsvfs->z_fuid_loaded) {
rw_exit(&zfsvfs->z_fuid_lock);
return;
}
zfs_fuid_avl_tree_create(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
(void) zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
if (zfsvfs->z_fuid_obj != 0) {
zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os,
zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx,
&zfsvfs->z_fuid_domain);
}
zfsvfs->z_fuid_loaded = B_TRUE;
rw_exit(&zfsvfs->z_fuid_lock);
}
void
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
nvlist_t *nvp;
nvlist_t **fuids;
size_t nvsize = 0;
char *packed;
dmu_buf_t *db;
fuid_domain_t *domnode;
int numnodes;
int i;
if (!zfsvfs->z_fuid_dirty) {
return;
}
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
if (zfsvfs->z_fuid_obj == 0) {
zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
sizeof (uint64_t), tx);
VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, sizeof (uint64_t), 1,
&zfsvfs->z_fuid_obj, tx) == 0);
}
VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
domnode->f_idx) == 0);
VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
domnode->f_ksid->kd_name) == 0);
}
VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
fuids, numnodes) == 0);
for (i = 0; i != numnodes; i++)
nvlist_free(fuids[i]);
kmem_free(fuids, numnodes * sizeof (void *));
VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
packed = kmem_alloc(nvsize, KM_SLEEP);
VERIFY(nvlist_pack(nvp, &packed, &nvsize,
NV_ENCODE_XDR, KM_SLEEP) == 0);
nvlist_free(nvp);
zfsvfs->z_fuid_size = nvsize;
dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
zfsvfs->z_fuid_size, packed, tx);
kmem_free(packed, zfsvfs->z_fuid_size);
VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
FTAG, &db));
dmu_buf_will_dirty(db, tx);
*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
dmu_buf_rele(db, FTAG);
zfsvfs->z_fuid_dirty = B_FALSE;
rw_exit(&zfsvfs->z_fuid_lock);
}
int
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain,
char **retdomain, boolean_t addok)
{
fuid_domain_t searchnode, *findnode;
avl_index_t loc;
krw_t rw = RW_READER;
if (domain[0] == '\0') {
if (retdomain)
*retdomain = nulldomain;
return (0);
}
searchnode.f_ksid = ksid_lookupdomain(domain);
if (retdomain)
*retdomain = searchnode.f_ksid->kd_name;
if (!zfsvfs->z_fuid_loaded)
zfs_fuid_init(zfsvfs);
retry:
rw_enter(&zfsvfs->z_fuid_lock, rw);
findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
if (findnode) {
rw_exit(&zfsvfs->z_fuid_lock);
ksiddomain_rele(searchnode.f_ksid);
return (findnode->f_idx);
} else if (addok) {
fuid_domain_t *domnode;
uint64_t retidx;
if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) {
rw_exit(&zfsvfs->z_fuid_lock);
rw = RW_WRITER;
goto retry;
}
domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
domnode->f_ksid = searchnode.f_ksid;
retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;
avl_add(&zfsvfs->z_fuid_domain, domnode);
avl_add(&zfsvfs->z_fuid_idx, domnode);
zfsvfs->z_fuid_dirty = B_TRUE;
rw_exit(&zfsvfs->z_fuid_lock);
return (retidx);
} else {
rw_exit(&zfsvfs->z_fuid_lock);
return (-1);
}
}
const char *
zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
{
char *domain;
if (idx == 0 || !zfsvfs->z_use_fuids)
return (NULL);
if (!zfsvfs->z_fuid_loaded)
zfs_fuid_init(zfsvfs);
rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
if (zfsvfs->z_fuid_obj || zfsvfs->z_fuid_dirty)
domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx);
else
domain = nulldomain;
rw_exit(&zfsvfs->z_fuid_lock);
ASSERT(domain);
return (domain);
}
void
zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
{
*uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
*gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_gid, cr, ZFS_GROUP);
}
uid_t
zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
cred_t *cr, zfs_fuid_type_t type)
{
uint32_t index = FUID_INDEX(fuid);
const char *domain;
uid_t id;
if (index == 0)
return (fuid);
domain = zfs_fuid_find_by_idx(zfsvfs, index);
ASSERT(domain != NULL);
if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
(void) kidmap_getuidbysid(crgetzone(cr), domain,
FUID_RID(fuid), &id);
} else {
(void) kidmap_getgidbysid(crgetzone(cr), domain,
FUID_RID(fuid), &id);
}
return (id);
}
void
zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
uint64_t idx, uint64_t id, zfs_fuid_type_t type)
{
zfs_fuid_t *fuid;
zfs_fuid_domain_t *fuid_domain;
zfs_fuid_info_t *fuidp;
uint64_t fuididx;
boolean_t found = B_FALSE;
if (*fuidpp == NULL)
*fuidpp = zfs_fuid_info_alloc();
fuidp = *fuidpp;
for (fuididx = 1, fuid_domain = list_head(&fuidp->z_domains);
fuid_domain; fuid_domain = list_next(&fuidp->z_domains,
fuid_domain), fuididx++) {
if (idx == fuid_domain->z_domidx) {
found = B_TRUE;
break;
}
}
if (!found) {
fuid_domain = kmem_alloc(sizeof (zfs_fuid_domain_t), KM_SLEEP);
fuid_domain->z_domain = domain;
fuid_domain->z_domidx = idx;
list_insert_tail(&fuidp->z_domains, fuid_domain);
fuidp->z_domain_str_sz += strlen(domain) + 1;
fuidp->z_domain_cnt++;
}
if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) {
fuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
fuid->z_id = id;
fuid->z_domidx = idx;
fuid->z_logfuid = FUID_ENCODE(fuididx, rid);
list_insert_tail(&fuidp->z_fuids, fuid);
fuidp->z_fuid_cnt++;
} else {
if (type == ZFS_OWNER)
fuidp->z_fuid_owner = FUID_ENCODE(fuididx, rid);
else
fuidp->z_fuid_group = FUID_ENCODE(fuididx, rid);
}
}
uint64_t
zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
cred_t *cr, zfs_fuid_info_t **fuidp)
{
uint64_t idx;
ksid_t *ksid;
uint32_t rid;
char *kdomain;
const char *domain;
uid_t id;
VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
if (!zfsvfs->z_use_fuids || (ksid == NULL)) {
id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
if (IS_EPHEMERAL(id))
return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY);
return ((uint64_t)id);
}
id = (type == ZFS_OWNER) ? ksid_getid(ksid) : crgetgid(cr);
if (!IS_EPHEMERAL(id))
return ((uint64_t)id);
if (type == ZFS_GROUP)
id = ksid_getid(ksid);
rid = ksid_getrid(ksid);
domain = ksid_getdomain(ksid);
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
return (FUID_ENCODE(idx, rid));
}
uint64_t
zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp)
{
const char *domain;
char *kdomain;
uint32_t fuid_idx = FUID_INDEX(id);
uint32_t rid;
idmap_stat status;
uint64_t idx = 0;
zfs_fuid_t *zfuid = NULL;
zfs_fuid_info_t *fuidp = NULL;
if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
return (id);
if (zfsvfs->z_replay) {
fuidp = zfsvfs->z_fuid_replay;
if (fuidp == NULL)
return (UID_NOBODY);
VERIFY3U(type, >=, ZFS_OWNER);
VERIFY3U(type, <=, ZFS_ACE_GROUP);
switch (type) {
case ZFS_ACE_USER:
case ZFS_ACE_GROUP:
zfuid = list_head(&fuidp->z_fuids);
rid = FUID_RID(zfuid->z_logfuid);
idx = FUID_INDEX(zfuid->z_logfuid);
break;
case ZFS_OWNER:
rid = FUID_RID(fuidp->z_fuid_owner);
idx = FUID_INDEX(fuidp->z_fuid_owner);
break;
case ZFS_GROUP:
rid = FUID_RID(fuidp->z_fuid_group);
idx = FUID_INDEX(fuidp->z_fuid_group);
break;
};
domain = fuidp->z_domain_table[idx - 1];
} else {
if (type == ZFS_OWNER || type == ZFS_ACE_USER)
status = kidmap_getsidbyuid(crgetzone(cr), id,
&domain, &rid);
else
status = kidmap_getsidbygid(crgetzone(cr), id,
&domain, &rid);
if (status != 0) {
rid = UID_NOBODY;
domain = nulldomain;
}
}
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
if (!zfsvfs->z_replay)
zfs_fuid_node_add(fuidpp, kdomain,
rid, idx, id, type);
else if (zfuid != NULL) {
list_remove(&fuidp->z_fuids, zfuid);
kmem_free(zfuid, sizeof (zfs_fuid_t));
}
return (FUID_ENCODE(idx, rid));
}
void
zfs_fuid_destroy(zfsvfs_t *zfsvfs)
{
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
if (!zfsvfs->z_fuid_loaded) {
rw_exit(&zfsvfs->z_fuid_lock);
return;
}
zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
rw_exit(&zfsvfs->z_fuid_lock);
}
zfs_fuid_info_t *
zfs_fuid_info_alloc(void)
{
zfs_fuid_info_t *fuidp;
fuidp = kmem_zalloc(sizeof (zfs_fuid_info_t), KM_SLEEP);
list_create(&fuidp->z_domains, sizeof (zfs_fuid_domain_t),
offsetof(zfs_fuid_domain_t, z_next));
list_create(&fuidp->z_fuids, sizeof (zfs_fuid_t),
offsetof(zfs_fuid_t, z_next));
return (fuidp);
}
void
zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
{
zfs_fuid_t *zfuid;
zfs_fuid_domain_t *zdomain;
while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
list_remove(&fuidp->z_fuids, zfuid);
kmem_free(zfuid, sizeof (zfs_fuid_t));
}
if (fuidp->z_domain_table != NULL)
kmem_free(fuidp->z_domain_table,
(sizeof (char **)) * fuidp->z_domain_cnt);
while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
list_remove(&fuidp->z_domains, zdomain);
kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
}
kmem_free(fuidp, sizeof (zfs_fuid_info_t));
}
boolean_t
zfs_fuid_is_cruser(zfsvfs_t *zfsvfs, uint64_t fuid, cred_t *cr)
{
ksid_t *ksid;
const char *domain;
uint32_t idx = FUID_INDEX(fuid);
uint32_t rid = FUID_RID(fuid);
uid_t uid = (uid_t)-1;
if (idx == 0) {
return (rid == crgetuid(cr));
}
domain = zfs_fuid_find_by_idx(zfsvfs, idx);
ASSERT(domain != NULL);
ksid = crgetsid(cr, KSID_USER);
if (ksid != NULL) {
const char *ksdom = ksid_getdomain(ksid);
ASSERT(ksdom != NULL);
return (rid == ksid->ks_rid &&
strcmp(domain, ksdom) == 0);
}
(void) kidmap_getuidbysid(crgetzone(cr), domain, rid, &uid);
if (uid == (uid_t)-1 || uid == IDMAP_WK_CREATOR_OWNER_UID)
return (B_FALSE);
return (uid == crgetuid(cr));
}
boolean_t
zfs_user_in_cred(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
{
ksid_t *ksid = crgetsid(cr, KSID_USER);
ksidlist_t *ksidlist = crgetsidlist(cr);
if (zfs_fuid_is_cruser(zfsvfs, id, cr))
return (B_TRUE);
if (ksid && ksidlist) {
uint32_t idx = FUID_INDEX(id);
uint32_t rid = FUID_RID(id);
const char *domain;
if (idx == 0) {
return (B_FALSE);
}
domain = zfs_fuid_find_by_idx(zfsvfs, idx);
ASSERT(domain != NULL);
if (strcmp(domain, IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
return (B_FALSE);
if (ksidlist_has_sid(ksidlist, domain, rid))
return (B_TRUE);
}
return (B_FALSE);
}
boolean_t
zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
{
ksid_t *ksid = crgetsid(cr, KSID_GROUP);
ksidlist_t *ksidlist = crgetsidlist(cr);
uid_t gid;
uint32_t idx = FUID_INDEX(id);
uint32_t rid = FUID_RID(id);
if (ksid != NULL && id != IDMAP_WK_CREATOR_GROUP_GID) {
const char *domain = NULL;
int ngroups;
if (idx != 0) {
domain = zfs_fuid_find_by_idx(zfsvfs, idx);
ASSERT(domain != NULL);
if (strcmp(domain,
IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
return (B_FALSE);
if (strcmp(ksid_getdomain(ksid), domain) == 0 &&
rid == ksid_getrid(ksid))
return (B_TRUE);
if (ksidlist != NULL &&
ksidlist_has_sid(ksidlist, domain, rid))
return (B_TRUE);
} else {
if (ksid_getid(ksid) == rid)
return (B_TRUE);
if (ksidlist != NULL &&
ksidlist_has_pid(ksidlist, rid))
return (B_TRUE);
}
gid = crgetgid(cr);
ngroups = crgetngroups(cr);
if (ksid_getid(ksid) == gid &&
(ngroups == 0 ||
(ngroups == 1 && crgetgroups(cr)[0] == gid)))
return (B_FALSE);
}
gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
return (groupmember(gid, cr));
}
void
zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
if (zfsvfs->z_fuid_obj == 0) {
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
FUID_SIZE_ESTIMATE(zfsvfs));
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
} else {
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
FUID_SIZE_ESTIMATE(zfsvfs));
}
}
#endif