#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/cmn_err.h>
#include <sys/systm.h>
#include <sys/t_lock.h>
#include <sys/debug.h>
#include <sys/atomic.h>
#include <sys/fs/lofs_node.h>
#include <sys/fs/lofs_info.h>
#define LOFS_DEFAULT_HTSIZE (1 << 6)
#define ltablehash(vp, tblsz) ((((intptr_t)(vp))>>10) & ((tblsz)-1))
#define TABLE_LOCK(vp, li) \
(&(li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_lock)
#define TABLE_BUCKET(vp, li) \
((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_chain)
#define TABLE_COUNT(vp, li) \
((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_count)
#define TABLE_LOCK_ENTER(vp, li) table_lock_enter(vp, li)
#define TABLE_LOCK_EXIT(vp, li) \
mutex_exit(&(li)->li_hashtable[ltablehash((vp), \
(li)->li_htsize)].lh_lock)
static lnode_t *lfind(struct vnode *, struct loinfo *);
static void lsave(lnode_t *, struct loinfo *);
static struct vfs *makelfsnode(struct vfs *, struct loinfo *);
static struct lfsnode *lfsfind(struct vfs *, struct loinfo *);
uint_t lo_resize_threshold = 1;
uint_t lo_resize_factor = 2;
static kmem_cache_t *lnode_cache;
static void
table_lock_enter(vnode_t *vp, struct loinfo *li)
{
struct lobucket *chain;
uint_t htsize;
uint_t hash;
for (;;) {
htsize = li->li_htsize;
membar_consumer();
chain = (struct lobucket *)li->li_hashtable;
hash = ltablehash(vp, htsize);
mutex_enter(&chain[hash].lh_lock);
if (li->li_hashtable == chain && li->li_htsize == htsize)
break;
mutex_exit(&chain[hash].lh_lock);
}
}
void
lofs_subrinit(void)
{
lnode_cache = kmem_cache_create("lnode_cache", sizeof (lnode_t),
0, NULL, NULL, NULL, NULL, NULL, 0);
}
void
lofs_subrfini(void)
{
kmem_cache_destroy(lnode_cache);
}
void
lsetup(struct loinfo *li, uint_t htsize)
{
li->li_refct = 0;
li->li_lfs = NULL;
if (htsize == 0)
htsize = LOFS_DEFAULT_HTSIZE;
li->li_htsize = htsize;
li->li_hashtable = kmem_zalloc(htsize * sizeof (*li->li_hashtable),
KM_SLEEP);
mutex_init(&li->li_lfslock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&li->li_htlock, NULL, MUTEX_DEFAULT, NULL);
li->li_retired = NULL;
}
void
ldestroy(struct loinfo *li)
{
uint_t i, htsize;
struct lobucket *table;
struct lo_retired_ht *lrhp, *trhp;
mutex_destroy(&li->li_htlock);
mutex_destroy(&li->li_lfslock);
htsize = li->li_htsize;
table = li->li_hashtable;
for (i = 0; i < htsize; i++)
mutex_destroy(&table[i].lh_lock);
kmem_free(table, htsize * sizeof (*li->li_hashtable));
lrhp = li->li_retired;
while (lrhp != NULL) {
trhp = lrhp;
lrhp = lrhp->lrh_next;
kmem_free(trhp->lrh_table,
trhp->lrh_size * sizeof (*li->li_hashtable));
kmem_free(trhp, sizeof (*trhp));
}
li->li_retired = NULL;
}
struct vnode *
makelonode(struct vnode *vp, struct loinfo *li, int flag)
{
lnode_t *lp, *tlp;
struct vfs *vfsp;
vnode_t *nvp;
lp = NULL;
TABLE_LOCK_ENTER(vp, li);
if (flag != LOF_FORCE)
lp = lfind(vp, li);
if ((flag == LOF_FORCE) || (lp == NULL)) {
lp = kmem_cache_alloc(lnode_cache, KM_NOSLEEP);
nvp = vn_alloc(KM_NOSLEEP);
if (lp == NULL || nvp == NULL) {
TABLE_LOCK_EXIT(vp, li);
tlp = lp;
if (tlp == NULL) {
tlp = kmem_cache_alloc(lnode_cache, KM_SLEEP);
}
if (nvp == NULL) {
nvp = vn_alloc(KM_SLEEP);
}
lp = NULL;
TABLE_LOCK_ENTER(vp, li);
if (flag != LOF_FORCE)
lp = lfind(vp, li);
if (lp != NULL) {
kmem_cache_free(lnode_cache, tlp);
vn_free(nvp);
VN_RELE(vp);
goto found_lnode;
}
lp = tlp;
}
atomic_inc_32(&li->li_refct);
vfsp = makelfsnode(vp->v_vfsp, li);
lp->lo_vnode = nvp;
VN_SET_VFS_TYPE_DEV(nvp, vfsp, vp->v_type, vp->v_rdev);
nvp->v_flag |= (vp->v_flag & (VNOMOUNT|VNOMAP|VDIROPEN));
vn_setops(nvp, lo_vnodeops);
nvp->v_data = (caddr_t)lp;
lp->lo_vp = vp;
lp->lo_looping = 0;
lsave(lp, li);
vn_exists(vp);
} else {
VN_RELE(vp);
}
found_lnode:
TABLE_LOCK_EXIT(vp, li);
return (ltov(lp));
}
static struct vfs *
makelfsnode(struct vfs *vfsp, struct loinfo *li)
{
struct lfsnode *lfs;
struct lfsnode *tlfs;
if (vfsp == li->li_realvfs)
return (li->li_mountvfs);
ASSERT(li->li_refct > 0);
mutex_enter(&li->li_lfslock);
if ((lfs = lfsfind(vfsp, li)) == NULL) {
mutex_exit(&li->li_lfslock);
lfs = kmem_zalloc(sizeof (*lfs), KM_SLEEP);
mutex_enter(&li->li_lfslock);
if ((tlfs = lfsfind(vfsp, li)) != NULL) {
kmem_free(lfs, sizeof (*lfs));
lfs = tlfs;
goto found_lfs;
}
lfs->lfs_realvfs = vfsp;
VFS_INIT(&lfs->lfs_vfs, lo_vfsops, (caddr_t)li);
lfs->lfs_vfs.vfs_fstype = li->li_mountvfs->vfs_fstype;
lfs->lfs_vfs.vfs_flag =
((vfsp->vfs_flag | li->li_mflag) & ~li->li_dflag) &
INHERIT_VFS_FLAG;
lfs->lfs_vfs.vfs_bsize = vfsp->vfs_bsize;
lfs->lfs_vfs.vfs_dev = vfsp->vfs_dev;
lfs->lfs_vfs.vfs_fsid = vfsp->vfs_fsid;
if (vfsp->vfs_mntpt != NULL) {
lfs->lfs_vfs.vfs_mntpt = vfs_getmntpoint(vfsp);
}
(void) VFS_ROOT(vfsp, &lfs->lfs_realrootvp);
VFS_HOLD(&lfs->lfs_vfs);
lfs->lfs_next = li->li_lfs;
li->li_lfs = lfs;
vfs_propagate_features(vfsp, &lfs->lfs_vfs);
}
found_lfs:
VFS_HOLD(&lfs->lfs_vfs);
mutex_exit(&li->li_lfslock);
return (&lfs->lfs_vfs);
}
static void
freelfsnode(struct lfsnode *lfs, struct loinfo *li)
{
struct lfsnode *prev = NULL;
struct lfsnode *this;
ASSERT(MUTEX_HELD(&li->li_lfslock));
ASSERT(li->li_refct > 0);
for (this = li->li_lfs; this != NULL; this = this->lfs_next) {
if (this == lfs) {
ASSERT(lfs->lfs_vfs.vfs_count == 1);
if (prev == NULL)
li->li_lfs = lfs->lfs_next;
else
prev->lfs_next = lfs->lfs_next;
if (lfs->lfs_realrootvp != NULL) {
VN_RELE(lfs->lfs_realrootvp);
}
if (lfs->lfs_vfs.vfs_mntpt != NULL)
refstr_rele(lfs->lfs_vfs.vfs_mntpt);
if (lfs->lfs_vfs.vfs_implp != NULL) {
ASSERT(lfs->lfs_vfs.vfs_femhead == NULL);
ASSERT(lfs->lfs_vfs.vfs_vskap == NULL);
ASSERT(lfs->lfs_vfs.vfs_fstypevsp == NULL);
kmem_free(lfs->lfs_vfs.vfs_implp,
sizeof (vfs_impl_t));
}
sema_destroy(&lfs->lfs_vfs.vfs_reflock);
kmem_free(lfs, sizeof (struct lfsnode));
return;
}
prev = this;
}
panic("freelfsnode");
}
static struct lfsnode *
lfsfind(struct vfs *vfsp, struct loinfo *li)
{
struct lfsnode *lfs;
ASSERT(MUTEX_HELD(&li->li_lfslock));
for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
if (lfs->lfs_realvfs == vfsp) {
struct vnode *realvp;
realvp = lfs->lfs_realrootvp;
if (realvp == NULL)
continue;
if (realvp->v_vfsp == NULL || realvp->v_type == VBAD)
continue;
return (lfs);
}
}
return (NULL);
}
struct vfs *
lo_realvfs(struct vfs *vfsp, struct vnode **realrootvpp)
{
struct loinfo *li = vtoli(vfsp);
struct lfsnode *lfs;
ASSERT(li->li_refct > 0);
if (vfsp == li->li_mountvfs) {
if (realrootvpp != NULL)
*realrootvpp = vtol(li->li_rootvp)->lo_vp;
return (li->li_realvfs);
}
mutex_enter(&li->li_lfslock);
for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
if (vfsp == &lfs->lfs_vfs) {
if (realrootvpp != NULL)
*realrootvpp = lfs->lfs_realrootvp;
mutex_exit(&li->li_lfslock);
return (lfs->lfs_realvfs);
}
}
panic("lo_realvfs");
}
static void
lretire(struct loinfo *li, struct lobucket *table, uint_t size)
{
struct lo_retired_ht *lrhp;
lrhp = kmem_alloc(sizeof (*lrhp), KM_SLEEP);
lrhp->lrh_table = table;
lrhp->lrh_size = size;
mutex_enter(&li->li_htlock);
lrhp->lrh_next = li->li_retired;
li->li_retired = lrhp;
mutex_exit(&li->li_htlock);
}
static void
lgrow(struct loinfo *li, uint_t newsize)
{
uint_t oldsize;
uint_t i;
struct lobucket *oldtable, *newtable;
if ((newtable = kmem_zalloc(newsize * sizeof (*li->li_hashtable),
KM_NOSLEEP)) == NULL)
return;
mutex_enter(&li->li_htlock);
if (newsize <= li->li_htsize) {
mutex_exit(&li->li_htlock);
kmem_free(newtable, newsize * sizeof (*li->li_hashtable));
return;
}
oldsize = li->li_htsize;
oldtable = li->li_hashtable;
for (i = 0; i < oldsize; i++)
mutex_enter(&oldtable[i].lh_lock);
for (i = 0; i < oldsize; i++)
mutex_enter(&newtable[i].lh_lock);
for (i = 0; i < oldsize; i++) {
lnode_t *tlp, *nlp;
for (tlp = oldtable[i].lh_chain; tlp != NULL; tlp = nlp) {
uint_t hash = ltablehash(tlp->lo_vp, newsize);
nlp = tlp->lo_next;
tlp->lo_next = newtable[hash].lh_chain;
newtable[hash].lh_chain = tlp;
newtable[hash].lh_count++;
}
}
membar_producer();
li->li_hashtable = newtable;
membar_producer();
li->li_htsize = newsize;
for (i = 0; i < oldsize; i++) {
mutex_exit(&newtable[i].lh_lock);
mutex_exit(&oldtable[i].lh_lock);
}
mutex_exit(&li->li_htlock);
lretire(li, oldtable, oldsize);
}
static void
lsave(lnode_t *lp, struct loinfo *li)
{
ASSERT(lp->lo_vp);
ASSERT(MUTEX_HELD(TABLE_LOCK(lp->lo_vp, li)));
#ifdef LODEBUG
lo_dprint(4, "lsave lp %p hash %d\n",
lp, ltablehash(lp->lo_vp, li));
#endif
TABLE_COUNT(lp->lo_vp, li)++;
lp->lo_next = TABLE_BUCKET(lp->lo_vp, li);
TABLE_BUCKET(lp->lo_vp, li) = lp;
if (li->li_refct > (li->li_htsize << lo_resize_threshold)) {
TABLE_LOCK_EXIT(lp->lo_vp, li);
lgrow(li, li->li_htsize << lo_resize_factor);
TABLE_LOCK_ENTER(lp->lo_vp, li);
}
}
static void
lfs_rele(struct lfsnode *lfs, struct loinfo *li)
{
vfs_t *vfsp = &lfs->lfs_vfs;
ASSERT(MUTEX_HELD(&li->li_lfslock));
ASSERT(vfsp->vfs_count > 1);
if (atomic_dec_32_nv(&vfsp->vfs_count) == 1)
freelfsnode(lfs, li);
}
void
freelonode(lnode_t *lp)
{
lnode_t *lt;
lnode_t *ltprev = NULL;
struct lfsnode *lfs, *nextlfs;
struct vfs *vfsp;
struct vnode *vp = ltov(lp);
struct vnode *realvp = realvp(vp);
struct loinfo *li = vtoli(vp->v_vfsp);
#ifdef LODEBUG
lo_dprint(4, "freelonode lp %p hash %d\n",
lp, ltablehash(lp->lo_vp, li));
#endif
TABLE_LOCK_ENTER(lp->lo_vp, li);
mutex_enter(&vp->v_lock);
if (vp->v_count > 1) {
VN_RELE_LOCKED(vp);
mutex_exit(&vp->v_lock);
TABLE_LOCK_EXIT(lp->lo_vp, li);
return;
}
mutex_exit(&vp->v_lock);
for (lt = TABLE_BUCKET(lp->lo_vp, li); lt != NULL;
ltprev = lt, lt = lt->lo_next) {
if (lt == lp) {
#ifdef LODEBUG
lo_dprint(4, "freeing %p, vfsp %p\n",
vp, vp->v_vfsp);
#endif
atomic_dec_32(&li->li_refct);
vfsp = vp->v_vfsp;
vn_invalid(vp);
if (vfsp != li->li_mountvfs) {
mutex_enter(&li->li_lfslock);
lfs = li->li_lfs;
while (lfs != NULL) {
nextlfs = lfs->lfs_next;
if (vfsp == &lfs->lfs_vfs) {
lfs_rele(lfs, li);
break;
}
if (lfs->lfs_vfs.vfs_count == 1) {
freelfsnode(lfs, li);
}
lfs = nextlfs;
}
mutex_exit(&li->li_lfslock);
}
if (ltprev == NULL) {
TABLE_BUCKET(lt->lo_vp, li) = lt->lo_next;
} else {
ltprev->lo_next = lt->lo_next;
}
TABLE_COUNT(lt->lo_vp, li)--;
TABLE_LOCK_EXIT(lt->lo_vp, li);
kmem_cache_free(lnode_cache, lt);
vn_free(vp);
VN_RELE(realvp);
return;
}
}
panic("freelonode");
}
static lnode_t *
lfind(struct vnode *vp, struct loinfo *li)
{
lnode_t *lt;
ASSERT(MUTEX_HELD(TABLE_LOCK(vp, li)));
lt = TABLE_BUCKET(vp, li);
while (lt != NULL) {
if (lt->lo_vp == vp) {
VN_HOLD(ltov(lt));
return (lt);
}
lt = lt->lo_next;
}
return (NULL);
}
#ifdef LODEBUG
static int lofsdebug;
#endif
#ifdef LODEBUG
lo_dprint(int level, char *str, int a1, int a2, int a3, int a4, int a5, int a6,
int a7, int a8, int a9)
{
if (lofsdebug == level || (lofsdebug > 10 && (lofsdebug - 10) >= level))
printf(str, a1, a2, a3, a4, a5, a6, a7, a8, a9);
}
#endif