#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/errno.h>
#include <sys/cmn_err.h>
#include <sys/cred.h>
#include <sys/stat.h>
#include <sys/debug.h>
#include <sys/policy.h>
#include <sys/fs/tmpnode.h>
#include <sys/fs/tmp.h>
#include <sys/vtrace.h>
static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
char *, struct tmpnode *, struct tdirent *, struct cred *);
static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
enum de_op, struct tmpnode **, struct cred *);
static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
enum de_op, struct tmpnode *);
#define T_HASH_SIZE 8192
#define T_MUTEX_SIZE 64
clock_t tmpfs_rename_backoff_delay = 1;
unsigned int tmpfs_rename_backoff_tries = 0;
unsigned long tmpfs_rename_loops = 0;
static struct tdirent *t_hashtable[T_HASH_SIZE];
static kmutex_t t_hashmutex[T_MUTEX_SIZE];
#define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1))
#define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1))
#define TMPFS_HASH(tp, name, hash) \
{ \
char Xc, *Xcp; \
hash = (uint_t)(uintptr_t)(tp) >> 8; \
for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
hash = (hash << 4) + hash + (uint_t)Xc; \
}
void
tmpfs_hash_init(void)
{
int ix;
for (ix = 0; ix < T_MUTEX_SIZE; ix++)
mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
}
static void
tmpfs_hash_in(struct tdirent *t)
{
uint_t hash;
struct tdirent **prevpp;
kmutex_t *t_hmtx;
TMPFS_HASH(t->td_parent, t->td_name, hash);
t->td_hash = hash;
prevpp = &t_hashtable[T_HASH_INDEX(hash)];
t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
mutex_enter(t_hmtx);
t->td_link = *prevpp;
*prevpp = t;
mutex_exit(t_hmtx);
}
static void
tmpfs_hash_out(struct tdirent *t)
{
uint_t hash;
struct tdirent **prevpp;
kmutex_t *t_hmtx;
hash = t->td_hash;
prevpp = &t_hashtable[T_HASH_INDEX(hash)];
t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
mutex_enter(t_hmtx);
while (*prevpp != t)
prevpp = &(*prevpp)->td_link;
*prevpp = t->td_link;
mutex_exit(t_hmtx);
}
static void
tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
{
uint_t hash;
kmutex_t *t_hmtx;
hash = tdp->td_hash;
t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
mutex_enter(t_hmtx);
tdp->td_tmpnode = fromtp;
mutex_exit(t_hmtx);
}
static struct tdirent *
tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
struct tmpnode **found)
{
struct tdirent *l;
uint_t hash;
kmutex_t *t_hmtx;
struct tmpnode *tnp;
TMPFS_HASH(parent, name, hash);
t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
mutex_enter(t_hmtx);
l = t_hashtable[T_HASH_INDEX(hash)];
while (l) {
if ((l->td_hash == hash) &&
(l->td_parent == parent) &&
(strcmp(l->td_name, name) == 0)) {
tnp = l->td_tmpnode;
if (hold) {
ASSERT(tnp);
tmpnode_hold(tnp);
}
if (found)
*found = tnp;
mutex_exit(t_hmtx);
return (l);
} else {
l = l->td_link;
}
}
mutex_exit(t_hmtx);
return (NULL);
}
int
tdirlookup(
struct tmpnode *parent,
char *name,
struct tmpnode **foundtp,
struct cred *cred)
{
int error;
*foundtp = NULL;
if (parent->tn_type != VDIR)
return (ENOTDIR);
if ((error = tmp_taccess(parent, VEXEC, cred)))
return (error);
if (*name == '\0') {
tmpnode_hold(parent);
*foundtp = parent;
return (0);
}
if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
ASSERT(*foundtp);
return (0);
}
return (ENOENT);
}
int
tdirenter(
struct tmount *tm,
struct tmpnode *dir,
char *name,
enum de_op op,
struct tmpnode *fromparent,
struct tmpnode *tp,
struct vattr *va,
struct tmpnode **tpp,
struct cred *cred,
caller_context_t *ctp)
{
struct tdirent *tdp;
struct tmpnode *found = NULL;
int error = 0;
char *s;
ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
for (s = name; *s; s++)
if (*s == '/')
return (EACCES);
if (name[0] == '\0')
panic("tdirenter: NULL name");
if (op == DE_LINK || op == DE_RENAME) {
if (tp != dir) {
unsigned int tries = 0;
while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) {
tmpfs_rename_loops++;
if (tmpfs_rename_backoff_tries != 0) {
if (tries > tmpfs_rename_backoff_tries)
return (EBUSY);
tries++;
}
VN_HOLD(TNTOV(dir));
rw_exit(&dir->tn_rwlock);
delay(tmpfs_rename_backoff_delay);
rw_enter(&dir->tn_rwlock, RW_WRITER);
VN_RELE(TNTOV(dir));
}
}
mutex_enter(&tp->tn_tlock);
if (tp->tn_nlink == 0) {
mutex_exit(&tp->tn_tlock);
if (tp != dir)
rw_exit(&tp->tn_rwlock);
return (ENOENT);
}
if (tp->tn_nlink == MAXLINK) {
mutex_exit(&tp->tn_tlock);
if (tp != dir)
rw_exit(&tp->tn_rwlock);
return (EMLINK);
}
tp->tn_nlink++;
gethrestime(&tp->tn_ctime);
mutex_exit(&tp->tn_tlock);
if (tp != dir)
rw_exit(&tp->tn_rwlock);
}
if (dir->tn_nlink == 0) {
error = ENOENT;
goto out;
}
if (op == DE_RENAME) {
if (tp == dir) {
error = EINVAL;
goto out;
}
if (tp->tn_type == VDIR) {
if ((fromparent != dir) &&
(error = tdircheckpath(tp, dir, cred))) {
goto out;
}
}
}
tdp = tmpfs_hash_lookup(name, dir, 1, &found);
if (tdp) {
ASSERT(found);
switch (op) {
case DE_CREATE:
case DE_MKDIR:
if (tpp) {
*tpp = found;
error = EEXIST;
} else {
tmpnode_rele(found);
}
break;
case DE_RENAME:
error = tdirrename(fromparent, tp,
dir, name, found, tdp, cred);
if (error == 0) {
if (found != NULL) {
vnevent_rename_dest(TNTOV(found),
TNTOV(dir), name, ctp);
}
}
tmpnode_rele(found);
break;
case DE_LINK:
error = EEXIST;
tmpnode_rele(found);
break;
}
} else {
if (error = tmp_taccess(dir, VWRITE, cred))
goto out;
if (op == DE_CREATE || op == DE_MKDIR) {
error = tdirmaketnode(dir, tm, va, op, &tp, cred);
if (error)
goto out;
}
if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
if (op == DE_CREATE || op == DE_MKDIR) {
rw_enter(&tp->tn_rwlock, RW_WRITER);
if ((tp->tn_type) == VDIR) {
ASSERT(tdp == NULL);
tdirtrunc(tp);
}
mutex_enter(&tp->tn_tlock);
tp->tn_nlink = 0;
mutex_exit(&tp->tn_tlock);
gethrestime(&tp->tn_ctime);
rw_exit(&tp->tn_rwlock);
tmpnode_rele(tp);
tp = NULL;
}
} else if (tpp) {
*tpp = tp;
} else if (op == DE_CREATE || op == DE_MKDIR) {
tmpnode_rele(tp);
}
}
out:
if (error && (op == DE_LINK || op == DE_RENAME)) {
DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
gethrestime(&tp->tn_ctime);
}
return (error);
}
int
tdirdelete(
struct tmpnode *dir,
struct tmpnode *tp,
char *nm,
enum dr_op op,
struct cred *cred)
{
struct tdirent *tpdp;
int error;
size_t namelen;
struct tmpnode *tnp;
timestruc_t now;
ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
if (nm[0] == '\0')
panic("tdirdelete: NULL name for %p", (void *)tp);
if (nm[0] == '.') {
if (nm[1] == '\0')
return (EINVAL);
if (nm[1] == '.' && nm[2] == '\0')
return (EEXIST);
}
if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
return (error);
if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
return (error);
if (dir->tn_dir == NULL)
return (ENOENT);
tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
if (tpdp == NULL) {
return (ENOENT);
}
if (tp != tnp)
return (ENOENT);
tmpfs_hash_out(tpdp);
ASSERT(tpdp->td_next != tpdp);
ASSERT(tpdp->td_prev != tpdp);
if (tpdp->td_prev) {
tpdp->td_prev->td_next = tpdp->td_next;
}
if (tpdp->td_next) {
tpdp->td_next->td_prev = tpdp->td_prev;
}
if (dir->tn_dir->td_prev == tpdp) {
dir->tn_dir->td_prev = tpdp->td_prev;
}
ASSERT(tpdp->td_next != tpdp);
ASSERT(tpdp->td_prev != tpdp);
namelen = strlen(tpdp->td_name) + 1;
tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
gethrestime(&now);
dir->tn_mtime = now;
dir->tn_ctime = now;
tp->tn_ctime = now;
ASSERT(tp->tn_nlink > 0);
DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
if (op == DR_RMDIR && tp->tn_type == VDIR) {
tdirtrunc(tp);
ASSERT(tp->tn_nlink == 0);
}
return (0);
}
void
tdirinit(
struct tmpnode *parent,
struct tmpnode *dir)
{
struct tdirent *dot, *dotdot;
timestruc_t now;
ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
dot->td_tmpnode = dir;
dot->td_offset = 0;
dot->td_name = (char *)dot + sizeof (struct tdirent);
dot->td_name[0] = '.';
dot->td_parent = dir;
tmpfs_hash_in(dot);
dotdot->td_tmpnode = parent;
dotdot->td_offset = 1;
dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
dotdot->td_name[0] = '.';
dotdot->td_name[1] = '.';
dotdot->td_parent = dir;
tmpfs_hash_in(dotdot);
dot->td_next = dotdot;
dot->td_prev = dotdot;
dotdot->td_next = NULL;
dotdot->td_prev = dot;
gethrestime(&now);
dir->tn_mtime = now;
dir->tn_ctime = now;
if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
parent->tn_ctime = now;
}
dir->tn_dir = dot;
dir->tn_size = 2 * sizeof (struct tdirent) + 5;
dir->tn_dirents = 2;
dir->tn_nlink = 2;
}
void
tdirtrunc(struct tmpnode *dir)
{
struct tdirent *tdp;
struct tmpnode *tp;
size_t namelen;
timestruc_t now;
int isvattrdir, isdotdot, skip_decr;
ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
ASSERT(tdp->td_next != tdp);
ASSERT(tdp->td_prev != tdp);
ASSERT(tdp->td_tmpnode);
dir->tn_dir = tdp->td_next;
namelen = strlen(tdp->td_name) + 1;
tp = tdp->td_tmpnode;
isdotdot = (strcmp("..", tdp->td_name) == 0);
skip_decr = (isvattrdir && isdotdot);
if (!skip_decr) {
ASSERT(tp->tn_nlink > 0);
DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
}
tmpfs_hash_out(tdp);
tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
}
gethrestime(&now);
dir->tn_mtime = now;
dir->tn_ctime = now;
ASSERT(dir->tn_dir == NULL);
ASSERT(dir->tn_size == 0);
ASSERT(dir->tn_dirents == 0);
}
static int
tdircheckpath(
struct tmpnode *fromtp,
struct tmpnode *toparent,
struct cred *cred)
{
int error = 0;
struct tmpnode *dir, *dotdot;
struct tdirent *tdp;
ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
if (tdp == NULL)
return (ENOENT);
ASSERT(dotdot);
if (dotdot == toparent) {
tmpnode_rele(dotdot);
return (0);
}
for (;;) {
if (dotdot == fromtp) {
tmpnode_rele(dotdot);
error = EINVAL;
break;
}
dir = dotdot;
error = tdirlookup(dir, "..", &dotdot, cred);
if (error) {
tmpnode_rele(dir);
break;
}
if (dir == dotdot) {
tmpnode_rele(dir);
tmpnode_rele(dotdot);
break;
}
tmpnode_rele(dir);
}
return (error);
}
static int
tdirrename(
struct tmpnode *fromparent,
struct tmpnode *fromtp,
struct tmpnode *toparent,
char *nm,
struct tmpnode *to,
struct tdirent *where,
struct cred *cred)
{
int error = 0;
int doingdirectory;
timestruc_t now;
#if defined(lint)
nm = nm;
#endif
ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
if (fromtp == to)
return (ESAME);
rw_enter(&fromtp->tn_rwlock, RW_READER);
rw_enter(&to->tn_rwlock, RW_READER);
if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
error = EXDEV;
goto out;
}
if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
(error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
goto out;
doingdirectory = (fromtp->tn_type == VDIR);
if (to->tn_type == VDIR) {
if (!doingdirectory) {
error = EISDIR;
goto out;
}
if (vn_vfswlock(TNTOV(to))) {
error = EBUSY;
goto out;
}
if (vn_mountedvfs(TNTOV(to)) != NULL) {
vn_vfsunlock(TNTOV(to));
error = EBUSY;
goto out;
}
mutex_enter(&to->tn_tlock);
if (to->tn_dirents > 2 || to->tn_nlink > 2) {
mutex_exit(&to->tn_tlock);
vn_vfsunlock(TNTOV(to));
error = EEXIST;
gethrestime(&to->tn_atime);
goto out;
}
mutex_exit(&to->tn_tlock);
} else if (doingdirectory) {
error = ENOTDIR;
goto out;
}
tmpfs_hash_change(where, fromtp);
gethrestime(&now);
toparent->tn_mtime = now;
toparent->tn_ctime = now;
rw_exit(&to->tn_rwlock);
rw_enter(&to->tn_rwlock, RW_WRITER);
DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
to->tn_ctime = now;
if (doingdirectory) {
vn_vfsunlock(TNTOV(to));
tdirtrunc(to);
ASSERT(to->tn_nlink == 0);
if (fromparent != toparent)
tdirfixdotdot(fromtp, fromparent, toparent);
}
out:
rw_exit(&to->tn_rwlock);
rw_exit(&fromtp->tn_rwlock);
return (error);
}
static void
tdirfixdotdot(
struct tmpnode *fromtp,
struct tmpnode *fromparent,
struct tmpnode *toparent)
{
struct tdirent *dotdot;
ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
gethrestime(&toparent->tn_ctime);
dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
ASSERT(dotdot->td_tmpnode == fromparent);
dotdot->td_tmpnode = toparent;
if (fromparent != NULL) {
mutex_enter(&fromparent->tn_tlock);
if (fromparent->tn_nlink != 0) {
fromparent->tn_nlink--;
gethrestime(&fromparent->tn_ctime);
}
mutex_exit(&fromparent->tn_tlock);
}
}
static int
tdiraddentry(
struct tmpnode *dir,
struct tmpnode *tp,
char *name,
enum de_op op,
struct tmpnode *fromtp)
{
struct tdirent *tdp, *tpdp;
size_t namelen, alloc_size;
timestruc_t now;
if (dir->tn_dir == NULL)
return (ENOENT);
if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
return (EXDEV);
namelen = strlen(name) + 1;
alloc_size = namelen + sizeof (struct tdirent);
tdp = tmp_memalloc(alloc_size, 0);
if (tdp == NULL)
return (ENOSPC);
if ((op == DE_RENAME) && (tp->tn_type == VDIR))
tdirfixdotdot(tp, fromtp, dir);
dir->tn_size += alloc_size;
dir->tn_dirents++;
tdp->td_tmpnode = tp;
tdp->td_parent = dir;
tdp->td_name = (char *)tdp + sizeof (struct tdirent);
(void) strcpy(tdp->td_name, name);
tmpfs_hash_in(tdp);
tpdp = dir->tn_dir->td_prev;
while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
tpdp->td_offset) <= 1) {
ASSERT(tpdp->td_next != tpdp);
ASSERT(tpdp->td_prev != tpdp);
ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
tpdp = tpdp->td_next;
}
tdp->td_offset = tpdp->td_offset + 1;
if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
dir->tn_dir->td_prev = dir->tn_dir->td_next;
else
dir->tn_dir->td_prev = tdp;
ASSERT(tpdp->td_next != tpdp);
ASSERT(tpdp->td_prev != tpdp);
tdp->td_next = tpdp->td_next;
if (tdp->td_next) {
tdp->td_next->td_prev = tdp;
}
tdp->td_prev = tpdp;
tpdp->td_next = tdp;
ASSERT(tdp->td_next != tdp);
ASSERT(tdp->td_prev != tdp);
ASSERT(tpdp->td_next != tpdp);
ASSERT(tpdp->td_prev != tpdp);
gethrestime(&now);
dir->tn_mtime = now;
dir->tn_ctime = now;
return (0);
}
static int
tdirmaketnode(
struct tmpnode *dir,
struct tmount *tm,
struct vattr *va,
enum de_op op,
struct tmpnode **newnode,
struct cred *cred)
{
struct tmpnode *tp;
enum vtype type;
ASSERT(va != NULL);
ASSERT(op == DE_CREATE || op == DE_MKDIR);
if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
return (EOVERFLOW);
type = va->va_type;
tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
tmpnode_init(tm, tp, va, cred);
if (dir->tn_flags & ISXATTR) {
tp->tn_flags |= ISXATTR;
}
if (type == VBLK || type == VCHR) {
tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
} else {
tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
}
tp->tn_vnode->v_type = type;
tp->tn_uid = crgetuid(cred);
if ((va->va_mask & AT_GID) &&
((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
secpolicy_vnode_create_gid(cred) == 0)) {
tp->tn_gid = va->va_gid;
} else {
if (dir->tn_mode & VSGID)
tp->tn_gid = dir->tn_gid;
else
tp->tn_gid = crgetgid(cred);
}
if (dir->tn_mode & VSGID && type == VDIR)
tp->tn_mode |= VSGID;
else {
if ((tp->tn_mode & VSGID) &&
secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
tp->tn_mode &= ~VSGID;
}
if (va->va_mask & AT_ATIME)
tp->tn_atime = va->va_atime;
if (va->va_mask & AT_MTIME)
tp->tn_mtime = va->va_mtime;
if (op == DE_MKDIR)
tdirinit(dir, tp);
*newnode = tp;
return (0);
}