#include <sys/flock_impl.h>
#include <sys/vfs.h>
#include <sys/t_lock.h>
#include <sys/callb.h>
#include <sys/clconf.h>
#include <sys/cladm.h>
#include <sys/nbmlock.h>
#include <sys/cred.h>
#include <sys/policy.h>
int flk_lock_allocs;
int flk_lock_frees;
int edge_allocs;
int edge_frees;
int flk_proc_vertex_allocs;
int flk_proc_edge_allocs;
int flk_proc_vertex_frees;
int flk_proc_edge_frees;
static kmutex_t flock_lock;
#ifdef DEBUG
int check_debug = 0;
#define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \
check_active_locks(gp);
#define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \
check_sleeping_locks(gp);
#define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \
if (check_debug) \
check_owner_locks(gp, pid, sysid, vp);
#define CHECK_LOCK_TRANSITION(old_state, new_state) \
{ \
if (check_lock_transition(old_state, new_state)) { \
cmn_err(CE_PANIC, "Illegal lock transition \
from %d to %d", old_state, new_state); \
} \
}
#else
#define CHECK_ACTIVE_LOCKS(gp)
#define CHECK_SLEEPING_LOCKS(gp)
#define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
#define CHECK_LOCK_TRANSITION(old_state, new_state)
#endif
struct kmem_cache *flk_edge_cache;
graph_t *lock_graph[HASH_SIZE];
proc_graph_t pgraph;
static flk_nlm_status_t *nlm_reg_status = NULL;
static kmutex_t nlm_reg_lock;
static uint_t nlm_status_size;
struct flock_globals {
flk_lockmgr_status_t flk_lockmgr_status;
flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
};
zone_key_t flock_zone_key;
static void create_flock(lock_descriptor_t *, flock64_t *);
static lock_descriptor_t *flk_get_lock(void);
static void flk_free_lock(lock_descriptor_t *lock);
static void flk_get_first_blocking_lock(lock_descriptor_t *request);
static int flk_process_request(lock_descriptor_t *);
static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
static edge_t *flk_get_edge(void);
static int flk_wait_execute_request(lock_descriptor_t *);
static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
static void flk_insert_active_lock(lock_descriptor_t *);
static void flk_delete_active_lock(lock_descriptor_t *, int);
static void flk_insert_sleeping_lock(lock_descriptor_t *);
static void flk_graph_uncolor(graph_t *);
static void flk_wakeup(lock_descriptor_t *, int);
static void flk_free_edge(edge_t *);
static void flk_recompute_dependencies(lock_descriptor_t *,
lock_descriptor_t **, int, int);
static int flk_find_barriers(lock_descriptor_t *);
static void flk_update_barriers(lock_descriptor_t *);
static int flk_color_reachables(lock_descriptor_t *);
static int flk_canceled(lock_descriptor_t *);
static void flk_delete_locks_by_sysid(lock_descriptor_t *);
static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
static void wait_for_lock(lock_descriptor_t *);
static void unlock_lockmgr_granted(struct flock_globals *);
static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t);
static void cl_flk_wakeup_sleeping_nlm_locks(int);
static void cl_flk_unlock_nlm_granted(int);
#ifdef DEBUG
static int check_lock_transition(int, int);
static void check_sleeping_locks(graph_t *);
static void check_active_locks(graph_t *);
static int no_path(lock_descriptor_t *, lock_descriptor_t *);
static void path(lock_descriptor_t *, lock_descriptor_t *);
static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
#endif
static int flk_check_deadlock(lock_descriptor_t *);
static void flk_proc_graph_uncolor(void);
static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
static proc_edge_t *flk_get_proc_edge(void);
static void flk_proc_release(proc_vertex_t *);
static void flk_free_proc_edge(proc_edge_t *);
static void flk_update_proc_graph(edge_t *, int);
static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t,
u_offset_t);
static struct flock_globals *
flk_get_globals(void)
{
ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
return (zone_getspecific(flock_zone_key, curproc->p_zone));
}
static flk_lockmgr_status_t
flk_get_lockmgr_status(void)
{
struct flock_globals *fg;
ASSERT(MUTEX_HELD(&flock_lock));
if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
return (FLK_LOCKMGR_DOWN);
}
fg = flk_get_globals();
return (fg->flk_lockmgr_status);
}
int
ofdlock(file_t *fp, int fcmd, flock64_t *lckdat, int flag, u_offset_t offset)
{
int cmd = 0;
vnode_t *vp;
lock_descriptor_t stack_lock_request;
lock_descriptor_t *lock_request;
int error = 0;
graph_t *gp;
int serialize = 0;
if (fcmd != F_OFD_GETLK)
cmd = SETFLCK;
if (fcmd == F_OFD_SETLKW || fcmd == F_FLOCKW)
cmd |= SLPFLCK;
VERIFY(lckdat->l_whence == 0);
VERIFY(lckdat->l_start == 0);
VERIFY(lckdat->l_len == 0);
vp = fp->f_vnode;
lckdat->l_pid = 0;
lckdat->l_sysid = 0;
if ((fcmd == F_OFD_SETLK || fcmd == F_OFD_SETLKW) &&
((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
(lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
return (EBADF);
if (lckdat->l_type == F_UNLCK || !(cmd & SETFLCK)) {
lock_request = &stack_lock_request;
(void) bzero((caddr_t)lock_request,
sizeof (lock_descriptor_t));
lock_request->l_edge.edge_in_next = &lock_request->l_edge;
lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
lock_request->l_status = FLK_INITIAL_STATE;
} else {
lock_request = flk_get_lock();
fp->f_filock = (struct filock *)lock_request;
}
lock_request->l_state = 0;
lock_request->l_vnode = vp;
lock_request->l_zoneid = getzoneid();
lock_request->l_ofd = fp;
error = flk_convert_lock_data(vp, lckdat, &lock_request->l_start,
&lock_request->l_end, offset);
if (error)
goto done;
error = flk_check_lock_data(lock_request->l_start, lock_request->l_end,
MAXEND);
if (error)
goto done;
ASSERT(lock_request->l_end >= lock_request->l_start);
lock_request->l_type = lckdat->l_type;
if (cmd & SLPFLCK)
lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
if (!(cmd & SETFLCK)) {
if (lock_request->l_type == F_RDLCK ||
lock_request->l_type == F_WRLCK)
lock_request->l_state |= QUERY_LOCK;
}
lock_request->l_flock = (*lckdat);
if (fcmd != F_OFD_GETLK && lock_request->l_type != F_UNLCK &&
nbl_need_check(vp)) {
nbl_start_crit(vp, RW_WRITER);
serialize = 1;
}
gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
mutex_enter(&gp->gp_mutex);
lock_request->l_state |= REFERENCED_LOCK;
lock_request->l_graph = gp;
switch (lock_request->l_type) {
case F_RDLCK:
case F_WRLCK:
if (IS_QUERY_LOCK(lock_request)) {
flk_get_first_blocking_lock(lock_request);
if (lock_request->l_ofd != NULL)
lock_request->l_flock.l_pid = -1;
(*lckdat) = lock_request->l_flock;
} else {
error = flk_process_request(lock_request);
}
break;
case F_UNLCK:
error = flk_execute_request(lock_request);
break;
default:
error = EINVAL;
break;
}
if (lock_request == &stack_lock_request) {
flk_set_state(lock_request, FLK_DEAD_STATE);
} else {
lock_request->l_state &= ~REFERENCED_LOCK;
if ((error != 0) || IS_DELETED(lock_request)) {
flk_set_state(lock_request, FLK_DEAD_STATE);
flk_free_lock(lock_request);
}
}
mutex_exit(&gp->gp_mutex);
if (serialize)
nbl_end_crit(vp);
return (error);
done:
flk_set_state(lock_request, FLK_DEAD_STATE);
if (lock_request != &stack_lock_request)
flk_free_lock(lock_request);
return (error);
}
void
ofdcleanlock(file_t *fp)
{
lock_descriptor_t *fplock, *lock, *nlock;
vnode_t *vp;
graph_t *gp;
ASSERT(MUTEX_HELD(&fp->f_tlock));
if ((fplock = (lock_descriptor_t *)fp->f_filock) == NULL)
return;
fp->f_filock = NULL;
vp = fp->f_vnode;
gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
if (gp == NULL)
return;
mutex_enter(&gp->gp_mutex);
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
do {
nlock = lock->l_next;
if (fplock == lock) {
CANCEL_WAKEUP(lock);
break;
}
lock = nlock;
} while (lock->l_vnode == vp);
}
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
do {
nlock = lock->l_next;
if (fplock == lock) {
flk_delete_active_lock(lock, 0);
flk_wakeup(lock, 1);
flk_free_lock(lock);
break;
}
lock = nlock;
} while (lock->l_vnode == vp);
}
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
mutex_exit(&gp->gp_mutex);
}
int
reclock(vnode_t *vp, flock64_t *lckdat, int cmd, int flag, u_offset_t offset,
flk_callback_t *flk_cbp)
{
lock_descriptor_t stack_lock_request;
lock_descriptor_t *lock_request;
int error = 0;
graph_t *gp;
int nlmid;
if ((cmd & SETFLCK) &&
((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
(lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
return (EBADF);
if ((lckdat->l_type == F_UNLCK) ||
!((cmd & INOFLCK) || (cmd & SETFLCK))) {
lock_request = &stack_lock_request;
(void) bzero((caddr_t)lock_request,
sizeof (lock_descriptor_t));
lock_request->l_edge.edge_in_next = &lock_request->l_edge;
lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
lock_request->l_status = FLK_INITIAL_STATE;
} else {
lock_request = flk_get_lock();
}
lock_request->l_state = 0;
lock_request->l_vnode = vp;
lock_request->l_zoneid = getzoneid();
if ((cmd & (RCMDLCK | PCMDLCK)) != 0) {
ASSERT(lckdat->l_whence == 0);
lock_request->l_start = lckdat->l_start;
lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
lckdat->l_start + (lckdat->l_len - 1);
} else {
error = flk_convert_lock_data(vp, lckdat,
&lock_request->l_start, &lock_request->l_end,
offset);
if (error) {
goto done;
}
error = flk_check_lock_data(lock_request->l_start,
lock_request->l_end, MAXEND);
if (error) {
goto done;
}
}
ASSERT(lock_request->l_end >= lock_request->l_start);
lock_request->l_type = lckdat->l_type;
if (cmd & INOFLCK)
lock_request->l_state |= IO_LOCK;
if (cmd & SLPFLCK)
lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
if (cmd & RCMDLCK)
lock_request->l_state |= LOCKMGR_LOCK;
if (cmd & NBMLCK)
lock_request->l_state |= NBMAND_LOCK;
if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) {
lock_request->l_state |= PXFS_LOCK;
}
if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
if (lock_request->l_type == F_RDLCK ||
lock_request->l_type == F_WRLCK)
lock_request->l_state |= QUERY_LOCK;
}
lock_request->l_flock = (*lckdat);
lock_request->l_callbacks = flk_cbp;
if (IS_LOCKMGR(lock_request)) {
if (nlm_status_size == 0) {
mutex_enter(&flock_lock);
if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
mutex_exit(&flock_lock);
error = ENOLCK;
goto done;
}
mutex_exit(&flock_lock);
} else {
nlmid = GETNLMID(lock_request->l_flock.l_sysid);
ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
mutex_enter(&nlm_reg_lock);
if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
nlmid)) {
FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
} else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
nlmid)) {
mutex_exit(&nlm_reg_lock);
error = ENOLCK;
goto done;
}
mutex_exit(&nlm_reg_lock);
}
}
gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
if (IS_IO_LOCK(lock_request)) {
VOP_RWUNLOCK(vp,
(lock_request->l_type == F_RDLCK) ?
V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
}
mutex_enter(&gp->gp_mutex);
lock_request->l_state |= REFERENCED_LOCK;
lock_request->l_graph = gp;
switch (lock_request->l_type) {
case F_RDLCK:
case F_WRLCK:
if (IS_QUERY_LOCK(lock_request)) {
flk_get_first_blocking_lock(lock_request);
if (lock_request->l_ofd != NULL)
lock_request->l_flock.l_pid = -1;
(*lckdat) = lock_request->l_flock;
break;
}
error = flk_process_request(lock_request);
break;
case F_UNLCK:
if (IS_LOCKMGR(lock_request) &&
flk_canceled(lock_request)) {
error = 0;
} else {
error = flk_execute_request(lock_request);
}
break;
case F_UNLKSYS:
if (lock_request->l_flock.l_sysid == 0) {
mutex_exit(&gp->gp_mutex);
return (EINVAL);
}
if (secpolicy_nfs(CRED()) != 0) {
mutex_exit(&gp->gp_mutex);
return (EPERM);
}
flk_delete_locks_by_sysid(lock_request);
lock_request->l_state &= ~REFERENCED_LOCK;
flk_set_state(lock_request, FLK_DEAD_STATE);
flk_free_lock(lock_request);
mutex_exit(&gp->gp_mutex);
return (0);
default:
error = EINVAL;
break;
}
if (error == PXFS_LOCK_BLOCKED) {
lock_request->l_state &= ~REFERENCED_LOCK;
mutex_exit(&gp->gp_mutex);
return (error);
}
if (IS_IO_LOCK(lock_request)) {
(void) VOP_RWLOCK(vp,
(lock_request->l_type == F_RDLCK) ?
V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
if (!error) {
lckdat->l_type = F_UNLCK;
flk_wakeup(lock_request, 1);
flk_set_state(lock_request, FLK_DEAD_STATE);
flk_free_lock(lock_request);
}
}
if (lock_request == &stack_lock_request) {
flk_set_state(lock_request, FLK_DEAD_STATE);
} else {
lock_request->l_state &= ~REFERENCED_LOCK;
if ((error != 0) || IS_DELETED(lock_request)) {
flk_set_state(lock_request, FLK_DEAD_STATE);
flk_free_lock(lock_request);
}
}
mutex_exit(&gp->gp_mutex);
return (error);
done:
flk_set_state(lock_request, FLK_DEAD_STATE);
if (lock_request != &stack_lock_request)
flk_free_lock(lock_request);
return (error);
}
callb_cpr_t *
flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
{
callb_cpr_t *cpr_callbackp = NULL;
callb_cpr_t *one_result;
flk_callback_t *cb;
if (cblist == NULL)
return (NULL);
if (when == FLK_BEFORE_SLEEP) {
cb = cblist;
do {
one_result = (*cb->cb_callback)(when, cb->cb_data);
if (one_result != NULL) {
ASSERT(cpr_callbackp == NULL);
cpr_callbackp = one_result;
}
cb = cb->cb_next;
} while (cb != cblist);
} else {
cb = cblist->cb_prev;
do {
one_result = (*cb->cb_callback)(when, cb->cb_data);
if (one_result != NULL) {
cpr_callbackp = one_result;
}
cb = cb->cb_prev;
} while (cb != cblist->cb_prev);
}
return (cpr_callbackp);
}
void
flk_init_callback(flk_callback_t *flk_cb,
callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
{
flk_cb->cb_next = flk_cb;
flk_cb->cb_prev = flk_cb;
flk_cb->cb_callback = cb_fcn;
flk_cb->cb_data = cbdata;
}
void
flk_add_callback(flk_callback_t *newcb,
callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
void *cbdata, flk_callback_t *cblist)
{
flk_init_callback(newcb, cb_fcn, cbdata);
if (cblist == NULL)
return;
newcb->cb_prev = cblist->cb_prev;
newcb->cb_next = cblist;
cblist->cb_prev->cb_next = newcb;
cblist->cb_prev = newcb;
}
void
flk_del_callback(flk_callback_t *flk_cb)
{
flk_cb->cb_next->cb_prev = flk_cb->cb_prev;
flk_cb->cb_prev->cb_next = flk_cb->cb_next;
flk_cb->cb_prev = flk_cb;
flk_cb->cb_next = flk_cb;
}
void
flk_init(void)
{
uint_t i;
flk_edge_cache = kmem_cache_create("flk_edges",
sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
if (flk_edge_cache == NULL) {
cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
}
if (cluster_bootflags & CLUSTER_BOOTED) {
nlm_status_size = clconf_maximum_nodeid() + 1;
} else {
nlm_status_size = 0;
}
if (nlm_status_size != 0) {
nlm_reg_status = (flk_nlm_status_t *)
kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size,
KM_SLEEP);
for (i = 0; i < nlm_status_size; i++) {
nlm_reg_status[i] = FLK_NLM_UNKNOWN;
}
}
}
void *
flk_zone_init(zoneid_t zoneid)
{
struct flock_globals *fg;
uint_t i;
fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
for (i = 0; i < HASH_SIZE; i++)
fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
return (fg);
}
void
flk_zone_fini(zoneid_t zoneid, void *data)
{
struct flock_globals *fg = data;
kmem_free(fg, sizeof (*fg));
}
static lock_descriptor_t *
flk_get_lock(void)
{
lock_descriptor_t *l;
l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
l->l_edge.edge_in_next = &l->l_edge;
l->l_edge.edge_in_prev = &l->l_edge;
l->l_edge.edge_adj_next = &l->l_edge;
l->l_edge.edge_adj_prev = &l->l_edge;
l->pvertex = -1;
l->l_status = FLK_INITIAL_STATE;
flk_lock_allocs++;
return (l);
}
void
flk_free_lock(lock_descriptor_t *lock)
{
file_t *fp;
ASSERT(IS_DEAD(lock));
if ((fp = lock->l_ofd) != NULL && fp->f_filock == (struct filock *)lock)
fp->f_filock = NULL;
if (IS_REFERENCED(lock)) {
lock->l_state |= DELETED_LOCK;
return;
}
flk_lock_frees++;
kmem_free((void *)lock, sizeof (lock_descriptor_t));
}
void
flk_set_state(lock_descriptor_t *lock, int new_state)
{
if (IS_INTERRUPTED(lock)) {
if ((new_state == FLK_CANCELLED_STATE) ||
(new_state == FLK_GRANTED_STATE) ||
(new_state == FLK_INTERRUPTED_STATE)) {
return;
}
}
if (IS_CANCELLED(lock)) {
if ((new_state == FLK_GRANTED_STATE) ||
(new_state == FLK_CANCELLED_STATE)) {
return;
}
}
CHECK_LOCK_TRANSITION(lock->l_status, new_state);
if (IS_PXFS(lock)) {
cl_flk_state_transition_notify(lock, lock->l_status, new_state);
}
lock->l_status = new_state;
}
static int
flk_process_request(lock_descriptor_t *request)
{
graph_t *gp = request->l_graph;
lock_descriptor_t *lock;
int request_blocked_by_active = 0;
int request_blocked_by_granted = 0;
int request_blocked_by_sleeping = 0;
vnode_t *vp = request->l_vnode;
int error = 0;
int request_will_wait = 0;
int found_covering_lock = 0;
lock_descriptor_t *covered_by = NULL;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
request_will_wait = IS_WILLING_TO_SLEEP(request);
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
do {
if (BLOCKS(lock, request)) {
if (!request_will_wait)
return (EAGAIN);
request_blocked_by_active = 1;
break;
}
if (SAME_OWNER(lock, request) &&
COVERS(lock, request) &&
(request->l_type == F_RDLCK))
return (flk_execute_request(request));
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
if (!request_blocked_by_active) {
lock_descriptor_t *lk[1];
lock_descriptor_t *first_glock = NULL;
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
do {
if (BLOCKS(lock, request)) {
if (IS_GRANTED(lock)) {
request_blocked_by_granted = 1;
} else {
request_blocked_by_sleeping = 1;
}
}
lock = lock->l_next;
} while ((lock->l_vnode == vp));
first_glock = lock->l_prev;
ASSERT(first_glock->l_vnode == vp);
}
if (request_blocked_by_granted)
goto block;
if (!request_blocked_by_sleeping) {
ASSERT(!request_blocked_by_active);
return (flk_execute_request(request));
} else if (request->l_type == F_RDLCK) {
goto block;
}
lk[0] = request;
request->l_state |= RECOMPUTE_LOCK;
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
do {
flk_recompute_dependencies(lock, lk, 1, 0);
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
lock = first_glock;
if (lock) {
do {
if (IS_GRANTED(lock)) {
flk_recompute_dependencies(lock, lk, 1, 0);
}
lock = lock->l_prev;
} while ((lock->l_vnode == vp));
}
request->l_state &= ~RECOMPUTE_LOCK;
if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
return (EDEADLK);
return (flk_execute_request(request));
}
block:
if (request_will_wait)
flk_graph_uncolor(gp);
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
do {
if (BLOCKS(lock, request)) {
if (!request_will_wait)
return (EAGAIN);
if (COVERS(lock, request) &&
lock->l_type == F_WRLCK) {
if (found_covering_lock &&
!SAME_OWNER(lock, covered_by)) {
found_covering_lock++;
break;
}
found_covering_lock = 1;
covered_by = lock;
}
if (found_covering_lock &&
!SAME_OWNER(lock, covered_by)) {
lock = lock->l_next;
continue;
}
if ((error = flk_add_edge(request, lock,
!found_covering_lock, 0)))
return (error);
}
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
if (request_blocked_by_active && found_covering_lock != 2) {
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
ASSERT(lock != NULL);
do {
if (BLOCKS(lock, request)) {
if (found_covering_lock &&
!SAME_OWNER(lock, covered_by)) {
lock = lock->l_next;
continue;
}
if ((error = flk_add_edge(request, lock,
CHECK_CYCLE, 0)))
return (error);
}
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
if (NOT_BLOCKED(request)) {
return (flk_execute_request(request));
} else {
if (flk_check_deadlock(request))
return (EDEADLK);
return (flk_wait_execute_request(request));
}
}
int
flk_execute_request(lock_descriptor_t *request)
{
graph_t *gp = request->l_graph;
vnode_t *vp = request->l_vnode;
lock_descriptor_t *lock, *lock1;
int done_searching = 0;
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
ASSERT(MUTEX_HELD(&gp->gp_mutex));
flk_set_state(request, FLK_START_STATE);
ASSERT(NOT_BLOCKED(request));
if (IS_IO_LOCK(request))
return (0);
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock == NULL && request->l_type == F_UNLCK)
return (0);
if (lock == NULL) {
flk_insert_active_lock(request);
return (0);
}
do {
lock1 = lock->l_next;
if (SAME_OWNER(request, lock)) {
done_searching = flk_relation(lock, request);
}
lock = lock1;
} while (lock->l_vnode == vp && !done_searching);
if (request->l_type != F_UNLCK)
flk_insert_active_lock(request);
return (0);
}
static int
flk_wait_execute_request(lock_descriptor_t *request)
{
graph_t *gp = request->l_graph;
callb_cpr_t *cprp;
struct flock_globals *fg;
int index;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
ASSERT(IS_WILLING_TO_SLEEP(request));
flk_insert_sleeping_lock(request);
index = 0;
fg = NULL;
if (IS_LOCKMGR(request)) {
index = HASH_INDEX(request->l_vnode);
fg = flk_get_globals();
if (nlm_status_size == 0) {
if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
flk_cancel_sleeping_lock(request, 1);
return (ENOLCK);
}
} else {
if (!IS_NLM_UP(request)) {
flk_cancel_sleeping_lock(request, 1);
return (ENOLCK);
}
}
}
if (IS_PXFS(request)) {
return (PXFS_LOCK_BLOCKED);
}
if (request->l_callbacks != NULL) {
mutex_exit(&gp->gp_mutex);
cprp = flk_invoke_callbacks(request->l_callbacks,
FLK_BEFORE_SLEEP);
mutex_enter(&gp->gp_mutex);
if (cprp == NULL) {
wait_for_lock(request);
} else {
mutex_enter(cprp->cc_lockp);
CALLB_CPR_SAFE_BEGIN(cprp);
mutex_exit(cprp->cc_lockp);
wait_for_lock(request);
mutex_enter(cprp->cc_lockp);
CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
mutex_exit(cprp->cc_lockp);
}
mutex_exit(&gp->gp_mutex);
(void) flk_invoke_callbacks(request->l_callbacks,
FLK_AFTER_SLEEP);
mutex_enter(&gp->gp_mutex);
} else {
wait_for_lock(request);
}
if (IS_LOCKMGR(request)) {
if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
!IS_GRANTED(request)) {
flk_cancel_sleeping_lock(request, 1);
return (ENOLCK);
}
}
if (IS_INTERRUPTED(request)) {
flk_cancel_sleeping_lock(request, 1);
return (EINTR);
}
if (IS_CANCELLED(request)) {
flk_cancel_sleeping_lock(request, 1);
return (EBADF);
}
request->l_state &= ~GRANTED_LOCK;
REMOVE_SLEEP_QUEUE(request);
return (flk_execute_request(request));
}
static int
flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
int check_cycle, int update_graph)
{
edge_t *edge;
edge_t *ep;
lock_descriptor_t *vertex;
lock_descriptor_t *vertex_stack;
STACK_INIT(vertex_stack);
if (COLORED(to_lock))
return (0);
edge = flk_get_edge();
edge->from_vertex = from_lock;
edge->to_vertex = to_lock;
from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
edge->edge_adj_prev = &from_lock->l_edge;
from_lock->l_edge.edge_adj_next = edge;
to_lock->l_edge.edge_in_next->edge_in_prev = edge;
edge->edge_in_next = to_lock->l_edge.edge_in_next;
to_lock->l_edge.edge_in_next = edge;
edge->edge_in_prev = &to_lock->l_edge;
if (update_graph) {
flk_update_proc_graph(edge, 0);
return (0);
}
if (!check_cycle) {
return (0);
}
STACK_PUSH(vertex_stack, from_lock, l_stack);
while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
STACK_POP(vertex_stack, l_stack);
for (ep = FIRST_ADJ(vertex);
ep != HEAD(vertex);
ep = NEXT_ADJ(ep)) {
if (COLORED(ep->to_vertex))
continue;
COLOR(ep->to_vertex);
if (SAME_OWNER(ep->to_vertex, from_lock))
goto dead_lock;
STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
}
}
return (0);
dead_lock:
ep = FIRST_ADJ(from_lock);
while (ep != HEAD(from_lock)) {
IN_LIST_REMOVE(ep);
from_lock->l_sedge = NEXT_ADJ(ep);
ADJ_LIST_REMOVE(ep);
flk_free_edge(ep);
ep = from_lock->l_sedge;
}
return (EDEADLK);
}
static edge_t *
flk_get_edge()
{
edge_t *ep;
ASSERT(flk_edge_cache != NULL);
ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
edge_allocs++;
return (ep);
}
static void
flk_free_edge(edge_t *ep)
{
edge_frees++;
kmem_cache_free(flk_edge_cache, (void *)ep);
}
static int
flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
{
int lock_effect;
lock_descriptor_t *lock1, *lock2;
lock_descriptor_t *topology[3];
int nvertex = 0;
int i;
edge_t *ep;
graph_t *gp = (lock->l_graph);
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
ASSERT(MUTEX_HELD(&gp->gp_mutex));
topology[0] = topology[1] = topology[2] = NULL;
if (request->l_type == F_UNLCK)
lock_effect = FLK_UNLOCK;
else if (request->l_type == F_RDLCK &&
lock->l_type == F_WRLCK)
lock_effect = FLK_DOWNGRADE;
else if (request->l_type == F_WRLCK &&
lock->l_type == F_RDLCK)
lock_effect = FLK_UPGRADE;
else
lock_effect = FLK_STAY_SAME;
if (lock->l_end < request->l_start) {
if (lock->l_end == request->l_start - 1 &&
lock_effect == FLK_STAY_SAME) {
topology[0] = request;
request->l_start = lock->l_start;
nvertex = 1;
goto recompute;
} else {
return (0);
}
}
if (lock->l_start > request->l_end) {
if (request->l_end == lock->l_start - 1 &&
lock_effect == FLK_STAY_SAME) {
topology[0] = request;
request->l_end = lock->l_end;
nvertex = 1;
goto recompute;
} else {
return (1);
}
}
if (request->l_end < lock->l_end) {
if (request->l_start > lock->l_start) {
if (lock_effect == FLK_STAY_SAME) {
request->l_start = lock->l_start;
request->l_end = lock->l_end;
topology[0] = request;
nvertex = 1;
} else {
lock1 = flk_get_lock();
lock2 = flk_get_lock();
COPY(lock1, lock);
COPY(lock2, lock);
lock1->l_start = lock->l_start;
lock1->l_end = request->l_start - 1;
lock2->l_start = request->l_end + 1;
lock2->l_end = lock->l_end;
topology[0] = lock1;
topology[1] = lock2;
topology[2] = request;
nvertex = 3;
}
} else if (request->l_start < lock->l_start) {
if (lock_effect == FLK_STAY_SAME) {
request->l_end = lock->l_end;
topology[0] = request;
nvertex = 1;
} else {
lock1 = flk_get_lock();
COPY(lock1, lock);
lock1->l_start = request->l_end + 1;
topology[0] = lock1;
topology[1] = request;
nvertex = 2;
}
} else {
if (lock_effect == FLK_STAY_SAME) {
request->l_start = lock->l_start;
request->l_end = lock->l_end;
topology[0] = request;
nvertex = 1;
} else {
lock1 = flk_get_lock();
COPY(lock1, lock);
lock1->l_start = request->l_end + 1;
topology[0] = lock1;
topology[1] = request;
nvertex = 2;
}
}
} else if (request->l_end > lock->l_end) {
if (request->l_start > lock->l_start) {
if (lock_effect == FLK_STAY_SAME) {
request->l_start = lock->l_start;
topology[0] = request;
nvertex = 1;
} else {
lock1 = flk_get_lock();
COPY(lock1, lock);
lock1->l_end = request->l_start - 1;
topology[0] = lock1;
topology[1] = request;
nvertex = 2;
}
} else if (request->l_start < lock->l_start) {
topology[0] = request;
nvertex = 1;
} else {
topology[0] = request;
nvertex = 1;
}
} else {
if (request->l_start > lock->l_start) {
if (lock_effect == FLK_STAY_SAME) {
request->l_start = lock->l_start;
topology[0] = request;
nvertex = 1;
} else {
lock1 = flk_get_lock();
COPY(lock1, lock);
lock1->l_end = request->l_start - 1;
topology[0] = lock1;
topology[1] = request;
nvertex = 2;
}
} else if (request->l_start < lock->l_start) {
topology[0] = request;
nvertex = 1;
} else {
if (lock_effect != FLK_UNLOCK) {
topology[0] = request;
nvertex = 1;
} else {
flk_delete_active_lock(lock, 0);
flk_wakeup(lock, 1);
flk_free_lock(lock);
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
return (1);
}
}
}
recompute:
if (lock_effect == FLK_UNLOCK) {
topology[nvertex-1] = NULL;
nvertex--;
}
for (i = 0; i < nvertex; i++) {
topology[i]->l_state |= RECOMPUTE_LOCK;
topology[i]->l_color = NO_COLOR;
}
ASSERT(FIRST_ADJ(lock) == HEAD(lock));
ep = FIRST_IN(lock);
while (ep != HEAD(lock)) {
ADJ_LIST_REMOVE(ep);
ep = NEXT_IN(ep);
}
flk_delete_active_lock(lock, 0);
flk_recompute_dependencies(lock, topology, nvertex, 1);
for (i = 0; i < nvertex; i++) {
topology[i]->l_state &= ~RECOMPUTE_LOCK;
topology[i]->l_color = NO_COLOR;
}
if (lock_effect == FLK_UNLOCK) {
nvertex++;
}
for (i = 0; i < nvertex - 1; i++) {
flk_insert_active_lock(topology[i]);
}
if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
flk_wakeup(lock, 0);
} else {
ep = FIRST_IN(lock);
while (ep != HEAD(lock)) {
lock->l_sedge = NEXT_IN(ep);
IN_LIST_REMOVE(ep);
flk_update_proc_graph(ep, 1);
flk_free_edge(ep);
ep = lock->l_sedge;
}
}
flk_free_lock(lock);
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
return (0);
}
static void
flk_insert_active_lock(lock_descriptor_t *new_lock)
{
graph_t *gp = new_lock->l_graph;
vnode_t *vp = new_lock->l_vnode;
lock_descriptor_t *first_lock, *lock;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
first_lock = lock;
if (first_lock != NULL) {
for (; (lock->l_vnode == vp &&
lock->l_start < new_lock->l_start); lock = lock->l_next)
;
} else {
lock = ACTIVE_HEAD(gp);
}
lock->l_prev->l_next = new_lock;
new_lock->l_next = lock;
new_lock->l_prev = lock->l_prev;
lock->l_prev = new_lock;
if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
vp->v_filocks = (struct filock *)new_lock;
}
flk_set_state(new_lock, FLK_ACTIVE_STATE);
new_lock->l_state |= ACTIVE_LOCK;
CHECK_ACTIVE_LOCKS(gp);
CHECK_SLEEPING_LOCKS(gp);
}
static void
flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
{
vnode_t *vp = lock->l_vnode;
graph_t *gp = lock->l_graph;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
if (free_lock)
ASSERT(NO_DEPENDENTS(lock));
ASSERT(NOT_BLOCKED(lock));
ASSERT(IS_ACTIVE(lock));
ASSERT((vp->v_filocks != NULL));
if (vp->v_filocks == (struct filock *)lock) {
vp->v_filocks = (struct filock *)
((lock->l_next->l_vnode == vp) ? lock->l_next :
NULL);
}
lock->l_next->l_prev = lock->l_prev;
lock->l_prev->l_next = lock->l_next;
lock->l_next = lock->l_prev = NULL;
flk_set_state(lock, FLK_DEAD_STATE);
lock->l_state &= ~ACTIVE_LOCK;
if (free_lock)
flk_free_lock(lock);
CHECK_ACTIVE_LOCKS(gp);
CHECK_SLEEPING_LOCKS(gp);
}
static void
flk_insert_sleeping_lock(lock_descriptor_t *request)
{
graph_t *gp = request->l_graph;
vnode_t *vp = request->l_vnode;
lock_descriptor_t *lock;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
ASSERT(IS_INITIAL(request));
for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
lock->l_vnode < vp); lock = lock->l_next)
;
lock->l_prev->l_next = request;
request->l_prev = lock->l_prev;
lock->l_prev = request;
request->l_next = lock;
flk_set_state(request, FLK_SLEEPING_STATE);
request->l_state |= SLEEPING_LOCK;
}
void
flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
{
graph_t *gp = request->l_graph;
vnode_t *vp = request->l_vnode;
lock_descriptor_t **topology = NULL;
edge_t *ep;
lock_descriptor_t *vertex, *lock;
int nvertex = 0;
int i;
lock_descriptor_t *vertex_stack;
STACK_INIT(vertex_stack);
ASSERT(MUTEX_HELD(&gp->gp_mutex));
STACK_PUSH(vertex_stack, request, l_stack);
while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
STACK_POP(vertex_stack, l_stack);
for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
ep = NEXT_ADJ(ep)) {
if (IS_RECOMPUTE(ep->to_vertex))
continue;
ep->to_vertex->l_state |= RECOMPUTE_LOCK;
STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
nvertex++;
}
}
if (nvertex) {
topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
KM_SLEEP);
}
nvertex = 0;
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
do {
if (IS_RECOMPUTE(lock)) {
lock->l_index = nvertex;
topology[nvertex++] = lock;
}
lock->l_color = NO_COLOR;
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
do {
if (IS_RECOMPUTE(lock)) {
lock->l_index = nvertex;
topology[nvertex++] = lock;
}
lock->l_color = NO_COLOR;
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
ADJ_LIST_REMOVE(ep);
}
if (remove_from_queue)
REMOVE_SLEEP_QUEUE(request);
flk_recompute_dependencies(request, topology, nvertex, 1);
ep = FIRST_ADJ(request);
while (ep != HEAD(request)) {
IN_LIST_REMOVE(ep);
request->l_sedge = NEXT_ADJ(ep);
ADJ_LIST_REMOVE(ep);
flk_update_proc_graph(ep, 1);
flk_free_edge(ep);
ep = request->l_sedge;
}
for (i = 0; i < nvertex; i++) {
topology[i]->l_state &= ~RECOMPUTE_LOCK;
}
if (nvertex)
kmem_free((void *)topology,
(nvertex * sizeof (lock_descriptor_t *)));
flk_wakeup(request, 0);
flk_set_state(request, FLK_DEAD_STATE);
flk_free_lock(request);
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
}
static void
flk_graph_uncolor(graph_t *gp)
{
lock_descriptor_t *lock;
if (gp->mark == UINT_MAX) {
gp->mark = 1;
for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
lock = lock->l_next)
lock->l_color = 0;
for (lock = SLEEPING_HEAD(gp)->l_next;
lock != SLEEPING_HEAD(gp); lock = lock->l_next)
lock->l_color = 0;
} else {
gp->mark++;
}
}
static void
flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
{
edge_t *ep;
graph_t *gp = lock->l_graph;
lock_descriptor_t *lck;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
if (NO_DEPENDENTS(lock))
return;
ep = FIRST_IN(lock);
do {
lck = ep->from_vertex;
if (adj_list_remove)
ADJ_LIST_REMOVE(ep);
flk_update_proc_graph(ep, 1);
if (NOT_BLOCKED(lck)) {
GRANT_WAKEUP(lck);
}
lock->l_sedge = NEXT_IN(ep);
IN_LIST_REMOVE(ep);
flk_free_edge(ep);
ep = lock->l_sedge;
} while (ep != HEAD(lock));
ASSERT(NO_DEPENDENTS(lock));
}
static void
flk_recompute_dependencies(lock_descriptor_t *request,
lock_descriptor_t **topology, int nvertex, int update_graph)
{
lock_descriptor_t *vertex, *lock;
graph_t *gp = request->l_graph;
int i, count;
int barrier_found = 0;
edge_t *ep;
lock_descriptor_t *vertex_stack;
STACK_INIT(vertex_stack);
ASSERT(MUTEX_HELD(&gp->gp_mutex));
if (nvertex == 0)
return;
flk_graph_uncolor(request->l_graph);
barrier_found = flk_find_barriers(request);
request->l_state |= RECOMPUTE_DONE;
STACK_PUSH(vertex_stack, request, l_stack);
request->l_sedge = FIRST_IN(request);
while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
if (vertex->l_state & RECOMPUTE_DONE) {
count = 0;
goto next_in_edge;
}
if (IS_BARRIER(vertex)) {
if (vertex->l_index) {
vertex->l_index--;
STACK_POP(vertex_stack, l_stack);
if (vertex->l_index == 0) {
vertex->l_state &= ~BARRIER_LOCK;
}
continue;
}
}
vertex->l_state |= RECOMPUTE_DONE;
flk_graph_uncolor(gp);
count = flk_color_reachables(vertex);
for (i = 0; i < nvertex; i++) {
lock = topology[i];
if (COLORED(lock))
continue;
if (BLOCKS(lock, vertex)) {
(void) flk_add_edge(vertex, lock,
NO_CHECK_CYCLE, update_graph);
COLOR(lock);
count++;
count += flk_color_reachables(lock);
}
}
next_in_edge:
if (count == nvertex ||
vertex->l_sedge == HEAD(vertex)) {
STACK_POP(vertex_stack, l_stack);
vertex->l_state &= ~RECOMPUTE_DONE;
if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
flk_graph_uncolor(gp);
flk_update_barriers(vertex);
}
continue;
}
ep = vertex->l_sedge;
lock = ep->from_vertex;
STACK_PUSH(vertex_stack, lock, l_stack);
lock->l_sedge = FIRST_IN(lock);
vertex->l_sedge = NEXT_IN(ep);
}
}
static int
flk_color_reachables(lock_descriptor_t *vertex)
{
lock_descriptor_t *ver, *lock;
int count;
edge_t *ep;
lock_descriptor_t *vertex_stack;
STACK_INIT(vertex_stack);
STACK_PUSH(vertex_stack, vertex, l_stack1);
count = 0;
while ((ver = STACK_TOP(vertex_stack)) != NULL) {
STACK_POP(vertex_stack, l_stack1);
for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
ep = NEXT_ADJ(ep)) {
lock = ep->to_vertex;
if (COLORED(lock))
continue;
COLOR(lock);
if (IS_RECOMPUTE(lock))
count++;
STACK_PUSH(vertex_stack, lock, l_stack1);
}
}
return (count);
}
static void
flk_update_barriers(lock_descriptor_t *lock)
{
lock_descriptor_t *vertex, *lck;
edge_t *ep;
lock_descriptor_t *vertex_stack;
STACK_INIT(vertex_stack);
STACK_PUSH(vertex_stack, lock, l_stack1);
while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
STACK_POP(vertex_stack, l_stack1);
for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
ep = NEXT_IN(ep)) {
lck = ep->from_vertex;
if (COLORED(lck)) {
if (IS_BARRIER(lck)) {
ASSERT(lck->l_index > 0);
lck->l_index--;
if (lck->l_index == 0)
lck->l_state &= ~BARRIER_LOCK;
}
continue;
}
COLOR(lck);
if (IS_BARRIER(lck)) {
ASSERT(lck->l_index > 0);
lck->l_index--;
if (lck->l_index == 0)
lck->l_state &= ~BARRIER_LOCK;
}
STACK_PUSH(vertex_stack, lck, l_stack1);
}
}
}
static int
flk_find_barriers(lock_descriptor_t *lock)
{
lock_descriptor_t *vertex, *lck;
int found = 0;
edge_t *ep;
lock_descriptor_t *vertex_stack;
STACK_INIT(vertex_stack);
STACK_PUSH(vertex_stack, lock, l_stack1);
while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
STACK_POP(vertex_stack, l_stack1);
for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
ep = NEXT_IN(ep)) {
lck = ep->from_vertex;
if (COLORED(lck)) {
lck->l_state |= BARRIER_LOCK;
lck->l_index++;
if (!found)
found = 1;
continue;
}
COLOR(lck);
lck->l_index = 0;
STACK_PUSH(vertex_stack, lck, l_stack1);
}
}
return (found);
}
static void
flk_get_first_blocking_lock(lock_descriptor_t *request)
{
graph_t *gp = request->l_graph;
vnode_t *vp = request->l_vnode;
lock_descriptor_t *lock, *blocker;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
blocker = NULL;
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
do {
if (BLOCKS(lock, request)) {
blocker = lock;
break;
}
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
if (blocker == NULL && request->l_flock.l_type == F_RDLCK) {
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
do {
if (BLOCKS(lock, request)) {
blocker = lock;
break;
}
lock = lock->l_next;
} while (lock->l_vnode == vp);
}
}
if (blocker) {
report_blocker(blocker, request);
} else
request->l_flock.l_type = F_UNLCK;
}
graph_t *
flk_get_lock_graph(vnode_t *vp, int initialize)
{
graph_t *gp;
graph_t *gp_alloc = NULL;
int index = HASH_INDEX(vp);
if (initialize == FLK_USE_GRAPH) {
mutex_enter(&flock_lock);
gp = lock_graph[index];
mutex_exit(&flock_lock);
return (gp);
}
ASSERT(initialize == FLK_INIT_GRAPH);
if (lock_graph[index] == NULL) {
gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
gp_alloc->active_locks.l_next =
gp_alloc->active_locks.l_prev =
(lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
gp_alloc->sleeping_locks.l_next =
gp_alloc->sleeping_locks.l_prev =
(lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
gp_alloc->index = index;
mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
}
mutex_enter(&flock_lock);
gp = lock_graph[index];
if (gp == NULL) {
struct flock_globals *fg;
ASSERT(gp_alloc != NULL);
lock_graph[index] = gp = gp_alloc;
if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
fg = flk_get_globals();
fg->lockmgr_status[index] = fg->flk_lockmgr_status;
}
}
mutex_exit(&flock_lock);
if ((gp_alloc != NULL) && (gp != gp_alloc)) {
mutex_destroy(&gp_alloc->gp_mutex);
kmem_free(gp_alloc, sizeof (graph_t));
}
return (gp);
}
int
cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid)
{
lock_descriptor_t *lock;
int result = 0;
graph_t *gp;
int lock_nlmid;
if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
return (0);
}
gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
if (gp == NULL) {
return (0);
}
mutex_enter(&gp->gp_mutex);
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
while (lock->l_vnode == vp) {
lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
result = 1;
goto done;
}
lock = lock->l_next;
}
}
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
while (lock->l_vnode == vp) {
lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
result = 1;
goto done;
}
lock = lock->l_next;
}
}
done:
mutex_exit(&gp->gp_mutex);
return (result);
}
int
flk_has_remote_locks(vnode_t *vp)
{
lock_descriptor_t *lock;
int result = 0;
graph_t *gp;
gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
if (gp == NULL) {
return (0);
}
mutex_enter(&gp->gp_mutex);
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
while (lock->l_vnode == vp) {
if (IS_REMOTE(lock)) {
result = 1;
goto done;
}
lock = lock->l_next;
}
}
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
while (lock->l_vnode == vp) {
if (IS_REMOTE(lock)) {
result = 1;
goto done;
}
lock = lock->l_next;
}
}
done:
mutex_exit(&gp->gp_mutex);
return (result);
}
int
flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
{
lock_descriptor_t *lock;
int result = 0;
graph_t *gp;
if (sysid == 0)
return (0);
gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
if (gp == NULL) {
return (0);
}
mutex_enter(&gp->gp_mutex);
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
while (lock->l_vnode == vp) {
if (lock->l_flock.l_sysid == sysid) {
result = 1;
goto done;
}
lock = lock->l_next;
}
}
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
while (lock->l_vnode == vp) {
if (lock->l_flock.l_sysid == sysid) {
result = 1;
goto done;
}
lock = lock->l_next;
}
}
done:
mutex_exit(&gp->gp_mutex);
return (result);
}
int
flk_sysid_has_locks(int sysid, int lck_type)
{
int has_locks = 0;
lock_descriptor_t *lock;
graph_t *gp;
int i;
for (i = 0; i < HASH_SIZE && !has_locks; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL) {
continue;
}
mutex_enter(&gp->gp_mutex);
if (lck_type & FLK_QUERY_ACTIVE) {
for (lock = ACTIVE_HEAD(gp)->l_next;
lock != ACTIVE_HEAD(gp) && !has_locks;
lock = lock->l_next) {
if (lock->l_flock.l_sysid == sysid)
has_locks = 1;
}
}
if (lck_type & FLK_QUERY_SLEEPING) {
for (lock = SLEEPING_HEAD(gp)->l_next;
lock != SLEEPING_HEAD(gp) && !has_locks;
lock = lock->l_next) {
if (lock->l_flock.l_sysid == sysid)
has_locks = 1;
}
}
mutex_exit(&gp->gp_mutex);
}
return (has_locks);
}
void
cl_flk_remove_locks_by_sysid(int sysid)
{
graph_t *gp;
int i;
lock_descriptor_t *lock, *nlock;
if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
return;
}
ASSERT(sysid != 0);
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL)
continue;
mutex_enter(&gp->gp_mutex);
lock = SLEEPING_HEAD(gp)->l_next;
while (lock != SLEEPING_HEAD(gp)) {
nlock = lock->l_next;
if (lock->l_flock.l_sysid == sysid) {
INTERRUPT_WAKEUP(lock);
}
lock = nlock;
}
lock = ACTIVE_HEAD(gp)->l_next;
while (lock != ACTIVE_HEAD(gp)) {
nlock = lock->l_next;
if (lock->l_flock.l_sysid == sysid) {
flk_delete_active_lock(lock, 0);
flk_wakeup(lock, 1);
flk_free_lock(lock);
}
lock = nlock;
}
mutex_exit(&gp->gp_mutex);
}
}
static void
flk_delete_locks_by_sysid(lock_descriptor_t *request)
{
int sysid = request->l_flock.l_sysid;
lock_descriptor_t *lock, *nlock;
graph_t *gp;
int i;
ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
ASSERT(sysid != 0);
mutex_exit(&request->l_graph->gp_mutex);
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL)
continue;
mutex_enter(&gp->gp_mutex);
lock = SLEEPING_HEAD(gp)->l_next;
while (lock != SLEEPING_HEAD(gp)) {
nlock = lock->l_next;
if (lock->l_flock.l_sysid == sysid) {
INTERRUPT_WAKEUP(lock);
}
lock = nlock;
}
lock = ACTIVE_HEAD(gp)->l_next;
while (lock != ACTIVE_HEAD(gp)) {
nlock = lock->l_next;
if (lock->l_flock.l_sysid == sysid) {
flk_delete_active_lock(lock, 0);
flk_wakeup(lock, 1);
flk_free_lock(lock);
}
lock = nlock;
}
mutex_exit(&gp->gp_mutex);
}
mutex_enter(&request->l_graph->gp_mutex);
}
void
cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid)
{
lock_descriptor_t *lock, *nlock;
graph_t *gp;
int i;
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL)
continue;
mutex_enter(&gp->gp_mutex);
lock = SLEEPING_HEAD(gp)->l_next;
while (lock != SLEEPING_HEAD(gp)) {
nlock = lock->l_next;
if (lock->l_vnode->v_vfsp == vfsp) {
ASSERT(IS_PXFS(lock));
if (GETPXFSID(lock->l_flock.l_sysid) ==
pxfsid) {
flk_set_state(lock,
FLK_CANCELLED_STATE);
flk_cancel_sleeping_lock(lock, 1);
}
}
lock = nlock;
}
lock = ACTIVE_HEAD(gp)->l_next;
while (lock != ACTIVE_HEAD(gp)) {
nlock = lock->l_next;
if (lock->l_vnode->v_vfsp == vfsp) {
ASSERT(IS_PXFS(lock));
if (GETPXFSID(lock->l_flock.l_sysid) ==
pxfsid) {
flk_delete_active_lock(lock, 0);
flk_wakeup(lock, 1);
flk_free_lock(lock);
}
}
lock = nlock;
}
mutex_exit(&gp->gp_mutex);
}
}
static int
flk_canceled(lock_descriptor_t *request)
{
lock_descriptor_t *lock, *nlock;
graph_t *gp = request->l_graph;
vnode_t *vp = request->l_vnode;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
ASSERT(IS_LOCKMGR(request));
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
while (lock->l_vnode == vp) {
nlock = lock->l_next;
if (SAME_OWNER(lock, request) &&
lock->l_start == request->l_start &&
lock->l_end == request->l_end) {
INTERRUPT_WAKEUP(lock);
return (1);
}
lock = nlock;
}
}
return (0);
}
void
cleanlocks(vnode_t *vp, pid_t pid, int sysid)
{
graph_t *gp;
lock_descriptor_t *lock, *nlock;
lock_descriptor_t *link_stack;
STACK_INIT(link_stack);
gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
if (gp == NULL)
return;
mutex_enter(&gp->gp_mutex);
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
do {
nlock = lock->l_next;
if ((lock->l_flock.l_pid == pid ||
pid == IGN_PID) &&
lock->l_flock.l_sysid == sysid) {
CANCEL_WAKEUP(lock);
}
lock = nlock;
} while (lock->l_vnode == vp);
}
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
do {
nlock = lock->l_next;
if ((lock->l_flock.l_pid == pid ||
pid == IGN_PID) &&
lock->l_flock.l_sysid == sysid) {
flk_delete_active_lock(lock, 0);
STACK_PUSH(link_stack, lock, l_stack);
}
lock = nlock;
} while (lock->l_vnode == vp);
}
while ((lock = STACK_TOP(link_stack)) != NULL) {
STACK_POP(link_stack, l_stack);
flk_wakeup(lock, 1);
flk_free_lock(lock);
}
CHECK_SLEEPING_LOCKS(gp);
CHECK_ACTIVE_LOCKS(gp);
CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
mutex_exit(&gp->gp_mutex);
}
int
chklock(struct vnode *vp, int iomode, u_offset_t offset, ssize_t len, int fmode,
caller_context_t *ct)
{
register int i;
struct flock64 bf;
int error = 0;
bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
bf.l_whence = 0;
bf.l_start = offset;
bf.l_len = len;
if (ct == NULL) {
bf.l_pid = curproc->p_pid;
bf.l_sysid = 0;
} else {
bf.l_pid = ct->cc_pid;
bf.l_sysid = ct->cc_sysid;
}
i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
bf.l_type != F_UNLCK)
error = i ? i : EAGAIN;
return (error);
}
int
convoff(struct vnode *vp, struct flock64 *lckdat, int whence, offset_t offset)
{
int error;
struct vattr vattr;
if ((lckdat->l_whence == 2) || (whence == 2)) {
vattr.va_mask = AT_SIZE;
if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
return (error);
}
switch (lckdat->l_whence) {
case 1:
lckdat->l_start += offset;
break;
case 2:
lckdat->l_start += vattr.va_size;
case 0:
break;
default:
return (EINVAL);
}
if (lckdat->l_start < 0)
return (EINVAL);
switch (whence) {
case 1:
lckdat->l_start -= offset;
break;
case 2:
lckdat->l_start -= vattr.va_size;
case 0:
break;
default:
return (EINVAL);
}
lckdat->l_whence = (short)whence;
return (0);
}
static int
flk_check_deadlock(lock_descriptor_t *lock)
{
proc_vertex_t *start_vertex, *pvertex;
proc_vertex_t *dvertex;
proc_edge_t *pep, *ppep;
edge_t *ep, *nep;
proc_vertex_t *process_stack;
if (lock->l_ofd != NULL)
return (0);
STACK_INIT(process_stack);
mutex_enter(&flock_lock);
start_vertex = flk_get_proc_vertex(lock);
ASSERT(start_vertex != NULL);
ep = FIRST_ADJ(lock);
while (ep != HEAD(lock)) {
proc_vertex_t *adj_proc;
adj_proc = flk_get_proc_vertex(ep->to_vertex);
for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
if (pep->to_proc == adj_proc) {
ASSERT(pep->refcount);
pep->refcount++;
break;
}
}
if (pep == NULL) {
pep = flk_get_proc_edge();
pep->to_proc = adj_proc;
pep->refcount = 1;
adj_proc->incount++;
pep->next = start_vertex->edge;
start_vertex->edge = pep;
}
ep = NEXT_ADJ(ep);
}
ep = FIRST_IN(lock);
while (ep != HEAD(lock)) {
proc_vertex_t *in_proc;
in_proc = flk_get_proc_vertex(ep->from_vertex);
for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
if (pep->to_proc == start_vertex) {
ASSERT(pep->refcount);
pep->refcount++;
break;
}
}
if (pep == NULL) {
pep = flk_get_proc_edge();
pep->to_proc = start_vertex;
pep->refcount = 1;
start_vertex->incount++;
pep->next = in_proc->edge;
in_proc->edge = pep;
}
ep = NEXT_IN(ep);
}
if (start_vertex->incount == 0) {
mutex_exit(&flock_lock);
return (0);
}
flk_proc_graph_uncolor();
start_vertex->p_sedge = start_vertex->edge;
STACK_PUSH(process_stack, start_vertex, p_stack);
while ((pvertex = STACK_TOP(process_stack)) != NULL) {
for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
dvertex = pep->to_proc;
if (!PROC_ARRIVED(dvertex)) {
STACK_PUSH(process_stack, dvertex, p_stack);
dvertex->p_sedge = dvertex->edge;
PROC_ARRIVE(pvertex);
pvertex->p_sedge = pep->next;
break;
}
if (!PROC_DEPARTED(dvertex))
goto deadlock;
}
if (pep == NULL) {
PROC_DEPART(pvertex);
STACK_POP(process_stack, p_stack);
}
}
mutex_exit(&flock_lock);
return (0);
deadlock:
ep = FIRST_ADJ(lock);
while (ep != HEAD(lock)) {
proc_vertex_t *adj_proc;
adj_proc = flk_get_proc_vertex(ep->to_vertex);
nep = NEXT_ADJ(ep);
IN_LIST_REMOVE(ep);
ADJ_LIST_REMOVE(ep);
flk_free_edge(ep);
ppep = start_vertex->edge;
for (pep = start_vertex->edge; pep != NULL; ppep = pep,
pep = ppep->next) {
if (pep->to_proc == adj_proc) {
pep->refcount--;
if (pep->refcount == 0) {
if (pep == ppep) {
start_vertex->edge = pep->next;
} else {
ppep->next = pep->next;
}
adj_proc->incount--;
flk_proc_release(adj_proc);
flk_free_proc_edge(pep);
}
break;
}
}
ep = nep;
}
ep = FIRST_IN(lock);
while (ep != HEAD(lock)) {
proc_vertex_t *in_proc;
in_proc = flk_get_proc_vertex(ep->from_vertex);
nep = NEXT_IN(ep);
IN_LIST_REMOVE(ep);
ADJ_LIST_REMOVE(ep);
flk_free_edge(ep);
ppep = in_proc->edge;
for (pep = in_proc->edge; pep != NULL; ppep = pep,
pep = ppep->next) {
if (pep->to_proc == start_vertex) {
pep->refcount--;
if (pep->refcount == 0) {
if (pep == ppep) {
in_proc->edge = pep->next;
} else {
ppep->next = pep->next;
}
start_vertex->incount--;
flk_proc_release(in_proc);
flk_free_proc_edge(pep);
}
break;
}
}
ep = nep;
}
flk_proc_release(start_vertex);
mutex_exit(&flock_lock);
return (1);
}
static proc_vertex_t *
flk_get_proc_vertex(lock_descriptor_t *lock)
{
int i;
proc_vertex_t *pv;
proc_vertex_t **palloc;
ASSERT(MUTEX_HELD(&flock_lock));
if (lock->pvertex != -1) {
ASSERT(lock->pvertex >= 0);
pv = pgraph.proc[lock->pvertex];
if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
return (pv);
}
}
for (i = 0; i < pgraph.gcount; i++) {
pv = pgraph.proc[i];
if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
lock->pvertex = pv->index = i;
return (pv);
}
}
pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
pv->pid = lock->l_flock.l_pid;
pv->sysid = lock->l_flock.l_sysid;
flk_proc_vertex_allocs++;
if (pgraph.free != 0) {
for (i = 0; i < pgraph.gcount; i++) {
if (pgraph.proc[i] == NULL) {
pgraph.proc[i] = pv;
lock->pvertex = pv->index = i;
pgraph.free--;
return (pv);
}
}
}
palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
sizeof (proc_vertex_t *), KM_SLEEP);
if (pgraph.proc) {
bcopy(pgraph.proc, palloc,
pgraph.gcount * sizeof (proc_vertex_t *));
kmem_free(pgraph.proc,
pgraph.gcount * sizeof (proc_vertex_t *));
}
pgraph.proc = palloc;
pgraph.free += (PROC_CHUNK - 1);
pv->index = lock->pvertex = pgraph.gcount;
pgraph.gcount += PROC_CHUNK;
pgraph.proc[pv->index] = pv;
return (pv);
}
static proc_edge_t *
flk_get_proc_edge()
{
proc_edge_t *pep;
pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
flk_proc_edge_allocs++;
return (pep);
}
static void
flk_free_proc_edge(proc_edge_t *pep)
{
ASSERT(pep->refcount == 0);
kmem_free((void *)pep, sizeof (proc_edge_t));
flk_proc_edge_frees++;
}
static void
flk_proc_graph_uncolor()
{
int i;
if (pgraph.mark == UINT_MAX) {
for (i = 0; i < pgraph.gcount; i++)
if (pgraph.proc[i] != NULL) {
pgraph.proc[i]->atime = 0;
pgraph.proc[i]->dtime = 0;
}
pgraph.mark = 1;
} else {
pgraph.mark++;
}
}
static void
flk_proc_release(proc_vertex_t *proc)
{
ASSERT(MUTEX_HELD(&flock_lock));
if (proc->edge == NULL && proc->incount == 0) {
pgraph.proc[proc->index] = NULL;
pgraph.free++;
kmem_free(proc, sizeof (proc_vertex_t));
flk_proc_vertex_frees++;
}
}
static void
flk_update_proc_graph(edge_t *ep, int delete)
{
proc_vertex_t *toproc, *fromproc;
proc_edge_t *pep, *prevpep;
mutex_enter(&flock_lock);
if (ep->from_vertex->l_ofd != NULL) {
mutex_exit(&flock_lock);
return;
}
toproc = flk_get_proc_vertex(ep->to_vertex);
fromproc = flk_get_proc_vertex(ep->from_vertex);
if (!delete)
goto add;
pep = prevpep = fromproc->edge;
ASSERT(pep != NULL);
while (pep != NULL) {
if (pep->to_proc == toproc) {
ASSERT(pep->refcount > 0);
pep->refcount--;
if (pep->refcount == 0) {
if (pep == prevpep) {
fromproc->edge = pep->next;
} else {
prevpep->next = pep->next;
}
toproc->incount--;
flk_proc_release(toproc);
flk_free_proc_edge(pep);
}
break;
}
prevpep = pep;
pep = pep->next;
}
flk_proc_release(fromproc);
mutex_exit(&flock_lock);
return;
add:
pep = fromproc->edge;
while (pep != NULL) {
if (pep->to_proc == toproc) {
ASSERT(pep->refcount > 0);
pep->refcount++;
break;
}
pep = pep->next;
}
if (pep == NULL) {
pep = flk_get_proc_edge();
pep->to_proc = toproc;
pep->refcount = 1;
toproc->incount++;
pep->next = fromproc->edge;
fromproc->edge = pep;
}
mutex_exit(&flock_lock);
}
void
cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state)
{
if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
return;
}
if (nlm_reg_status == NULL) {
return;
}
ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
mutex_enter(&nlm_reg_lock);
if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) {
FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
FLK_NLM_SHUTTING_DOWN);
} else {
FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid,
nlm_state);
}
mutex_exit(&nlm_reg_lock);
switch (nlm_state) {
case FLK_NLM_UP:
cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP);
break;
case FLK_NLM_SHUTTING_DOWN:
cl_flk_wakeup_sleeping_nlm_locks(nlmid);
break;
case FLK_NLM_DOWN:
cl_flk_unlock_nlm_granted(nlmid);
break;
default:
panic("cl_set_nlm_status: bad status (%d)", nlm_state);
}
}
void
flk_set_lockmgr_status(flk_lockmgr_status_t status)
{
int i;
graph_t *gp;
struct flock_globals *fg;
fg = flk_get_globals();
ASSERT(fg != NULL);
mutex_enter(&flock_lock);
fg->flk_lockmgr_status = status;
mutex_exit(&flock_lock);
switch (status) {
case FLK_LOCKMGR_UP:
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL)
continue;
mutex_enter(&gp->gp_mutex);
fg->lockmgr_status[i] = status;
mutex_exit(&gp->gp_mutex);
}
break;
case FLK_WAKEUP_SLEEPERS:
wakeup_sleeping_lockmgr_locks(fg);
break;
case FLK_LOCKMGR_DOWN:
unlock_lockmgr_granted(fg);
break;
default:
panic("flk_set_lockmgr_status: bad status (%d)", status);
break;
}
}
locklist_t *
get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
pid_t pid, const vnode_t *vp, zoneid_t zoneid)
{
lock_descriptor_t *lock;
lock_descriptor_t *graph_head;
locklist_t listhead;
locklist_t *llheadp;
locklist_t *llp;
locklist_t *lltp;
graph_t *gp;
int i;
int first_index;
int num_indexes;
ASSERT((list_type == FLK_ACTIVE_STATE) ||
(list_type == FLK_SLEEPING_STATE));
llheadp = &listhead;
lltp = llheadp;
llheadp->ll_next = (locklist_t *)NULL;
if (vp == NULL) {
first_index = 0;
num_indexes = HASH_SIZE;
} else {
first_index = HASH_INDEX(vp);
num_indexes = 1;
}
for (i = first_index; i < first_index + num_indexes; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL) {
continue;
}
mutex_enter(&gp->gp_mutex);
graph_head = (list_type == FLK_ACTIVE_STATE) ?
ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
for (lock = graph_head->l_next;
lock != graph_head;
lock = lock->l_next) {
if (use_sysid && lock->l_flock.l_sysid != sysid)
continue;
if (pid != NOPID && lock->l_flock.l_pid != pid)
continue;
if (vp != NULL && lock->l_vnode != vp)
continue;
if (lock_state && !(lock_state & lock->l_state))
continue;
if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
continue;
llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
lltp->ll_next = llp;
VN_HOLD(lock->l_vnode);
llp->ll_vp = lock->l_vnode;
create_flock(lock, &(llp->ll_flock));
llp->ll_next = (locklist_t *)NULL;
lltp = llp;
}
mutex_exit(&gp->gp_mutex);
}
llp = llheadp->ll_next;
return (llp);
}
locklist_t *
flk_get_sleeping_locks(int sysid, pid_t pid)
{
return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
ALL_ZONES));
}
locklist_t *
flk_get_active_locks(int sysid, pid_t pid)
{
return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
ALL_ZONES));
}
locklist_t *
flk_active_locks_for_vp(const vnode_t *vp)
{
return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
ALL_ZONES));
}
locklist_t *
flk_active_nbmand_locks_for_vp(const vnode_t *vp)
{
return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
NOPID, vp, ALL_ZONES));
}
locklist_t *
flk_active_nbmand_locks(pid_t pid)
{
return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
pid, NULL, ALL_ZONES));
}
void
flk_free_locklist(locklist_t *llp)
{
locklist_t *next_llp;
while (llp) {
next_llp = llp->ll_next;
VN_RELE(llp->ll_vp);
kmem_free(llp, sizeof (*llp));
llp = next_llp;
}
}
static void
cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state)
{
int i;
graph_t *gp;
lock_descriptor_t *lock;
lock_descriptor_t *nlock = NULL;
int lock_nlmid;
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL) {
continue;
}
mutex_enter(&gp->gp_mutex);
for (lock = SLEEPING_HEAD(gp)->l_next;
lock != SLEEPING_HEAD(gp);
lock = nlock) {
nlock = lock->l_next;
lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
SET_NLM_STATE(lock, nlm_state);
}
}
for (lock = ACTIVE_HEAD(gp)->l_next;
lock != ACTIVE_HEAD(gp);
lock = nlock) {
nlock = lock->l_next;
lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) {
ASSERT(IS_ACTIVE(lock));
SET_NLM_STATE(lock, nlm_state);
}
}
mutex_exit(&gp->gp_mutex);
}
}
static void
cl_flk_wakeup_sleeping_nlm_locks(int nlmid)
{
lock_descriptor_t *lock;
lock_descriptor_t *nlock = NULL;
int i;
graph_t *gp;
int lock_nlmid;
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL) {
continue;
}
mutex_enter(&gp->gp_mutex);
for (lock = SLEEPING_HEAD(gp)->l_next;
lock != SLEEPING_HEAD(gp);
lock = nlock) {
nlock = lock->l_next;
if (IS_LOCKMGR(lock)) {
lock_nlmid =
GETNLMID(lock->l_flock.l_sysid);
if (nlmid == lock_nlmid) {
SET_NLM_STATE(lock,
FLK_NLM_SHUTTING_DOWN);
INTERRUPT_WAKEUP(lock);
}
}
}
mutex_exit(&gp->gp_mutex);
}
}
static void
cl_flk_unlock_nlm_granted(int nlmid)
{
lock_descriptor_t *lock;
lock_descriptor_t *nlock = NULL;
int i;
graph_t *gp;
int lock_nlmid;
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL) {
continue;
}
mutex_enter(&gp->gp_mutex);
for (lock = ACTIVE_HEAD(gp)->l_next;
lock != ACTIVE_HEAD(gp);
lock = nlock) {
nlock = lock->l_next;
ASSERT(IS_ACTIVE(lock));
if (IS_LOCKMGR(lock)) {
lock_nlmid = GETNLMID(lock->l_flock.l_sysid);
if (nlmid == lock_nlmid) {
flk_delete_active_lock(lock, 0);
flk_wakeup(lock, 1);
flk_free_lock(lock);
}
}
}
mutex_exit(&gp->gp_mutex);
}
}
static void
wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
{
lock_descriptor_t *lock;
lock_descriptor_t *nlock = NULL;
int i;
graph_t *gp;
zoneid_t zoneid = getzoneid();
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL) {
continue;
}
mutex_enter(&gp->gp_mutex);
fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
for (lock = SLEEPING_HEAD(gp)->l_next;
lock != SLEEPING_HEAD(gp);
lock = nlock) {
nlock = lock->l_next;
if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
INTERRUPT_WAKEUP(lock);
}
}
mutex_exit(&gp->gp_mutex);
}
}
static void
unlock_lockmgr_granted(struct flock_globals *fg)
{
lock_descriptor_t *lock;
lock_descriptor_t *nlock = NULL;
int i;
graph_t *gp;
zoneid_t zoneid = getzoneid();
for (i = 0; i < HASH_SIZE; i++) {
mutex_enter(&flock_lock);
gp = lock_graph[i];
mutex_exit(&flock_lock);
if (gp == NULL) {
continue;
}
mutex_enter(&gp->gp_mutex);
fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
for (lock = ACTIVE_HEAD(gp)->l_next;
lock != ACTIVE_HEAD(gp);
lock = nlock) {
nlock = lock->l_next;
if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
ASSERT(IS_ACTIVE(lock));
flk_delete_active_lock(lock, 0);
flk_wakeup(lock, 1);
flk_free_lock(lock);
}
}
mutex_exit(&gp->gp_mutex);
}
}
static void
wait_for_lock(lock_descriptor_t *request)
{
graph_t *gp = request->l_graph;
ASSERT(MUTEX_HELD(&gp->gp_mutex));
while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
!(IS_INTERRUPTED(request))) {
if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
flk_set_state(request, FLK_INTERRUPTED_STATE);
request->l_state |= INTERRUPTED_LOCK;
}
}
}
static void
create_flock(lock_descriptor_t *lp, flock64_t *flp)
{
ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
ASSERT(lp->l_end >= lp->l_start);
flp->l_type = lp->l_type;
flp->l_whence = 0;
flp->l_start = lp->l_start;
flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
(lp->l_end - lp->l_start + 1);
flp->l_sysid = lp->l_flock.l_sysid;
flp->l_pid = lp->l_flock.l_pid;
}
int
flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
u_offset_t *start, u_offset_t *end, offset_t offset)
{
struct vattr vattr;
int error;
switch (flp->l_whence) {
case 0:
*start = (u_offset_t)flp->l_start;
break;
case 1:
*start = (u_offset_t)(flp->l_start + offset);
break;
case 2:
vattr.va_mask = AT_SIZE;
if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
return (error);
*start = (u_offset_t)(flp->l_start + vattr.va_size);
break;
default:
return (EINVAL);
}
if (flp->l_len == 0)
*end = MAX_U_OFFSET_T;
else if ((offset_t)flp->l_len > 0) {
*end = (u_offset_t)(*start + (flp->l_len - 1));
} else {
*end = *start;
*start += (u_offset_t)flp->l_len;
(*start)++;
}
return (0);
}
int
flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max)
{
if ((start > max) ||
((end > max) && (end != MAX_U_OFFSET_T))) {
return (EINVAL);
}
if (start > end) {
return (EINVAL);
}
return (0);
}
static void
report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
{
flock64_t *flrp;
ASSERT(blocker != NULL);
flrp = &request->l_flock;
flrp->l_whence = 0;
flrp->l_type = blocker->l_type;
flrp->l_pid = blocker->l_flock.l_pid;
flrp->l_sysid = blocker->l_flock.l_sysid;
request->l_ofd = blocker->l_ofd;
if (IS_LOCKMGR(request)) {
flrp->l_start = blocker->l_start;
if (blocker->l_end == MAX_U_OFFSET_T)
flrp->l_len = 0;
else
flrp->l_len = blocker->l_end - blocker->l_start + 1;
} else {
if (blocker->l_start > MAXEND) {
flrp->l_start = MAXEND;
flrp->l_len = 0;
} else {
flrp->l_start = blocker->l_start;
if (blocker->l_end == MAX_U_OFFSET_T)
flrp->l_len = 0;
else
flrp->l_len = blocker->l_end -
blocker->l_start + 1;
}
}
}
void
cl_flk_change_nlm_state_to_unknown(int nlmid)
{
if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
return;
}
if (nlm_reg_status == NULL) {
return;
}
ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
mutex_enter(&nlm_reg_lock);
FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN);
mutex_exit(&nlm_reg_lock);
}
int
nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset,
ssize_t length, int svmand, caller_context_t *ct)
{
int conflict = 0;
graph_t *gp;
lock_descriptor_t *lock;
pid_t pid;
int sysid;
if (ct == NULL) {
pid = curproc->p_pid;
sysid = 0;
} else {
pid = ct->cc_pid;
sysid = ct->cc_sysid;
}
mutex_enter(&flock_lock);
gp = lock_graph[HASH_INDEX(vp)];
mutex_exit(&flock_lock);
if (gp == NULL)
return (0);
mutex_enter(&gp->gp_mutex);
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
(lock->l_flock.l_sysid != sysid ||
lock->l_flock.l_pid != pid) &&
lock_blocks_io(op, offset, length,
lock->l_type, lock->l_start, lock->l_end)) {
DTRACE_PROBE1(conflict_lock,
lock_descriptor_t *, lock);
conflict = 1;
break;
}
}
mutex_exit(&gp->gp_mutex);
return (conflict);
}
static int
lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length,
int lock_type, u_offset_t lock_start, u_offset_t lock_end)
{
ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
if (op == NBL_READ && lock_type == F_RDLCK)
return (0);
if (offset <= lock_start && lock_start < offset + length)
return (1);
if (lock_start <= offset && offset <= lock_end)
return (1);
return (0);
}
#ifdef DEBUG
static void
check_active_locks(graph_t *gp)
{
lock_descriptor_t *lock, *lock1;
edge_t *ep;
for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
lock = lock->l_next) {
ASSERT(IS_ACTIVE(lock));
ASSERT(NOT_BLOCKED(lock));
ASSERT(!IS_BARRIER(lock));
ep = FIRST_IN(lock);
while (ep != HEAD(lock)) {
ASSERT(IS_SLEEPING(ep->from_vertex));
ASSERT(!NOT_BLOCKED(ep->from_vertex));
ep = NEXT_IN(ep);
}
for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
lock1 = lock1->l_next) {
if (lock1->l_vnode == lock->l_vnode) {
if (BLOCKS(lock1, lock)) {
cmn_err(CE_PANIC,
"active lock %p blocks %p",
(void *)lock1, (void *)lock);
} else if (BLOCKS(lock, lock1)) {
cmn_err(CE_PANIC,
"active lock %p blocks %p",
(void *)lock, (void *)lock1);
}
}
}
}
}
static int
check_lock_transition(int old_state, int new_state)
{
switch (old_state) {
case FLK_INITIAL_STATE:
if ((new_state == FLK_START_STATE) ||
(new_state == FLK_SLEEPING_STATE) ||
(new_state == FLK_ACTIVE_STATE) ||
(new_state == FLK_DEAD_STATE)) {
return (0);
} else {
return (1);
}
case FLK_START_STATE:
if ((new_state == FLK_ACTIVE_STATE) ||
(new_state == FLK_DEAD_STATE)) {
return (0);
} else {
return (1);
}
case FLK_ACTIVE_STATE:
if (new_state == FLK_DEAD_STATE) {
return (0);
} else {
return (1);
}
case FLK_SLEEPING_STATE:
if ((new_state == FLK_GRANTED_STATE) ||
(new_state == FLK_INTERRUPTED_STATE) ||
(new_state == FLK_CANCELLED_STATE)) {
return (0);
} else {
return (1);
}
case FLK_GRANTED_STATE:
if ((new_state == FLK_START_STATE) ||
(new_state == FLK_INTERRUPTED_STATE) ||
(new_state == FLK_CANCELLED_STATE)) {
return (0);
} else {
return (1);
}
case FLK_CANCELLED_STATE:
if ((new_state == FLK_INTERRUPTED_STATE) ||
(new_state == FLK_DEAD_STATE)) {
return (0);
} else {
return (1);
}
case FLK_INTERRUPTED_STATE:
if (new_state == FLK_DEAD_STATE) {
return (0);
} else {
return (1);
}
case FLK_DEAD_STATE:
if (new_state == FLK_DEAD_STATE) {
return (0);
} else {
return (1);
}
default:
return (1);
}
}
static void
check_sleeping_locks(graph_t *gp)
{
lock_descriptor_t *lock1, *lock2;
edge_t *ep;
for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
lock1 = lock1->l_next) {
ASSERT(!IS_BARRIER(lock1));
for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
lock2 = lock2->l_next) {
if (lock1->l_vnode == lock2->l_vnode) {
if (BLOCKS(lock2, lock1)) {
ASSERT(!IS_GRANTED(lock1));
ASSERT(!NOT_BLOCKED(lock1));
path(lock1, lock2);
}
}
}
for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
lock2 = lock2->l_next) {
ASSERT(!IS_BARRIER(lock1));
if (lock1->l_vnode == lock2->l_vnode) {
if (BLOCKS(lock2, lock1)) {
ASSERT(!IS_GRANTED(lock1));
ASSERT(!NOT_BLOCKED(lock1));
path(lock1, lock2);
}
}
}
ep = FIRST_ADJ(lock1);
while (ep != HEAD(lock1)) {
ASSERT(BLOCKS(ep->to_vertex, lock1));
ep = NEXT_ADJ(ep);
}
}
}
static int
level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
{
edge_t *ep;
lock_descriptor_t *vertex;
lock_descriptor_t *vertex_stack;
STACK_INIT(vertex_stack);
flk_graph_uncolor(lock1->l_graph);
ep = FIRST_ADJ(lock1);
ASSERT(ep != HEAD(lock1));
while (ep != HEAD(lock1)) {
if (no_path)
ASSERT(ep->to_vertex != lock2);
STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
COLOR(ep->to_vertex);
ep = NEXT_ADJ(ep);
}
while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
STACK_POP(vertex_stack, l_dstack);
for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
ep = NEXT_ADJ(ep)) {
if (COLORED(ep->to_vertex))
continue;
COLOR(ep->to_vertex);
if (ep->to_vertex == lock2)
return (1);
STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
}
}
return (0);
}
static void
check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
{
lock_descriptor_t *lock;
if (pid == 0)
return;
SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
if (lock) {
while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
if (lock->l_flock.l_pid == pid &&
lock->l_flock.l_sysid == sysid)
cmn_err(CE_PANIC,
"owner pid %d's lock %p in active queue",
pid, (void *)lock);
lock = lock->l_next;
}
}
SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
if (lock) {
while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
if (lock->l_flock.l_pid == pid &&
lock->l_flock.l_sysid == sysid)
cmn_err(CE_PANIC,
"owner pid %d's lock %p in sleep queue",
pid, (void *)lock);
lock = lock->l_next;
}
}
}
static int
level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
{
edge_t *ep = FIRST_ADJ(lock1);
while (ep != HEAD(lock1)) {
if (ep->to_vertex == lock2)
return (1);
else
ep = NEXT_ADJ(ep);
}
return (0);
}
static int
no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
{
return (!level_two_path(lock1, lock2, 1));
}
static void
path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
{
if (level_one_path(lock1, lock2)) {
if (level_two_path(lock1, lock2, 0) != 0) {
cmn_err(CE_WARN,
"one edge one path from lock1 %p lock2 %p",
(void *)lock1, (void *)lock2);
}
} else if (no_path(lock1, lock2)) {
cmn_err(CE_PANIC,
"No path from lock1 %p to lock2 %p",
(void *)lock1, (void *)lock2);
}
}
#endif