#include <sys/param.h>
#include <sys/kassert.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rangelock.h>
#include <sys/sleepqueue.h>
#include <sys/smr.h>
#include <sys/sysctl.h>
#include <vm/uma.h>
static int rangelock_cheat = 1;
SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN,
&rangelock_cheat, 0,
"");
#define RL_CHEAT_MASK 0x7
#define RL_CHEAT_CHEATING 0x1
#define RL_CHEAT_WLOCKED 0x2
#define RL_CHEAT_DRAINING 0x4
#define RL_CHEAT_READER 0x8
#define RL_RET_CHEAT_RLOCKED 0x1100
#define RL_RET_CHEAT_WLOCKED 0x2200
static void
rangelock_cheat_drain(struct rangelock *lock)
{
uintptr_t v;
DROP_GIANT();
for (;;) {
v = atomic_load_ptr(&lock->head);
if ((v & RL_CHEAT_DRAINING) == 0)
break;
sleepq_add(&lock->head, NULL, "ranged1", 0, 0);
sleepq_wait(&lock->head, PRI_USER);
sleepq_lock(&lock->head);
}
sleepq_release(&lock->head);
PICKUP_GIANT();
}
static bool
rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock,
void **cookie)
{
uintptr_t v, x;
v = atomic_load_ptr(&lock->head);
if ((v & RL_CHEAT_CHEATING) == 0)
return (false);
if ((v & RL_CHEAT_DRAINING) != 0) {
drain:
if (trylock) {
*cookie = NULL;
return (true);
}
sleepq_lock(&lock->head);
drain1:
rangelock_cheat_drain(lock);
return (false);
}
switch (locktype) {
case RL_LOCK_READ:
for (;;) {
if ((v & RL_CHEAT_WLOCKED) != 0) {
if (trylock) {
*cookie = NULL;
return (true);
}
x = v | RL_CHEAT_DRAINING;
sleepq_lock(&lock->head);
if (atomic_fcmpset_rel_ptr(&lock->head, &v,
x) != 0)
goto drain1;
sleepq_release(&lock->head);
} else {
x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER;
x |= RL_CHEAT_CHEATING;
if (atomic_fcmpset_acq_ptr(&lock->head, &v,
x) != 0)
break;
}
if ((v & RL_CHEAT_CHEATING) == 0)
return (false);
if ((v & RL_CHEAT_DRAINING) != 0)
goto drain;
}
*(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED;
break;
case RL_LOCK_WRITE:
for (;;) {
if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER ||
(v & RL_CHEAT_WLOCKED) != 0) {
if (trylock) {
*cookie = NULL;
return (true);
}
x = v | RL_CHEAT_DRAINING;
sleepq_lock(&lock->head);
if (atomic_fcmpset_rel_ptr(&lock->head, &v,
x) != 0)
goto drain1;
sleepq_release(&lock->head);
} else {
x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING;
if (atomic_fcmpset_acq_ptr(&lock->head, &v,
x) != 0)
break;
}
if ((v & RL_CHEAT_CHEATING) == 0)
return (false);
if ((v & RL_CHEAT_DRAINING) != 0)
goto drain;
}
*(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED;
break;
default:
__assert_unreachable();
break;
}
return (true);
}
static bool
rangelock_cheat_unlock(struct rangelock *lock, void *cookie)
{
uintptr_t v, x;
v = atomic_load_ptr(&lock->head);
if ((v & RL_CHEAT_CHEATING) == 0)
return (false);
MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED ||
(uintptr_t)cookie == RL_RET_CHEAT_RLOCKED);
switch ((uintptr_t)cookie) {
case RL_RET_CHEAT_RLOCKED:
for (;;) {
MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER);
MPASS((v & RL_CHEAT_WLOCKED) == 0);
x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER;
if ((v & RL_CHEAT_DRAINING) != 0) {
if (x != 0) {
x |= RL_CHEAT_DRAINING |
RL_CHEAT_CHEATING;
if (atomic_fcmpset_rel_ptr(&lock->head,
&v, x) != 0)
break;
} else {
sleepq_lock(&lock->head);
if (atomic_fcmpset_rel_ptr(&lock->head,
&v, x) != 0) {
sleepq_broadcast(
&lock->head,
SLEEPQ_SLEEP, 0, 0);
sleepq_release(&lock->head);
break;
}
sleepq_release(&lock->head);
}
} else {
x |= RL_CHEAT_CHEATING;
if (atomic_fcmpset_rel_ptr(&lock->head, &v,
x) != 0)
break;
}
}
break;
case RL_RET_CHEAT_WLOCKED:
for (;;) {
MPASS((v & RL_CHEAT_WLOCKED) != 0);
if ((v & RL_CHEAT_DRAINING) != 0) {
sleepq_lock(&lock->head);
atomic_store_ptr(&lock->head, 0);
sleepq_broadcast(&lock->head,
SLEEPQ_SLEEP, 0, 0);
sleepq_release(&lock->head);
break;
} else {
if (atomic_fcmpset_ptr(&lock->head, &v,
RL_CHEAT_CHEATING) != 0)
break;
}
}
break;
default:
__assert_unreachable();
break;
}
return (true);
}
static bool
rangelock_cheat_destroy(struct rangelock *lock)
{
uintptr_t v;
v = atomic_load_ptr(&lock->head);
if ((v & RL_CHEAT_CHEATING) == 0)
return (false);
MPASS(v == RL_CHEAT_CHEATING);
return (true);
}
static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
struct rl_q_entry {
struct rl_q_entry *rl_q_next;
struct rl_q_entry *rl_q_free;
off_t rl_q_start, rl_q_end;
int rl_q_flags;
#ifdef INVARIANTS
struct thread *rl_q_owner;
#endif
};
static uma_zone_t rl_entry_zone;
static smr_t rl_smr;
static void rangelock_free_free(struct rl_q_entry *free);
static void rangelock_noncheating_destroy(struct rangelock *lock);
static void
rangelock_sys_init(void *dummy __unused)
{
rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
UMA_ZONE_SMR);
rl_smr = uma_zone_get_smr(rl_entry_zone);
}
SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
static struct rl_q_entry *
rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
{
struct rl_q_entry *e;
e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
e->rl_q_next = NULL;
e->rl_q_free = NULL;
e->rl_q_start = start;
e->rl_q_end = end;
e->rl_q_flags = flags;
#ifdef INVARIANTS
e->rl_q_owner = curthread;
#endif
return (e);
}
void
rangelock_init(struct rangelock *lock)
{
lock->sleepers = false;
atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0);
}
void
rangelock_destroy(struct rangelock *lock)
{
MPASS(!lock->sleepers);
if (!rangelock_cheat_destroy(lock))
rangelock_noncheating_destroy(lock);
DEBUG_POISON_POINTER(*(void **)&lock->head);
}
static bool
rl_e_is_marked(const struct rl_q_entry *e)
{
return (((uintptr_t)e & 1) != 0);
}
static struct rl_q_entry *
rl_e_unmark_unchecked(const struct rl_q_entry *e)
{
return ((struct rl_q_entry *)((uintptr_t)e & ~1));
}
static struct rl_q_entry *
rl_e_unmark(const struct rl_q_entry *e)
{
MPASS(rl_e_is_marked(e));
return (rl_e_unmark_unchecked(e));
}
static void
rl_e_mark(struct rl_q_entry *e)
{
#if defined(INVARIANTS)
int r = atomic_testandset_ptr((uintptr_t *)&e->rl_q_next, 0);
MPASS(r == 0);
#else
atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
#endif
}
static struct rl_q_entry *
rl_q_load(struct rl_q_entry **p)
{
return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
}
static bool
rl_e_is_rlock(const struct rl_q_entry *e)
{
return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
}
static void
rangelock_free_free(struct rl_q_entry *free)
{
struct rl_q_entry *x, *xp;
for (x = free; x != NULL; x = xp) {
MPASS(!rl_e_is_marked(x));
xp = x->rl_q_free;
MPASS(!rl_e_is_marked(xp));
uma_zfree_smr(rl_entry_zone, x);
}
}
static void
rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
{
bool sleepers;
MPASS(lock != NULL && e != NULL);
MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
MPASS(e->rl_q_owner == curthread);
rl_e_mark(e);
sleepers = lock->sleepers;
lock->sleepers = false;
if (sleepers)
sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
}
void
rangelock_unlock(struct rangelock *lock, void *cookie)
{
if (rangelock_cheat_unlock(lock, cookie))
return;
sleepq_lock(&lock->sleepers);
rangelock_unlock_int(lock, cookie);
sleepq_release(&lock->sleepers);
}
static int
rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
{
bool rds;
if (e1 == NULL)
return (1);
if (e2->rl_q_start >= e1->rl_q_end)
return (-1);
rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
if (e2->rl_q_start >= e1->rl_q_start && rds)
return (-1);
if (e1->rl_q_start >= e2->rl_q_end)
return (1);
if (e1->rl_q_start >= e2->rl_q_start && rds)
return (1);
return (0);
}
static void
rl_insert_sleep(struct rangelock *lock)
{
smr_exit(rl_smr);
DROP_GIANT();
lock->sleepers = true;
sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
sleepq_wait(&lock->sleepers, PRI_USER);
PICKUP_GIANT();
smr_enter(rl_smr);
}
static bool
rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
struct rl_q_entry *new)
{
MPASS(!rl_e_is_marked(old));
return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
(uintptr_t)new) != 0);
}
static void
rangelock_noncheating_destroy(struct rangelock *lock)
{
struct rl_q_entry *cur, *free, *next, **prev;
free = NULL;
again:
smr_enter(rl_smr);
prev = (struct rl_q_entry **)&lock->head;
cur = rl_q_load(prev);
MPASS(!rl_e_is_marked(cur));
for (;;) {
if (cur == NULL)
break;
if (rl_e_is_marked(cur))
goto again;
next = rl_q_load(&cur->rl_q_next);
if (rl_e_is_marked(next)) {
next = rl_e_unmark(next);
if (rl_q_cas(prev, cur, next)) {
#ifdef INVARIANTS
cur->rl_q_owner = NULL;
#endif
cur->rl_q_free = free;
free = cur;
cur = next;
continue;
}
smr_exit(rl_smr);
goto again;
}
sleepq_lock(&lock->sleepers);
if (!rl_e_is_marked(cur)) {
rl_insert_sleep(lock);
goto again;
}
}
smr_exit(rl_smr);
rangelock_free_free(free);
}
enum RL_INSERT_RES {
RL_TRYLOCK_FAILED,
RL_TRYLOCK_FAILED_MARKED,
RL_LOCK_SUCCESS,
RL_LOCK_RETRY,
};
static enum RL_INSERT_RES
rl_conflict(struct rangelock *lock, struct rl_q_entry *cur, struct rl_q_entry *e,
bool trylock, bool inserted)
{
sleepq_lock(&lock->sleepers);
if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
sleepq_release(&lock->sleepers);
return (RL_LOCK_SUCCESS);
}
KASSERT(cur->rl_q_owner != curthread,
("%s: conflicting range is locked by the current thread",
__func__));
if (inserted)
rangelock_unlock_int(lock, e);
if (trylock) {
sleepq_release(&lock->sleepers);
return (inserted ? RL_TRYLOCK_FAILED_MARKED: RL_TRYLOCK_FAILED);
}
rl_insert_sleep(lock);
return (RL_LOCK_RETRY);
}
static enum RL_INSERT_RES
rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
struct rl_q_entry **free)
{
struct rl_q_entry *cur, *next, **prev;
enum RL_INSERT_RES res;
again:
prev = &e->rl_q_next;
cur = rl_q_load(prev);
MPASS(!rl_e_is_marked(cur));
for (;;) {
if (cur == NULL || cur->rl_q_start >= e->rl_q_end)
return (RL_LOCK_SUCCESS);
next = rl_q_load(&cur->rl_q_next);
if (rl_e_is_marked(next)) {
next = rl_e_unmark(next);
if (rl_q_cas(prev, cur, next)) {
cur->rl_q_free = *free;
*free = cur;
cur = next;
continue;
}
goto again;
}
if (rl_e_is_rlock(cur)) {
prev = &cur->rl_q_next;
cur = rl_e_unmark_unchecked(rl_q_load(prev));
continue;
}
res = rl_conflict(lock, cur, e, trylock, true);
if (res != RL_LOCK_SUCCESS)
return (res);
}
}
static enum RL_INSERT_RES
rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
bool trylock, struct rl_q_entry **free)
{
struct rl_q_entry *cur, *next, **prev;
enum RL_INSERT_RES res;
again:
prev = (struct rl_q_entry **)&lock->head;
cur = rl_q_load(prev);
MPASS(!rl_e_is_marked(cur));
for (;;) {
if (cur == e)
return (RL_LOCK_SUCCESS);
next = rl_q_load(&cur->rl_q_next);
if (rl_e_is_marked(next)) {
next = rl_e_unmark(next);
if (rl_q_cas(prev, cur, next)) {
cur->rl_q_free = *free;
*free = cur;
cur = next;
continue;
}
goto again;
}
if (cur->rl_q_end <= e->rl_q_start) {
prev = &cur->rl_q_next;
cur = rl_e_unmark_unchecked(rl_q_load(prev));
continue;
}
res = rl_conflict(lock, cur, e, trylock, true);
if (res != RL_LOCK_SUCCESS)
return (res);
}
}
static enum RL_INSERT_RES
rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
struct rl_q_entry **free)
{
struct rl_q_entry *cur, *next, **prev;
int r;
again:
prev = (struct rl_q_entry **)&lock->head;
cur = rl_q_load(prev);
if (cur == NULL && rl_q_cas(prev, NULL, e))
return (RL_LOCK_SUCCESS);
for (;;) {
if (cur != NULL) {
if (rl_e_is_marked(cur))
goto again;
next = rl_q_load(&cur->rl_q_next);
if (rl_e_is_marked(next)) {
next = rl_e_unmark(next);
if (rl_q_cas(prev, cur, next)) {
#ifdef INVARIANTS
cur->rl_q_owner = NULL;
#endif
cur->rl_q_free = *free;
*free = cur;
cur = next;
continue;
}
goto again;
}
}
MPASS(!rl_e_is_marked(cur));
r = rl_e_compare(cur, e);
if (r == -1) {
prev = &cur->rl_q_next;
cur = rl_q_load(prev);
} else if (r == 0) {
enum RL_INSERT_RES res;
res = rl_conflict(lock, cur, e, trylock, false);
switch (res) {
case RL_LOCK_SUCCESS:
break;
case RL_LOCK_RETRY:
goto again;
default:
return (res);
}
} else {
e->rl_q_next = cur;
if (rl_q_cas(prev, cur, e)) {
atomic_thread_fence_acq();
return (rl_e_is_rlock(e) ?
rl_r_validate(lock, e, trylock, free) :
rl_w_validate(lock, e, trylock, free));
}
e->rl_q_next = NULL;
cur = rl_q_load(prev);
}
}
}
static struct rl_q_entry *
rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
vm_ooffset_t end, int locktype)
{
struct rl_q_entry *e, *free;
void *cookie;
enum RL_INSERT_RES res;
if (rangelock_cheat_lock(lock, locktype, trylock, &cookie))
return (cookie);
for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
free = NULL;
e = rlqentry_alloc(start, end, locktype);
smr_enter(rl_smr);
res = rl_insert(lock, e, trylock, &free);
smr_exit(rl_smr);
if (res == RL_TRYLOCK_FAILED || res == RL_TRYLOCK_FAILED_MARKED) {
MPASS(trylock);
if (res == RL_TRYLOCK_FAILED) {
e->rl_q_free = free;
free = e;
}
e = NULL;
}
rangelock_free_free(free);
}
return (e);
}
void *
rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
}
void *
rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
}
void *
rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE));
}
void *
rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
{
return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
}
void
rangelock_may_recurse(struct rangelock *lock)
{
uintptr_t v, x;
v = atomic_load_ptr(&lock->head);
if ((v & RL_CHEAT_CHEATING) == 0)
return;
sleepq_lock(&lock->head);
for (;;) {
if ((v & RL_CHEAT_CHEATING) == 0) {
sleepq_release(&lock->head);
return;
}
if ((v & RL_CHEAT_WLOCKED) != 0 ||
(v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) {
x = v | RL_CHEAT_DRAINING;
if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
rangelock_cheat_drain(lock);
return;
}
continue;
}
x = 0;
if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
sleepq_release(&lock->head);
return;
}
}
}
#ifdef INVARIANT_SUPPORT
void
_rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
{
}
#endif
#include "opt_ddb.h"
#ifdef DDB
#include <ddb/ddb.h>
DB_SHOW_COMMAND(rangelock, db_show_rangelock)
{
struct rangelock *lock;
struct rl_q_entry *e, *x;
uintptr_t v;
if (!have_addr) {
db_printf("show rangelock addr\n");
return;
}
lock = (struct rangelock *)addr;
db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
v = lock->head;
if ((v & RL_CHEAT_CHEATING) != 0) {
db_printf(" cheating head %#jx\n", (uintmax_t)v);
return;
}
for (e = (struct rl_q_entry *)(lock->head);;) {
x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
if (x == NULL)
break;
db_printf(" entry %p marked %d %d start %#jx end %#jx "
"flags %x next %p",
e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
#ifdef INVARIANTS
db_printf(" owner %p (%d)", x->rl_q_owner,
x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
#endif
db_printf("\n");
e = x->rl_q_next;
}
}
#endif