#include <sys/systm.h>
#include <sys/types.h>
#include <sys/vnode.h>
#include <sys/errno.h>
#include <sys/sysmacros.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/cmn_err.h>
#include <sys/fs/ufs_inode.h>
#include <sys/fs/ufs_filio.h>
#include <sys/fs/ufs_log.h>
#include <sys/inttypes.h>
#include <sys/atomic.h>
#include <sys/tuneable.h>
extern pri_t minclsyspri;
extern struct kmem_cache *lufs_bp;
extern int ufs_trans_push_quota(ufsvfs_t *, delta_t, struct dquot *);
kmem_cache_t *mapentry_cache;
long logmap_maxnme_commit = 2048;
long logmap_maxnme_async = 4096;
long logmap_maxnme_sync = 6144;
long logmap_maxcfrag_commit = 4;
uint64_t ufs_crb_size = 0;
uint64_t ufs_crb_max_size = 0;
size_t ufs_crb_limit;
uint64_t ufs_crb_alloc_fails = 0;
#define UFS_MAX_CRB_DEFAULT_DIVISOR 10
int ufs_max_crb_divisor = UFS_MAX_CRB_DEFAULT_DIVISOR;
void handle_dquot(mapentry_t *);
#define CRB_FREE(crb, me) \
kmem_free(crb->c_buf, crb->c_nb); \
atomic_add_64(&ufs_crb_size, -(uint64_t)crb->c_nb); \
kmem_free(crb, sizeof (crb_t)); \
(me)->me_crb = NULL;
#define CRB_RELE(me) { \
crb_t *crb = (me)->me_crb; \
if (crb && (--crb->c_refcnt == 0)) { \
CRB_FREE(crb, me) \
} \
}
#define HANDLE_DQUOT(me, melist) { \
if ((me->me_arg) && \
(me->me_func == ufs_trans_push_quota)) { \
if (!((me->me_dt == melist->me_dt) && \
(me->me_arg == melist->me_arg) && \
(me->me_func == melist->me_func))) { \
handle_dquot(me); \
} \
} \
}
void
map_free_entries(mt_map_t *mtm)
{
int i;
mapentry_t *me;
while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
}
for (i = 0; i < mtm->mtm_nhash; i++)
mtm->mtm_hash[i] = NULL;
mtm->mtm_nme = 0;
mtm->mtm_nmet = 0;
}
mt_map_t *
map_put(mt_map_t *mtm)
{
map_free_entries(mtm);
ASSERT(map_put_debug(mtm));
kmem_free(mtm->mtm_hash,
(size_t) (sizeof (mapentry_t *) * mtm->mtm_nhash));
mutex_destroy(&mtm->mtm_mutex);
mutex_destroy(&mtm->mtm_scan_mutex);
cv_destroy(&mtm->mtm_to_roll_cv);
cv_destroy(&mtm->mtm_from_roll_cv);
rw_destroy(&mtm->mtm_rwlock);
mutex_destroy(&mtm->mtm_lock);
cv_destroy(&mtm->mtm_cv_commit);
cv_destroy(&mtm->mtm_cv_next);
cv_destroy(&mtm->mtm_cv_eot);
cv_destroy(&mtm->mtm_cv);
kmem_free(mtm, sizeof (mt_map_t));
return (NULL);
}
mt_map_t *
map_get(ml_unit_t *ul, enum maptypes maptype, int nh)
{
mt_map_t *mtm;
mtm = kmem_zalloc(sizeof (mt_map_t), KM_SLEEP);
mutex_init(&mtm->mtm_mutex, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&mtm->mtm_scan_mutex, NULL, MUTEX_DEFAULT, NULL);
cv_init(&mtm->mtm_to_roll_cv, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_from_roll_cv, NULL, CV_DEFAULT, NULL);
rw_init(&mtm->mtm_rwlock, NULL, RW_DEFAULT, NULL);
mtm->mtm_next = (mapentry_t *)mtm;
mtm->mtm_prev = (mapentry_t *)mtm;
mtm->mtm_hash = kmem_zalloc((size_t) (sizeof (mapentry_t *) * nh),
KM_SLEEP);
mtm->mtm_nhash = nh;
mtm->mtm_debug = ul->un_debug;
mtm->mtm_type = maptype;
mtm->mtm_cfrags = 0;
mtm->mtm_cfragmax = logmap_maxcfrag_commit;
mtm->mtm_ul = ul;
mutex_init(&mtm->mtm_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&mtm->mtm_cv_commit, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_cv_next, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_cv_eot, NULL, CV_DEFAULT, NULL);
cv_init(&mtm->mtm_cv, NULL, CV_DEFAULT, NULL);
ASSERT(map_get_debug(ul, mtm));
return (mtm);
}
long deltamap_maxnme = 1024;
int
deltamap_need_commit(mt_map_t *mtm)
{
return (mtm->mtm_nme > deltamap_maxnme);
}
void
deltamap_add(
mt_map_t *mtm,
offset_t mof,
off_t nb,
delta_t dtyp,
int (*func)(),
ulong_t arg,
threadtrans_t *tp)
{
int32_t hnb;
mapentry_t *me;
mapentry_t **mep;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
mep = MAP_HASH(mof, mtm);
for (me = *mep; me; me = me->me_hash) {
if (DATAwithinME(mof, hnb, me)) {
if (me->me_func == ufs_trans_push_quota) {
continue;
}
break;
}
ASSERT((dtyp == DT_CANCEL) ||
(!DATAoverlapME(mof, hnb, me)) ||
MEwithinDATA(me, mof, hnb));
}
if (me) {
continue;
}
if (tp && (mtm->mtm_ul->un_deltamap == mtm)) {
ASSERT(dtyp != DT_CANCEL);
if (dtyp == DT_ABZERO) {
tp->deltas_size += sizeof (struct delta);
} else {
tp->deltas_size +=
(hnb + sizeof (struct delta));
}
}
delta_stats[dtyp]++;
me = kmem_cache_alloc(mapentry_cache, KM_NOSLEEP);
if (me == NULL) {
mutex_exit(&mtm->mtm_mutex);
me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
mutex_enter(&mtm->mtm_mutex);
}
bzero(me, sizeof (mapentry_t));
me->me_mof = mof;
me->me_nb = hnb;
me->me_func = func;
me->me_arg = arg;
me->me_dt = dtyp;
me->me_flags = ME_HASH;
me->me_tid = mtm->mtm_tid;
me->me_hash = *mep;
*mep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
mtm->mtm_nme++;
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
mapentry_t *
deltamap_remove(mt_map_t *mtm, offset_t mof, off_t nb)
{
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
mapentry_t *mer;
if (mtm == NULL)
return (NULL);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_enter(&mtm->mtm_mutex);
for (mer = NULL, hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
mep = MAP_HASH(mof, mtm);
while ((me = *mep) != 0) {
if (MEwithinDATA(me, mof, hnb)) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
me->me_hash = mer;
mer = me;
me->me_flags |= ME_LIST;
me->me_flags &= ~ME_HASH;
mtm->mtm_nme--;
} else
mep = &me->me_hash;
}
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
return (mer);
}
void
deltamap_del(mt_map_t *mtm, offset_t mof, off_t nb)
{
mapentry_t *me;
mapentry_t *menext;
menext = deltamap_remove(mtm, mof, nb);
while ((me = menext) != 0) {
menext = me->me_hash;
kmem_cache_free(mapentry_cache, me);
}
}
void
deltamap_push(ml_unit_t *ul)
{
delta_t dtyp;
int (*func)();
ulong_t arg;
mapentry_t *me;
offset_t mof;
off_t nb;
mt_map_t *mtm = ul->un_deltamap;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
while ((me = mtm->mtm_next) != (mapentry_t *)mtm) {
ASSERT(me->me_func);
func = me->me_func;
dtyp = me->me_dt;
arg = me->me_arg;
mof = me->me_mof;
nb = me->me_nb;
if ((ul->un_flags & LDL_ERROR) ||
(*func)(ul->un_ufsvfs, dtyp, arg))
deltamap_del(mtm, mof, nb);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
int
logmap_need_commit(mt_map_t *mtm)
{
return ((mtm->mtm_nmet > logmap_maxnme_commit) ||
(mtm->mtm_cfrags >= mtm->mtm_cfragmax));
}
int
logmap_need_roll_async(mt_map_t *mtm)
{
return (mtm->mtm_nme > logmap_maxnme_async);
}
int
logmap_need_roll_sync(mt_map_t *mtm)
{
return (mtm->mtm_nme > logmap_maxnme_sync);
}
void
logmap_start_roll(ml_unit_t *ul)
{
mt_map_t *logmap = ul->un_logmap;
logmap_settail(logmap, ul);
ASSERT(!(ul->un_flags & LDL_NOROLL));
mutex_enter(&logmap->mtm_mutex);
if ((logmap->mtm_flags & MTM_ROLL_RUNNING) == 0) {
logmap->mtm_flags |= MTM_ROLL_RUNNING;
logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_EXIT);
(void) thread_create(NULL, 0, trans_roll, ul, 0, &p0,
TS_RUN, minclsyspri);
}
mutex_exit(&logmap->mtm_mutex);
}
void
logmap_kill_roll(ml_unit_t *ul)
{
mt_map_t *mtm = ul->un_logmap;
if (mtm == NULL)
return;
mutex_enter(&mtm->mtm_mutex);
while (mtm->mtm_flags & MTM_ROLL_RUNNING) {
mtm->mtm_flags |= MTM_ROLL_EXIT;
cv_signal(&mtm->mtm_to_roll_cv);
cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
}
mutex_exit(&mtm->mtm_mutex);
}
void
logmap_forceroll_nowait(mt_map_t *logmap)
{
ASSERT(!(logmap->mtm_ul->un_flags & LDL_NOROLL));
if ((logmap->mtm_flags & MTM_ROLLING) == 0) {
cv_signal(&logmap->mtm_to_roll_cv);
}
}
void
logmap_forceroll(mt_map_t *mtm)
{
mutex_enter(&mtm->mtm_mutex);
if ((mtm->mtm_flags & MTM_FORCE_ROLL) == 0) {
mtm->mtm_flags |= MTM_FORCE_ROLL;
cv_signal(&mtm->mtm_to_roll_cv);
}
do {
if ((mtm->mtm_flags & MTM_ROLL_RUNNING) == 0) {
mtm->mtm_flags &= ~MTM_FORCE_ROLL;
goto out;
}
cv_wait(&mtm->mtm_from_roll_cv, &mtm->mtm_mutex);
} while (mtm->mtm_flags & MTM_FORCE_ROLL);
out:
mutex_exit(&mtm->mtm_mutex);
}
void
logmap_remove_roll(mt_map_t *mtm, offset_t mof, off_t nb)
{
int dolock = 0;
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
offset_t savmof = mof;
off_t savnb = nb;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
again:
if (dolock)
rw_enter(&mtm->mtm_rwlock, RW_WRITER);
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
mep = MAP_HASH(mof, mtm);
while ((me = *mep) != 0) {
if ((me->me_flags & ME_ROLL) &&
(MEwithinDATA(me, mof, hnb))) {
if (me->me_flags & ME_AGE) {
ASSERT(dolock == 0);
dolock = 1;
mutex_exit(&mtm->mtm_mutex);
mof = savmof;
nb = savnb;
goto again;
}
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
me->me_flags &= ~(ME_HASH|ME_ROLL);
ASSERT(!(me->me_flags & ME_USER));
mtm->mtm_nme--;
if ((me->me_flags & ME_CANCEL) == 0) {
roll_stats[me->me_dt]++;
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
}
} else
mep = &me->me_hash;
}
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
if (dolock)
rw_exit(&mtm->mtm_rwlock);
}
int
logmap_next_roll(mt_map_t *logmap, offset_t *mofp)
{
mapentry_t *me;
ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(logmap));
mutex_enter(&logmap->mtm_mutex);
for (me = logmap->mtm_next; me != (mapentry_t *)logmap;
me = me->me_next) {
if (me->me_flags & ME_ROLL) {
continue;
}
if (me->me_tid == logmap->mtm_tid) {
break;
}
if (me->me_tid == logmap->mtm_committid) {
break;
}
ASSERT(!(me->me_dt == DT_CANCEL));
*mofp = me->me_mof;
mutex_exit(&logmap->mtm_mutex);
return (1);
}
mutex_exit(&logmap->mtm_mutex);
return (0);
}
static void
logmap_list_age(mapentry_t **age, mapentry_t *meadd)
{
mapentry_t *me;
ASSERT(!(meadd->me_flags & (ME_AGE|ME_LIST)));
for (me = *age; me; age = &me->me_agenext, me = *age) {
if (me->me_age > meadd->me_age)
break;
}
meadd->me_agenext = me;
meadd->me_flags |= ME_AGE;
*age = meadd;
}
int
logmap_list_get(
mt_map_t *mtm,
offset_t mof,
off_t nb,
mapentry_t **age)
{
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
int rwtype = RW_READER;
offset_t savmof = mof;
off_t savnb = nb;
int entire = 0;
crb_t *crb;
mtm->mtm_ref = 1;
again:
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
rw_enter(&mtm->mtm_rwlock, rwtype);
*age = NULL;
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
mep = MAP_HASH(mof, mtm);
for (me = *mep; me; me = me->me_hash) {
if (me->me_dt == DT_CANCEL)
continue;
if (!DATAoverlapME(mof, hnb, me))
continue;
if (me->me_flags & ME_AGE) {
for (me = *age; me; me = *age) {
*age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&mtm->mtm_mutex);
rw_exit(&mtm->mtm_rwlock);
rwtype = RW_WRITER;
mof = savmof;
nb = savnb;
entire = 0;
goto again;
} else {
logmap_list_age(age, me);
crb = me->me_crb;
if (crb) {
if (DATAwithinCRB(savmof, savnb, crb)) {
entire = 1;
}
} else {
if (DATAwithinME(savmof, savnb, me)) {
entire = 1;
}
}
}
}
}
mutex_exit(&mtm->mtm_mutex);
ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
return (entire);
}
int
logmap_list_get_roll(mt_map_t *logmap, offset_t mof, rollbuf_t *rbp)
{
mapentry_t *me, **mep, *age = NULL;
crb_t *crb = NULL;
ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
ASSERT(((logmap->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(logmap));
ASSERT((mof & MAPBLOCKOFF) == 0);
rbp->rb_crb = NULL;
mutex_enter(&logmap->mtm_mutex);
mep = MAP_HASH(mof, logmap);
for (me = *mep; me; me = me->me_hash) {
if (!DATAoverlapME(mof, MAPBLOCKSIZE, me))
continue;
if (me->me_tid == logmap->mtm_tid)
continue;
if (me->me_tid == logmap->mtm_committid)
continue;
if (me->me_dt == DT_CANCEL)
continue;
if (me->me_flags & ME_AGE) {
for (me = age; me; me = age) {
age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&logmap->mtm_mutex);
return (1);
} else {
logmap_list_age(&age, me);
}
}
if (!age) {
goto out;
}
for (me = age; me; me = me->me_agenext) {
me->me_flags |= ME_ROLL;
}
crb = age->me_crb;
if (crb && !(crb->c_invalid)) {
for (me = age; me; me = me->me_agenext) {
if (me->me_crb != crb) {
crb = NULL;
break;
}
}
rbp->rb_crb = crb;
}
out:
rbp->rb_age = age;
mutex_exit(&logmap->mtm_mutex);
ASSERT(((logmap->mtm_debug & MT_SCAN) == 0) ||
logmap_logscan_debug(logmap, age));
ASSERT(RW_LOCK_HELD(&logmap->mtm_rwlock));
return (0);
}
void
logmap_list_put_roll(mt_map_t *mtm, mapentry_t *age)
{
mapentry_t *me;
ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
mutex_enter(&mtm->mtm_mutex);
for (me = age; me; me = age) {
age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&mtm->mtm_mutex);
}
void
logmap_list_put(mt_map_t *mtm, mapentry_t *age)
{
mapentry_t *me;
ASSERT(RW_LOCK_HELD(&mtm->mtm_rwlock));
mutex_enter(&mtm->mtm_mutex);
for (me = age; me; me = age) {
age = me->me_agenext;
me->me_flags &= ~ME_AGE;
}
mutex_exit(&mtm->mtm_mutex);
rw_exit(&mtm->mtm_rwlock);
}
#define UFS_RW_BALANCE 2
int ufs_rw_balance = UFS_RW_BALANCE;
int
logmap_setup_read(mapentry_t *age, rollbuf_t *rbp)
{
offset_t mof;
crb_t *crb;
mapentry_t *me;
int32_t nb;
int i;
int start_sec, end_sec;
int read_needed = 0;
int all_inodes = 1;
int first_sec = INT_MAX;
int last_sec = -1;
rbsecmap_t secmap = 0;
ASSERT((MAPBLOCKSIZE / DEV_BSIZE) == (sizeof (secmap) * NBBY));
for (me = age; me; me = me->me_agenext) {
crb = me->me_crb;
if (crb) {
nb = crb->c_nb;
mof = crb->c_mof;
} else {
nb = me->me_nb;
mof = me->me_mof;
}
if ((nb & DEV_BMASK) || (mof & DEV_BMASK)) {
read_needed = 1;
}
start_sec = (mof & MAPBLOCKOFF) >> DEV_BSHIFT;
end_sec = start_sec + ((nb - 1) >> DEV_BSHIFT);
for (i = start_sec; i <= end_sec; i++) {
secmap |= UINT16_C(1) << i;
}
if (me->me_dt != DT_INODE) {
all_inodes = 0;
}
if (start_sec < first_sec) {
first_sec = start_sec;
}
if (end_sec > last_sec) {
last_sec = end_sec;
}
}
ASSERT(secmap);
ASSERT(first_sec != INT_MAX);
ASSERT(last_sec != -1);
if (all_inodes) {
if (!read_needed) {
int count = 0, gap = 0;
int sector_set;
for (i = first_sec + 1; i < last_sec; i++) {
sector_set = secmap & (UINT16_C(1) << i);
if (!gap && !sector_set) {
gap = 1;
count++;
if (count > ufs_rw_balance) {
read_needed = 1;
break;
}
} else if (gap && sector_set) {
gap = 0;
}
}
}
if (read_needed) {
for (i = first_sec + 1; i < last_sec; i++) {
secmap |= (UINT16_C(1) << i);
}
}
}
rbp->rb_secmap = secmap;
return (read_needed);
}
static void
logmap_abort(ml_unit_t *ul, uint32_t tid)
{
struct mt_map *mtm = ul->un_logmap;
mapentry_t *me, **mep;
int i;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
rw_enter(&mtm->mtm_rwlock, RW_WRITER);
mutex_enter(&mtm->mtm_mutex);
while ((me = mtm->mtm_cancel) != NULL) {
mtm->mtm_cancel = me->me_cancel;
me->me_flags &= ~ME_CANCEL;
me->me_cancel = NULL;
}
for (i = 0; i < mtm->mtm_nhash; i++) {
mep = &mtm->mtm_hash[i];
while ((me = *mep) != NULL) {
if (me->me_tid == tid ||
me->me_tid == mtm->mtm_committid) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
if (!(me->me_flags & ME_USER)) {
mtm->mtm_nme--;
}
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
continue;
}
mep = &me->me_hash;
}
}
if (!(ul->un_flags & LDL_SCAN))
mtm->mtm_flags |= MTM_CANCELED;
mutex_exit(&mtm->mtm_mutex);
mtm->mtm_dirty = 0;
mtm->mtm_nmet = 0;
rw_exit(&mtm->mtm_rwlock);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
static void
logmap_wait_space(mt_map_t *mtm, ml_unit_t *ul, mapentry_t *me)
{
ASSERT(MUTEX_HELD(&ul->un_log_mutex));
while (!ldl_has_space(ul, me)) {
ASSERT(!(ul->un_flags & LDL_NOROLL));
mutex_exit(&ul->un_log_mutex);
logmap_forceroll(mtm);
mutex_enter(&ul->un_log_mutex);
if (ul->un_flags & LDL_ERROR)
break;
}
ASSERT(MUTEX_HELD(&ul->un_log_mutex));
}
void
logmap_add(
ml_unit_t *ul,
char *va,
offset_t vamof,
mapentry_t *melist)
{
offset_t mof;
off_t nb;
mapentry_t *me;
mapentry_t **mep;
mapentry_t **savmep;
uint32_t tid;
mt_map_t *mtm = ul->un_logmap;
mutex_enter(&ul->un_log_mutex);
if (va)
logmap_wait_space(mtm, ul, melist);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mtm->mtm_ref = 1;
mtm->mtm_dirty++;
tid = mtm->mtm_tid;
while (melist) {
mof = melist->me_mof;
nb = melist->me_nb;
savmep = mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
while ((me = *mep) != 0) {
if (MEwithinDATA(me, mof, nb) &&
((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
if (tid == me->me_tid &&
((me->me_flags & ME_AGE) == 0)) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
ASSERT(!(me->me_flags & ME_USER));
mtm->mtm_nme--;
if (me->me_dt == DT_QR)
HANDLE_DQUOT(me, melist);
kmem_cache_free(mapentry_cache, me);
continue;
}
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
me->me_flags |= ME_CANCEL;
}
mep = &(*mep)->me_hash;
}
mutex_exit(&mtm->mtm_mutex);
me = melist;
melist = melist->me_hash;
me->me_flags &= ~ME_LIST;
if (va)
ldl_write(ul, va, vamof, me);
if (ul->un_flags & LDL_ERROR) {
kmem_cache_free(mapentry_cache, me);
continue;
}
ASSERT((va == NULL) ||
((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
map_check_ldl_write(ul, va, vamof, me));
mutex_enter(&mtm->mtm_mutex);
me->me_hash = *savmep;
*savmep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
me->me_flags |= ME_HASH;
me->me_tid = tid;
me->me_age = mtm->mtm_age++;
mtm->mtm_nme++;
mtm->mtm_nmet++;
mutex_exit(&mtm->mtm_mutex);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_exit(&ul->un_log_mutex);
}
void
logmap_add_buf(
ml_unit_t *ul,
char *va,
offset_t bufmof,
mapentry_t *melist,
caddr_t buf,
uint32_t bufsz)
{
offset_t mof;
offset_t vamof = bufmof + (va - buf);
off_t nb;
mapentry_t *me;
mapentry_t **mep;
mapentry_t **savmep;
uint32_t tid;
mt_map_t *mtm = ul->un_logmap;
crb_t *crb;
crb_t *crbsav = NULL;
ASSERT((bufsz & DEV_BMASK) == 0);
mutex_enter(&ul->un_log_mutex);
logmap_wait_space(mtm, ul, melist);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mtm->mtm_ref = 1;
mtm->mtm_dirty++;
tid = mtm->mtm_tid;
while (melist) {
mof = melist->me_mof;
nb = melist->me_nb;
savmep = mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
while ((me = *mep) != 0) {
crb = me->me_crb;
if (MEwithinDATA(me, mof, nb) &&
((me->me_flags & (ME_ROLL|ME_CANCEL)) == 0)) {
if (tid == me->me_tid &&
((me->me_flags & ME_AGE) == 0)) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
ASSERT(!(me->me_flags & ME_USER));
mtm->mtm_nme--;
if (me->me_dt == DT_QR)
HANDLE_DQUOT(me, melist);
if (crb && (--crb->c_refcnt == 0)) {
if (crbsav ||
(crb->c_nb != bufsz)) {
CRB_FREE(crb, me);
} else {
bcopy(buf, crb->c_buf,
bufsz);
crb->c_invalid = 0;
crb->c_mof = bufmof;
crbsav = crb;
me->me_crb = NULL;
}
}
kmem_cache_free(mapentry_cache, me);
continue;
}
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
me->me_flags |= ME_CANCEL;
}
if ((me->me_dt == DT_INODE) && (tid == me->me_tid) &&
!crbsav && crb &&
WITHIN(mof, nb, crb->c_mof, crb->c_nb)) {
ASSERT(crb->c_mof == bufmof);
ASSERT(crb->c_nb == bufsz);
bcopy(buf, crb->c_buf, bufsz);
crbsav = crb;
}
mep = &(*mep)->me_hash;
}
mutex_exit(&mtm->mtm_mutex);
if ((crbsav == NULL) && (melist->me_dt != DT_ABZERO)) {
if (((ufs_crb_size + bufsz) < ufs_crb_limit) &&
(kmem_avail() > bufsz)) {
crbsav = kmem_alloc(sizeof (crb_t), KM_NOSLEEP);
} else {
ufs_crb_alloc_fails++;
}
if (crbsav) {
crbsav->c_buf = kmem_alloc(bufsz, KM_NOSLEEP);
if (crbsav->c_buf) {
atomic_add_64(&ufs_crb_size,
(uint64_t)bufsz);
if (ufs_crb_size > ufs_crb_max_size) {
ufs_crb_max_size = ufs_crb_size;
}
bcopy(buf, crbsav->c_buf, bufsz);
crbsav->c_nb = bufsz;
crbsav->c_refcnt = 0;
crbsav->c_invalid = 0;
ASSERT((bufmof & DEV_BMASK) == 0);
crbsav->c_mof = bufmof;
} else {
kmem_free(crbsav, sizeof (crb_t));
crbsav = NULL;
}
}
}
me = melist;
melist = melist->me_hash;
me->me_flags &= ~ME_LIST;
me->me_crb = crbsav;
if (crbsav) {
crbsav->c_refcnt++;
}
crbsav = NULL;
ASSERT(va);
ldl_write(ul, va, vamof, me);
if (ul->un_flags & LDL_ERROR) {
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
continue;
}
ASSERT(((mtm->mtm_debug & MT_LOG_WRITE_CHECK) == 0) ||
map_check_ldl_write(ul, va, vamof, me));
mutex_enter(&mtm->mtm_mutex);
me->me_hash = *savmep;
*savmep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
me->me_flags |= ME_HASH;
me->me_tid = tid;
me->me_age = mtm->mtm_age++;
mtm->mtm_nme++;
mtm->mtm_nmet++;
mutex_exit(&mtm->mtm_mutex);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
mutex_exit(&ul->un_log_mutex);
}
void
logmap_free_cancel(mt_map_t *mtm, mapentry_t **cancelhead)
{
int dolock = 0;
mapentry_t *me;
mapentry_t **mep;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
again:
if (dolock)
rw_enter(&mtm->mtm_rwlock, RW_WRITER);
mutex_enter(&mtm->mtm_mutex);
if (mtm->mtm_flags & MTM_CANCELED) {
mtm->mtm_flags &= ~MTM_CANCELED;
ASSERT(dolock == 0);
mutex_exit(&mtm->mtm_mutex);
return;
}
while ((me = *cancelhead) != NULL) {
if (me->me_flags & ME_AGE) {
ASSERT(dolock == 0);
mutex_exit(&mtm->mtm_mutex);
dolock = 1;
goto again;
}
*cancelhead = me->me_cancel;
me->me_cancel = NULL;
me->me_flags &= ~(ME_CANCEL);
if (me->me_flags & ME_ROLL)
continue;
if (me->me_flags & ME_HASH) {
mep = MAP_HASH(me->me_mof, mtm);
while (*mep) {
if (*mep == me) {
*mep = me->me_hash;
me->me_next->me_prev = me->me_prev;
me->me_prev->me_next = me->me_next;
me->me_flags &= ~(ME_HASH);
if (!(me->me_flags & ME_USER)) {
mtm->mtm_nme--;
}
break;
} else
mep = &(*mep)->me_hash;
}
}
CRB_RELE(me);
kmem_cache_free(mapentry_cache, me);
}
mutex_exit(&mtm->mtm_mutex);
if (dolock)
rw_exit(&mtm->mtm_rwlock);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
void
logmap_commit(ml_unit_t *ul, uint32_t tid)
{
mapentry_t me;
mt_map_t *mtm = ul->un_logmap;
ASSERT(MUTEX_HELD(&ul->un_log_mutex));
if (mtm->mtm_dirty) {
me.me_mof = mtm->mtm_tid;
me.me_dt = DT_COMMIT;
me.me_nb = 0;
me.me_hash = NULL;
logmap_wait_space(mtm, ul, &me);
ldl_write(ul, NULL, (offset_t)0, &me);
ldl_round_commit(ul);
if (ul->un_flags & LDL_ERROR)
logmap_abort(ul, tid);
else {
mtm->mtm_dirty = 0;
mtm->mtm_nmet = 0;
mtm->mtm_cfrags = 0;
}
ldl_push_commit(ul);
}
}
void
logmap_sethead(mt_map_t *mtm, ml_unit_t *ul)
{
off_t lof;
uint32_t tid;
mapentry_t *me;
mutex_enter(&ul->un_log_mutex);
mutex_enter(&mtm->mtm_mutex);
me = mtm->mtm_next;
while (me != (mapentry_t *)mtm && me->me_lof == 0) {
me = me->me_next;
}
if (me == (mapentry_t *)mtm)
lof = -1;
else {
lof = me->me_lof;
tid = me->me_tid;
}
mutex_exit(&mtm->mtm_mutex);
ldl_sethead(ul, lof, tid);
if (lof == -1)
mtm->mtm_age = 0;
mutex_exit(&ul->un_log_mutex);
}
void
logmap_settail(mt_map_t *mtm, ml_unit_t *ul)
{
off_t lof;
size_t nb;
mutex_enter(&ul->un_log_mutex);
mutex_enter(&mtm->mtm_mutex);
if (mtm->mtm_prev == (mapentry_t *)mtm)
lof = -1;
else {
lof = mtm->mtm_tail_lof;
nb = mtm->mtm_tail_nb;
}
mutex_exit(&mtm->mtm_mutex);
ldl_settail(ul, lof, nb);
mutex_exit(&ul->un_log_mutex);
}
void
logmap_roll_dev(ml_unit_t *ul)
{
mt_map_t *mtm = ul->un_logmap;
mapentry_t *me;
ufsvfs_t *ufsvfsp = ul->un_ufsvfs;
again:
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
if (ul->un_flags & (LDL_ERROR|LDL_NOROLL))
return;
mutex_enter(&mtm->mtm_mutex);
for (me = mtm->mtm_next; me != (mapentry_t *)mtm; me = me->me_next) {
if (me->me_flags & ME_ROLL)
break;
if (me->me_tid == mtm->mtm_tid)
continue;
if (me->me_tid == mtm->mtm_committid)
continue;
break;
}
if (me != (mapentry_t *)mtm) {
mutex_exit(&mtm->mtm_mutex);
logmap_forceroll(mtm);
goto again;
}
mutex_exit(&mtm->mtm_mutex);
(void) ufs_putsummaryinfo(ul->un_dev, ufsvfsp, ufsvfsp->vfs_fs);
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
static void
logmap_cancel_delta(ml_unit_t *ul, offset_t mof, int32_t nb, int metadata)
{
mapentry_t *me;
mapentry_t **mep;
mt_map_t *mtm = ul->un_logmap;
int frags;
mtm->mtm_ref = 1;
mtm->mtm_dirty++;
me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
bzero(me, sizeof (mapentry_t));
me->me_mof = mof;
me->me_nb = nb;
me->me_dt = DT_CANCEL;
me->me_tid = mtm->mtm_tid;
me->me_hash = NULL;
mutex_enter(&ul->un_log_mutex);
if (metadata) {
logmap_wait_space(mtm, ul, me);
ldl_write(ul, NULL, (offset_t)0, me);
if (ul->un_flags & LDL_ERROR) {
kmem_cache_free(mapentry_cache, me);
mutex_exit(&ul->un_log_mutex);
return;
}
}
mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
me->me_age = mtm->mtm_age++;
me->me_hash = *mep;
*mep = me;
me->me_next = (mapentry_t *)mtm;
me->me_prev = mtm->mtm_prev;
mtm->mtm_prev->me_next = me;
mtm->mtm_prev = me;
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
if (metadata) {
mtm->mtm_nme++;
mtm->mtm_nmet++;
} else {
me->me_flags = ME_USER;
}
me->me_flags |= (ME_HASH|ME_CANCEL);
if (!(metadata)) {
frags = blkoff(ul->un_ufsvfs->vfs_fs, nb);
if (frags)
mtm->mtm_cfrags +=
numfrags(ul->un_ufsvfs->vfs_fs, frags);
}
mutex_exit(&mtm->mtm_mutex);
mutex_exit(&ul->un_log_mutex);
}
void
logmap_cancel(ml_unit_t *ul, offset_t mof, off_t nb, int metadata)
{
int32_t hnb;
mapentry_t *me;
mapentry_t **mep;
mt_map_t *mtm = ul->un_logmap;
crb_t *crb;
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
if (metadata) {
mep = MAP_HASH(mof, mtm);
mutex_enter(&mtm->mtm_mutex);
for (me = *mep; me; me = me->me_hash) {
if (!DATAoverlapME(mof, hnb, me))
continue;
ASSERT(MEwithinDATA(me, mof, hnb));
if ((me->me_flags & ME_CANCEL) == 0) {
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
me->me_flags |= ME_CANCEL;
crb = me->me_crb;
if (crb) {
crb->c_invalid = 1;
}
}
}
mutex_exit(&mtm->mtm_mutex);
}
logmap_cancel_delta(ul, mof, hnb, metadata);
}
ASSERT(((mtm->mtm_debug & MT_CHECK_MAP) == 0) ||
map_check_linkage(mtm));
}
int
logmap_iscancel(mt_map_t *mtm, offset_t mof, off_t nb)
{
off_t hnb;
mapentry_t *me;
mapentry_t **mep;
mutex_enter(&mtm->mtm_mutex);
for (hnb = 0; nb; nb -= hnb, mof += hnb) {
hnb = MAPBLOCKSIZE - (mof & MAPBLOCKOFF);
if (hnb > nb)
hnb = nb;
mep = MAP_HASH(mof, mtm);
for (me = *mep; me; me = me->me_hash) {
if (((me->me_flags & ME_ROLL) == 0) &&
(me->me_dt != DT_CANCEL))
continue;
if (DATAoverlapME(mof, hnb, me))
break;
}
if (me) {
mutex_exit(&mtm->mtm_mutex);
return (1);
}
}
mutex_exit(&mtm->mtm_mutex);
return (0);
}
static int
logmap_logscan_add(ml_unit_t *ul, struct delta *dp, off_t lof, size_t *nbp)
{
mapentry_t *me;
int error;
mt_map_t *mtm = ul->un_logmap;
error = 0;
if ((dp->d_typ <= DT_NONE) || (dp->d_typ >= DT_MAX))
error = EINVAL;
if (dp->d_typ == DT_COMMIT) {
if (dp->d_nb != INT32_C(0) && dp->d_nb != INT32_C(-1))
error = EINVAL;
} else {
if ((dp->d_nb < INT32_C(0)) ||
(dp->d_nb > INT32_C(MAPBLOCKSIZE)))
error = EINVAL;
if (dp->d_mof < INT64_C(0))
error = EINVAL;
}
if (error) {
ldl_seterror(ul, "Error processing ufs log data during scan");
return (error);
}
if (dp->d_typ == DT_COMMIT) {
if (mtm->mtm_dirty) {
ASSERT(dp->d_nb == INT32_C(0));
logmap_free_cancel(mtm, &mtm->mtm_cancel);
mtm->mtm_dirty = 0;
mtm->mtm_nmet = 0;
mtm->mtm_tid++;
mtm->mtm_committid = mtm->mtm_tid;
ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
logmap_logscan_commit_debug(lof, mtm));
}
*nbp = ldl_logscan_nbcommit(lof);
mtm->mtm_tail_lof = lof;
mtm->mtm_tail_nb = *nbp;
return (0);
}
me = kmem_cache_alloc(mapentry_cache, KM_SLEEP);
bzero(me, sizeof (mapentry_t));
me->me_lof = lof;
me->me_mof = dp->d_mof;
me->me_nb = dp->d_nb;
me->me_tid = mtm->mtm_tid;
me->me_dt = dp->d_typ;
me->me_hash = NULL;
me->me_flags = (ME_LIST | ME_SCAN);
logmap_add(ul, NULL, 0, me);
switch (dp->d_typ) {
case DT_CANCEL:
me->me_flags |= ME_CANCEL;
me->me_cancel = mtm->mtm_cancel;
mtm->mtm_cancel = me;
break;
default:
ASSERT(((mtm->mtm_debug & MT_SCAN) == 0) ||
logmap_logscan_add_debug(dp, mtm));
break;
}
if ((dp->d_typ == DT_CANCEL) || (dp->d_typ == DT_ABZERO))
*nbp = 0;
else
*nbp = dp->d_nb;
return (0);
}
void
logmap_logscan(ml_unit_t *ul)
{
size_t nb, nbd;
off_t lof;
struct delta delta;
mt_map_t *logmap = ul->un_logmap;
ASSERT(ul->un_deltamap->mtm_next == (mapentry_t *)ul->un_deltamap);
ldl_logscan_begin(ul);
(void) map_free_entries(logmap);
logmap->mtm_tid = 0;
logmap->mtm_committid = UINT32_C(0);
logmap->mtm_age = 0;
logmap->mtm_dirty = 0;
logmap->mtm_ref = 0;
lof = ul->un_head_lof;
nbd = sizeof (delta);
while (lof != ul->un_tail_lof) {
if (ldl_logscan_read(ul, &lof, nbd, (caddr_t)&delta))
break;
if (logmap_logscan_add(ul, &delta, lof, &nb))
break;
if (ldl_logscan_read(ul, &lof, nb, NULL))
break;
}
logmap_abort(ul, logmap->mtm_tid);
ldl_logscan_end(ul);
}
void
_init_map(void)
{
mapentry_cache = kmem_cache_create("lufs_mapentry_cache",
sizeof (mapentry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
}
void
handle_dquot(mapentry_t *me)
{
int dolock = 0;
int domutex = 0;
struct dquot *dqp;
dqp = (struct dquot *)me->me_arg;
dolock = (!RW_LOCK_HELD(&dqp->dq_ufsvfsp->vfs_dqrwlock));
if (dolock)
rw_enter(&dqp->dq_ufsvfsp->vfs_dqrwlock, RW_READER);
domutex = (!MUTEX_HELD(&dqp->dq_lock));
if (domutex)
mutex_enter(&dqp->dq_lock);
if (dqp->dq_cnt == 0) {
if (domutex)
mutex_exit(&dqp->dq_lock);
if (dolock)
rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
return;
}
dqp->dq_flags &= ~(DQ_MOD|DQ_TRANS);
dqput(dqp);
if (domutex)
mutex_exit(&dqp->dq_lock);
if (dolock)
rw_exit(&dqp->dq_ufsvfsp->vfs_dqrwlock);
}