#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/sysmacros.h>
#include <sys/conf.h>
#include <sys/cpuvar.h>
#include <sys/errno.h>
#include <sys/debug.h>
#include <sys/buf.h>
#include <sys/var.h>
#include <sys/vnode.h>
#include <sys/bitmap.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/vmem.h>
#include <sys/atomic.h>
#include <vm/seg_kmem.h>
#include <vm/page.h>
#include <vm/pvn.h>
#include <sys/vtrace.h>
#include <sys/fs/ufs_inode.h>
#include <sys/fs/ufs_bio.h>
#include <sys/fs/ufs_log.h>
#include <sys/systm.h>
#include <sys/vfs.h>
#include <sys/sdt.h>
static kmutex_t blist_lock;
static kmutex_t bhdr_lock;
static kmutex_t bfree_lock;
struct hbuf *hbuf;
struct dwbuf *dwbuf;
static struct buf *bhdrlist;
static int nbuf;
static int lastindex;
#define bio_bhash(dev, bn) (hash2ints((dev), (int)(bn)) & v.v_hmask)
#define EMPTY_LIST ((struct buf *)-1)
static kcondvar_t bio_mem_cv;
static kcondvar_t bio_flushinval_cv;
static int bio_doingflush;
static int bio_doinginval;
static int bio_flinv_cv_wanted;
struct biostats biostats = {
{ "buffer_cache_lookups", KSTAT_DATA_UINT32 },
{ "buffer_cache_hits", KSTAT_DATA_UINT32 },
{ "new_buffer_requests", KSTAT_DATA_UINT32 },
{ "waits_for_buffer_allocs", KSTAT_DATA_UINT32 },
{ "buffers_locked_by_someone", KSTAT_DATA_UINT32 },
{ "duplicate_buffers_found", KSTAT_DATA_UINT32 }
};
kstat_named_t *biostats_ptr = (kstat_named_t *)&biostats;
uint_t biostats_ndata = (uint_t)(sizeof (biostats) /
sizeof (kstat_named_t));
struct ufsbiostats ub = {
{ "breads", KSTAT_DATA_UINT32 },
{ "bwrites", KSTAT_DATA_UINT32 },
{ "fbiwrites", KSTAT_DATA_UINT32 },
{ "getpages", KSTAT_DATA_UINT32 },
{ "getras", KSTAT_DATA_UINT32 },
{ "putsyncs", KSTAT_DATA_UINT32 },
{ "putasyncs", KSTAT_DATA_UINT32 },
{ "putpageios", KSTAT_DATA_UINT32 },
};
void (*bio_lufs_strategy)(void *, buf_t *);
void (*bio_snapshot_strategy)(void *, buf_t *);
static struct buf *bio_getfreeblk(long);
static void bio_mem_get(long);
static void bio_bhdr_free(struct buf *);
static struct buf *bio_bhdr_alloc(void);
static void bio_recycle(int, long);
static void bio_pageio_done(struct buf *);
static int bio_incore(dev_t, daddr_t);
#define BIO_BUF_PERCENT (100/2)
#define BIO_MAX_PERCENT (100/20)
#define BIO_BHDR_POOL 100
#define BIO_MIN_HDR 10
#define BIO_MIN_HWM (BIO_MIN_HDR * MAXBSIZE / 1024)
#define BIO_HASHLEN 4
#define BIO_HEADER 0x01
#define BIO_MEM 0x02
extern int bufhwm;
extern int bufhwm_pct;
struct buf *
bread(dev_t dev, daddr_t blkno, long bsize)
{
return (BREAD(dev, blkno, bsize));
}
struct buf *
bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize)
{
struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
struct buf *bp;
klwp_t *lwp = ttolwp(curthread);
CPU_STATS_ADD_K(sys, lread, 1);
bp = getblk_common(ufsvfsp, dev, blkno, bsize, 1);
if (bp->b_flags & B_DONE)
return (bp);
bp->b_flags |= B_READ;
ASSERT(bp->b_bcount == bsize);
if (ufsvfsp == NULL) {
(void) bdev_strategy(bp);
} else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
(*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
} else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
(*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
} else {
ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
ub.ub_breads.value.ul++;
(void) bdev_strategy(bp);
}
if (lwp != NULL)
lwp->lwp_ru.inblock++;
CPU_STATS_ADD_K(sys, bread, 1);
(void) biowait(bp);
return (bp);
}
struct buf *
breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize)
{
struct buf *bp, *rabp;
klwp_t *lwp = ttolwp(curthread);
bp = NULL;
if (!bio_incore(dev, blkno)) {
CPU_STATS_ADD_K(sys, lread, 1);
bp = GETBLK(dev, blkno, bsize);
if ((bp->b_flags & B_DONE) == 0) {
bp->b_flags |= B_READ;
bp->b_bcount = bsize;
(void) bdev_strategy(bp);
if (lwp != NULL)
lwp->lwp_ru.inblock++;
CPU_STATS_ADD_K(sys, bread, 1);
}
}
if (rablkno && bfreelist.b_bcount > 1 &&
!bio_incore(dev, rablkno)) {
rabp = GETBLK(dev, rablkno, bsize);
if (rabp->b_flags & B_DONE)
brelse(rabp);
else {
rabp->b_flags |= B_READ|B_ASYNC;
rabp->b_bcount = bsize;
(void) bdev_strategy(rabp);
if (lwp != NULL)
lwp->lwp_ru.inblock++;
CPU_STATS_ADD_K(sys, bread, 1);
}
}
if (bp == NULL)
return (BREAD(dev, blkno, bsize));
(void) biowait(bp);
return (bp);
}
void
bwrite_common(void *arg, struct buf *bp, int force_wait,
int do_relse, int clear_flags)
{
register int do_wait;
struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
int flag;
klwp_t *lwp = ttolwp(curthread);
struct cpu *cpup;
ASSERT(SEMA_HELD(&bp->b_sem));
flag = bp->b_flags;
bp->b_flags &= ~clear_flags;
if (lwp != NULL)
lwp->lwp_ru.oublock++;
CPU_STATS_ENTER_K();
cpup = CPU;
CPU_STATS_ADDQ(cpup, sys, lwrite, 1);
CPU_STATS_ADDQ(cpup, sys, bwrite, 1);
do_wait = ((flag & B_ASYNC) == 0 || force_wait);
if (do_wait == 0)
CPU_STATS_ADDQ(cpup, sys, bawrite, 1);
CPU_STATS_EXIT_K();
if (ufsvfsp == NULL) {
(void) bdev_strategy(bp);
} else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
(*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
} else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
(*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
} else {
ub.ub_bwrites.value.ul++;
(void) bdev_strategy(bp);
}
if (do_wait) {
(void) biowait(bp);
if (do_relse) {
brelse(bp);
}
}
}
void
bwrite(struct buf *bp)
{
BWRITE(bp);
}
void
bwrite2(struct buf *bp)
{
BWRITE2(bp);
}
void
bdwrite(struct buf *bp)
{
ASSERT(SEMA_HELD(&bp->b_sem));
CPU_STATS_ADD_K(sys, lwrite, 1);
if ((bp->b_flags & B_DELWRI) == 0)
bp->b_start = ddi_get_lbolt();
bp->b_flags |= B_DELWRI | B_DONE;
bp->b_resid = 0;
brelse(bp);
}
void
bawrite(struct buf *bp)
{
ASSERT(SEMA_HELD(&bp->b_sem));
if (bfreelist.b_bcount > 4)
bp->b_flags |= B_ASYNC;
BWRITE(bp);
}
void
brelse(struct buf *bp)
{
struct buf **backp;
uint_t index;
kmutex_t *hmp;
struct buf *dp;
struct hbuf *hp;
ASSERT(SEMA_HELD(&bp->b_sem));
if ((bp->b_flags & (B_ERROR | B_DELWRI | B_RETRYWRI)) == B_RETRYWRI) {
bp->b_flags &= ~B_RETRYWRI;
}
if (bp->b_flags & (B_ERROR|B_NOCACHE)) {
if (bp->b_flags & B_NOCACHE) {
kmem_free(bp->b_un.b_addr, bp->b_bufsize);
sema_destroy(&bp->b_sem);
sema_destroy(&bp->b_io);
kmem_free(bp, sizeof (struct buf));
return;
}
if ((bp->b_flags & (B_READ | B_RETRYWRI)) == B_RETRYWRI) {
bp->b_flags |= B_DELWRI;
bp->b_start = ddi_get_lbolt();
} else
bp->b_flags |= B_AGE|B_STALE;
bp->b_flags &= ~B_ERROR;
bp->b_error = 0;
}
index = bio_bhash(bp->b_edev, bp->b_blkno);
hmp = &hbuf[index].b_lock;
mutex_enter(hmp);
hp = &hbuf[index];
dp = (struct buf *)hp;
ASSERT(hp->b_length >= 0);
ASSERT(hp->b_length < nbuf);
hp->b_length++;
if (bp->b_flags & B_DELWRI) {
dp = (struct buf *)&dwbuf[index];
}
ASSERT(bp->b_bufsize > 0);
ASSERT(bp->b_bcount > 0);
ASSERT(bp->b_un.b_addr != NULL);
if (bp->b_flags & B_AGE) {
backp = &dp->av_forw;
(*backp)->av_back = bp;
bp->av_forw = *backp;
*backp = bp;
bp->av_back = dp;
} else {
backp = &dp->av_back;
(*backp)->av_forw = bp;
bp->av_back = *backp;
*backp = bp;
bp->av_forw = dp;
}
mutex_exit(hmp);
if (bfreelist.b_flags & B_WANTED) {
mutex_enter(&bfree_lock);
if (bfreelist.b_flags & B_WANTED) {
bfreelist.b_flags &= ~B_WANTED;
cv_broadcast(&bio_mem_cv);
}
mutex_exit(&bfree_lock);
}
bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC);
sema_v(&bp->b_sem);
}
int
bio_busy(int cleanit)
{
struct buf *bp, *dp;
int busy = 0;
int i;
kmutex_t *hmp;
for (i = 0; i < v.v_hbuf; i++) {
dp = (struct buf *)&hbuf[i];
hmp = &hbuf[i].b_lock;
mutex_enter(hmp);
for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
if (bp->b_flags & B_BUSY)
busy++;
}
mutex_exit(hmp);
}
if (cleanit && busy != 0) {
bflush(NODEV);
}
return (busy);
}
struct buf *
getblk(dev_t dev, daddr_t blkno, long bsize)
{
return (getblk_common( NULL, dev,
blkno, bsize, 0));
}
struct buf *
getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg)
{
ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg;
struct buf *bp;
struct buf *dp;
struct buf *nbp = NULL;
struct buf *errbp;
uint_t index;
kmutex_t *hmp;
struct hbuf *hp;
if (getmajor(dev) >= devcnt)
cmn_err(CE_PANIC, "blkdev");
biostats.bio_lookup.value.ui32++;
index = bio_bhash(dev, blkno);
hp = &hbuf[index];
dp = (struct buf *)hp;
hmp = &hp->b_lock;
mutex_enter(hmp);
loop:
for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
if (bp->b_blkno != blkno || bp->b_edev != dev ||
(bp->b_flags & B_STALE))
continue;
if (!sema_tryp(&bp->b_sem)) {
biostats.bio_bufbusy.value.ui32++;
mutex_exit(hmp);
if (panicstr && errflg)
goto errout;
sema_p(&bp->b_sem);
if (bp->b_blkno != blkno || bp->b_edev != dev ||
(bp->b_flags & B_STALE)) {
sema_v(&bp->b_sem);
mutex_enter(hmp);
goto loop;
}
mutex_enter(hmp);
}
biostats.bio_hit.value.ui32++;
bp->b_flags &= ~B_AGE;
hp->b_length--;
notavail(bp);
mutex_exit(hmp);
ASSERT((bp->b_flags & B_NOCACHE) == 0);
if (nbp == NULL) {
ASSERT(SEMA_HELD(&bp->b_sem));
return (bp);
}
biostats.bio_bufdup.value.ui32++;
kmem_free(nbp->b_un.b_addr, nbp->b_bufsize);
nbp->b_un.b_addr = NULL;
mutex_enter(&bfree_lock);
bfreelist.b_bufsize += nbp->b_bufsize;
mutex_exit(&bfree_lock);
nbp->b_dev = (o_dev_t)NODEV;
nbp->b_edev = NODEV;
nbp->b_flags = 0;
nbp->b_file = NULL;
nbp->b_offset = -1;
sema_v(&nbp->b_sem);
bio_bhdr_free(nbp);
ASSERT(SEMA_HELD(&bp->b_sem));
return (bp);
}
if (nbp == NULL) {
mutex_exit(hmp);
nbp = bio_getfreeblk(bsize);
mutex_enter(hmp);
goto loop;
}
nbp->b_flags = B_BUSY;
nbp->b_edev = dev;
nbp->b_dev = (o_dev_t)cmpdev(dev);
nbp->b_blkno = blkno;
nbp->b_iodone = NULL;
nbp->b_bcount = bsize;
if (ufsvfsp && ufsvfsp->vfs_root) {
nbp->b_vp = ufsvfsp->vfs_root;
} else {
nbp->b_vp = NULL;
}
ASSERT((nbp->b_flags & B_NOCACHE) == 0);
binshash(nbp, dp);
mutex_exit(hmp);
ASSERT(SEMA_HELD(&nbp->b_sem));
return (nbp);
errout:
errbp = geteblk();
sema_p(&errbp->b_sem);
errbp->b_flags &= ~B_BUSY;
errbp->b_flags |= (B_ERROR | B_DONE);
return (errbp);
}
struct buf *
ngeteblk(long bsize)
{
struct buf *bp;
bp = kmem_alloc(sizeof (struct buf), KM_SLEEP);
bioinit(bp);
bp->av_forw = bp->av_back = NULL;
bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP);
bp->b_bufsize = bsize;
bp->b_flags = B_BUSY | B_NOCACHE | B_AGE;
bp->b_dev = (o_dev_t)NODEV;
bp->b_edev = NODEV;
bp->b_lblkno = 0;
bp->b_bcount = bsize;
bp->b_iodone = NULL;
return (bp);
}
struct buf *
geteblk(void)
{
return (ngeteblk((long)1024));
}
struct buf *
trygetblk(dev_t dev, daddr_t blkno)
{
struct buf *bp;
struct buf *dp;
struct hbuf *hp;
kmutex_t *hmp;
uint_t index;
index = bio_bhash(dev, blkno);
hp = &hbuf[index];
hmp = &hp->b_lock;
if (!mutex_tryenter(hmp))
return (NULL);
dp = (struct buf *)hp;
for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
if (bp->b_blkno != blkno || bp->b_edev != dev ||
(bp->b_flags & B_STALE))
continue;
if (sema_tryp(&bp->b_sem)) {
if (bp->b_flags & B_DONE) {
hp->b_length--;
notavail(bp);
mutex_exit(hmp);
return (bp);
} else {
sema_v(&bp->b_sem);
break;
}
}
break;
}
mutex_exit(hmp);
return (NULL);
}
int
iowait(struct buf *bp)
{
ASSERT(SEMA_HELD(&bp->b_sem));
return (biowait(bp));
}
void
iodone(struct buf *bp)
{
ASSERT(SEMA_HELD(&bp->b_sem));
(void) biodone(bp);
}
void
clrbuf(struct buf *bp)
{
ASSERT(SEMA_HELD(&bp->b_sem));
bzero(bp->b_un.b_addr, bp->b_bcount);
bp->b_resid = 0;
}
void
bflush(dev_t dev)
{
struct buf *bp, *dp;
struct hbuf *hp;
struct buf *delwri_list = EMPTY_LIST;
int i, index;
kmutex_t *hmp;
mutex_enter(&blist_lock);
while (bio_doinginval || bio_doingflush) {
bio_flinv_cv_wanted = 1;
cv_wait(&bio_flushinval_cv, &blist_lock);
}
bio_doingflush++;
for (i = 0; i < v.v_hbuf; i++) {
hmp = &hbuf[i].b_lock;
dp = (struct buf *)&dwbuf[i];
mutex_enter(hmp);
for (bp = dp->av_forw; bp != dp; bp = bp->av_forw) {
if (dev == NODEV || bp->b_edev == dev) {
if (bp->b_list == NULL) {
bp->b_list = delwri_list;
delwri_list = bp;
}
}
}
mutex_exit(hmp);
}
mutex_exit(&blist_lock);
while (delwri_list != EMPTY_LIST) {
bp = delwri_list;
sema_p(&bp->b_sem);
if ((dev != bp->b_edev && dev != NODEV) ||
(panicstr && bp->b_flags & B_BUSY)) {
sema_v(&bp->b_sem);
delwri_list = bp->b_list;
bp->b_list = NULL;
continue;
}
if (bp->b_flags & B_DELWRI) {
index = bio_bhash(bp->b_edev, bp->b_blkno);
hp = &hbuf[index];
hmp = &hp->b_lock;
dp = (struct buf *)hp;
bp->b_flags |= B_ASYNC;
mutex_enter(hmp);
hp->b_length--;
notavail(bp);
mutex_exit(hmp);
if (bp->b_vp == NULL) {
BWRITE(bp);
} else {
UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp);
}
} else {
sema_v(&bp->b_sem);
}
delwri_list = bp->b_list;
bp->b_list = NULL;
}
mutex_enter(&blist_lock);
bio_doingflush--;
if (bio_flinv_cv_wanted) {
bio_flinv_cv_wanted = 0;
cv_broadcast(&bio_flushinval_cv);
}
mutex_exit(&blist_lock);
}
void
blkflush(dev_t dev, daddr_t blkno)
{
struct buf *bp, *dp;
struct hbuf *hp;
struct buf *sbp = NULL;
uint_t index;
kmutex_t *hmp;
index = bio_bhash(dev, blkno);
hp = &hbuf[index];
dp = (struct buf *)hp;
hmp = &hp->b_lock;
mutex_enter(hmp);
for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
if (bp->b_blkno != blkno || bp->b_edev != dev ||
(bp->b_flags & B_STALE))
continue;
sbp = bp;
break;
}
mutex_exit(hmp);
if (sbp == NULL)
return;
sema_p(&sbp->b_sem);
if (sbp->b_blkno == blkno && sbp->b_edev == dev &&
(sbp->b_flags & (B_DELWRI|B_STALE)) == B_DELWRI) {
mutex_enter(hmp);
hp->b_length--;
notavail(sbp);
mutex_exit(hmp);
if (sbp->b_vp == NULL) {
BWRITE(sbp);
} else {
UFS_BWRITE(VTOI(sbp->b_vp)->i_ufsvfs, sbp);
}
} else {
sema_v(&sbp->b_sem);
}
}
int
bfinval(dev_t dev, int force)
{
struct buf *dp;
struct buf *bp;
struct buf *binval_list = EMPTY_LIST;
int i, error = 0;
kmutex_t *hmp;
uint_t index;
struct buf **backp;
mutex_enter(&blist_lock);
while (bio_doingflush) {
bio_flinv_cv_wanted = 1;
cv_wait(&bio_flushinval_cv, &blist_lock);
}
bio_doinginval++;
for (i = 0; i < v.v_hbuf; i++) {
dp = (struct buf *)&hbuf[i];
hmp = &hbuf[i].b_lock;
mutex_enter(hmp);
for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
if (bp->b_edev == dev) {
if (bp->b_list == NULL) {
bp->b_list = binval_list;
binval_list = bp;
}
}
}
mutex_exit(hmp);
}
mutex_exit(&blist_lock);
while (binval_list != EMPTY_LIST) {
bp = binval_list;
sema_p(&bp->b_sem);
if (bp->b_edev == dev) {
if (force && (bp->b_flags & B_DELWRI)) {
index = bio_bhash(bp->b_edev, bp->b_blkno);
hmp = &hbuf[index].b_lock;
dp = (struct buf *)&hbuf[index];
mutex_enter(hmp);
notavail(bp);
backp = &dp->av_forw;
(*backp)->av_back = bp;
bp->av_forw = *backp;
*backp = bp;
bp->av_back = dp;
bp->b_flags &=
~(B_BUSY | B_DELWRI | B_RETRYWRI);
mutex_exit(hmp);
}
if ((bp->b_flags & B_DELWRI) == 0)
bp->b_flags |= B_STALE|B_AGE;
else
error = EIO;
}
sema_v(&bp->b_sem);
binval_list = bp->b_list;
bp->b_list = NULL;
}
mutex_enter(&blist_lock);
bio_doinginval--;
if (bio_flinv_cv_wanted) {
cv_broadcast(&bio_flushinval_cv);
bio_flinv_cv_wanted = 0;
}
mutex_exit(&blist_lock);
return (error);
}
void
binval(dev_t dev)
{
(void) bfinval(dev, 0);
}
void
binit(void)
{
struct buf *bp;
unsigned int i, pct;
ulong_t bio_max_hwm, bio_default_hwm;
bio_max_hwm = MIN(physmem / BIO_MAX_PERCENT,
btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024);
bio_max_hwm = MIN(INT32_MAX, bio_max_hwm);
pct = BIO_BUF_PERCENT;
if (bufhwm_pct != 0 &&
((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) {
pct = BIO_BUF_PERCENT;
cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \
range(1..%d). Using %d as default.",
bufhwm_pct,
100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT);
}
bio_default_hwm = MIN(physmem / pct,
btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024);
bio_default_hwm = MIN(INT32_MAX, bio_default_hwm);
if ((v.v_bufhwm = bufhwm) == 0)
v.v_bufhwm = bio_default_hwm;
if (v.v_bufhwm < BIO_MIN_HWM || v.v_bufhwm > bio_max_hwm) {
v.v_bufhwm = (int)bio_max_hwm;
cmn_err(CE_WARN,
"binit: bufhwm(%d) out \
of range(%d..%lu). Using %lu as default",
bufhwm,
BIO_MIN_HWM, bio_max_hwm, bio_max_hwm);
}
v.v_hbuf = 1 << highbit((((ulong_t)v.v_bufhwm * 1024) / MAXBSIZE) /
BIO_HASHLEN);
v.v_hmask = v.v_hbuf - 1;
v.v_buf = BIO_BHDR_POOL;
hbuf = kmem_zalloc(v.v_hbuf * sizeof (struct hbuf), KM_SLEEP);
dwbuf = kmem_zalloc(v.v_hbuf * sizeof (struct dwbuf), KM_SLEEP);
bfreelist.b_bufsize = (size_t)v.v_bufhwm * 1024;
bp = &bfreelist;
bp->b_forw = bp->b_back = bp->av_forw = bp->av_back = bp;
for (i = 0; i < v.v_hbuf; i++) {
hbuf[i].b_forw = hbuf[i].b_back = (struct buf *)&hbuf[i];
hbuf[i].av_forw = hbuf[i].av_back = (struct buf *)&hbuf[i];
dwbuf[i].b_forw = dwbuf[i].b_back = (struct buf *)&dwbuf[i];
dwbuf[i].av_forw = dwbuf[i].av_back = (struct buf *)&dwbuf[i];
}
}
int
biowait(struct buf *bp)
{
int error = 0;
struct cpu *cpup;
ASSERT(SEMA_HELD(&bp->b_sem));
cpup = CPU;
atomic_inc_64(&cpup->cpu_stats.sys.iowait);
DTRACE_IO1(wait__start, struct buf *, bp);
if (panicstr) {
while ((bp->b_flags & B_DONE) == 0)
drv_usecwait(10);
} else
sema_p(&bp->b_io);
DTRACE_IO1(wait__done, struct buf *, bp);
atomic_dec_64(&cpup->cpu_stats.sys.iowait);
error = geterror(bp);
if ((bp->b_flags & B_ASYNC) == 0) {
if (bp->b_flags & B_REMAPPED)
bp_mapout(bp);
}
return (error);
}
void
biodone(struct buf *bp)
{
if (bp->b_flags & B_STARTED) {
DTRACE_IO1(done, struct buf *, bp);
bp->b_flags &= ~B_STARTED;
}
if (bp->b_iodone != NULL) {
(*(bp->b_iodone))(bp);
return;
}
ASSERT((bp->b_flags & B_DONE) == 0);
ASSERT(SEMA_HELD(&bp->b_sem));
bp->b_flags |= B_DONE;
if (bp->b_flags & B_ASYNC) {
if (bp->b_flags & (B_PAGEIO|B_REMAPPED))
bio_pageio_done(bp);
else
brelse(bp);
} else {
sema_v(&bp->b_io);
}
}
int
geterror(struct buf *bp)
{
int error = 0;
ASSERT(SEMA_HELD(&bp->b_sem));
if (bp->b_flags & B_ERROR) {
error = bp->b_error;
if (!error)
error = EIO;
}
return (error);
}
struct buf *
pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags)
{
struct buf *bp;
struct cpu *cpup;
if (flags & B_READ) {
CPU_STATS_ENTER_K();
cpup = CPU;
CPU_STATS_ADDQ(cpup, vm, pgin, 1);
CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len));
atomic_add_64(&curzone->zone_pgpgin, btopr(len));
if ((flags & B_ASYNC) == 0) {
klwp_t *lwp = ttolwp(curthread);
if (lwp != NULL)
lwp->lwp_ru.majflt++;
CPU_STATS_ADDQ(cpup, vm, maj_fault, 1);
}
if (pp != NULL && pp->p_vnode != NULL) {
if (IS_SWAPFSVP(pp->p_vnode)) {
CPU_STATS_ADDQ(cpup, vm, anonpgin, btopr(len));
atomic_add_64(&curzone->zone_anonpgin,
btopr(len));
} else {
if (pp->p_vnode->v_flag & VVMEXEC) {
CPU_STATS_ADDQ(cpup, vm, execpgin,
btopr(len));
atomic_add_64(&curzone->zone_execpgin,
btopr(len));
} else {
CPU_STATS_ADDQ(cpup, vm, fspgin,
btopr(len));
atomic_add_64(&curzone->zone_fspgin,
btopr(len));
}
}
}
CPU_STATS_EXIT_K();
TRACE_1(TR_FAC_VM, TR_PAGE_WS_IN,
"page_ws_in:pp %p", pp);
}
bp = kmem_zalloc(sizeof (struct buf), KM_SLEEP);
bp->b_bcount = len;
bp->b_bufsize = len;
bp->b_pages = pp;
bp->b_flags = B_PAGEIO | B_NOCACHE | B_BUSY | flags;
bp->b_offset = -1;
sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
VN_HOLD(vp);
bp->b_vp = vp;
return (bp);
}
void
pageio_done(struct buf *bp)
{
ASSERT(SEMA_HELD(&bp->b_sem));
if (bp->b_flags & B_REMAPPED)
bp_mapout(bp);
VN_RELE(bp->b_vp);
bp->b_vp = NULL;
ASSERT((bp->b_flags & B_NOCACHE) != 0);
sema_destroy(&bp->b_sem);
sema_destroy(&bp->b_io);
kmem_free(bp, sizeof (struct buf));
}
int
bcheck(dev_t dev, struct buf *sbp)
{
struct buf *bp;
struct buf *dp;
int i;
kmutex_t *hmp;
for (i = 0; i < v.v_hbuf; i++) {
dp = (struct buf *)&hbuf[i];
hmp = &hbuf[i].b_lock;
mutex_enter(hmp);
for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
if ((bp->b_edev == dev) &&
((bp->b_flags & B_STALE) == 0) &&
(bp->b_flags & (B_DELWRI|B_BUSY)) &&
(bp != sbp)) {
mutex_exit(hmp);
return (1);
}
}
mutex_exit(hmp);
}
return (0);
}
int
hash2ints(int x, int y)
{
int hash = 0;
hash = x - 1;
hash = ((hash * 7) + (x >> 8)) - 1;
hash = ((hash * 7) + (x >> 16)) - 1;
hash = ((hash * 7) + (x >> 24)) - 1;
hash = ((hash * 7) + y) - 1;
hash = ((hash * 7) + (y >> 8)) - 1;
hash = ((hash * 7) + (y >> 16)) - 1;
hash = ((hash * 7) + (y >> 24)) - 1;
return (hash);
}
static struct buf *
bio_getfreeblk(long bsize)
{
struct buf *bp, *dp;
struct hbuf *hp;
kmutex_t *hmp;
uint_t start, end;
bio_mem_get(bsize);
bp = bio_bhdr_alloc();
sema_p(&bp->b_sem);
ASSERT(bp->b_un.b_addr == NULL);
bp->b_un.b_addr = kmem_alloc(bsize, KM_NOSLEEP);
if (bp->b_un.b_addr != NULL) {
bp->b_bufsize = bsize;
ASSERT(SEMA_HELD(&bp->b_sem));
return (bp);
} else {
struct buf *save;
save = bp;
start = end = lastindex;
biostats.bio_bufwant.value.ui32++;
do {
hp = &hbuf[start];
hmp = &hp->b_lock;
dp = (struct buf *)hp;
mutex_enter(hmp);
bp = dp->av_forw;
while (bp != dp) {
ASSERT(bp != NULL);
if (!sema_tryp(&bp->b_sem)) {
bp = bp->av_forw;
continue;
}
ASSERT(!(bp->b_flags & B_DELWRI));
if (bp->b_bufsize == bsize) {
hp->b_length--;
notavail(bp);
bremhash(bp);
mutex_exit(hmp);
mutex_enter(&bfree_lock);
bfreelist.b_bufsize += bsize;
mutex_exit(&bfree_lock);
lastindex = start;
sema_v(&save->b_sem);
bio_bhdr_free(save);
ASSERT(SEMA_HELD(&bp->b_sem));
return (bp);
}
sema_v(&bp->b_sem);
bp = bp->av_forw;
}
mutex_exit(hmp);
start = ((start + 1) % v.v_hbuf);
} while (start != end);
biostats.bio_bufwait.value.ui32++;
bp = save;
bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP);
}
bp->b_bufsize = bsize;
ASSERT(SEMA_HELD(&bp->b_sem));
return (bp);
}
static struct buf *
bio_bhdr_alloc(void)
{
struct buf *dp, *sdp;
struct buf *bp;
int i;
for (;;) {
mutex_enter(&bhdr_lock);
if (bhdrlist != NULL) {
bp = bhdrlist;
bhdrlist = bp->av_forw;
mutex_exit(&bhdr_lock);
bp->av_forw = NULL;
return (bp);
}
mutex_exit(&bhdr_lock);
dp = kmem_zalloc(sizeof (struct buf) * v.v_buf, KM_NOSLEEP);
if (dp == NULL) {
bio_recycle(BIO_HEADER, 0);
} else {
sdp = dp;
for (i = 0; i < v.v_buf; i++, dp++) {
dp->b_dev = (o_dev_t)NODEV;
dp->b_edev = NODEV;
dp->av_forw = dp + 1;
sema_init(&dp->b_sem, 1, NULL, SEMA_DEFAULT,
NULL);
sema_init(&dp->b_io, 0, NULL, SEMA_DEFAULT,
NULL);
dp->b_offset = -1;
}
mutex_enter(&bhdr_lock);
(--dp)->av_forw = bhdrlist;
bhdrlist = sdp;
nbuf += v.v_buf;
bp = bhdrlist;
bhdrlist = bp->av_forw;
mutex_exit(&bhdr_lock);
bp->av_forw = NULL;
return (bp);
}
}
}
static void
bio_bhdr_free(struct buf *bp)
{
ASSERT(bp->b_back == NULL);
ASSERT(bp->b_forw == NULL);
ASSERT(bp->av_back == NULL);
ASSERT(bp->av_forw == NULL);
ASSERT(bp->b_un.b_addr == NULL);
ASSERT(bp->b_dev == (o_dev_t)NODEV);
ASSERT(bp->b_edev == NODEV);
ASSERT(bp->b_flags == 0);
mutex_enter(&bhdr_lock);
bp->av_forw = bhdrlist;
bhdrlist = bp;
mutex_exit(&bhdr_lock);
}
static void
bio_mem_get(long bsize)
{
mutex_enter(&bfree_lock);
if (bfreelist.b_bufsize > bsize) {
bfreelist.b_bufsize -= bsize;
mutex_exit(&bfree_lock);
return;
}
mutex_exit(&bfree_lock);
bio_recycle(BIO_MEM, bsize);
}
static void
bio_flushlist(struct buf *delwri_list)
{
struct buf *bp;
while (delwri_list != EMPTY_LIST) {
bp = delwri_list;
bp->b_flags |= B_AGE | B_ASYNC;
if (bp->b_vp == NULL) {
BWRITE(bp);
} else {
UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp);
}
delwri_list = bp->b_list;
bp->b_list = NULL;
}
}
static void
bio_recycle(int want, long bsize)
{
struct buf *bp, *dp, *dwp, *nbp;
struct hbuf *hp;
int found = 0;
kmutex_t *hmp;
int start, end;
struct buf *delwri_list = EMPTY_LIST;
top:
start = end = lastindex;
do {
hp = &hbuf[start];
hmp = &hp->b_lock;
dp = (struct buf *)hp;
mutex_enter(hmp);
bp = dp->av_forw;
while (bp != dp) {
ASSERT(bp != NULL);
if (!sema_tryp(&bp->b_sem)) {
bp = bp->av_forw;
continue;
}
if ((bp->b_flags & B_AGE) == 0 && found) {
sema_v(&bp->b_sem);
mutex_exit(hmp);
lastindex = start;
return;
}
ASSERT(MUTEX_HELD(&hp->b_lock));
ASSERT(!(bp->b_flags & B_DELWRI));
hp->b_length--;
notavail(bp);
bremhash(bp);
mutex_exit(hmp);
if (bp->b_bufsize) {
kmem_free(bp->b_un.b_addr, bp->b_bufsize);
bp->b_un.b_addr = NULL;
mutex_enter(&bfree_lock);
bfreelist.b_bufsize += bp->b_bufsize;
mutex_exit(&bfree_lock);
}
bp->b_dev = (o_dev_t)NODEV;
bp->b_edev = NODEV;
bp->b_flags = 0;
sema_v(&bp->b_sem);
bio_bhdr_free(bp);
if (want == BIO_HEADER) {
found = 1;
} else {
ASSERT(want == BIO_MEM);
if (!found && bfreelist.b_bufsize >= bsize) {
mutex_enter(&bfree_lock);
if (bfreelist.b_bufsize >= bsize) {
bfreelist.b_bufsize -= bsize;
found = 1;
}
mutex_exit(&bfree_lock);
}
}
mutex_enter(hmp);
bp = dp->av_forw;
}
mutex_exit(hmp);
dwp = (struct buf *)&dwbuf[start];
mutex_enter(&blist_lock);
bio_doingflush++;
mutex_enter(hmp);
for (bp = dwp->av_forw; bp != dwp; bp = nbp) {
ASSERT(bp != NULL);
nbp = bp->av_forw;
if (!sema_tryp(&bp->b_sem))
continue;
ASSERT(bp->b_flags & B_DELWRI);
if ((bp->b_flags & B_AGE) == 0 && found) {
sema_v(&bp->b_sem);
mutex_exit(hmp);
lastindex = start;
mutex_exit(&blist_lock);
bio_flushlist(delwri_list);
mutex_enter(&blist_lock);
bio_doingflush--;
if (bio_flinv_cv_wanted) {
bio_flinv_cv_wanted = 0;
cv_broadcast(&bio_flushinval_cv);
}
mutex_exit(&blist_lock);
return;
}
if (bp->b_list != NULL) {
sema_v(&bp->b_sem);
continue;
}
hp->b_length--;
notavail(bp);
bp->b_list = delwri_list;
delwri_list = bp;
}
mutex_exit(hmp);
mutex_exit(&blist_lock);
bio_flushlist(delwri_list);
delwri_list = EMPTY_LIST;
mutex_enter(&blist_lock);
bio_doingflush--;
if (bio_flinv_cv_wanted) {
bio_flinv_cv_wanted = 0;
cv_broadcast(&bio_flushinval_cv);
}
mutex_exit(&blist_lock);
start = (start + 1) % v.v_hbuf;
} while (start != end);
if (found)
return;
mutex_enter(&bfree_lock);
bfreelist.b_flags |= B_WANTED;
(void) cv_reltimedwait(&bio_mem_cv, &bfree_lock, hz, TR_CLOCK_TICK);
mutex_exit(&bfree_lock);
goto top;
}
static int
bio_incore(dev_t dev, daddr_t blkno)
{
struct buf *bp;
struct buf *dp;
uint_t index;
kmutex_t *hmp;
index = bio_bhash(dev, blkno);
dp = (struct buf *)&hbuf[index];
hmp = &hbuf[index].b_lock;
mutex_enter(hmp);
for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
if (bp->b_blkno == blkno && bp->b_edev == dev &&
(bp->b_flags & B_STALE) == 0) {
mutex_exit(hmp);
return (1);
}
}
mutex_exit(hmp);
return (0);
}
static void
bio_pageio_done(struct buf *bp)
{
if (bp->b_flags & B_PAGEIO) {
if (bp->b_flags & B_REMAPPED)
bp_mapout(bp);
if (bp->b_flags & B_READ)
pvn_read_done(bp->b_pages, bp->b_flags);
else
pvn_write_done(bp->b_pages, B_WRITE | bp->b_flags);
pageio_done(bp);
} else {
ASSERT(bp->b_flags & B_REMAPPED);
bp_mapout(bp);
brelse(bp);
}
}
void
bioerror(struct buf *bp, int error)
{
ASSERT(bp != NULL);
ASSERT(error >= 0);
ASSERT(SEMA_HELD(&bp->b_sem));
if (error != 0) {
bp->b_flags |= B_ERROR;
} else {
bp->b_flags &= ~B_ERROR;
}
bp->b_error = error;
}
void
bioreset(struct buf *bp)
{
ASSERT(bp != NULL);
biofini(bp);
bioinit(bp);
}
size_t
biosize(void)
{
return (sizeof (struct buf));
}
int
biomodified(struct buf *bp)
{
int npf;
int ppattr;
struct page *pp;
ASSERT(bp != NULL);
if ((bp->b_flags & B_PAGEIO) == 0) {
return (-1);
}
pp = bp->b_pages;
npf = btopr(bp->b_bcount + ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET));
while (npf > 0) {
ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO |
HAT_SYNC_STOPON_MOD);
if (ppattr & P_MOD)
return (1);
pp = pp->p_next;
npf--;
}
return (0);
}
void
bioinit(struct buf *bp)
{
bzero(bp, sizeof (struct buf));
sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
bp->b_offset = -1;
}
void
biofini(struct buf *bp)
{
sema_destroy(&bp->b_io);
sema_destroy(&bp->b_sem);
}
struct buf *
bioclone(struct buf *bp, off_t off, size_t len, dev_t dev, daddr_t blkno,
int (*iodone)(struct buf *), struct buf *bp_mem, int sleep)
{
struct buf *bufp;
ASSERT(bp);
if (bp_mem == NULL) {
bufp = kmem_alloc(sizeof (struct buf), sleep);
if (bufp == NULL) {
return (NULL);
}
bioinit(bufp);
} else {
bufp = bp_mem;
bioreset(bufp);
}
#define BUF_CLONE_FLAGS (B_READ|B_WRITE|B_SHADOW|B_PHYS|B_PAGEIO|B_FAILFAST|\
B_ABRWRITE)
bufp->b_flags = (bp->b_flags & BUF_CLONE_FLAGS) | B_BUSY;
bufp->b_bcount = len;
bufp->b_blkno = blkno;
bufp->b_iodone = iodone;
bufp->b_proc = bp->b_proc;
bufp->b_edev = dev;
bufp->b_file = bp->b_file;
bufp->b_offset = bp->b_offset;
if (bp->b_flags & B_SHADOW) {
ASSERT(bp->b_shadow);
ASSERT(bp->b_flags & B_PHYS);
bufp->b_shadow = bp->b_shadow +
btop(((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off);
bufp->b_un.b_addr = (caddr_t)((uintptr_t)bp->b_un.b_addr + off);
if (bp->b_flags & B_REMAPPED)
bufp->b_proc = NULL;
} else {
if (bp->b_flags & B_PAGEIO) {
struct page *pp;
off_t o;
int i;
pp = bp->b_pages;
o = ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off;
for (i = btop(o); i > 0; i--) {
pp = pp->p_next;
}
bufp->b_pages = pp;
bufp->b_un.b_addr = (caddr_t)(o & PAGEOFFSET);
} else {
bufp->b_un.b_addr =
(caddr_t)((uintptr_t)bp->b_un.b_addr + off);
if (bp->b_flags & B_REMAPPED)
bufp->b_proc = NULL;
}
}
return (bufp);
}