root/usr/src/lib/smbclnt/libfknsmb/common/fake_stream.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/*        All Rights Reserved   */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 *
 * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
 * Copyright 2022 Garrett D'Amore
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/thread.h>
#include <sys/sysmacros.h>
#include <sys/stropts.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <sys/conf.h>
#include <sys/debug.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/atomic.h>
#include <sys/errno.h>
#include <sys/vtrace.h>
#include <sys/ftrace.h>
#include <sys/ontrap.h>
#include <sys/sdt.h>
#include <sys/strft.h>

/*
 * This file contains selected functions from io/stream.c
 * needed by this library, mostly unmodified.
 */

/*
 * STREAMS message allocator: principles of operation
 * (See usr/src/uts/common/io/stream.c)
 */
#define DBLK_MAX_CACHE          73728
#define DBLK_CACHE_ALIGN        64
#define DBLK_MIN_SIZE           8
#define DBLK_SIZE_SHIFT         3

#ifdef _BIG_ENDIAN
#define DBLK_RTFU_SHIFT(field)  \
        (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
#else
#define DBLK_RTFU_SHIFT(field)  \
        (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
#endif

#define DBLK_RTFU(ref, type, flags, uioflag)    \
        (((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
        ((type) << DBLK_RTFU_SHIFT(db_type)) | \
        (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
        ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
#define DBLK_RTFU_REF_MASK      (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
#define DBLK_RTFU_WORD(dbp)     (*((uint32_t *)&(dbp)->db_ref))
#define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band))

static size_t dblk_sizes[] = {
#ifdef _LP64
        16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856,
        8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624,
        40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392,
#else
        64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904,
        8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672,
        40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440,
#endif
        DBLK_MAX_CACHE, 0
};

static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
static struct kmem_cache *mblk_cache;
static struct kmem_cache *dblk_esb_cache;

static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
static mblk_t *allocb_oversize(size_t size, int flags);
static int allocb_tryhard_fails;
static void frnop_func(void *arg);
frtn_t frnop = { frnop_func };
static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);

/*
 * Patchable mblk/dblk kmem_cache flags.
 */
int dblk_kmem_flags = 0;
int mblk_kmem_flags = 0;

static int
dblk_constructor(void *buf, void *cdrarg, int kmflags)
{
        dblk_t *dbp = buf;
        ssize_t msg_size = (ssize_t)cdrarg;
        size_t index;

        ASSERT(msg_size != 0);

        index = (msg_size - 1) >> DBLK_SIZE_SHIFT;

        ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));

        if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
                return (-1);
        if ((msg_size & PAGEOFFSET) == 0) {
                dbp->db_base = kmem_alloc(msg_size, kmflags);
                if (dbp->db_base == NULL) {
                        kmem_cache_free(mblk_cache, dbp->db_mblk);
                        return (-1);
                }
        } else {
                dbp->db_base = (unsigned char *)&dbp[1];
        }

        dbp->db_mblk->b_datap = dbp;
        dbp->db_cache = dblk_cache[index];
        dbp->db_lim = dbp->db_base + msg_size;
        dbp->db_free = dbp->db_lastfree = dblk_lastfree;
        dbp->db_frtnp = NULL;
        dbp->db_fthdr = NULL;
        dbp->db_credp = NULL;
        dbp->db_cpid = -1;
        dbp->db_struioflag = 0;
        dbp->db_struioun.cksum.flags = 0;
        return (0);
}

/*ARGSUSED*/
static int
dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
{
        dblk_t *dbp = buf;

        if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
                return (-1);
        dbp->db_mblk->b_datap = dbp;
        dbp->db_cache = dblk_esb_cache;
        dbp->db_fthdr = NULL;
        dbp->db_credp = NULL;
        dbp->db_cpid = -1;
        dbp->db_struioflag = 0;
        dbp->db_struioun.cksum.flags = 0;
        return (0);
}

static int
bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
{
        dblk_t *dbp = buf;
        bcache_t *bcp = cdrarg;

        if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
                return (-1);

        dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags);
        if (dbp->db_base == NULL) {
                kmem_cache_free(mblk_cache, dbp->db_mblk);
                return (-1);
        }

        dbp->db_mblk->b_datap = dbp;
        dbp->db_cache = (void *)bcp;
        dbp->db_lim = dbp->db_base + bcp->size;
        dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
        dbp->db_frtnp = NULL;
        dbp->db_fthdr = NULL;
        dbp->db_credp = NULL;
        dbp->db_cpid = -1;
        dbp->db_struioflag = 0;
        dbp->db_struioun.cksum.flags = 0;
        return (0);
}

/*ARGSUSED*/
static void
dblk_destructor(void *buf, void *cdrarg)
{
        dblk_t *dbp = buf;
        ssize_t msg_size = (ssize_t)cdrarg;

        ASSERT(dbp->db_mblk->b_datap == dbp);
        ASSERT(msg_size != 0);
        ASSERT(dbp->db_struioflag == 0);
        ASSERT(dbp->db_struioun.cksum.flags == 0);

        if ((msg_size & PAGEOFFSET) == 0) {
                kmem_free(dbp->db_base, msg_size);
        }

        kmem_cache_free(mblk_cache, dbp->db_mblk);
}

static void
bcache_dblk_destructor(void *buf, void *cdrarg)
{
        dblk_t *dbp = buf;
        bcache_t *bcp = cdrarg;

        kmem_cache_free(bcp->buffer_cache, dbp->db_base);

        ASSERT(dbp->db_mblk->b_datap == dbp);
        ASSERT(dbp->db_struioflag == 0);
        ASSERT(dbp->db_struioun.cksum.flags == 0);

        kmem_cache_free(mblk_cache, dbp->db_mblk);
}

/* Needed in the ASSERT below */
#ifdef  DEBUG
#ifdef  _KERNEL
#define KMEM_SLAB_T_SZ  sizeof (kmem_slab_t)
#else   /* _KERNEL */
#define KMEM_SLAB_T_SZ  64      /* fakekernel */
#endif  /* _KERNEL */
#endif  /* DEBUG */

void
streams_msg_init(void)
{
        char name[40];
        size_t size;
        size_t lastsize = DBLK_MIN_SIZE;
        size_t *sizep;
        struct kmem_cache *cp;
        size_t tot_size;
        int offset;

        mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32,
            NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags);

        for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {

                if ((offset = (size & PAGEOFFSET)) != 0) {
                        /*
                         * We are in the middle of a page, dblk should
                         * be allocated on the same page
                         */
                        tot_size = size + sizeof (dblk_t);
                        ASSERT((offset + sizeof (dblk_t) + KMEM_SLAB_T_SZ)
                            < PAGESIZE);
                        ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);

                } else {

                        /*
                         * buf size is multiple of page size, dblk and
                         * buffer are allocated separately.
                         */

                        ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
                        tot_size = sizeof (dblk_t);
                }

                (void) sprintf(name, "streams_dblk_%ld", (long)size);
                cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN,
                    dblk_constructor, dblk_destructor, NULL, (void *)(size),
                    NULL, dblk_kmem_flags);

                while (lastsize <= size) {
                        dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
                        lastsize += DBLK_MIN_SIZE;
                }
        }

        dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t),
            DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL,
            (void *)sizeof (dblk_t), NULL, dblk_kmem_flags);

        /* fthdr_cache, ftblk_cache, ... */
}

/*ARGSUSED*/
mblk_t *
allocb(size_t size, uint_t pri)
{
        dblk_t *dbp;
        mblk_t *mp;
        size_t index;

        index =  (size - 1)  >> DBLK_SIZE_SHIFT;

        if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
                if (size != 0) {
                        mp = allocb_oversize(size, KM_NOSLEEP);
                        goto out;
                }
                index = 0;
        }

        if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
                mp = NULL;
                goto out;
        }

        mp = dbp->db_mblk;
        DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
        mp->b_next = mp->b_prev = mp->b_cont = NULL;
        mp->b_rptr = mp->b_wptr = dbp->db_base;
        mp->b_queue = NULL;
        MBLK_BAND_FLAG_WORD(mp) = 0;
        STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
out:
        FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);

        return (mp);
}

/*
 * Allocate an mblk taking db_credp and db_cpid from the template.
 * Allow the cred to be NULL.
 */
mblk_t *
allocb_tmpl(size_t size, const mblk_t *tmpl)
{
        mblk_t *mp = allocb(size, 0);

        if (mp != NULL) {
                dblk_t *src = tmpl->b_datap;
                dblk_t *dst = mp->b_datap;
                cred_t *cr;
                pid_t cpid;

                cr = msg_getcred(tmpl, &cpid);
                if (cr != NULL)
                        crhold(dst->db_credp = cr);
                dst->db_cpid = cpid;
                dst->db_type = src->db_type;
        }
        return (mp);
}

mblk_t *
allocb_cred(size_t size, cred_t *cr, pid_t cpid)
{
        mblk_t *mp = allocb(size, 0);

        ASSERT(cr != NULL);
        if (mp != NULL) {
                dblk_t *dbp = mp->b_datap;

                crhold(dbp->db_credp = cr);
                dbp->db_cpid = cpid;
        }
        return (mp);
}

mblk_t *
allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid)
{
        mblk_t *mp = allocb_wait(size, 0, flags, error);

        ASSERT(cr != NULL);
        if (mp != NULL) {
                dblk_t *dbp = mp->b_datap;

                crhold(dbp->db_credp = cr);
                dbp->db_cpid = cpid;
        }

        return (mp);
}

/*
 * Extract the db_cred (and optionally db_cpid) from a message.
 * We find the first mblk which has a non-NULL db_cred and use that.
 * If none found we return NULL.
 * Does NOT get a hold on the cred.
 */
cred_t *
msg_getcred(const mblk_t *mp, pid_t *cpidp)
{
        cred_t *cr = NULL;

        while (mp != NULL) {
                dblk_t *dbp = mp->b_datap;

                cr = dbp->db_credp;
                if (cr == NULL) {
                        mp = mp->b_cont;
                        continue;
                }
                if (cpidp != NULL)
                        *cpidp = dbp->db_cpid;

                /* DEBUG check for only one db_credp */
                return (cr);
        }
        if (cpidp != NULL)
                *cpidp = NOPID;
        return (NULL);
}

/*
 * Variant of msg_getcred which, when a cred is found
 * 1. Returns with a hold on the cred
 * 2. Clears the first cred in the mblk.
 * This is more efficient to use than a msg_getcred() + crhold() when
 * the message is freed after the cred has been extracted.
 *
 * The caller is responsible for ensuring that there is no other reference
 * on the message since db_credp can not be cleared when there are other
 * references.
 */
cred_t *
msg_extractcred(mblk_t *mp, pid_t *cpidp)
{
        cred_t *cr = NULL;

        while (mp != NULL) {
                dblk_t *dbp = mp->b_datap;

                cr = dbp->db_credp;
                if (cr == NULL) {
                        mp = mp->b_cont;
                        continue;
                }
                ASSERT(dbp->db_ref == 1);
                dbp->db_credp = NULL;
                if (cpidp != NULL)
                        *cpidp = dbp->db_cpid;

                /* DEBUG check for only one db_credp */
                return (cr);
        }
        return (NULL);
}

/* _KERNEL msg_getlabel() */

void
freeb(mblk_t *mp)
{
        dblk_t *dbp = mp->b_datap;

        ASSERT(dbp->db_ref > 0);
        ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
        FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);

        STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);

        dbp->db_free(mp, dbp);
}

void
freemsg(mblk_t *mp)
{
        FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
        while (mp) {
                dblk_t *dbp = mp->b_datap;
                mblk_t *mp_cont = mp->b_cont;

                ASSERT(dbp->db_ref > 0);
                ASSERT(mp->b_next == NULL && mp->b_prev == NULL);

                STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);

                dbp->db_free(mp, dbp);
                mp = mp_cont;
        }
}

/*
 * Reallocate a block for another use.  Try hard to use the old block.
 * If the old data is wanted (copy), leave b_wptr at the end of the data,
 * otherwise return b_wptr = b_rptr.
 *
 * This routine is private and unstable.
 */
mblk_t  *
reallocb(mblk_t *mp, size_t size, uint_t copy)
{
        mblk_t          *mp1;
        unsigned char   *old_rptr;
        ptrdiff_t       cur_size;

        if (mp == NULL)
                return (allocb(size, BPRI_HI));

        cur_size = mp->b_wptr - mp->b_rptr;
        old_rptr = mp->b_rptr;

        ASSERT(mp->b_datap->db_ref != 0);

        if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
                /*
                 * If the data is wanted and it will fit where it is, no
                 * work is required.
                 */
                if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
                        return (mp);

                mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
                mp1 = mp;
        } else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
                /* XXX other mp state could be copied too, db_flags ... ? */
                mp1->b_cont = mp->b_cont;
        } else {
                return (NULL);
        }

        if (copy) {
                bcopy(old_rptr, mp1->b_rptr, cur_size);
                mp1->b_wptr = mp1->b_rptr + cur_size;
        }

        if (mp != mp1)
                freeb(mp);

        return (mp1);
}

static void
dblk_lastfree(mblk_t *mp, dblk_t *dbp)
{
        ASSERT(dbp->db_mblk == mp);
        if (dbp->db_fthdr != NULL)
                str_ftfree(dbp);

        /* set credp and projid to be 'unspecified' before returning to cache */
        if (dbp->db_credp != NULL) {
                crfree(dbp->db_credp);
                dbp->db_credp = NULL;
        }
        dbp->db_cpid = -1;

        /* Reset the struioflag and the checksum flag fields */
        dbp->db_struioflag = 0;
        dbp->db_struioun.cksum.flags = 0;

        /* and the COOKED and/or UIOA flag(s) */
        dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);

        kmem_cache_free(dbp->db_cache, dbp);
}

static void
dblk_decref(mblk_t *mp, dblk_t *dbp)
{
        if (dbp->db_ref != 1) {
                uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
                    -(1 << DBLK_RTFU_SHIFT(db_ref)));
                /*
                 * atomic_add_32_nv() just decremented db_ref, so we no longer
                 * have a reference to the dblk, which means another thread
                 * could free it.  Therefore we cannot examine the dblk to
                 * determine whether ours was the last reference.  Instead,
                 * we extract the new and minimum reference counts from rtfu.
                 * Note that all we're really saying is "if (ref != refmin)".
                 */
                if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
                    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
                        kmem_cache_free(mblk_cache, mp);
                        return;
                }
        }
        dbp->db_mblk = mp;
        dbp->db_free = dbp->db_lastfree;
        dbp->db_lastfree(mp, dbp);
}

mblk_t *
dupb(mblk_t *mp)
{
        dblk_t *dbp = mp->b_datap;
        mblk_t *new_mp;
        uint32_t oldrtfu, newrtfu;

        if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
                goto out;

        new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
        new_mp->b_rptr = mp->b_rptr;
        new_mp->b_wptr = mp->b_wptr;
        new_mp->b_datap = dbp;
        new_mp->b_queue = NULL;
        MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);

        STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);

        dbp->db_free = dblk_decref;
        do {
                ASSERT(dbp->db_ref > 0);
                oldrtfu = DBLK_RTFU_WORD(dbp);
                newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
                /*
                 * If db_ref is maxed out we can't dup this message anymore.
                 */
                if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
                        kmem_cache_free(mblk_cache, new_mp);
                        new_mp = NULL;
                        goto out;
                }
        } while (atomic_cas_32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) !=
            oldrtfu);

out:
        FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
        return (new_mp);
}

/*ARGSUSED*/
static void
frnop_func(void *arg)
{
}

/*
 * Generic esballoc used to implement the four flavors: [d]esballoc[a].
 * and allocb_oversize
 */
static mblk_t *
gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
    void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
{
        dblk_t *dbp;
        mblk_t *mp;

        ASSERT(base != NULL && frp != NULL);

        if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
                mp = NULL;
                goto out;
        }

        mp = dbp->db_mblk;
        dbp->db_base = base;
        dbp->db_lim = base + size;
        dbp->db_free = dbp->db_lastfree = lastfree;
        dbp->db_frtnp = frp;
        DBLK_RTFU_WORD(dbp) = db_rtfu;
        mp->b_next = mp->b_prev = mp->b_cont = NULL;
        mp->b_rptr = mp->b_wptr = base;
        mp->b_queue = NULL;
        MBLK_BAND_FLAG_WORD(mp) = 0;

out:
        FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
        return (mp);
}

static void
bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
{
        bcache_t *bcp = dbp->db_cache;

        ASSERT(dbp->db_mblk == mp);
        if (dbp->db_fthdr != NULL)
                str_ftfree(dbp);

        /* set credp and projid to be 'unspecified' before returning to cache */
        if (dbp->db_credp != NULL) {
                crfree(dbp->db_credp);
                dbp->db_credp = NULL;
        }
        dbp->db_cpid = -1;
        dbp->db_struioflag = 0;
        dbp->db_struioun.cksum.flags = 0;

        mutex_enter(&bcp->mutex);
        kmem_cache_free(bcp->dblk_cache, dbp);
        bcp->alloc--;

        if (bcp->alloc == 0 && bcp->destroy != 0) {
                kmem_cache_destroy(bcp->dblk_cache);
                kmem_cache_destroy(bcp->buffer_cache);
                mutex_exit(&bcp->mutex);
                mutex_destroy(&bcp->mutex);
                kmem_free(bcp, sizeof (bcache_t));
        } else {
                mutex_exit(&bcp->mutex);
        }
}

bcache_t *
bcache_create(char *name, size_t size, uint_t align)
{
        bcache_t *bcp;
        char buffer[255];

        ASSERT((align & (align - 1)) == 0);

        if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL)
                return (NULL);

        bcp->size = size;
        bcp->align = align;
        bcp->alloc = 0;
        bcp->destroy = 0;

        mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);

        (void) sprintf(buffer, "%s_buffer_cache", name);
        bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
            NULL, NULL, NULL, 0);
        (void) sprintf(buffer, "%s_dblk_cache", name);
        bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
            DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
            NULL, (void *)bcp, NULL, 0);

        return (bcp);
}

void
bcache_destroy(bcache_t *bcp)
{
        ASSERT(bcp != NULL);

        mutex_enter(&bcp->mutex);
        if (bcp->alloc == 0) {
                kmem_cache_destroy(bcp->dblk_cache);
                kmem_cache_destroy(bcp->buffer_cache);
                mutex_exit(&bcp->mutex);
                mutex_destroy(&bcp->mutex);
                kmem_free(bcp, sizeof (bcache_t));
        } else {
                bcp->destroy++;
                mutex_exit(&bcp->mutex);
        }
}

/*ARGSUSED*/
mblk_t *
bcache_allocb(bcache_t *bcp, uint_t pri)
{
        dblk_t *dbp;
        mblk_t *mp = NULL;

        ASSERT(bcp != NULL);

        mutex_enter(&bcp->mutex);
        if (bcp->destroy != 0) {
                mutex_exit(&bcp->mutex);
                goto out;
        }

        if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
                mutex_exit(&bcp->mutex);
                goto out;
        }
        bcp->alloc++;
        mutex_exit(&bcp->mutex);

        ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);

        mp = dbp->db_mblk;
        DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
        mp->b_next = mp->b_prev = mp->b_cont = NULL;
        mp->b_rptr = mp->b_wptr = dbp->db_base;
        mp->b_queue = NULL;
        MBLK_BAND_FLAG_WORD(mp) = 0;
        STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
out:
        FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);

        return (mp);
}

static void
dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
{
        ASSERT(dbp->db_mblk == mp);
        if (dbp->db_fthdr != NULL)
                str_ftfree(dbp);

        /* set credp and projid to be 'unspecified' before returning to cache */
        if (dbp->db_credp != NULL) {
                crfree(dbp->db_credp);
                dbp->db_credp = NULL;
        }
        dbp->db_cpid = -1;
        dbp->db_struioflag = 0;
        dbp->db_struioun.cksum.flags = 0;

        kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
        kmem_cache_free(dbp->db_cache, dbp);
}

static mblk_t *
allocb_oversize(size_t size, int kmflags)
{
        mblk_t *mp;
        void *buf;

        size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
        if ((buf = kmem_alloc(size, kmflags)) == NULL)
                return (NULL);
        if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
            &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
                kmem_free(buf, size);

        if (mp != NULL)
                STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);

        return (mp);
}

mblk_t *
allocb_tryhard(size_t target_size)
{
        size_t size;
        mblk_t *bp;

        for (size = target_size; size < target_size + 512;
            size += DBLK_CACHE_ALIGN)
                if ((bp = allocb(size, BPRI_HI)) != NULL)
                        return (bp);
        allocb_tryhard_fails++;
        return (NULL);
}

/*
 * This routine is consolidation private for STREAMS internal use
 * This routine may only be called from sync routines (i.e., not
 * from put or service procedures).  It is located here (rather
 * than strsubr.c) so that we don't have to expose all of the
 * allocb() implementation details in header files.
 */
mblk_t *
allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
{
        dblk_t *dbp;
        mblk_t *mp;
        size_t index;

        index = (size -1) >> DBLK_SIZE_SHIFT;

        if (flags & STR_NOSIG) {
                if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
                        if (size != 0) {
                                mp = allocb_oversize(size, KM_SLEEP);
                                FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
                                    (uintptr_t)mp);
                                return (mp);
                        }
                        index = 0;
                }

                dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
                mp = dbp->db_mblk;
                DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
                mp->b_next = mp->b_prev = mp->b_cont = NULL;
                mp->b_rptr = mp->b_wptr = dbp->db_base;
                mp->b_queue = NULL;
                MBLK_BAND_FLAG_WORD(mp) = 0;
                STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);

                FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);

        } else {
                while ((mp = allocb(size, pri)) == NULL) {
                        if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
                                return (NULL);
                }
        }

        return (mp);
}

/*
 * Call function 'func' with 'arg' when a class zero block can
 * be allocated with priority 'pri'.
 */
bufcall_id_t
esbbcall(uint_t pri, void (*func)(void *), void *arg)
{
        return (bufcall(1, pri, func, arg));
}

/*
 * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
 * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
 * This provides consistency for all internal allocators of ioctl.
 */
mblk_t *
mkiocb(uint_t cmd)
{
        struct iocblk   *ioc;
        mblk_t          *mp;

        /*
         * Allocate enough space for any of the ioctl related messages.
         */
        if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
                return (NULL);

        bzero(mp->b_rptr, sizeof (union ioctypes));

        /*
         * Set the mblk_t information and ptrs correctly.
         */
        mp->b_wptr += sizeof (struct iocblk);
        mp->b_datap->db_type = M_IOCTL;

        /*
         * Fill in the fields.
         */
        ioc             = (struct iocblk *)mp->b_rptr;
        ioc->ioc_cmd    = cmd;
        ioc->ioc_cr     = kcred;
        ioc->ioc_id     = getiocseqno();
        ioc->ioc_flag   = IOC_NATIVE;
        return (mp);
}

/*
 * test if block of given size can be allocated with a request of
 * the given priority.
 * 'pri' is no longer used, but is retained for compatibility.
 */
/* ARGSUSED */
int
testb(size_t size, uint_t pri)
{
        return ((size + sizeof (dblk_t)) <= kmem_avail());
}

/* _KERNEL: bufcall, unbufcall */

/*
 * Duplicate a message block by block (uses dupb), returning
 * a pointer to the duplicate message.
 * Returns a non-NULL value only if the entire message
 * was dup'd.
 */
mblk_t *
dupmsg(mblk_t *bp)
{
        mblk_t *head, *nbp;

        if (!bp || !(nbp = head = dupb(bp)))
                return (NULL);

        while (bp->b_cont) {
                if (!(nbp->b_cont = dupb(bp->b_cont))) {
                        freemsg(head);
                        return (NULL);
                }
                nbp = nbp->b_cont;
                bp = bp->b_cont;
        }
        return (head);
}

#define DUPB_NOLOAN(bp) \
        ((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
        copyb((bp)) : dupb((bp)))

mblk_t *
dupmsg_noloan(mblk_t *bp)
{
        mblk_t *head, *nbp;

        if (bp == NULL || DB_TYPE(bp) != M_DATA ||
            ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
                return (NULL);

        while (bp->b_cont) {
                if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
                        freemsg(head);
                        return (NULL);
                }
                nbp = nbp->b_cont;
                bp = bp->b_cont;
        }
        return (head);
}

/*
 * Copy data from message and data block to newly allocated message and
 * data block. Returns new message block pointer, or NULL if error.
 * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
 * as in the original even when db_base is not word aligned. (bug 1052877)
 */
mblk_t *
copyb(mblk_t *bp)
{
        mblk_t  *nbp;
        dblk_t  *dp, *ndp;
        uchar_t *base;
        size_t  size;
        size_t  unaligned;

        ASSERT(bp->b_wptr >= bp->b_rptr);

        dp = bp->b_datap;
        if (dp->db_fthdr != NULL)
                STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);

        size = dp->db_lim - dp->db_base;
        unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
        if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
                return (NULL);
        nbp->b_flag = bp->b_flag;
        nbp->b_band = bp->b_band;
        ndp = nbp->b_datap;

        /*
         * Well, here is a potential issue.  If we are trying to
         * trace a flow, and we copy the message, we might lose
         * information about where this message might have been.
         * So we should inherit the FT data.  On the other hand,
         * a user might be interested only in alloc to free data.
         * So I guess the real answer is to provide a tunable.
         */
        STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);

        base = ndp->db_base + unaligned;
        bcopy(dp->db_base, ndp->db_base + unaligned, size);

        nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
        nbp->b_wptr = nbp->b_rptr + MBLKL(bp);

        return (nbp);
}

/*
 * Copy data from message to newly allocated message using new
 * data blocks.  Returns a pointer to the new message, or NULL if error.
 */
mblk_t *
copymsg(mblk_t *bp)
{
        mblk_t *head, *nbp;

        if (!bp || !(nbp = head = copyb(bp)))
                return (NULL);

        while (bp->b_cont) {
                if (!(nbp->b_cont = copyb(bp->b_cont))) {
                        freemsg(head);
                        return (NULL);
                }
                nbp = nbp->b_cont;
                bp = bp->b_cont;
        }
        return (head);
}

/*
 * link a message block to tail of message
 */
void
linkb(mblk_t *mp, mblk_t *bp)
{
        ASSERT(mp && bp);

        for (; mp->b_cont; mp = mp->b_cont)
                ;
        mp->b_cont = bp;
}

/*
 * unlink a message block from head of message
 * return pointer to new message.
 * NULL if message becomes empty.
 */
mblk_t *
unlinkb(mblk_t *bp)
{
        mblk_t *bp1;

        bp1 = bp->b_cont;
        bp->b_cont = NULL;
        return (bp1);
}

/*
 * remove a message block "bp" from message "mp"
 *
 * Return pointer to new message or NULL if no message remains.
 * Return -1 if bp is not found in message.
 */
mblk_t *
rmvb(mblk_t *mp, mblk_t *bp)
{
        mblk_t *tmp;
        mblk_t *lastp = NULL;

        ASSERT(mp && bp);
        for (tmp = mp; tmp; tmp = tmp->b_cont) {
                if (tmp == bp) {
                        if (lastp)
                                lastp->b_cont = tmp->b_cont;
                        else
                                mp = tmp->b_cont;
                        tmp->b_cont = NULL;
                        return (mp);
                }
                lastp = tmp;
        }
        return ((mblk_t *)-1);
}

/*
 * Concatenate and align first len bytes of common
 * message type.  Len == -1, means concat everything.
 * Returns 1 on success, 0 on failure
 * After the pullup, mp points to the pulled up data.
 */
int
pullupmsg(mblk_t *mp, ssize_t len)
{
        mblk_t *bp, *b_cont;
        dblk_t *dbp;
        ssize_t n;

        ASSERT(mp->b_datap->db_ref > 0);
        ASSERT(mp->b_next == NULL && mp->b_prev == NULL);

        /*
         * We won't handle Multidata message, since it contains
         * metadata which this function has no knowledge of; we
         * assert on DEBUG, and return failure otherwise.
         */
        ASSERT(mp->b_datap->db_type != M_MULTIDATA);
        if (mp->b_datap->db_type == M_MULTIDATA)
                return (0);

        if (len == -1) {
                if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
                        return (1);
                len = xmsgsize(mp);
        } else {
                ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
                ASSERT(first_mblk_len >= 0);
                /*
                 * If the length is less than that of the first mblk,
                 * we want to pull up the message into an aligned mblk.
                 * Though not part of the spec, some callers assume it.
                 */
                if (len <= first_mblk_len) {
                        if (str_aligned(mp->b_rptr))
                                return (1);
                        len = first_mblk_len;
                } else if (xmsgsize(mp) < len)
                        return (0);
        }

        if ((bp = allocb_tmpl(len, mp)) == NULL)
                return (0);

        dbp = bp->b_datap;
        *bp = *mp;              /* swap mblks so bp heads the old msg... */
        mp->b_datap = dbp;      /* ... and mp heads the new message */
        mp->b_datap->db_mblk = mp;
        bp->b_datap->db_mblk = bp;
        mp->b_rptr = mp->b_wptr = dbp->db_base;

        do {
                ASSERT(bp->b_datap->db_ref > 0);
                ASSERT(bp->b_wptr >= bp->b_rptr);
                n = MIN(bp->b_wptr - bp->b_rptr, len);
                ASSERT(n >= 0);         /* allow zero-length mblk_t's */
                if (n > 0)
                        bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
                mp->b_wptr += n;
                bp->b_rptr += n;
                len -= n;
                if (bp->b_rptr != bp->b_wptr)
                        break;
                b_cont = bp->b_cont;
                freeb(bp);
                bp = b_cont;
        } while (len && bp);

        mp->b_cont = bp;        /* tack on whatever wasn't pulled up */

        return (1);
}

/*
 * Concatenate and align at least the first len bytes of common message
 * type.  Len == -1 means concatenate everything.  The original message is
 * unaltered.  Returns a pointer to a new message on success, otherwise
 * returns NULL.
 */
mblk_t *
msgpullup(mblk_t *mp, ssize_t len)
{
        mblk_t  *newmp;
        ssize_t totlen;
        ssize_t n;

        /*
         * We won't handle Multidata message, since it contains
         * metadata which this function has no knowledge of; we
         * assert on DEBUG, and return failure otherwise.
         */
        ASSERT(mp->b_datap->db_type != M_MULTIDATA);
        if (mp->b_datap->db_type == M_MULTIDATA)
                return (NULL);

        totlen = xmsgsize(mp);

        if ((len > 0) && (len > totlen))
                return (NULL);

        /*
         * Copy all of the first msg type into one new mblk, then dupmsg
         * and link the rest onto this.
         */

        len = totlen;

        if ((newmp = allocb_tmpl(len, mp)) == NULL)
                return (NULL);

        newmp->b_flag = mp->b_flag;
        newmp->b_band = mp->b_band;

        while (len > 0) {
                n = mp->b_wptr - mp->b_rptr;
                ASSERT(n >= 0);         /* allow zero-length mblk_t's */
                if (n > 0)
                        bcopy(mp->b_rptr, newmp->b_wptr, n);
                newmp->b_wptr += n;
                len -= n;
                mp = mp->b_cont;
        }

        if (mp != NULL) {
                newmp->b_cont = dupmsg(mp);
                if (newmp->b_cont == NULL) {
                        freemsg(newmp);
                        return (NULL);
                }
        }

        return (newmp);
}

/*
 * Trim bytes from message
 *  len > 0, trim from head
 *  len < 0, trim from tail
 * Returns 1 on success, 0 on failure.
 */
int
adjmsg(mblk_t *mp, ssize_t len)
{
        mblk_t *bp;
        mblk_t *save_bp = NULL;
        mblk_t *prev_bp;
        mblk_t *bcont;
        unsigned char type;
        ssize_t n;
        int fromhead;
        int first;

        ASSERT(mp != NULL);
        /*
         * We won't handle Multidata message, since it contains
         * metadata which this function has no knowledge of; we
         * assert on DEBUG, and return failure otherwise.
         */
        ASSERT(mp->b_datap->db_type != M_MULTIDATA);
        if (mp->b_datap->db_type == M_MULTIDATA)
                return (0);

        if (len < 0) {
                fromhead = 0;
                len = -len;
        } else {
                fromhead = 1;
        }

        if (xmsgsize(mp) < len)
                return (0);

        if (fromhead) {
                first = 1;
                while (len) {
                        ASSERT(mp->b_wptr >= mp->b_rptr);
                        n = MIN(mp->b_wptr - mp->b_rptr, len);
                        mp->b_rptr += n;
                        len -= n;

                        /*
                         * If this is not the first zero length
                         * message remove it
                         */
                        if (!first && (mp->b_wptr == mp->b_rptr)) {
                                bcont = mp->b_cont;
                                freeb(mp);
                                mp = save_bp->b_cont = bcont;
                        } else {
                                save_bp = mp;
                                mp = mp->b_cont;
                        }
                        first = 0;
                }
        } else {
                type = mp->b_datap->db_type;
                while (len) {
                        bp = mp;
                        save_bp = NULL;

                        /*
                         * Find the last message of same type
                         */
                        while (bp && bp->b_datap->db_type == type) {
                                ASSERT(bp->b_wptr >= bp->b_rptr);
                                prev_bp = save_bp;
                                save_bp = bp;
                                bp = bp->b_cont;
                        }
                        if (save_bp == NULL)
                                break;
                        n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
                        save_bp->b_wptr -= n;
                        len -= n;

                        /*
                         * If this is not the first message
                         * and we have taken away everything
                         * from this message, remove it
                         */

                        if ((save_bp != mp) &&
                            (save_bp->b_wptr == save_bp->b_rptr)) {
                                bcont = save_bp->b_cont;
                                freeb(save_bp);
                                prev_bp->b_cont = bcont;
                        }
                }
        }
        return (1);
}

/*
 * get number of data bytes in message
 */
size_t
msgdsize(mblk_t *bp)
{
        size_t count = 0;

        for (; bp; bp = bp->b_cont)
                if (bp->b_datap->db_type == M_DATA) {
                        ASSERT(bp->b_wptr >= bp->b_rptr);
                        count += bp->b_wptr - bp->b_rptr;
                }
        return (count);
}

/* getq() etc to EOF removed */