#include <sys/errno.h>
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/kmem.h>
#include <sys/ddi.h>
#include <sys/stat.h>
#include <sys/sunddi.h>
#include <sys/file.h>
#include <sys/open.h>
#include <sys/modctl.h>
#include <sys/ddi_impldefs.h>
#include <sys/sysmacros.h>
#include <sys/ddidevmap.h>
#include <sys/policy.h>
#include <sys/vmsystm.h>
#include <vm/hat_i86.h>
#include <vm/hat_pte.h>
#include <vm/seg_kmem.h>
#include <vm/seg_mf.h>
#include <xen/io/blkif_impl.h>
#include <xen/io/blk_common.h>
#include <xen/io/xpvtap.h>
static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
cred_t *cred, int *rval);
static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
size_t len, size_t *maplen, uint_t model);
static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
cred_t *cred_p);
static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp);
static struct cb_ops xpvtap_cb_ops = {
xpvtap_open,
xpvtap_close,
nodev,
nodev,
nodev,
nodev,
nodev,
xpvtap_ioctl,
xpvtap_devmap,
nodev,
xpvtap_segmap,
xpvtap_chpoll,
ddi_prop_op,
NULL,
D_NEW | D_MP | D_64BIT | D_DEVMAP,
CB_REV
};
static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
void **result);
static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
static struct dev_ops xpvtap_dev_ops = {
DEVO_REV,
0,
xpvtap_getinfo,
nulldev,
nulldev,
xpvtap_attach,
xpvtap_detach,
nodev,
&xpvtap_cb_ops,
NULL,
NULL
};
static struct modldrv xpvtap_modldrv = {
&mod_driverops,
"xpvtap driver",
&xpvtap_dev_ops,
};
static struct modlinkage xpvtap_modlinkage = {
MODREV_1,
(void *) &xpvtap_modldrv,
NULL
};
void *xpvtap_statep;
static xpvtap_state_t *xpvtap_drv_init(int instance);
static void xpvtap_drv_fini(xpvtap_state_t *state);
static uint_t xpvtap_intr(caddr_t arg);
typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
xpvtap_rs_hdl_t *handle);
static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
xpvtap_rs_cleanup_t callback, void *arg);
static int xpvtap_segmf_register(xpvtap_state_t *state);
static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
static int xpvtap_user_init(xpvtap_state_t *state);
static void xpvtap_user_fini(xpvtap_state_t *state);
static int xpvtap_user_ring_init(xpvtap_state_t *state);
static void xpvtap_user_ring_fini(xpvtap_state_t *state);
static int xpvtap_user_thread_init(xpvtap_state_t *state);
static void xpvtap_user_thread_fini(xpvtap_state_t *state);
static void xpvtap_user_thread_start(caddr_t arg);
static void xpvtap_user_thread_stop(xpvtap_state_t *state);
static void xpvtap_user_thread(void *arg);
static void xpvtap_user_app_stop(caddr_t arg);
static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
uint_t *uid);
static int xpvtap_user_request_push(xpvtap_state_t *state,
blkif_request_t *req, uint_t uid);
static int xpvtap_user_response_get(xpvtap_state_t *state,
blkif_response_t *resp, uint_t *uid);
static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
int
_init(void)
{
int e;
e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
if (e != 0) {
return (e);
}
e = mod_install(&xpvtap_modlinkage);
if (e != 0) {
ddi_soft_state_fini(&xpvtap_statep);
return (e);
}
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&xpvtap_modlinkage, modinfop));
}
int
_fini(void)
{
int e;
e = mod_remove(&xpvtap_modlinkage);
if (e != 0) {
return (e);
}
ddi_soft_state_fini(&xpvtap_statep);
return (0);
}
static int
xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
blk_ringinit_args_t args;
xpvtap_state_t *state;
int instance;
int e;
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
instance = ddi_get_instance(dip);
state = xpvtap_drv_init(instance);
if (state == NULL) {
return (DDI_FAILURE);
}
state->bt_dip = dip;
args.ar_dip = state->bt_dip;
args.ar_intr = xpvtap_intr;
args.ar_intr_arg = (caddr_t)state;
args.ar_ringup = xpvtap_user_thread_start;
args.ar_ringup_arg = (caddr_t)state;
args.ar_ringdown = xpvtap_user_app_stop;
args.ar_ringdown_arg = (caddr_t)state;
e = blk_ring_init(&args, &state->bt_guest_ring);
if (e != DDI_SUCCESS) {
goto attachfail_ringinit;
}
e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
DDI_PSEUDO, 0);
if (e != DDI_SUCCESS) {
goto attachfail_minor_node;
}
ddi_report_dev(dip);
return (DDI_SUCCESS);
attachfail_minor_node:
blk_ring_fini(&state->bt_guest_ring);
attachfail_ringinit:
xpvtap_drv_fini(state);
return (DDI_FAILURE);
}
static int
xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
xpvtap_state_t *state;
int instance;
instance = ddi_get_instance(dip);
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (DDI_FAILURE);
}
switch (cmd) {
case DDI_DETACH:
break;
case DDI_SUSPEND:
default:
return (DDI_FAILURE);
}
xpvtap_user_thread_stop(state);
blk_ring_fini(&state->bt_guest_ring);
xpvtap_drv_fini(state);
ddi_remove_minor_node(dip, NULL);
return (DDI_SUCCESS);
}
static int
xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
{
xpvtap_state_t *state;
int instance;
dev_t dev;
int e;
dev = (dev_t)arg;
instance = getminor(dev);
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (DDI_FAILURE);
}
*result = (void *)state->bt_dip;
e = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)(uintptr_t)instance;
e = DDI_SUCCESS;
break;
default:
e = DDI_FAILURE;
break;
}
return (e);
}
static int
xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
{
xpvtap_state_t *state;
int instance;
if (secpolicy_xvm_control(cred)) {
return (EPERM);
}
instance = getminor(*devp);
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (ENXIO);
}
mutex_enter(&state->bt_open.bo_mutex);
if (state->bt_open.bo_opened) {
mutex_exit(&state->bt_open.bo_mutex);
return (EBUSY);
}
state->bt_open.bo_opened = B_TRUE;
mutex_exit(&state->bt_open.bo_mutex);
state->bt_map.um_as = curproc->p_as;
return (0);
}
static int
xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
{
xpvtap_state_t *state;
int instance;
instance = getminor(devp);
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (ENXIO);
}
mutex_enter(&state->bt_thread.ut_mutex);
state->bt_thread.ut_wake = B_TRUE;
state->bt_thread.ut_exit = B_TRUE;
cv_signal(&state->bt_thread.ut_wake_cv);
if (!state->bt_thread.ut_exit_done) {
cv_wait(&state->bt_thread.ut_exit_done_cv,
&state->bt_thread.ut_mutex);
}
ASSERT(state->bt_thread.ut_exit_done);
mutex_exit(&state->bt_thread.ut_mutex);
state->bt_map.um_as = NULL;
state->bt_map.um_guest_pages = NULL;
mutex_enter(&state->bt_open.bo_mutex);
ASSERT(state->bt_open.bo_opened);
state->bt_open.bo_opened = B_FALSE;
cv_signal(&state->bt_open.bo_exit_cv);
mutex_exit(&state->bt_open.bo_mutex);
return (0);
}
static int
xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
int *rval)
{
xpvtap_state_t *state;
int instance;
if (secpolicy_xvm_control(cred)) {
return (EPERM);
}
instance = getminor(dev);
if (instance == -1) {
return (EBADF);
}
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (EBADF);
}
switch (cmd) {
case XPVTAP_IOCTL_RESP_PUSH:
mutex_enter(&state->bt_thread.ut_mutex);
state->bt_thread.ut_wake = B_TRUE;
cv_signal(&state->bt_thread.ut_wake_cv);
mutex_exit(&state->bt_thread.ut_mutex);
break;
default:
cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
return (ENXIO);
}
return (0);
}
static int
xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
cred_t *cred_p)
{
struct segmf_crargs a;
xpvtap_state_t *state;
int instance;
int e;
if (secpolicy_xvm_control(cred_p)) {
return (EPERM);
}
instance = getminor(dev);
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (EBADF);
}
if ((flags & MAP_TYPE) != MAP_SHARED) {
return (EINVAL);
}
if (off == 0) {
e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
prot, maxprot, flags, cred_p);
return (e);
}
if (off != PAGESIZE) {
return (EINVAL);
}
if (len != XPVTAP_GREF_BUFSIZE) {
return (EINVAL);
}
ASSERT(asp == state->bt_map.um_as);
as_rangelock(asp);
if ((flags & MAP_FIXED) == 0) {
map_addr(addrp, len, 0, 0, flags);
if (*addrp == NULL) {
as_rangeunlock(asp);
return (ENOMEM);
}
} else {
(void) as_unmap(asp, *addrp, len);
}
a.dev = dev;
a.prot = (uchar_t)prot;
a.maxprot = (uchar_t)maxprot;
e = as_map(asp, *addrp, len, segmf_create, &a);
if (e != 0) {
as_rangeunlock(asp);
return (e);
}
as_rangeunlock(asp);
state->bt_map.um_guest_pages = (caddr_t)*addrp;
state->bt_map.um_guest_size = (size_t)len;
e = as_add_callback(asp, xpvtap_segmf_unregister, state,
AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
if (e != 0) {
(void) as_unmap(asp, *addrp, len);
return (EINVAL);
}
mutex_enter(&state->bt_thread.ut_mutex);
state->bt_thread.ut_wake = B_TRUE;
cv_signal(&state->bt_thread.ut_wake_cv);
mutex_exit(&state->bt_thread.ut_mutex);
return (0);
}
static int
xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
size_t *maplen, uint_t model)
{
xpvtap_user_ring_t *usring;
xpvtap_state_t *state;
int instance;
int e;
instance = getminor(dev);
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (EBADF);
}
if (off != 0) {
return (EINVAL);
}
if (len != PAGESIZE) {
return (EINVAL);
}
usring = &state->bt_user_ring;
e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
if (e < 0) {
return (e);
}
*maplen = PAGESIZE;
return (0);
}
static int
xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp)
{
xpvtap_user_ring_t *usring;
xpvtap_state_t *state;
int instance;
instance = getminor(dev);
if (instance == -1) {
return (EBADF);
}
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
return (EBADF);
}
if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
return (EINVAL);
}
*reventsp = 0;
usring = &state->bt_user_ring;
if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
RING_PUSH_REQUESTS(&usring->ur_ring);
usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
*reventsp = POLLIN | POLLRDNORM;
}
if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
*phpp = &state->bt_pollhead;
}
return (0);
}
static xpvtap_state_t *
xpvtap_drv_init(int instance)
{
xpvtap_state_t *state;
int e;
e = ddi_soft_state_zalloc(xpvtap_statep, instance);
if (e != DDI_SUCCESS) {
return (NULL);
}
state = ddi_get_soft_state(xpvtap_statep, instance);
if (state == NULL) {
goto drvinitfail_get_soft_state;
}
state->bt_instance = instance;
mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
state->bt_open.bo_opened = B_FALSE;
state->bt_map.um_registered = B_FALSE;
e = xpvtap_user_init(state);
if (e != DDI_SUCCESS) {
goto drvinitfail_userinit;
}
return (state);
drvinitfail_userinit:
cv_destroy(&state->bt_open.bo_exit_cv);
mutex_destroy(&state->bt_open.bo_mutex);
drvinitfail_get_soft_state:
(void) ddi_soft_state_free(xpvtap_statep, instance);
return (NULL);
}
static void
xpvtap_drv_fini(xpvtap_state_t *state)
{
xpvtap_user_fini(state);
cv_destroy(&state->bt_open.bo_exit_cv);
mutex_destroy(&state->bt_open.bo_mutex);
(void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
}
static uint_t
xpvtap_intr(caddr_t arg)
{
xpvtap_state_t *state;
state = (xpvtap_state_t *)arg;
mutex_enter(&state->bt_thread.ut_mutex);
state->bt_thread.ut_wake = B_TRUE;
cv_signal(&state->bt_thread.ut_wake_cv);
mutex_exit(&state->bt_thread.ut_mutex);
return (DDI_INTR_CLAIMED);
}
static int
xpvtap_segmf_register(xpvtap_state_t *state)
{
struct seg *seg;
uint64_t pte_ma;
struct as *as;
caddr_t uaddr;
uint_t pgcnt;
int i;
as = state->bt_map.um_as;
pgcnt = btopr(state->bt_map.um_guest_size);
uaddr = state->bt_map.um_guest_pages;
if (pgcnt == 0) {
return (DDI_FAILURE);
}
AS_LOCK_ENTER(as, RW_READER);
seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
(seg->s_base + seg->s_size))) {
AS_LOCK_EXIT(as);
return (DDI_FAILURE);
}
for (i = 0; i < pgcnt; i++) {
hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
hat_release_mapping(as->a_hat, uaddr);
segmf_add_gref_pte(seg, uaddr, pte_ma);
uaddr += PAGESIZE;
}
state->bt_map.um_registered = B_TRUE;
AS_LOCK_EXIT(as);
return (DDI_SUCCESS);
}
static void
xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
{
xpvtap_state_t *state;
caddr_t uaddr;
uint_t pgcnt;
int i;
state = (xpvtap_state_t *)arg;
if (!state->bt_map.um_registered) {
(void) as_delete_callback(as, arg);
return;
}
pgcnt = btopr(state->bt_map.um_guest_size);
uaddr = state->bt_map.um_guest_pages;
xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
for (i = 0; i < pgcnt; i++) {
AS_LOCK_ENTER(as, RW_WRITER);
hat_prepare_mapping(as->a_hat, uaddr, NULL);
hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
hat_release_mapping(as->a_hat, uaddr);
AS_LOCK_EXIT(as);
uaddr += PAGESIZE;
}
(void) as_delete_callback(as, arg);
state->bt_map.um_registered = B_FALSE;
}
static int
xpvtap_user_init(xpvtap_state_t *state)
{
xpvtap_user_map_t *map;
int e;
map = &state->bt_map;
e = xpvtap_user_ring_init(state);
if (e != DDI_SUCCESS) {
return (DDI_FAILURE);
}
xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
map->um_outstanding_reqs = kmem_zalloc(
sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
KM_SLEEP);
e = xpvtap_user_thread_init(state);
if (e != DDI_SUCCESS) {
goto userinitfail_user_thread_init;
}
return (DDI_SUCCESS);
userinitfail_user_thread_init:
xpvtap_rs_fini(&map->um_rs);
kmem_free(map->um_outstanding_reqs,
sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
xpvtap_user_ring_fini(state);
return (DDI_FAILURE);
}
static int
xpvtap_user_ring_init(xpvtap_state_t *state)
{
xpvtap_user_ring_t *usring;
usring = &state->bt_user_ring;
usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
DDI_UMEM_SLEEP, &usring->ur_cookie);
SHARED_RING_INIT(usring->ur_sring);
FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
usring->ur_prod_polled = 0;
return (DDI_SUCCESS);
}
static int
xpvtap_user_thread_init(xpvtap_state_t *state)
{
xpvtap_user_thread_t *thread;
char taskqname[32];
thread = &state->bt_thread;
mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
thread->ut_wake = B_FALSE;
thread->ut_exit = B_FALSE;
thread->ut_exit_done = B_TRUE;
(void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
TASKQ_DEFAULTPRI, 0);
if (thread->ut_taskq == NULL) {
goto userinitthrfail_taskq_create;
}
return (DDI_SUCCESS);
userinitthrfail_taskq_create:
cv_destroy(&thread->ut_exit_done_cv);
cv_destroy(&thread->ut_wake_cv);
mutex_destroy(&thread->ut_mutex);
return (DDI_FAILURE);
}
static void
xpvtap_user_thread_start(caddr_t arg)
{
xpvtap_user_thread_t *thread;
xpvtap_state_t *state;
int e;
state = (xpvtap_state_t *)arg;
thread = &state->bt_thread;
thread->ut_exit_done = B_FALSE;
e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
DDI_SLEEP);
if (e != DDI_SUCCESS) {
thread->ut_exit_done = B_TRUE;
cmn_err(CE_WARN, "Unable to start user thread\n");
}
}
static void
xpvtap_user_thread_stop(xpvtap_state_t *state)
{
mutex_enter(&state->bt_thread.ut_mutex);
state->bt_thread.ut_wake = B_TRUE;
state->bt_thread.ut_exit = B_TRUE;
cv_signal(&state->bt_thread.ut_wake_cv);
if (!state->bt_thread.ut_exit_done) {
cv_wait(&state->bt_thread.ut_exit_done_cv,
&state->bt_thread.ut_mutex);
}
mutex_exit(&state->bt_thread.ut_mutex);
ASSERT(state->bt_thread.ut_exit_done);
}
static void
xpvtap_user_fini(xpvtap_state_t *state)
{
xpvtap_user_map_t *map;
map = &state->bt_map;
xpvtap_user_thread_fini(state);
xpvtap_rs_fini(&map->um_rs);
kmem_free(map->um_outstanding_reqs,
sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
xpvtap_user_ring_fini(state);
}
static void
xpvtap_user_ring_fini(xpvtap_state_t *state)
{
ddi_umem_free(state->bt_user_ring.ur_cookie);
}
static void
xpvtap_user_thread_fini(xpvtap_state_t *state)
{
ddi_taskq_destroy(state->bt_thread.ut_taskq);
cv_destroy(&state->bt_thread.ut_exit_done_cv);
cv_destroy(&state->bt_thread.ut_wake_cv);
mutex_destroy(&state->bt_thread.ut_mutex);
}
static void
xpvtap_user_thread(void *arg)
{
xpvtap_user_thread_t *thread;
blkif_response_t resp;
xpvtap_state_t *state;
blkif_request_t req;
boolean_t b;
uint_t uid;
int e;
state = (xpvtap_state_t *)arg;
thread = &state->bt_thread;
xpvtap_thread_start:
mutex_enter(&thread->ut_mutex);
if (thread->ut_exit) {
thread->ut_exit_done = B_TRUE;
cv_signal(&state->bt_thread.ut_exit_done_cv);
mutex_exit(&thread->ut_mutex);
return;
}
if (!thread->ut_wake) {
e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
if ((e == 0) || (thread->ut_exit)) {
thread->ut_exit = B_TRUE;
mutex_exit(&thread->ut_mutex);
goto xpvtap_thread_start;
}
}
if (!thread->ut_wake) {
mutex_exit(&thread->ut_mutex);
goto xpvtap_thread_start;
}
thread->ut_wake = B_FALSE;
mutex_exit(&thread->ut_mutex);
do {
e = blk_ring_request_get(state->bt_guest_ring, &req);
if (e == B_FALSE) {
break;
}
e = xpvtap_user_request_map(state, &req, &uid);
if (e != DDI_SUCCESS) {
blk_ring_request_requeue(state->bt_guest_ring);
break;
}
e = xpvtap_user_request_push(state, &req, uid);
if (e != DDI_SUCCESS) {
resp.id = req.id;
resp.operation = req.operation;
resp.status = BLKIF_RSP_ERROR;
blk_ring_response_put(state->bt_guest_ring, &resp);
}
} while (!thread->ut_exit);
do {
b = xpvtap_user_response_get(state, &resp, &uid);
if (b != B_TRUE) {
break;
}
xpvtap_user_request_unmap(state, uid);
blk_ring_response_put(state->bt_guest_ring, &resp);
} while (!thread->ut_exit);
goto xpvtap_thread_start;
}
static int
xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
uint_t *uid)
{
grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct seg *seg;
struct as *as;
domid_t domid;
caddr_t uaddr;
uint_t flags;
int i;
int e;
domid = xvdi_get_oeid(state->bt_dip);
as = state->bt_map.um_as;
if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
return (DDI_FAILURE);
}
if (!state->bt_map.um_registered) {
e = xpvtap_segmf_register(state);
if (e != DDI_SUCCESS) {
return (DDI_FAILURE);
}
}
e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
if (e != DDI_SUCCESS) {
return (DDI_FAILURE);
}
if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
(req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
(req->nr_segments == 0)) {
return (DDI_SUCCESS);
}
uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
AS_LOCK_ENTER(as, RW_READER);
seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
(seg->s_base + seg->s_size))) {
AS_LOCK_EXIT(as);
return (DDI_FAILURE);
}
flags = 0;
if (req->operation == BLKIF_OP_READ) {
flags |= SEGMF_GREF_WR;
}
for (i = 0; i < req->nr_segments; i++) {
gref[i] = req->seg[i].gref;
}
(void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
domid);
AS_LOCK_EXIT(as);
return (DDI_SUCCESS);
}
static int
xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
uint_t uid)
{
blkif_request_t *outstanding_req;
blkif_front_ring_t *uring;
blkif_request_t *target;
xpvtap_user_map_t *map;
uring = &state->bt_user_ring.ur_ring;
map = &state->bt_map;
target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
outstanding_req = &map->um_outstanding_reqs[uid];
bcopy(req, outstanding_req, sizeof (*outstanding_req));
bcopy(req, target, sizeof (*req));
target->id = (uint64_t)uid;
uring->req_prod_pvt++;
pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
return (DDI_SUCCESS);
}
static void
xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
{
blkif_request_t *req;
struct seg *seg;
struct as *as;
caddr_t uaddr;
int e;
as = state->bt_map.um_as;
if (as == NULL) {
return;
}
req = &state->bt_map.um_outstanding_reqs[uid];
if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
(req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
(req->nr_segments != 0)) {
uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
AS_LOCK_ENTER(as, RW_READER);
seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
(seg->s_base + seg->s_size))) {
AS_LOCK_EXIT(as);
xpvtap_rs_free(state->bt_map.um_rs, uid);
return;
}
e = segmf_release_grefs(seg, uaddr, req->nr_segments);
if (e != 0) {
cmn_err(CE_WARN, "unable to release grefs");
}
AS_LOCK_EXIT(as);
}
xpvtap_rs_free(state->bt_map.um_rs, uid);
}
static int
xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
uint_t *uid)
{
blkif_front_ring_t *uring;
blkif_response_t *target;
uring = &state->bt_user_ring.ur_ring;
if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
return (B_FALSE);
}
target = NULL;
target = RING_GET_RESPONSE(uring, uring->rsp_cons);
if (target == NULL) {
return (B_FALSE);
}
bcopy(target, resp, sizeof (*resp));
uring->rsp_cons++;
*uid = (uint_t)resp->id;
resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
return (B_TRUE);
}
static void xpvtap_user_app_stop(caddr_t arg)
{
xpvtap_state_t *state;
clock_t rc;
state = (xpvtap_state_t *)arg;
mutex_enter(&state->bt_open.bo_mutex);
if (state->bt_open.bo_opened) {
rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
&state->bt_open.bo_mutex, drv_usectohz(10000000),
TR_CLOCK_TICK);
if (rc <= 0) {
cmn_err(CE_NOTE, "!user process still has driver open, "
"deferring detach\n");
}
}
mutex_exit(&state->bt_open.bo_mutex);
}
static void
xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
{
xpvtap_rs_t *rstruct;
uint_t array_size;
uint_t index;
ASSERT(handle != NULL);
ASSERT(min_val < max_val);
rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
if ((max_val & 0x3F) == 0) {
rstruct->rs_free_size = (max_val >> 6) * 8;
} else {
rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
}
rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
rstruct->rs_min = min_val;
rstruct->rs_last = min_val;
rstruct->rs_max = max_val;
mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
rstruct->rs_flushing = B_FALSE;
array_size = rstruct->rs_free_size >> 3;
for (index = 0; index < array_size; index++) {
rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
}
*handle = rstruct;
}
static void
xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
{
xpvtap_rs_t *rstruct;
ASSERT(handle != NULL);
rstruct = (xpvtap_rs_t *)*handle;
mutex_destroy(&rstruct->rs_mutex);
kmem_free(rstruct->rs_free, rstruct->rs_free_size);
kmem_free(rstruct, sizeof (xpvtap_rs_t));
*handle = NULL;
}
static int
xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
{
xpvtap_rs_t *rstruct;
uint_t array_idx;
uint64_t free;
uint_t index;
uint_t last;
uint_t min;
uint_t max;
ASSERT(handle != NULL);
ASSERT(resource != NULL);
rstruct = (xpvtap_rs_t *)handle;
mutex_enter(&rstruct->rs_mutex);
min = rstruct->rs_min;
max = rstruct->rs_max;
for (index = min; index <= max; index++) {
array_idx = rstruct->rs_last >> 6;
free = rstruct->rs_free[array_idx];
last = rstruct->rs_last & 0x3F;
if ((free & ((uint64_t)1 << last)) != 0) {
*resource = rstruct->rs_last;
rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
rstruct->rs_last++;
if (rstruct->rs_last > max) {
rstruct->rs_last = rstruct->rs_min;
}
mutex_exit(&rstruct->rs_mutex);
return (DDI_SUCCESS);
}
rstruct->rs_last++;
if (rstruct->rs_last > max) {
rstruct->rs_last = rstruct->rs_min;
}
}
mutex_exit(&rstruct->rs_mutex);
return (DDI_FAILURE);
}
static void
xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
{
xpvtap_rs_t *rstruct;
uint_t array_idx;
uint_t offset;
ASSERT(handle != NULL);
rstruct = (xpvtap_rs_t *)handle;
ASSERT(resource >= rstruct->rs_min);
ASSERT(resource <= rstruct->rs_max);
if (!rstruct->rs_flushing) {
mutex_enter(&rstruct->rs_mutex);
}
array_idx = resource >> 6;
offset = resource & 0x3F;
rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
if (!rstruct->rs_flushing) {
mutex_exit(&rstruct->rs_mutex);
}
}
static void
xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
void *arg)
{
xpvtap_rs_t *rstruct;
uint_t array_idx;
uint64_t free;
uint_t index;
uint_t last;
uint_t min;
uint_t max;
ASSERT(handle != NULL);
rstruct = (xpvtap_rs_t *)handle;
mutex_enter(&rstruct->rs_mutex);
min = rstruct->rs_min;
max = rstruct->rs_max;
rstruct->rs_flushing = B_TRUE;
for (index = min; index <= max; index++) {
array_idx = rstruct->rs_last >> 6;
free = rstruct->rs_free[array_idx];
last = rstruct->rs_last & 0x3F;
if ((free & ((uint64_t)1 << last)) == 0) {
(*callback)(arg, rstruct->rs_last);
rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
}
rstruct->rs_last++;
if (rstruct->rs_last > max) {
rstruct->rs_last = rstruct->rs_min;
}
}
mutex_exit(&rstruct->rs_mutex);
}