#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/pciio.h>
#include <sys/sysctl.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/stat.h>
#include <sys/sunddi.h>
#include <sys/pci.h>
#include <sys/pci_cap.h>
#include <sys/pcie_impl.h>
#include <sys/ppt_dev.h>
#include <sys/mkdev.h>
#include <sys/sysmacros.h>
#include "vmm_lapic.h"
#include "iommu.h"
#include "ppt.h"
#define MAX_MSIMSGS 32
#define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
struct pptintr_arg {
struct pptdev *pptdev;
uint64_t addr;
uint64_t msg_data;
};
struct pptseg {
vm_paddr_t gpa;
size_t len;
int wired;
};
struct pptbar {
uint64_t base;
uint64_t size;
uint_t type;
ddi_acc_handle_t io_handle;
caddr_t io_ptr;
uint_t ddireg;
};
struct pptdev {
dev_info_t *pptd_dip;
list_node_t pptd_node;
ddi_acc_handle_t pptd_cfg;
struct pptbar pptd_bars[PCI_BASE_NUM];
struct vm *vm;
struct pptseg mmio[MAX_MMIOSEGS];
struct {
int num_msgs;
boolean_t is_fixed;
size_t inth_sz;
ddi_intr_handle_t *inth;
struct pptintr_arg arg[MAX_MSIMSGS];
} msi;
struct {
int num_msgs;
size_t inth_sz;
size_t arg_sz;
ddi_intr_handle_t *inth;
struct pptintr_arg *arg;
} msix;
};
static major_t ppt_major;
static void *ppt_state;
static kmutex_t pptdev_mtx;
static list_t pptdev_list;
#define PPT_MINOR_NAME "ppt"
static ddi_device_acc_attr_t ppt_attr = {
DDI_DEVICE_ATTR_V0,
DDI_NEVERSWAP_ACC,
DDI_STORECACHING_OK_ACC,
DDI_DEFAULT_ACC
};
static int
ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr)
{
return (0);
}
#define BAR_TO_IDX(bar) (((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32)
#define BAR_VALID(b) ( \
(b) >= PCI_CONF_BASE0 && \
(b) <= PCI_CONF_BASE5 && \
((b) & (PCI_BAR_SZ_32-1)) == 0)
static int
ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
{
minor_t minor = getminor(dev);
struct pptdev *ppt;
void *data = (void *)arg;
if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) {
return (ENOENT);
}
switch (cmd) {
case PPT_CFG_READ: {
struct ppt_cfg_io cio;
ddi_acc_handle_t cfg = ppt->pptd_cfg;
if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
return (EFAULT);
}
switch (cio.pci_width) {
case 4:
cio.pci_data = pci_config_get32(cfg, cio.pci_off);
break;
case 2:
cio.pci_data = pci_config_get16(cfg, cio.pci_off);
break;
case 1:
cio.pci_data = pci_config_get8(cfg, cio.pci_off);
break;
default:
return (EINVAL);
}
if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) {
return (EFAULT);
}
return (0);
}
case PPT_CFG_WRITE: {
struct ppt_cfg_io cio;
ddi_acc_handle_t cfg = ppt->pptd_cfg;
if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
return (EFAULT);
}
switch (cio.pci_width) {
case 4:
pci_config_put32(cfg, cio.pci_off, cio.pci_data);
break;
case 2:
pci_config_put16(cfg, cio.pci_off, cio.pci_data);
break;
case 1:
pci_config_put8(cfg, cio.pci_off, cio.pci_data);
break;
default:
return (EINVAL);
}
return (0);
}
case PPT_BAR_QUERY: {
struct ppt_bar_query barg;
struct pptbar *pbar;
if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) {
return (EFAULT);
}
if (barg.pbq_baridx >= PCI_BASE_NUM) {
return (EINVAL);
}
pbar = &ppt->pptd_bars[barg.pbq_baridx];
if (pbar->base == 0 || pbar->size == 0) {
return (ENOENT);
}
barg.pbq_type = pbar->type;
barg.pbq_base = pbar->base;
barg.pbq_size = pbar->size;
if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) {
return (EFAULT);
}
return (0);
}
case PPT_BAR_READ: {
struct ppt_bar_io bio;
struct pptbar *pbar;
void *addr;
uint_t rnum;
ddi_acc_handle_t cfg;
if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
return (EFAULT);
}
rnum = bio.pbi_bar;
if (rnum >= PCI_BASE_NUM) {
return (EINVAL);
}
pbar = &ppt->pptd_bars[rnum];
if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
return (EINVAL);
}
addr = pbar->io_ptr + bio.pbi_off;
switch (bio.pbi_width) {
case 4:
bio.pbi_data = ddi_get32(pbar->io_handle, addr);
break;
case 2:
bio.pbi_data = ddi_get16(pbar->io_handle, addr);
break;
case 1:
bio.pbi_data = ddi_get8(pbar->io_handle, addr);
break;
default:
return (EINVAL);
}
if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) {
return (EFAULT);
}
return (0);
}
case PPT_BAR_WRITE: {
struct ppt_bar_io bio;
struct pptbar *pbar;
void *addr;
uint_t rnum;
ddi_acc_handle_t cfg;
if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
return (EFAULT);
}
rnum = bio.pbi_bar;
if (rnum >= PCI_BASE_NUM) {
return (EINVAL);
}
pbar = &ppt->pptd_bars[rnum];
if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
return (EINVAL);
}
addr = pbar->io_ptr + bio.pbi_off;
switch (bio.pbi_width) {
case 4:
ddi_put32(pbar->io_handle, addr, bio.pbi_data);
break;
case 2:
ddi_put16(pbar->io_handle, addr, bio.pbi_data);
break;
case 1:
ddi_put8(pbar->io_handle, addr, bio.pbi_data);
break;
default:
return (EINVAL);
}
return (0);
}
default:
return (ENOTTY);
}
return (0);
}
static int
ppt_find_msix_table_bar(struct pptdev *ppt)
{
uint16_t base;
uint32_t off;
if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) !=
DDI_SUCCESS)
return (-1);
off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_TBL_OFFSET);
if (off == PCI_EINVAL32)
return (-1);
return (off & PCI_MSIX_TBL_BIR_MASK);
}
static int
ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
size_t *maplen, uint_t model)
{
minor_t minor;
struct pptdev *ppt;
int err, bar;
uint_t ddireg;
minor = getminor(dev);
if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL)
return (ENXIO);
#ifdef _MULTI_DATAMODEL
if (ddi_model_convert_from(model) != DDI_MODEL_NONE)
return (ENXIO);
#endif
if (off < 0 || off != P2ALIGN(off, PAGESIZE))
return (EINVAL);
if ((bar = ppt_find_msix_table_bar(ppt)) == -1)
return (EINVAL);
ddireg = ppt->pptd_bars[bar].ddireg;
if (ddireg == 0)
return (EINVAL);
err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, ddireg, off, len,
PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr);
if (err == DDI_SUCCESS)
*maplen = len;
return (err);
}
static void
ppt_bar_wipe(struct pptdev *ppt)
{
uint_t i;
for (i = 0; i < PCI_BASE_NUM; i++) {
struct pptbar *pbar = &ppt->pptd_bars[i];
if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) {
ddi_regs_map_free(&pbar->io_handle);
}
}
bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars));
}
static int
ppt_bar_crawl(struct pptdev *ppt)
{
pci_regspec_t *regs;
uint_t rcount, i;
int err = 0, rlen;
if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS,
"assigned-addresses", (caddr_t)®s, &rlen) != DDI_PROP_SUCCESS) {
return (EIO);
}
VERIFY3S(rlen, >, 0);
rcount = rlen / sizeof (pci_regspec_t);
for (i = 0; i < rcount; i++) {
pci_regspec_t *reg = ®s[i];
struct pptbar *pbar;
uint_t bar, rnum;
DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg);
bar = PCI_REG_REG_G(reg->pci_phys_hi);
if (!BAR_VALID(bar)) {
continue;
}
rnum = BAR_TO_IDX(bar);
pbar = &ppt->pptd_bars[rnum];
if (pbar->base != 0 || pbar->size != 0) {
err = EEXIST;
break;
}
pbar->ddireg = i + 1;
pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK;
pbar->base = ((uint64_t)reg->pci_phys_mid << 32) |
(uint64_t)reg->pci_phys_low;
pbar->size = ((uint64_t)reg->pci_size_hi << 32) |
(uint64_t)reg->pci_size_low;
if (pbar->type == PCI_ADDR_IO) {
err = ddi_regs_map_setup(ppt->pptd_dip, rnum,
&pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle);
if (err != 0) {
break;
}
}
}
kmem_free(regs, rlen);
if (err != 0) {
ppt_bar_wipe(ppt);
}
return (err);
}
static boolean_t
ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size)
{
const uint64_t map_end = base + size;
if (map_end <= base) {
return (B_FALSE);
}
if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
return (B_FALSE);
}
for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
const struct pptbar *bar = &ppt->pptd_bars[i];
const uint64_t bar_end = bar->base + bar->size;
if (bar->type != PCI_ADDR_MEM32 &&
bar->type != PCI_ADDR_MEM64) {
continue;
}
if (base < bar->base || base >= bar_end ||
map_end < bar->base || map_end > bar_end) {
continue;
}
return (B_TRUE);
}
return (B_FALSE);
}
static boolean_t
ppt_toggle_bar(struct pptdev *ppt, boolean_t enable)
{
ddi_acc_handle_t hdl;
uint16_t cmd;
if (pci_config_setup(ppt->pptd_dip, &hdl) != DDI_SUCCESS)
return (B_FALSE);
cmd = pci_config_get16(hdl, PCI_CONF_COMM);
if (enable) {
cmd |= PCI_COMM_ME;
for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
const struct pptbar *bar = &ppt->pptd_bars[i];
switch (bar->type) {
case PCI_ADDR_MEM32:
case PCI_ADDR_MEM64:
cmd |= PCI_COMM_MAE;
break;
case PCI_ADDR_IO:
cmd |= PCI_COMM_IO;
break;
}
}
} else {
cmd &= ~(PCI_COMM_ME | PCI_COMM_MAE | PCI_COMM_IO);
}
pci_config_put16(hdl, PCI_CONF_COMM, cmd);
pci_config_teardown(&hdl);
return (B_TRUE);
}
static int
ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
struct pptdev *ppt = NULL;
char name[PPT_MAXNAMELEN];
int inst;
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
inst = ddi_get_instance(dip);
if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) {
goto fail;
}
VERIFY(ppt = ddi_get_soft_state(ppt_state, inst));
ppt->pptd_dip = dip;
ddi_set_driver_private(dip, ppt);
if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) {
goto fail;
}
if (ppt_bar_crawl(ppt) != 0) {
goto fail;
}
if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst,
DDI_PSEUDO, 0) != DDI_SUCCESS) {
goto fail;
}
ppt_toggle_bar(ppt, B_FALSE);
mutex_enter(&pptdev_mtx);
list_insert_tail(&pptdev_list, ppt);
mutex_exit(&pptdev_mtx);
return (DDI_SUCCESS);
fail:
if (ppt != NULL) {
ddi_remove_minor_node(dip, NULL);
if (ppt->pptd_cfg != NULL) {
pci_config_teardown(&ppt->pptd_cfg);
}
ppt_bar_wipe(ppt);
ddi_soft_state_free(ppt_state, inst);
}
return (DDI_FAILURE);
}
static int
ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
struct pptdev *ppt;
int inst;
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
ppt = ddi_get_driver_private(dip);
inst = ddi_get_instance(dip);
ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt);
mutex_enter(&pptdev_mtx);
if (ppt->vm != NULL) {
mutex_exit(&pptdev_mtx);
return (DDI_FAILURE);
}
list_remove(&pptdev_list, ppt);
mutex_exit(&pptdev_mtx);
ddi_remove_minor_node(dip, PPT_MINOR_NAME);
ppt_bar_wipe(ppt);
pci_config_teardown(&ppt->pptd_cfg);
ddi_set_driver_private(dip, NULL);
ddi_soft_state_free(ppt_state, inst);
return (DDI_SUCCESS);
}
static int
ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
{
int error = DDI_FAILURE;
int inst = getminor((dev_t)arg);
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO: {
struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst);
if (ppt != NULL) {
*result = (void *)ppt->pptd_dip;
error = DDI_SUCCESS;
}
break;
}
case DDI_INFO_DEVT2INSTANCE: {
*result = (void *)(uintptr_t)inst;
error = DDI_SUCCESS;
break;
}
default:
break;
}
return (error);
}
static struct cb_ops ppt_cb_ops = {
ppt_open,
nulldev,
nodev,
nodev,
nodev,
nodev,
nodev,
ppt_ioctl,
ppt_devmap,
NULL,
NULL,
nochpoll,
ddi_prop_op,
NULL,
D_NEW | D_MP | D_64BIT | D_DEVMAP,
CB_REV
};
static struct dev_ops ppt_ops = {
DEVO_REV,
0,
ppt_ddi_info,
nulldev,
nulldev,
ppt_ddi_attach,
ppt_ddi_detach,
nodev,
&ppt_cb_ops,
(struct bus_ops *)NULL
};
static struct modldrv modldrv = {
&mod_driverops,
"bhyve pci pass-thru",
&ppt_ops
};
static struct modlinkage modlinkage = {
MODREV_1,
&modldrv,
NULL
};
int
_init(void)
{
int error;
mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL);
list_create(&pptdev_list, sizeof (struct pptdev),
offsetof(struct pptdev, pptd_node));
error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0);
if (error) {
goto fail;
}
error = mod_install(&modlinkage);
ppt_major = ddi_name_to_major("ppt");
fail:
if (error) {
ddi_soft_state_fini(&ppt_state);
}
return (error);
}
int
_fini(void)
{
int error;
error = mod_remove(&modlinkage);
if (error)
return (error);
ddi_soft_state_fini(&ppt_state);
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
static boolean_t
ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us)
{
uint16_t cap_ptr, devsts;
ddi_acc_handle_t hdl;
if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
return (B_FALSE);
if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) {
pci_config_teardown(&hdl);
return (B_FALSE);
}
devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) {
if (max_delay_us == 0) {
pci_config_teardown(&hdl);
return (B_FALSE);
}
if (max_delay_us > 100000) {
delay(drv_usectohz(100000));
max_delay_us -= 100000;
} else {
delay(drv_usectohz(max_delay_us));
max_delay_us = 0;
}
devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
}
pci_config_teardown(&hdl);
return (B_TRUE);
}
static uint_t
ppt_max_completion_tmo_us(dev_info_t *dip)
{
uint_t timo = 0;
uint16_t cap_ptr;
ddi_acc_handle_t hdl;
uint_t timo_ranges[] = {
50000,
100,
10000,
0,
0,
55000,
210000,
0,
0,
900000,
3500000,
0,
0,
13000000,
64000000,
0
};
if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
return (50000);
if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
goto out;
if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) &
PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0)
goto out;
if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP2) &
PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0)
goto out;
timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) &
PCIE_DEVCAP2_COM_TO_RANGE_MASK];
out:
if (timo == 0)
timo = 50000;
pci_config_teardown(&hdl);
return (timo);
}
static boolean_t
ppt_flr(dev_info_t *dip, boolean_t force)
{
uint16_t cap_ptr, ctl, cmd;
ddi_acc_handle_t hdl;
uint_t compl_delay = 0, max_delay_us;
if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
return (B_FALSE);
if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
goto fail;
if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR)
== 0)
goto fail;
max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000);
cmd = pci_config_get16(hdl, PCI_CONF_COMM);
pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME);
if (!ppt_wait_for_pending_txn(dip, max_delay_us)) {
if (!force) {
pci_config_put16(hdl, PCI_CONF_COMM, cmd);
goto fail;
}
dev_err(dip, CE_WARN,
"?Resetting with transactions pending after %u us\n",
max_delay_us);
compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000));
}
ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL);
(void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL,
ctl | PCIE_DEVCTL_INITIATE_FLR);
delay(drv_usectohz((100 + compl_delay) * 1000));
pci_config_teardown(&hdl);
return (B_TRUE);
fail:
pci_config_teardown(&hdl);
return (B_FALSE);
}
static int
ppt_findf(struct vm *vm, int fd, struct pptdev **pptp)
{
struct pptdev *ppt = NULL;
file_t *fp;
vattr_t va;
int err = 0;
ASSERT(MUTEX_HELD(&pptdev_mtx));
if ((fp = getf(fd)) == NULL)
return (EBADF);
va.va_mask = AT_RDEV;
if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 ||
getmajor(va.va_rdev) != ppt_major) {
err = EBADF;
goto fail;
}
ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev));
if (ppt == NULL) {
err = EBADF;
goto fail;
}
if (ppt->vm != vm) {
err = EBUSY;
goto fail;
}
*pptp = ppt;
return (0);
fail:
releasef(fd);
return (err);
}
static void
ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt)
{
int i;
struct pptseg *seg;
for (i = 0; i < MAX_MMIOSEGS; i++) {
seg = &ppt->mmio[i];
if (seg->len == 0)
continue;
(void) vm_unmap_mmio(vm, seg->gpa, seg->len);
bzero(seg, sizeof (struct pptseg));
}
}
static void
ppt_teardown_msi(struct pptdev *ppt)
{
int i;
if (ppt->msi.num_msgs == 0)
return;
for (i = 0; i < ppt->msi.num_msgs; i++) {
int intr_cap;
(void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
if (intr_cap & DDI_INTR_FLAG_BLOCK)
ddi_intr_block_disable(&ppt->msi.inth[i], 1);
else
ddi_intr_disable(ppt->msi.inth[i]);
ddi_intr_remove_handler(ppt->msi.inth[i]);
ddi_intr_free(ppt->msi.inth[i]);
ppt->msi.inth[i] = NULL;
}
kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
ppt->msi.inth = NULL;
ppt->msi.inth_sz = 0;
ppt->msi.is_fixed = B_FALSE;
ppt->msi.num_msgs = 0;
}
static void
ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
{
if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) {
int intr_cap;
(void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
if (intr_cap & DDI_INTR_FLAG_BLOCK)
ddi_intr_block_disable(&ppt->msix.inth[idx], 1);
else
ddi_intr_disable(ppt->msix.inth[idx]);
ddi_intr_remove_handler(ppt->msix.inth[idx]);
}
}
static void
ppt_teardown_msix(struct pptdev *ppt)
{
uint_t i;
if (ppt->msix.num_msgs == 0)
return;
for (i = 0; i < ppt->msix.num_msgs; i++)
ppt_teardown_msix_intr(ppt, i);
if (ppt->msix.inth) {
for (i = 0; i < ppt->msix.num_msgs; i++)
ddi_intr_free(ppt->msix.inth[i]);
kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
ppt->msix.inth = NULL;
ppt->msix.inth_sz = 0;
kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
ppt->msix.arg = NULL;
ppt->msix.arg_sz = 0;
}
ppt->msix.num_msgs = 0;
}
int
ppt_assigned_devices(struct vm *vm)
{
struct pptdev *ppt;
uint_t num = 0;
mutex_enter(&pptdev_mtx);
for (ppt = list_head(&pptdev_list); ppt != NULL;
ppt = list_next(&pptdev_list, ppt)) {
if (ppt->vm == vm) {
num++;
}
}
mutex_exit(&pptdev_mtx);
return (num);
}
boolean_t
ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
{
struct pptdev *ppt = list_head(&pptdev_list);
mutex_enter(&pptdev_mtx);
for (ppt = list_head(&pptdev_list); ppt != NULL;
ppt = list_next(&pptdev_list, ppt)) {
if (ppt->vm != vm) {
continue;
}
for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
struct pptseg *seg = &ppt->mmio[i];
if (seg->len == 0)
continue;
if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) {
mutex_exit(&pptdev_mtx);
return (B_TRUE);
}
}
}
mutex_exit(&pptdev_mtx);
return (B_FALSE);
}
int
ppt_assign_device(struct vm *vm, int pptfd)
{
struct pptdev *ppt;
int err = 0;
mutex_enter(&pptdev_mtx);
err = ppt_findf(NULL, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
err = EIO;
goto done;
}
ppt_flr(ppt->pptd_dip, B_TRUE);
if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS ||
pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
err = EIO;
goto done;
}
ppt_toggle_bar(ppt, B_TRUE);
ppt->vm = vm;
iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
pf_set_passthru(ppt->pptd_dip, B_TRUE);
done:
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}
static void
ppt_reset_pci_power_state(dev_info_t *dip)
{
ddi_acc_handle_t cfg;
uint16_t cap_ptr;
if (pci_config_setup(dip, &cfg) != DDI_SUCCESS)
return;
if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) {
uint16_t val;
val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR);
if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) {
val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0;
(void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR,
val);
}
}
pci_config_teardown(&cfg);
}
static void
ppt_do_unassign(struct pptdev *ppt)
{
struct vm *vm = ppt->vm;
ASSERT3P(vm, !=, NULL);
ASSERT(MUTEX_HELD(&pptdev_mtx));
ppt_flr(ppt->pptd_dip, B_TRUE);
ppt_reset_pci_power_state(ppt->pptd_dip);
(void) pci_restore_config_regs(ppt->pptd_dip);
pf_set_passthru(ppt->pptd_dip, B_FALSE);
ppt_unmap_all_mmio(vm, ppt);
ppt_teardown_msi(ppt);
ppt_teardown_msix(ppt);
iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
ppt->vm = NULL;
}
int
ppt_unassign_device(struct vm *vm, int pptfd)
{
struct pptdev *ppt;
int err = 0;
mutex_enter(&pptdev_mtx);
err = ppt_findf(vm, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
ppt_do_unassign(ppt);
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}
void
ppt_unassign_all(struct vm *vm)
{
struct pptdev *ppt;
mutex_enter(&pptdev_mtx);
for (ppt = list_head(&pptdev_list); ppt != NULL;
ppt = list_next(&pptdev_list, ppt)) {
if (ppt->vm == vm) {
ppt_do_unassign(ppt);
}
}
mutex_exit(&pptdev_mtx);
}
int
ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len,
vm_paddr_t hpa)
{
struct pptdev *ppt;
int err = 0;
if ((len & PAGEOFFSET) != 0 || len == 0 || (gpa & PAGEOFFSET) != 0 ||
(hpa & PAGEOFFSET) != 0 || gpa + len < gpa || hpa + len < hpa) {
return (EINVAL);
}
mutex_enter(&pptdev_mtx);
err = ppt_findf(vm, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
if (!ppt_bar_verify_mmio(ppt, hpa, len)) {
err = EINVAL;
goto done;
}
for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
struct pptseg *seg = &ppt->mmio[i];
if (seg->len == 0) {
err = vm_map_mmio(vm, gpa, len, hpa);
if (err == 0) {
seg->gpa = gpa;
seg->len = len;
}
goto done;
}
}
err = ENOSPC;
done:
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}
int
ppt_unmap_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len)
{
struct pptdev *ppt;
int err = 0;
uint_t i;
mutex_enter(&pptdev_mtx);
err = ppt_findf(vm, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
for (i = 0; i < MAX_MMIOSEGS; i++) {
struct pptseg *seg = &ppt->mmio[i];
if (seg->gpa == gpa && seg->len == len) {
err = vm_unmap_mmio(vm, seg->gpa, seg->len);
if (err == 0) {
seg->gpa = 0;
seg->len = 0;
}
goto out;
}
}
err = ENOENT;
out:
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}
static uint_t
pptintr(caddr_t arg, caddr_t unused)
{
struct pptintr_arg *pptarg = (struct pptintr_arg *)arg;
struct pptdev *ppt = pptarg->pptdev;
if (ppt->vm != NULL) {
lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
} else {
}
return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED);
}
int
ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg,
int numvec)
{
int i, msi_count, intr_type;
struct pptdev *ppt;
int err = 0;
if (numvec < 0 || numvec > MAX_MSIMSGS)
return (EINVAL);
mutex_enter(&pptdev_mtx);
err = ppt_findf(vm, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
if (ppt->msix.num_msgs != 0 && numvec != 0) {
err = EBUSY;
goto done;
}
ppt_teardown_msi(ppt);
if (numvec == 0) {
goto done;
}
if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
&msi_count) != DDI_SUCCESS) {
if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED,
&msi_count) != DDI_SUCCESS) {
err = EINVAL;
goto done;
}
intr_type = DDI_INTR_TYPE_FIXED;
ppt->msi.is_fixed = B_TRUE;
} else {
intr_type = DDI_INTR_TYPE_MSI;
}
if (numvec > msi_count) {
err = EINVAL;
goto done;
}
ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t);
ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP);
if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0,
numvec, &msi_count, 0) != DDI_SUCCESS) {
kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
err = EINVAL;
goto done;
}
if (numvec != msi_count) {
ppt_teardown_msi(ppt);
err = EINVAL;
goto done;
}
for (i = 0; i < numvec; i++) {
int res, intr_cap = 0;
ppt->msi.num_msgs = i + 1;
ppt->msi.arg[i].pptdev = ppt;
ppt->msi.arg[i].addr = addr;
ppt->msi.arg[i].msg_data = msg + i;
if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr,
&ppt->msi.arg[i], NULL) != DDI_SUCCESS)
break;
(void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
if (intr_cap & DDI_INTR_FLAG_BLOCK)
res = ddi_intr_block_enable(&ppt->msi.inth[i], 1);
else
res = ddi_intr_enable(ppt->msi.inth[i]);
if (res != DDI_SUCCESS)
break;
}
if (i < numvec) {
ppt_teardown_msi(ppt);
err = ENXIO;
}
done:
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}
int
ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
uint64_t msg, uint32_t vector_control)
{
struct pptdev *ppt;
int numvec, alloced;
int err = 0;
mutex_enter(&pptdev_mtx);
err = ppt_findf(vm, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
if (ppt->msi.num_msgs != 0) {
err = EBUSY;
goto done;
}
if (ppt->msix.num_msgs == 0) {
dev_info_t *dip = ppt->pptd_dip;
if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX,
&numvec) != DDI_SUCCESS) {
err = EINVAL;
goto done;
}
ppt->msix.num_msgs = numvec;
ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]);
ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP);
ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t);
ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP);
if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0,
numvec, &alloced, 0) != DDI_SUCCESS) {
kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
ppt->msix.arg = NULL;
ppt->msix.inth = NULL;
ppt->msix.arg_sz = ppt->msix.inth_sz = 0;
err = EINVAL;
goto done;
}
if (numvec != alloced) {
ppt_teardown_msix(ppt);
err = EINVAL;
goto done;
}
}
if (idx >= ppt->msix.num_msgs) {
err = EINVAL;
goto done;
}
if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
int intr_cap, res;
ppt_teardown_msix_intr(ppt, idx);
ppt->msix.arg[idx].pptdev = ppt;
ppt->msix.arg[idx].addr = addr;
ppt->msix.arg[idx].msg_data = msg;
if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr,
&ppt->msix.arg[idx], NULL) != DDI_SUCCESS) {
err = ENXIO;
goto done;
}
(void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
if (intr_cap & DDI_INTR_FLAG_BLOCK)
res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1);
else
res = ddi_intr_enable(ppt->msix.inth[idx]);
if (res != DDI_SUCCESS) {
ddi_intr_remove_handler(ppt->msix.inth[idx]);
err = ENXIO;
goto done;
}
} else {
ppt_teardown_msix_intr(ppt, idx);
}
done:
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}
int
ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit)
{
struct pptdev *ppt;
int err = 0;
mutex_enter(&pptdev_mtx);
err = ppt_findf(vm, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
msilimit) != DDI_SUCCESS) {
*msilimit = -1;
}
if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX,
msixlimit) != DDI_SUCCESS) {
*msixlimit = -1;
}
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}
int
ppt_disable_msix(struct vm *vm, int pptfd)
{
struct pptdev *ppt;
int err = 0;
mutex_enter(&pptdev_mtx);
err = ppt_findf(vm, pptfd, &ppt);
if (err != 0) {
mutex_exit(&pptdev_mtx);
return (err);
}
ppt_teardown_msix(ppt);
releasef(pptfd);
mutex_exit(&pptdev_mtx);
return (err);
}