#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <dev/pci/pcireg.h>
#include <machine/vmparam.h>
#include <sys/vmm_vm.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <sys/sunndi.h>
#include "io/iommu.h"
#define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1)
struct vtdmap {
volatile uint32_t version;
volatile uint32_t res0;
volatile uint64_t cap;
volatile uint64_t ext_cap;
volatile uint32_t gcr;
volatile uint32_t gsr;
volatile uint64_t rta;
volatile uint64_t ccr;
};
#define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F)
#define VTD_CAP_ND(cap) ((cap) & 0x7)
#define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1)
#define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF)
#define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1)
#define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1)
#define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
#define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF)
#define VTD_GCR_WBF (1 << 27)
#define VTD_GCR_SRTP (1 << 30)
#define VTD_GCR_TE (1U << 31)
#define VTD_GSR_WBFS (1 << 27)
#define VTD_GSR_RTPS (1 << 30)
#define VTD_GSR_TES (1U << 31)
#define VTD_CCR_ICC (1UL << 63)
#define VTD_CCR_CIRG_GLOBAL (1UL << 61)
#define VTD_IIR_IVT (1UL << 63)
#define VTD_IIR_IIRG_GLOBAL (1ULL << 60)
#define VTD_IIR_IIRG_DOMAIN (2ULL << 60)
#define VTD_IIR_IIRG_PAGE (3ULL << 60)
#define VTD_IIR_DRAIN_READS (1ULL << 49)
#define VTD_IIR_DRAIN_WRITES (1ULL << 48)
#define VTD_IIR_DOMAIN_P 32
#define VTD_ROOT_PRESENT 0x1
#define VTD_CTX_PRESENT 0x1
#define VTD_CTX_TT_ALL (1UL << 2)
#define VTD_PTE_RD (1UL << 0)
#define VTD_PTE_WR (1UL << 1)
#define VTD_PTE_SUPERPAGE (1UL << 7)
#define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL)
#define VTD_RID2IDX(rid) (((rid) & 0xff) * 2)
struct domain {
uint64_t *ptp;
int pt_levels;
int addrwidth;
int spsmask;
uint_t id;
vm_paddr_t maxaddr;
SLIST_ENTRY(domain) next;
};
static SLIST_HEAD(, domain) domhead;
#define DRHD_MAX_UNITS 16
static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS];
static int drhd_num;
static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
static int max_domains;
typedef int (*drhd_ident_func_t)(void);
static dev_info_t *vtddips[DRHD_MAX_UNITS];
static uint64_t root_table[PAGE_SIZE / sizeof (uint64_t)] __aligned(4096);
static uint64_t ctx_tables[256][PAGE_SIZE / sizeof (uint64_t)] __aligned(4096);
static int
vtd_max_domains(struct vtdmap *vtdmap)
{
int nd;
nd = VTD_CAP_ND(vtdmap->cap);
switch (nd) {
case 0:
return (16);
case 1:
return (64);
case 2:
return (256);
case 3:
return (1024);
case 4:
return (4 * 1024);
case 5:
return (16 * 1024);
case 6:
return (64 * 1024);
default:
panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
}
}
static uint_t
domain_id(void)
{
uint_t id;
struct domain *dom;
for (id = 1; id < max_domains; id++) {
SLIST_FOREACH(dom, &domhead, next) {
if (dom->id == id)
break;
}
if (dom == NULL)
break;
}
if (id >= max_domains)
panic("domain ids exhausted");
return (id);
}
static struct vtdmap *
vtd_device_scope(uint16_t rid)
{
int i, remaining, pathrem;
char *end, *pathend;
struct vtdmap *vtdmap;
ACPI_DMAR_HARDWARE_UNIT *drhd;
ACPI_DMAR_DEVICE_SCOPE *device_scope;
ACPI_DMAR_PCI_PATH *path;
for (i = 0; i < drhd_num; i++) {
drhd = drhds[i];
if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
vtdmap = vtdmaps[i];
return (vtdmap);
}
end = (char *)drhd + drhd->Header.Length;
remaining = drhd->Header.Length -
sizeof (ACPI_DMAR_HARDWARE_UNIT);
while (remaining > sizeof (ACPI_DMAR_DEVICE_SCOPE)) {
device_scope =
(ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
remaining -= device_scope->Length;
switch (device_scope->EntryType) {
case 0x01:
case 0x02:
break;
default:
continue;
}
if (PCI_RID2BUS(rid) != device_scope->Bus)
continue;
pathend = (char *)device_scope + device_scope->Length;
pathrem = device_scope->Length -
sizeof (ACPI_DMAR_DEVICE_SCOPE);
while (pathrem >= sizeof (ACPI_DMAR_PCI_PATH)) {
path = (ACPI_DMAR_PCI_PATH *)
(pathend - pathrem);
pathrem -= sizeof (ACPI_DMAR_PCI_PATH);
if (PCI_RID2SLOT(rid) != path->Device)
continue;
if (PCI_RID2FUNC(rid) != path->Function)
continue;
vtdmap = vtdmaps[i];
return (vtdmap);
}
}
}
return (NULL);
}
static void
vtd_wbflush(struct vtdmap *vtdmap)
{
if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
invalidate_cache_all();
if (VTD_CAP_RWBF(vtdmap->cap)) {
vtdmap->gcr = VTD_GCR_WBF;
while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
;
}
}
static void
vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
{
vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
;
}
static void
vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
{
int offset;
volatile uint64_t *iotlb_reg, val;
vtd_wbflush(vtdmap);
offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
*iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
while (1) {
val = *iotlb_reg;
if ((val & VTD_IIR_IVT) == 0)
break;
}
}
static void
vtd_translation_enable(struct vtdmap *vtdmap)
{
vtdmap->gcr = VTD_GCR_TE;
while ((vtdmap->gsr & VTD_GSR_TES) == 0)
;
}
static void
vtd_translation_disable(struct vtdmap *vtdmap)
{
vtdmap->gcr = 0;
while ((vtdmap->gsr & VTD_GSR_TES) != 0)
;
}
static void *
vtd_map(dev_info_t *dip)
{
caddr_t regs;
ddi_acc_handle_t hdl;
int error;
static ddi_device_acc_attr_t regs_attr = {
DDI_DEVICE_ATTR_V0,
DDI_NEVERSWAP_ACC,
DDI_STRICTORDER_ACC,
};
error = ddi_regs_map_setup(dip, 0, ®s, 0, PAGE_SIZE, ®s_attr,
&hdl);
if (error != DDI_SUCCESS)
return (NULL);
ddi_set_driver_private(dip, hdl);
return (regs);
}
static void
vtd_unmap(dev_info_t *dip)
{
ddi_acc_handle_t hdl = ddi_get_driver_private(dip);
if (hdl != NULL)
ddi_regs_map_free(&hdl);
}
static dev_info_t *
vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *drhd, int unit)
{
dev_info_t *dip;
struct ddi_parent_private_data *pdptr;
struct regspec reg;
ndi_devi_enter(ddi_root_node());
dip = ddi_find_devinfo("vtd", unit, 0);
ndi_devi_exit(ddi_root_node());
if (dip != NULL)
return (dip);
dip = ddi_add_child(ddi_root_node(), "vtd",
DEVI_SID_NODEID, unit);
reg.regspec_bustype = 0;
reg.regspec_addr = drhd->Address;
reg.regspec_size = PAGE_SIZE;
(void) ndi_prop_update_int_array(DDI_DEV_T_NONE,
dip, "reg", (int *)®,
sizeof (struct regspec) / sizeof (int));
ddi_set_driver(dip, ddi_get_driver(ddi_root_node()));
DEVI(dip)->devi_bus_dma_allochdl =
DEVI(ddi_get_driver((ddi_root_node())));
pdptr = kmem_zalloc(sizeof (struct ddi_parent_private_data)
+ sizeof (struct regspec), KM_SLEEP);
pdptr->par_nreg = 1;
pdptr->par_reg = (struct regspec *)(pdptr + 1);
pdptr->par_reg->regspec_bustype = 0;
pdptr->par_reg->regspec_addr = drhd->Address;
pdptr->par_reg->regspec_size = PAGE_SIZE;
ddi_set_parent_data(dip, pdptr);
return (dip);
}
static int
vtd_init(void)
{
int i, units, remaining, tmp;
struct vtdmap *vtdmap;
vm_paddr_t ctx_paddr;
char *end;
#ifdef __FreeBSD__
char envname[32];
unsigned long mapaddr;
#endif
ACPI_STATUS status;
ACPI_TABLE_DMAR *dmar;
ACPI_DMAR_HEADER *hdr;
ACPI_DMAR_HARDWARE_UNIT *drhd;
#ifdef __FreeBSD__
for (units = 0; units < DRHD_MAX_UNITS; units++) {
snprintf(envname, sizeof (envname), "vtd.regmap.%d.addr",
units);
if (getenv_ulong(envname, &mapaddr) == 0)
break;
vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
}
if (units > 0)
goto skip_dmar;
#else
units = 0;
#endif
status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
if (ACPI_FAILURE(status))
return (ENXIO);
end = (char *)dmar + dmar->Header.Length;
remaining = dmar->Header.Length - sizeof (ACPI_TABLE_DMAR);
while (remaining > sizeof (ACPI_DMAR_HEADER)) {
hdr = (ACPI_DMAR_HEADER *)(end - remaining);
if (hdr->Length > remaining)
break;
if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
break;
drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
drhds[units] = drhd;
#ifdef __FreeBSD__
vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
#else
vtddips[units] = vtd_get_dip(drhd, units);
vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]);
if (vtdmaps[units] == NULL)
goto fail;
#endif
if (++units >= DRHD_MAX_UNITS)
break;
remaining -= hdr->Length;
}
if (units <= 0)
return (ENXIO);
#ifdef __FreeBSD__
skip_dmar:
#endif
drhd_num = units;
max_domains = 64 * 1024;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
if (VTD_CAP_CM(vtdmap->cap) != 0)
panic("vtd_init: invalid caching mode");
if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
max_domains = tmp;
}
for (i = 0; i < 256; i++) {
ctx_paddr = vtophys(ctx_tables[i]);
if (ctx_paddr & PAGE_MASK)
panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
}
return (0);
#ifndef __FreeBSD__
fail:
for (i = 0; i <= units; i++)
vtd_unmap(vtddips[i]);
return (ENXIO);
#endif
}
static void
vtd_cleanup(void)
{
#ifndef __FreeBSD__
int i;
KASSERT(SLIST_EMPTY(&domhead), ("domain list not empty"));
bzero(root_table, sizeof (root_table));
for (i = 0; i <= drhd_num; i++) {
vtdmaps[i] = NULL;
if (vtddips[i] != NULL)
vtd_unmap(vtddips[i]);
}
#endif
}
static void
vtd_enable(void)
{
int i;
struct vtdmap *vtdmap;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
vtd_wbflush(vtdmap);
vtdmap->rta = vtophys(root_table);
vtdmap->gcr = VTD_GCR_SRTP;
while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
;
vtd_ctx_global_invalidate(vtdmap);
vtd_iotlb_global_invalidate(vtdmap);
vtd_translation_enable(vtdmap);
}
}
static void
vtd_disable(void)
{
int i;
struct vtdmap *vtdmap;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
vtd_translation_disable(vtdmap);
}
}
static void
vtd_add_device(void *arg, uint16_t rid)
{
int idx;
uint64_t *ctxp;
struct domain *dom = arg;
vm_paddr_t pt_paddr;
struct vtdmap *vtdmap;
uint8_t bus;
bus = PCI_RID2BUS(rid);
ctxp = ctx_tables[bus];
pt_paddr = vtophys(dom->ptp);
idx = VTD_RID2IDX(rid);
if (ctxp[idx] & VTD_CTX_PRESENT) {
panic("vtd_add_device: device %x is already owned by "
"domain %d", rid, (uint16_t)(ctxp[idx + 1] >> 8));
}
if ((vtdmap = vtd_device_scope(rid)) == NULL)
panic("vtd_add_device: device %x is not in scope for "
"any DMA remapping unit", rid);
ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
if (VTD_ECAP_DI(vtdmap->ext_cap))
ctxp[idx] = VTD_CTX_TT_ALL;
else
ctxp[idx] = 0;
ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
}
static void
vtd_remove_device(void *arg, uint16_t rid)
{
int i, idx;
uint64_t *ctxp;
struct vtdmap *vtdmap;
uint8_t bus;
bus = PCI_RID2BUS(rid);
ctxp = ctx_tables[bus];
idx = VTD_RID2IDX(rid);
ctxp[idx] = 0;
ctxp[idx + 1] = 0;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
vtd_ctx_global_invalidate(vtdmap);
vtd_iotlb_global_invalidate(vtdmap);
}
}
#define CREATE_MAPPING 0
#define REMOVE_MAPPING 1
static uint64_t
vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
int remove)
{
struct domain *dom;
int i, spshift, ptpshift, ptpindex, nlevels;
uint64_t spsize, *ptp;
dom = arg;
ptpindex = 0;
ptpshift = 0;
KASSERT(gpa + len > gpa, ("%s: invalid gpa range %lx/%lx", __func__,
gpa, len));
KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %lx/%lx beyond "
"domain maxaddr %lx", __func__, gpa, len, dom->maxaddr));
if (gpa & PAGE_MASK)
panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
if (hpa & PAGE_MASK)
panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
if (len & PAGE_MASK)
panic("vtd_create_mapping: unaligned len 0x%0lx", len);
spshift = 48;
for (i = 3; i >= 0; i--) {
spsize = 1UL << spshift;
if ((dom->spsmask & (1 << i)) != 0 &&
(gpa & (spsize - 1)) == 0 &&
(hpa & (spsize - 1)) == 0 &&
(len >= spsize)) {
break;
}
spshift -= 9;
}
ptp = dom->ptp;
nlevels = dom->pt_levels;
while (--nlevels >= 0) {
ptpshift = 12 + nlevels * 9;
ptpindex = (gpa >> ptpshift) & 0x1FF;
if (spshift >= ptpshift) {
break;
}
if (ptp[ptpindex] == 0) {
void *nlp = vmm_ptp_alloc();
ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
}
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
}
if ((gpa & ((1UL << ptpshift) - 1)) != 0)
panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
if (remove) {
ptp[ptpindex] = 0;
} else {
ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
if (nlevels > 0)
ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
}
return (1UL << ptpshift);
}
static uint64_t
vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
{
return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
}
static uint64_t
vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
{
return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
}
static void
vtd_invalidate_tlb(void *dom)
{
int i;
struct vtdmap *vtdmap;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
vtd_iotlb_global_invalidate(vtdmap);
}
}
static void *
vtd_create_domain(vm_paddr_t maxaddr)
{
struct domain *dom;
vm_paddr_t addr;
int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
struct vtdmap *vtdmap;
if (drhd_num <= 0)
panic("vtd_create_domain: no dma remapping hardware available");
addr = 0;
for (gaw = 0; addr < maxaddr; gaw++)
addr = 1ULL << gaw;
res = (gaw - 12) % 9;
if (res == 0)
agaw = gaw;
else
agaw = gaw + 9 - res;
if (agaw > 64)
agaw = 64;
pt_levels = 2;
sagaw = 30;
addrwidth = 0;
tmp = ~0;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
tmp &= VTD_CAP_SAGAW(vtdmap->cap);
}
for (i = 0; i < 5; i++) {
if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
break;
pt_levels++;
addrwidth++;
sagaw += 9;
if (sagaw > 64)
sagaw = 64;
}
if (i >= 5) {
panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
tmp, agaw);
}
dom = kmem_zalloc(sizeof (struct domain), KM_SLEEP);
dom->pt_levels = pt_levels;
dom->addrwidth = addrwidth;
dom->id = domain_id();
dom->maxaddr = maxaddr;
dom->ptp = vmm_ptp_alloc();
if ((uintptr_t)dom->ptp & PAGE_MASK)
panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
#ifdef __FreeBSD__
#ifdef notyet
dom->spsmask = ~0;
for (i = 0; i < drhd_num; i++) {
vtdmap = vtdmaps[i];
dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
}
#endif
#else
dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
#endif
SLIST_INSERT_HEAD(&domhead, dom, next);
return (dom);
}
static void
vtd_free_ptp(uint64_t *ptp, int level)
{
int i;
uint64_t *nlp;
if (level > 1) {
for (i = 0; i < 512; i++) {
if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
continue;
if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
continue;
nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
vtd_free_ptp(nlp, level - 1);
}
}
vmm_ptp_free(ptp);
}
static void
vtd_destroy_domain(void *arg)
{
struct domain *dom;
dom = arg;
SLIST_REMOVE(&domhead, dom, domain, next);
vtd_free_ptp(dom->ptp, dom->pt_levels);
kmem_free(dom, sizeof (*dom));
}
const struct iommu_ops vmm_iommu_ops = {
.init = vtd_init,
.cleanup = vtd_cleanup,
.enable = vtd_enable,
.disable = vtd_disable,
.create_domain = vtd_create_domain,
.destroy_domain = vtd_destroy_domain,
.create_mapping = vtd_create_mapping,
.remove_mapping = vtd_remove_mapping,
.add_device = vtd_add_device,
.remove_device = vtd_remove_device,
.invalidate_tlb = vtd_invalidate_tlb,
};
static struct modlmisc modlmisc = {
&mod_miscops,
"bhyve vmm vtd",
};
static struct modlinkage modlinkage = {
MODREV_1,
&modlmisc,
NULL
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}