#include <sys/param.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcidevs.h>
#include <dev/pv/virtioreg.h>
#include <dev/pci/virtio_pcireg.h>
#include <dev/pv/vioblkreg.h>
#include <dev/vmm/vmm.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <errno.h>
#include <event.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "atomicio.h"
#include "pci.h"
#include "vioscsi.h"
#include "virtio.h"
#include "vmd.h"
#define VIRTIO_DEBUG 0
#ifdef DPRINTF
#undef DPRINTF
#endif
#if VIRTIO_DEBUG
#define DPRINTF log_debug
#else
#define DPRINTF(x...) do {} while(0)
#endif
extern struct vmd *env;
struct virtio_dev viornd;
struct virtio_dev *vioscsi = NULL;
struct virtio_dev vmmci;
SLIST_HEAD(virtio_dev_head, virtio_dev) virtio_devs;
#define MAXPHYS (64 * 1024)
#define VIRTIO_NET_F_MAC (1<<5)
#define VMMCI_F_TIMESYNC (1<<0)
#define VMMCI_F_ACK (1<<1)
#define VMMCI_F_SYNCRTC (1<<2)
#define RXQ 0
#define TXQ 1
static void virtio_dev_init(struct vmd_vm *, struct virtio_dev *, uint8_t,
uint16_t, uint16_t, uint64_t);
static int virtio_dev_launch(struct vmd_vm *, struct virtio_dev *);
static void virtio_dispatch_dev(int, short, void *);
static int handle_dev_msg(struct viodev_msg *, struct virtio_dev *);
static int virtio_dev_closefds(struct virtio_dev *);
static void virtio_pci_add_cap(uint8_t, uint8_t, uint8_t, uint32_t);
static void vmmci_pipe_dispatch(int, short, void *);
static int virtio_io_dispatch(int, uint16_t, uint32_t *, uint8_t *, void *,
uint8_t);
static int virtio_io_isr(int, uint16_t, uint32_t *, uint8_t *, void *, uint8_t);
static int virtio_io_notify(int, uint16_t, uint32_t *, uint8_t *, void *,
uint8_t);
static int viornd_notifyq(struct virtio_dev *, uint16_t);
static void vmmci_ack(struct virtio_dev *, unsigned int);
#if VIRTIO_DEBUG
static const char *
virtio1_reg_name(uint16_t reg)
{
switch (reg) {
case VIO1_PCI_DEVICE_FEATURE_SELECT: return "DEVICE_FEATURE_SELECT";
case VIO1_PCI_DEVICE_FEATURE: return "DEVICE_FEATURE";
case VIO1_PCI_DRIVER_FEATURE_SELECT: return "DRIVER_FEATURE_SELECT";
case VIO1_PCI_DRIVER_FEATURE: return "DRIVER_FEATURE";
case VIO1_PCI_CONFIG_MSIX_VECTOR: return "CONFIG_MSIX_VECTOR";
case VIO1_PCI_NUM_QUEUES: return "NUM_QUEUES";
case VIO1_PCI_DEVICE_STATUS: return "DEVICE_STATUS";
case VIO1_PCI_CONFIG_GENERATION: return "CONFIG_GENERATION";
case VIO1_PCI_QUEUE_SELECT: return "QUEUE_SELECT";
case VIO1_PCI_QUEUE_SIZE: return "QUEUE_SIZE";
case VIO1_PCI_QUEUE_MSIX_VECTOR: return "QUEUE_MSIX_VECTOR";
case VIO1_PCI_QUEUE_ENABLE: return "QUEUE_ENABLE";
case VIO1_PCI_QUEUE_NOTIFY_OFF: return "QUEUE_NOTIFY_OFF";
case VIO1_PCI_QUEUE_DESC: return "QUEUE_DESC";
case VIO1_PCI_QUEUE_DESC + 4: return "QUEUE_DESC (HIGH)";
case VIO1_PCI_QUEUE_AVAIL: return "QUEUE_AVAIL";
case VIO1_PCI_QUEUE_AVAIL + 4: return "QUEUE_AVAIL (HIGH)";
case VIO1_PCI_QUEUE_USED: return "QUEUE_USED";
case VIO1_PCI_QUEUE_USED + 4: return "QUEUE_USED (HIGH)";
default: return "UNKNOWN";
}
}
#endif
const char *
virtio_reg_name(uint8_t reg)
{
switch (reg) {
case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
case VIRTIO_CONFIG_QUEUE_PFN: return "queue address";
case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
case VIRTIO_CONFIG_DEVICE_STATUS: return "device status";
case VIRTIO_CONFIG_ISR_STATUS: return "isr status";
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI...VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
return "device config 0";
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
return "device config 1";
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8: return "device config 2";
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12: return "device config 3";
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16: return "device config 4";
default: return "unknown";
}
}
uint32_t
vring_size(uint32_t vq_size)
{
uint32_t allocsize1, allocsize2;
allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
+ sizeof(uint16_t) * (2 + vq_size));
allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * 2
+ sizeof(struct vring_used_elem) * vq_size);
return allocsize1 + allocsize2;
}
void
virtio_update_qs(struct virtio_dev *dev)
{
struct virtio_vq_info *vq_info = NULL;
if (dev->driver_feature & VIRTIO_F_VERSION_1) {
if (dev->pci_cfg.queue_select >= dev->num_queues) {
dev->pci_cfg.queue_size = 0;
dev->pci_cfg.queue_enable = 0;
return;
}
vq_info = &dev->vq[dev->pci_cfg.queue_select];
dev->pci_cfg.queue_size = vq_info->qs;
dev->pci_cfg.queue_desc = vq_info->q_gpa;
dev->pci_cfg.queue_avail = vq_info->q_gpa + vq_info->vq_availoffset;
dev->pci_cfg.queue_used = vq_info->q_gpa + vq_info->vq_usedoffset;
dev->pci_cfg.queue_enable = vq_info->vq_enabled;
} else {
if (dev->cfg.queue_select >= dev->num_queues) {
dev->cfg.queue_size = 0;
return;
}
vq_info = &dev->vq[dev->cfg.queue_select];
dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
dev->cfg.queue_size = vq_info->qs;
}
}
void
virtio_update_qa(struct virtio_dev *dev)
{
struct virtio_vq_info *vq_info = NULL;
void *hva = NULL;
if (dev->driver_feature & VIRTIO_F_VERSION_1) {
if (dev->pci_cfg.queue_select >= dev->num_queues) {
log_warnx("%s: invalid queue index", __func__);
return;
}
vq_info = &dev->vq[dev->pci_cfg.queue_select];
vq_info->q_gpa = dev->pci_cfg.queue_desc;
vq_info->qs = dev->pci_cfg.queue_size;
vq_info->mask = vq_info->qs - 1;
if (vq_info->qs > 0 && vq_info->qs % 2 == 0) {
vq_info->vq_availoffset = dev->pci_cfg.queue_avail -
dev->pci_cfg.queue_desc;
vq_info->vq_usedoffset = dev->pci_cfg.queue_used -
dev->pci_cfg.queue_desc;
vq_info->vq_enabled = (dev->pci_cfg.queue_enable == 1);
} else {
vq_info->vq_availoffset = 0;
vq_info->vq_usedoffset = 0;
vq_info->vq_enabled = 0;
}
} else {
if (dev->cfg.queue_select >= dev->num_queues) {
log_warnx("%s: invalid queue index", __func__);
return;
}
vq_info = &dev->vq[dev->cfg.queue_select];
vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn *
VIRTIO_PAGE_SIZE;
vq_info->vq_availoffset = sizeof(struct vring_desc) *
vq_info->qs;
vq_info->vq_usedoffset = VIRTQUEUE_ALIGN(
sizeof(struct vring_desc) * vq_info->qs +
sizeof(uint16_t) * (2 + vq_info->qs));
}
if (vq_info->q_gpa > 0) {
hva = hvaddr_mem(vq_info->q_gpa, vring_size(vq_info->qs));
if (hva == NULL)
fatalx("%s: failed to translate gpa to hva", __func__);
vq_info->q_hva = hva;
} else {
vq_info->q_hva = NULL;
vq_info->last_avail = 0;
vq_info->notified_avail = 0;
}
}
static int
viornd_notifyq(struct virtio_dev *dev, uint16_t idx)
{
size_t sz;
int dxx, ret = 0;
uint16_t aidx, uidx;
char *vr, *rnd_data;
struct vring_desc *desc = NULL;
struct vring_avail *avail = NULL;
struct vring_used *used = NULL;
struct virtio_vq_info *vq_info = NULL;
if (dev->device_id != PCI_PRODUCT_VIRTIO_ENTROPY)
fatalx("%s: device is not an entropy device", __func__);
if (idx >= dev->num_queues) {
log_warnx("%s: invalid virtqueue index", __func__);
return (0);
}
vq_info = &dev->vq[idx];
if (!vq_info->vq_enabled) {
log_warnx("%s: virtqueue not enabled", __func__);
return (0);
}
vr = vq_info->q_hva;
if (vr == NULL)
fatalx("%s: null vring", __func__);
desc = (struct vring_desc *)(vr);
avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
aidx = avail->idx & vq_info->mask;
uidx = used->idx & vq_info->mask;
dxx = avail->ring[aidx] & vq_info->mask;
sz = desc[dxx].len;
if (sz > MAXPHYS)
fatalx("viornd descriptor size too large (%zu)", sz);
rnd_data = malloc(sz);
if (rnd_data == NULL)
fatal("memory allocaiton error for viornd data");
arc4random_buf(rnd_data, sz);
if (write_mem(desc[dxx].addr, rnd_data, sz)) {
log_warnx("viornd: can't write random data @ 0x%llx",
desc[dxx].addr);
} else {
ret = 1;
viornd.isr = 1;
used->ring[uidx].id = dxx;
used->ring[uidx].len = sz;
__sync_synchronize();
used->idx++;
}
free(rnd_data);
return (ret);
}
static int
virtio_io_dispatch(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
void *arg, uint8_t sz)
{
struct virtio_dev *dev = (struct virtio_dev *)arg;
uint8_t actual = (uint8_t)reg;
switch (reg & 0xFF00) {
case VIO1_CFG_BAR_OFFSET:
*data = virtio_io_cfg(dev, dir, actual, *data, sz);
break;
case VIO1_DEV_BAR_OFFSET:
if (dir == VEI_DIR_IN) {
log_debug("%s: no device specific handler", __func__);
*data = (uint32_t)(-1);
}
break;
case VIO1_NOTIFY_BAR_OFFSET:
return virtio_io_notify(dir, actual, data, intr, arg, sz);
case VIO1_ISR_BAR_OFFSET:
return virtio_io_isr(dir, actual, data, intr, arg, sz);
default:
DPRINTF("%s: no handler for reg 0x%04x", __func__, reg);
if (dir == VEI_DIR_IN)
*data = (uint32_t)(-1);
}
return (0);
}
uint32_t
virtio_io_cfg(struct virtio_dev *dev, int dir, uint8_t reg, uint32_t data,
uint8_t sz)
{
struct virtio_pci_common_cfg *pci_cfg = &dev->pci_cfg;
uint32_t res = 0;
uint16_t i;
if (dir == VEI_DIR_OUT) {
switch (reg) {
case VIO1_PCI_DEVICE_FEATURE_SELECT:
if (sz != 4)
log_warnx("%s: unaligned write to device "
"feature select (sz=%u)", __func__, sz);
else
pci_cfg->device_feature_select = data;
break;
case VIO1_PCI_DEVICE_FEATURE:
log_warnx("illegal write to device feature register");
break;
case VIO1_PCI_DRIVER_FEATURE_SELECT:
if (sz != 4)
log_warnx("%s: unaligned write to driver "
"feature select register (sz=%u)", __func__,
sz);
else
pci_cfg->driver_feature_select = data;
break;
case VIO1_PCI_DRIVER_FEATURE:
if (sz != 4) {
log_warnx("%s: unaligned write to driver "
"feature register (sz=%u)", __func__, sz);
break;
}
if (pci_cfg->driver_feature_select > 1) {
DPRINTF("%s: ignoring driver feature write",
__func__);
break;
}
pci_cfg->driver_feature = data;
if (pci_cfg->driver_feature_select == 0)
dev->driver_feature |= pci_cfg->driver_feature;
else
dev->driver_feature |=
((uint64_t)pci_cfg->driver_feature << 32);
dev->driver_feature &= dev->device_feature;
DPRINTF("%s: driver features 0x%llx", __func__,
dev->driver_feature);
break;
case VIO1_PCI_CONFIG_MSIX_VECTOR:
break;
case VIO1_PCI_NUM_QUEUES:
log_warnx("illegal write to num queues register");
break;
case VIO1_PCI_DEVICE_STATUS:
if (sz != 1) {
log_warnx("%s: unaligned write to device "
"status register (sz=%u)", __func__, sz);
break;
}
dev->status = data;
if (dev->status == 0) {
dev->driver_feature = 0;
dev->isr = 0;
pci_cfg->queue_select = 0;
virtio_update_qs(dev);
if (dev->num_queues > 0) {
for (i = 0; i < dev->num_queues; i++)
virtio_vq_init(dev, i);
}
}
DPRINTF("%s: dev %u status [%s%s%s%s%s%s]", __func__,
dev->pci_id,
(data & VIRTIO_CONFIG_DEVICE_STATUS_ACK) ?
"[ack]" : "",
(data & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER) ?
"[driver]" : "",
(data & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) ?
"[driver ok]" : "",
(data & VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) ?
"[features ok]" : "",
(data &
VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET)
? "[needs reset]" : "",
(data & VIRTIO_CONFIG_DEVICE_STATUS_FAILED) ?
"[failed]" : "");
break;
case VIO1_PCI_CONFIG_GENERATION:
log_warnx("illegal write to config generation "
"register");
break;
case VIO1_PCI_QUEUE_SELECT:
pci_cfg->queue_select = data;
virtio_update_qs(dev);
break;
case VIO1_PCI_QUEUE_SIZE:
if (data <= VIRTIO_QUEUE_SIZE_MAX)
pci_cfg->queue_size = data;
else {
log_warnx("%s: clamping queue size", __func__);
pci_cfg->queue_size = VIRTIO_QUEUE_SIZE_MAX;
}
virtio_update_qa(dev);
break;
case VIO1_PCI_QUEUE_MSIX_VECTOR:
break;
case VIO1_PCI_QUEUE_ENABLE:
pci_cfg->queue_enable = data;
virtio_update_qa(dev);
break;
case VIO1_PCI_QUEUE_NOTIFY_OFF:
log_warnx("illegal write to queue notify offset "
"register");
break;
case VIO1_PCI_QUEUE_DESC:
if (sz != 4) {
log_warnx("%s: unaligned write to queue "
"desc. register (sz=%u)", __func__, sz);
break;
}
pci_cfg->queue_desc &= 0xffffffff00000000;
pci_cfg->queue_desc |= (uint64_t)data;
virtio_update_qa(dev);
break;
case VIO1_PCI_QUEUE_DESC + 4:
if (sz != 4) {
log_warnx("%s: unaligned write to queue "
"desc. register (sz=%u)", __func__, sz);
break;
}
pci_cfg->queue_desc &= 0x00000000ffffffff;
pci_cfg->queue_desc |= ((uint64_t)data << 32);
virtio_update_qa(dev);
break;
case VIO1_PCI_QUEUE_AVAIL:
if (sz != 4) {
log_warnx("%s: unaligned write to queue "
"available register (sz=%u)", __func__, sz);
break;
}
pci_cfg->queue_avail &= 0xffffffff00000000;
pci_cfg->queue_avail |= (uint64_t)data;
virtio_update_qa(dev);
break;
case VIO1_PCI_QUEUE_AVAIL + 4:
if (sz != 4) {
log_warnx("%s: unaligned write to queue "
"available register (sz=%u)", __func__, sz);
break;
}
pci_cfg->queue_avail &= 0x00000000ffffffff;
pci_cfg->queue_avail |= ((uint64_t)data << 32);
virtio_update_qa(dev);
break;
case VIO1_PCI_QUEUE_USED:
if (sz != 4) {
log_warnx("%s: unaligned write to queue used "
"register (sz=%u)", __func__, sz);
break;
}
pci_cfg->queue_used &= 0xffffffff00000000;
pci_cfg->queue_used |= (uint64_t)data;
virtio_update_qa(dev);
break;
case VIO1_PCI_QUEUE_USED + 4:
if (sz != 4) {
log_warnx("%s: unaligned write to queue used "
"register (sz=%u)", __func__, sz);
break;
}
pci_cfg->queue_used &= 0x00000000ffffffff;
pci_cfg->queue_used |= ((uint64_t)data << 32);
virtio_update_qa(dev);
break;
default:
log_warnx("%s: invalid register 0x%04x", __func__, reg);
}
} else {
switch (reg) {
case VIO1_PCI_DEVICE_FEATURE_SELECT:
res = pci_cfg->device_feature_select;
break;
case VIO1_PCI_DEVICE_FEATURE:
if (pci_cfg->device_feature_select == 0)
res = dev->device_feature & (uint32_t)(-1);
else if (pci_cfg->device_feature_select == 1)
res = dev->device_feature >> 32;
else {
DPRINTF("%s: ignoring device feature read",
__func__);
}
break;
case VIO1_PCI_DRIVER_FEATURE_SELECT:
res = pci_cfg->driver_feature_select;
break;
case VIO1_PCI_DRIVER_FEATURE:
if (pci_cfg->driver_feature_select == 0)
res = dev->driver_feature & (uint32_t)(-1);
else if (pci_cfg->driver_feature_select == 1)
res = dev->driver_feature >> 32;
else {
DPRINTF("%s: ignoring driver feature read",
__func__);
}
break;
case VIO1_PCI_CONFIG_MSIX_VECTOR:
res = VIRTIO_MSI_NO_VECTOR;
break;
case VIO1_PCI_NUM_QUEUES:
res = dev->num_queues;
break;
case VIO1_PCI_DEVICE_STATUS:
res = dev->status;
break;
case VIO1_PCI_CONFIG_GENERATION:
res = pci_cfg->config_generation;
break;
case VIO1_PCI_QUEUE_SELECT:
res = pci_cfg->queue_select;
break;
case VIO1_PCI_QUEUE_SIZE:
res = pci_cfg->queue_size;
break;
case VIO1_PCI_QUEUE_MSIX_VECTOR:
res = VIRTIO_MSI_NO_VECTOR;
break;
case VIO1_PCI_QUEUE_ENABLE:
res = pci_cfg->queue_enable;
break;
case VIO1_PCI_QUEUE_NOTIFY_OFF:
res = pci_cfg->queue_notify_off;
break;
case VIO1_PCI_QUEUE_DESC:
res = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_desc);
break;
case VIO1_PCI_QUEUE_DESC + 4:
res = (uint32_t)(pci_cfg->queue_desc >> 32);
break;
case VIO1_PCI_QUEUE_AVAIL:
res = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_avail);
break;
case VIO1_PCI_QUEUE_AVAIL + 4:
res = (uint32_t)(pci_cfg->queue_avail >> 32);
break;
case VIO1_PCI_QUEUE_USED:
res = (uint32_t)(0xFFFFFFFF & pci_cfg->queue_used);
break;
case VIO1_PCI_QUEUE_USED + 4:
res = (uint32_t)(pci_cfg->queue_used >> 32);
break;
default:
log_warnx("%s: invalid register 0x%04x", __func__, reg);
}
}
DPRINTF("%s: dev=%u %s sz=%u dir=%s data=0x%04x", __func__, dev->pci_id,
virtio1_reg_name(reg), sz, (dir == VEI_DIR_OUT) ? "w" : "r",
(dir == VEI_DIR_OUT) ? data : res);
return (res);
}
static int
virtio_io_isr(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
void *arg, uint8_t sz)
{
struct virtio_dev *dev = (struct virtio_dev *)arg;
*intr = 0xFF;
DPRINTF("%s: dev=%u, reg=0x%04x, sz=%u, dir=%s", __func__,
dev->pci_id, reg, sz,
(dir == VEI_DIR_OUT) ? "write" : "read");
if (dev->device_id == PCI_PRODUCT_VIRTIO_BLOCK ||
dev->device_id == PCI_PRODUCT_VIRTIO_NETWORK ||
dev->device_id == PCI_PRODUCT_VIRTIO_SCSI)
fatalx("%s: cannot use on multi-process virtio dev", __func__);
if (dir == VEI_DIR_IN) {
*data = dev->isr;
dev->isr = 0;
vcpu_deassert_irq(dev->vmm_id, 0, dev->irq);
}
return (0);
}
static int
virtio_io_notify(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
void *arg, uint8_t sz)
{
int raise_intr = 0;
struct virtio_dev *dev = (struct virtio_dev *)arg;
uint16_t vq_idx = (uint16_t)(0x0000ffff & *data);
*intr = 0xFF;
DPRINTF("%s: reg=0x%04x, sz=%u, vq_idx=%u, dir=%s", __func__, reg, sz,
vq_idx, (dir == VEI_DIR_OUT) ? "write" : "read");
if (dev->device_id == PCI_PRODUCT_VIRTIO_BLOCK ||
dev->device_id == PCI_PRODUCT_VIRTIO_NETWORK ||
dev->device_id == PCI_PRODUCT_VIRTIO_SCSI)
fatalx("%s: cannot use on multi-process virtio dev", __func__);
if (vq_idx >= dev->num_queues) {
log_warnx("%s: invalid virtqueue index %u", __func__, vq_idx);
return (0);
}
if (dir == VEI_DIR_IN) {
*data = 0;
return (0);
}
switch (dev->device_id) {
case PCI_PRODUCT_VIRTIO_ENTROPY:
raise_intr = viornd_notifyq(dev, vq_idx);
break;
case PCI_PRODUCT_VIRTIO_VMMCI:
break;
default:
log_warnx("%s: invalid device type %u", __func__,
dev->device_id);
}
if (raise_intr)
*intr = 1;
return (0);
}
int
vmmci_ctl(struct virtio_dev *dev, unsigned int cmd)
{
int ret = 0;
struct timeval tv = { 0, 0 };
struct vmmci_dev *v = NULL;
if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI)
fatalx("%s: device is not a vmmci device", __func__);
v = &dev->vmmci;
mutex_lock(&v->mutex);
if ((dev->status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) == 0) {
ret = -1;
goto unlock;
}
if (cmd == v->cmd)
goto unlock;
switch (cmd) {
case VMMCI_NONE:
break;
case VMMCI_SHUTDOWN:
case VMMCI_REBOOT:
v->cmd = cmd;
if (cmd == VMMCI_SHUTDOWN)
cmd = VMMCI_REBOOT;
dev->isr = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
vcpu_assert_irq(dev->vmm_id, 0, dev->irq);
tv.tv_sec = VMMCI_TIMEOUT_SHORT;
evtimer_add(&v->timeout, &tv);
break;
case VMMCI_SYNCRTC:
if (vmmci.cfg.guest_feature & VMMCI_F_SYNCRTC) {
v->cmd = cmd;
dev->isr = VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
vcpu_assert_irq(dev->vmm_id, 0, dev->irq);
} else {
log_debug("%s: RTC sync skipped (guest does not "
"support RTC sync)\n", __func__);
}
break;
default:
fatalx("invalid vmmci command: %d", cmd);
}
unlock:
mutex_unlock(&v->mutex);
return (ret);
}
static void
vmmci_ack(struct virtio_dev *dev, unsigned int cmd)
{
struct vmmci_dev *v = NULL;
if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI)
fatalx("%s: device is not a vmmci device", __func__);
v = &dev->vmmci;
switch (cmd) {
case VMMCI_NONE:
break;
case VMMCI_SHUTDOWN:
if (v->cmd == 0) {
log_debug("%s: vm %u requested shutdown", __func__,
dev->vmm_id);
vm_pipe_send(&v->dev_pipe, VMMCI_SET_TIMEOUT_SHORT);
return;
}
case VMMCI_REBOOT:
if (cmd == v->cmd) {
log_debug("%s: vm %u acknowledged shutdown request",
__func__, dev->vmm_id);
vm_pipe_send(&v->dev_pipe, VMMCI_SET_TIMEOUT_LONG);
}
break;
case VMMCI_SYNCRTC:
log_debug("%s: vm %u acknowledged RTC sync request",
__func__, dev->vmm_id);
v->cmd = VMMCI_NONE;
break;
default:
log_warnx("%s: illegal request %u", __func__, cmd);
break;
}
}
void
vmmci_timeout(int fd, short type, void *arg)
{
struct virtio_dev *dev = (struct virtio_dev *)arg;
struct vmmci_dev *v = NULL;
if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI)
fatalx("%s: device is not a vmmci device", __func__);
v = &dev->vmmci;
log_debug("vm %u shutdown", dev->vmm_id);
vm_shutdown(v->cmd == VMMCI_REBOOT ? VMMCI_REBOOT : VMMCI_SHUTDOWN);
}
int
vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
void *arg, uint8_t sz)
{
struct virtio_dev *dev = (struct virtio_dev *)arg;
struct vmmci_dev *v = NULL;
if (dev->device_id != PCI_PRODUCT_VIRTIO_VMMCI)
fatalx("%s: device is not a vmmci device (%u)",
__func__, dev->device_id);
v = &dev->vmmci;
*intr = 0xFF;
mutex_lock(&v->mutex);
if (dir == 0) {
switch (reg) {
case VIRTIO_CONFIG_DEVICE_FEATURES:
case VIRTIO_CONFIG_QUEUE_SIZE:
case VIRTIO_CONFIG_ISR_STATUS:
log_warnx("illegal write %x to %s", *data,
virtio_reg_name(reg));
break;
case VIRTIO_CONFIG_GUEST_FEATURES:
dev->cfg.guest_feature = *data;
break;
case VIRTIO_CONFIG_QUEUE_PFN:
dev->cfg.queue_pfn = *data;
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
dev->cfg.queue_select = *data;
break;
case VIRTIO_CONFIG_QUEUE_NOTIFY:
dev->cfg.queue_notify = *data;
break;
case VIRTIO_CONFIG_DEVICE_STATUS:
dev->status = *data;
break;
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
vmmci_ack(dev, *data);
break;
}
} else {
switch (reg) {
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
*data = v->cmd;
break;
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
gettimeofday(&v->time, NULL);
*data = (uint64_t)v->time.tv_sec;
break;
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 8:
*data = (uint64_t)v->time.tv_sec << 32;
break;
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12:
*data = (uint64_t)v->time.tv_usec;
break;
case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 16:
*data = (uint64_t)v->time.tv_usec << 32;
break;
case VIRTIO_CONFIG_DEVICE_FEATURES:
*data = dev->cfg.device_feature;
break;
case VIRTIO_CONFIG_GUEST_FEATURES:
*data = dev->cfg.guest_feature;
break;
case VIRTIO_CONFIG_QUEUE_PFN:
*data = dev->cfg.queue_pfn;
break;
case VIRTIO_CONFIG_QUEUE_SIZE:
*data = dev->cfg.queue_size;
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
*data = dev->cfg.queue_select;
break;
case VIRTIO_CONFIG_QUEUE_NOTIFY:
*data = dev->cfg.queue_notify;
break;
case VIRTIO_CONFIG_DEVICE_STATUS:
*data = dev->status;
break;
case VIRTIO_CONFIG_ISR_STATUS:
*data = dev->isr;
dev->isr = 0;
vcpu_deassert_irq(dev->vmm_id, 0, dev->irq);
break;
}
}
mutex_unlock(&v->mutex);
return (0);
}
int
virtio_get_base(int fd, char *path, size_t npath, int type, const char *dpath)
{
switch (type) {
case VMDF_RAW:
return 0;
case VMDF_QCOW2:
return virtio_qcow2_get_base(fd, path, npath, dpath);
}
log_warnx("%s: invalid disk format", __func__);
return -1;
}
static void
vmmci_pipe_dispatch(int fd, short event, void *arg)
{
struct virtio_dev *dev = (struct virtio_dev *)arg;
struct vmmci_dev *v = &dev->vmmci;
struct timeval tv = { 0, 0 };
enum pipe_msg_type msg;
msg = vm_pipe_recv(&v->dev_pipe);
switch (msg) {
case VMMCI_SET_TIMEOUT_SHORT:
tv.tv_sec = VMMCI_TIMEOUT_SHORT;
evtimer_add(&v->timeout, &tv);
break;
case VMMCI_SET_TIMEOUT_LONG:
tv.tv_sec = VMMCI_TIMEOUT_LONG;
evtimer_add(&v->timeout, &tv);
break;
default:
log_warnx("%s: invalid pipe message type %d", __func__, msg);
}
}
int
virtio_init(struct vmd_vm *vm, int child_cdrom,
int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
{
struct vmop_create_params *vmc = &vm->vm_params;
struct virtio_dev *dev;
uint8_t id, i, j;
int bar_id, ret = 0;
SLIST_INIT(&virtio_devs);
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
PCI_PRODUCT_QUMRANET_VIO1_RNG, PCI_CLASS_SYSTEM,
PCI_SUBCLASS_SYSTEM_MISC, PCI_VENDOR_OPENBSD,
PCI_PRODUCT_VIRTIO_ENTROPY, 1, 1, NULL)) {
log_warnx("can't add PCI virtio rng device");
return (1);
}
virtio_dev_init(vm, &viornd, id, VIORND_QUEUE_SIZE_DEFAULT,
VIRTIO_RND_QUEUES, VIRTIO_F_VERSION_1);
bar_id = pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_io_dispatch,
&viornd);
if (bar_id == -1 || bar_id > 0xff) {
log_warnx("can't add bar for virtio rng device");
return (1);
}
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG, bar_id, 0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id, 0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG, bar_id, 0);
if (vmc->vmc_nnics > 0) {
for (i = 0; i < vmc->vmc_nnics; i++) {
dev = malloc(sizeof(struct virtio_dev));
if (dev == NULL) {
log_warn("calloc failure allocating vionet");
return (1);
}
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
PCI_PRODUCT_QUMRANET_VIO1_NET, PCI_CLASS_SYSTEM,
PCI_SUBCLASS_SYSTEM_MISC, PCI_VENDOR_OPENBSD,
PCI_PRODUCT_VIRTIO_NETWORK, 1, 1, NULL)) {
log_warnx("can't add PCI virtio net device");
return (1);
}
virtio_dev_init(vm, dev, id, VIONET_QUEUE_SIZE_DEFAULT,
VIRTIO_NET_QUEUES,
(VIRTIO_NET_F_MAC | VIRTIO_F_VERSION_1));
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io,
dev) == -1) {
log_warnx("can't add bar for virtio net "
"device");
return (1);
}
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG,
bar_id, 0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_DEVICE_CFG,
bar_id, 8);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id,
0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG,
bar_id, 0);
dev->dev_type = VMD_DEVTYPE_NET;
dev->vmm_id = vm->vm_vmmid;
dev->vionet.data_fd = child_taps[i];
memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6);
dev->vionet.lockedmac =
vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0;
dev->vionet.local =
vmc->vmc_ifflags[i] & VMIFF_LOCAL ? 1 : 0;
if (i == 0 && vmc->vmc_bootdevice & VMBOOTDEV_NET)
dev->vionet.pxeboot = 1;
memcpy(&dev->vionet.local_prefix,
&env->vmd_cfg.cfg_localprefix,
sizeof(dev->vionet.local_prefix));
log_debug("%s: vm \"%s\" vio%u lladdr %s%s%s%s",
__func__, vm->vm_params.vmc_name, i,
ether_ntoa((void *)dev->vionet.mac),
dev->vionet.lockedmac ? ", locked" : "",
dev->vionet.local ? ", local" : "",
dev->vionet.pxeboot ? ", pxeboot" : "");
dev->vionet.idx = i;
SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
}
}
if (vmc->vmc_ndisks > 0) {
for (i = 0; i < vmc->vmc_ndisks; i++) {
dev = malloc(sizeof(struct virtio_dev));
if (dev == NULL) {
log_warn("%s: failure allocating vioblk",
__func__);
return (1);
}
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
PCI_PRODUCT_QUMRANET_VIO1_BLOCK,
PCI_CLASS_MASS_STORAGE,
PCI_SUBCLASS_MASS_STORAGE_SCSI, PCI_VENDOR_OPENBSD,
PCI_PRODUCT_VIRTIO_BLOCK, 1, 1, NULL)) {
log_warnx("can't add PCI virtio block "
"device");
return (1);
}
virtio_dev_init(vm, dev, id, VIOBLK_QUEUE_SIZE_DEFAULT,
VIRTIO_BLK_QUEUES,
(VIRTIO_F_VERSION_1 | VIRTIO_BLK_F_SEG_MAX));
bar_id = pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io,
dev);
if (bar_id == -1 || bar_id > 0xff) {
log_warnx("can't add bar for virtio block "
"device");
return (1);
}
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG,
bar_id, 0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_DEVICE_CFG,
bar_id, 24);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id,
0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG,
bar_id, 0);
dev->dev_type = VMD_DEVTYPE_DISK;
dev->vmm_id = vm->vm_vmmid;
dev->vioblk.seg_max = VIOBLK_SEG_MAX_DEFAULT;
memset(&dev->vioblk.disk_fd, -1,
sizeof(dev->vioblk.disk_fd));
dev->vioblk.ndisk_fd = vmc->vmc_diskbases[i];
for (j = 0; j < dev->vioblk.ndisk_fd; j++)
dev->vioblk.disk_fd[j] = child_disks[i][j];
dev->vioblk.idx = i;
SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
}
}
if (strlen(vmc->vmc_cdrom)) {
dev = malloc(sizeof(struct virtio_dev));
if (dev == NULL) {
log_warn("calloc failure allocating vioscsi");
return (1);
}
if (pci_add_device(&id, PCI_VENDOR_QUMRANET,
PCI_PRODUCT_QUMRANET_VIO1_SCSI, PCI_CLASS_MASS_STORAGE,
PCI_SUBCLASS_MASS_STORAGE_SCSI, PCI_VENDOR_OPENBSD,
PCI_PRODUCT_VIRTIO_SCSI, 1, 1, NULL)) {
log_warnx("can't add PCI vioscsi device");
return (1);
}
virtio_dev_init(vm, dev, id, VIOSCSI_QUEUE_SIZE_DEFAULT,
VIRTIO_SCSI_QUEUES, VIRTIO_F_VERSION_1);
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io, dev)
== -1) {
log_warnx("can't add bar for vioscsi device");
return (1);
}
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_COMMON_CFG, bar_id, 0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_DEVICE_CFG, bar_id, 36);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_ISR_CFG, bar_id, 0);
virtio_pci_add_cap(id, VIRTIO_PCI_CAP_NOTIFY_CFG, bar_id, 0);
dev->dev_type = VMD_DEVTYPE_SCSI;
dev->vmm_id = vm->vm_vmmid;
dev->vioscsi.cdrom_fd = child_cdrom;
dev->vioscsi.locked = 0;
dev->vioscsi.lba = 0;
dev->vioscsi.max_xfer = VIOSCSI_BLOCK_SIZE_CDROM;
SLIST_INSERT_HEAD(&virtio_devs, dev, dev_next);
}
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
if (virtio_dev_launch(vm, dev) != 0) {
log_warnx("failed to launch virtio device");
return (1);
}
}
dev = &vmmci;
if (pci_add_device(&id, PCI_VENDOR_OPENBSD, PCI_PRODUCT_OPENBSD_CONTROL,
PCI_CLASS_COMMUNICATIONS, PCI_SUBCLASS_COMMUNICATIONS_MISC,
PCI_VENDOR_OPENBSD, PCI_PRODUCT_VIRTIO_VMMCI, 0, 1, NULL)) {
log_warnx("can't add PCI vmm control device");
return (1);
}
virtio_dev_init(vm, dev, id, 0, 0,
VMMCI_F_TIMESYNC | VMMCI_F_ACK | VMMCI_F_SYNCRTC);
if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, dev) == -1) {
log_warnx("can't add bar for vmm control device");
return (1);
}
ret = pthread_mutex_init(&dev->vmmci.mutex, NULL);
if (ret) {
errno = ret;
log_warn("could not initialize vmmci mutex");
return (1);
}
evtimer_set(&dev->vmmci.timeout, vmmci_timeout, dev);
vm_pipe_init2(&dev->vmmci.dev_pipe, vmmci_pipe_dispatch, dev);
event_add(&dev->vmmci.dev_pipe.read_ev, NULL);
return (0);
}
void
vionet_set_hostmac(struct vmd_vm *vm, unsigned int idx, uint8_t *addr)
{
struct vmop_create_params *vmc = &vm->vm_params;
struct virtio_dev *dev;
struct vionet_dev *vionet = NULL;
int ret;
if (idx > vmc->vmc_nnics)
fatalx("%s: invalid vionet index: %u", __func__, idx);
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
if (dev->dev_type == VMD_DEVTYPE_NET
&& dev->vionet.idx == idx) {
vionet = &dev->vionet;
break;
}
}
if (vionet == NULL)
fatalx("%s: dev == NULL, idx = %u", __func__, idx);
memcpy(vionet->hostmac, addr, sizeof(vionet->hostmac));
ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_HOSTMAC, 0, 0, -1,
vionet->hostmac, sizeof(vionet->hostmac));
if (ret == -1) {
log_warnx("%s: failed to queue hostmac to vionet dev %u",
__func__, idx);
return;
}
}
void
virtio_shutdown(struct vmd_vm *vm)
{
int ret, status;
pid_t pid = 0;
struct virtio_dev *dev, *tmp;
struct viodev_msg msg;
struct imsgbuf *ibuf;
if (vioscsi != NULL)
vioscsi->vioscsi.file.close(vioscsi->vioscsi.file.p, 0);
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
memset(&msg, 0, sizeof(msg));
msg.type = VIODEV_MSG_SHUTDOWN;
ibuf = &dev->sync_iev.ibuf;
ret = imsg_compose(ibuf, VIODEV_MSG_SHUTDOWN, 0, 0, -1,
&msg, sizeof(msg));
if (ret == -1)
fatalx("%s: failed to send shutdown to device",
__func__);
if (imsgbuf_flush(ibuf) == -1)
fatalx("%s: imsgbuf_flush", __func__);
}
SLIST_FOREACH_SAFE(dev, &virtio_devs, dev_next, tmp) {
log_debug("%s: waiting on device pid %d", __func__,
dev->dev_pid);
do {
pid = waitpid(dev->dev_pid, &status, WNOHANG);
} while (pid == 0 || (pid == -1 && errno == EINTR));
if (pid == dev->dev_pid)
log_debug("%s: device for pid %d is stopped",
__func__, pid);
else
log_warnx("%s: unexpected pid %d", __func__, pid);
free(dev);
}
}
void virtio_broadcast_imsg(struct vmd_vm *vm, uint16_t type, void *data,
uint16_t datalen)
{
struct virtio_dev *dev;
int ret;
SLIST_FOREACH(dev, &virtio_devs, dev_next) {
ret = imsg_compose_event(&dev->async_iev, type, 0, 0, -1, data,
datalen);
if (ret == -1) {
log_warnx("%s: failed to broadcast imsg type %u",
__func__, type);
}
}
}
void
virtio_stop(struct vmd_vm *vm)
{
return virtio_broadcast_imsg(vm, IMSG_VMDOP_PAUSE_VM, NULL, 0);
}
void
virtio_start(struct vmd_vm *vm)
{
return virtio_broadcast_imsg(vm, IMSG_VMDOP_UNPAUSE_VM, NULL, 0);
}
static void
virtio_dev_init(struct vmd_vm *vm, struct virtio_dev *dev, uint8_t pci_id,
uint16_t queue_size, uint16_t num_queues, uint64_t features)
{
size_t i;
uint16_t device_id;
if (num_queues > 0 && num_queues > VIRTIO_MAX_QUEUES)
fatalx("%s: num_queues too large", __func__);
device_id = pci_get_subsys_id(pci_id);
if (!device_id)
fatalx("%s: invalid pci device id %u", __func__, pci_id);
memset(dev, 0, sizeof(*dev));
dev->pci_id = pci_id;
dev->device_id = device_id;
dev->irq = pci_get_dev_irq(pci_id);
dev->isr = 0;
dev->vm_id = vm->vm_vmid;
dev->vmm_id = vm->vm_vmmid;
dev->device_feature = features;
dev->pci_cfg.config_generation = 0;
dev->cfg.device_feature = features;
dev->num_queues = num_queues;
dev->queue_size = queue_size;
dev->cfg.queue_size = queue_size;
dev->async_fd = -1;
dev->sync_fd = -1;
if (num_queues > 0) {
for (i = 0; i < num_queues; i++)
virtio_vq_init(dev, i);
}
}
void
virtio_vq_init(struct virtio_dev *dev, size_t idx)
{
struct virtio_vq_info *vq_info = NULL;
int v1 = (dev->device_feature & VIRTIO_F_VERSION_1) ? 1 : 0;
if (idx >= dev->num_queues)
fatalx("%s: invalid virtqueue index", __func__);
vq_info = &dev->vq[idx];
vq_info->q_gpa = 0;
vq_info->qs = dev->queue_size;
vq_info->mask = dev->queue_size - 1;
if (v1) {
vq_info->vq_enabled = 0;
vq_info->vq_availoffset = 0;
vq_info->vq_usedoffset = 0;
} else {
vq_info->vq_enabled = 1;
vq_info->vq_availoffset =
sizeof(struct vring_desc) * vq_info->qs;
vq_info->vq_usedoffset = VIRTQUEUE_ALIGN(
sizeof(struct vring_desc) * vq_info->qs +
sizeof(uint16_t) * (2 + vq_info->qs));
}
vq_info->last_avail = 0;
vq_info->notified_avail = 0;
}
static void
virtio_pci_add_cap(uint8_t pci_id, uint8_t cfg_type, uint8_t bar_id,
uint32_t dev_cfg_len)
{
struct virtio_pci_common_cap cap;
memset(&cap, 0, sizeof(cap));
cap.virtio.cap_vndr = PCI_CAP_VENDSPEC;
cap.virtio.cap_len = sizeof(struct virtio_pci_cap);
cap.virtio.bar = bar_id;
cap.virtio.cfg_type = cfg_type;
switch (cfg_type) {
case VIRTIO_PCI_CAP_COMMON_CFG:
cap.virtio.offset = VIO1_CFG_BAR_OFFSET;
cap.virtio.length = sizeof(struct virtio_pci_common_cfg);
break;
case VIRTIO_PCI_CAP_DEVICE_CFG:
cap.virtio.offset = VIO1_DEV_BAR_OFFSET;
cap.virtio.length = dev_cfg_len;
break;
case VIRTIO_PCI_CAP_ISR_CFG:
cap.virtio.offset = VIO1_ISR_BAR_OFFSET;
cap.virtio.length = sizeof(uint8_t);
break;
case VIRTIO_PCI_CAP_NOTIFY_CFG:
cap.virtio.offset = VIO1_NOTIFY_BAR_OFFSET;
cap.virtio.length = sizeof(uint16_t);
cap.notify.notify_off_multiplier = 0;
break;
default:
fatalx("%s: invalid pci capability config type %u", __func__,
cfg_type);
}
if (pci_add_capability(pci_id, &cap.pci) == -1) {
fatalx("%s: can't add capability for virtio pci device %u",
__func__, pci_id);
}
}
static int
virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev *dev)
{
char *nargv[12], num[32], vmm_fd[32], vm_name[VM_NAME_MAX], t[2];
pid_t dev_pid;
int sync_fds[2], async_fds[2], ret = 0;
size_t i, sz = 0;
struct viodev_msg msg;
struct virtio_dev *dev_entry;
struct imsg imsg;
struct imsgev *iev = &dev->sync_iev;
switch (dev->dev_type) {
case VMD_DEVTYPE_NET:
log_debug("%s: launching vionet%d", vm->vm_params.vmc_name,
dev->vionet.idx);
break;
case VMD_DEVTYPE_DISK:
log_debug("%s: launching vioblk%d", vm->vm_params.vmc_name,
dev->vioblk.idx);
break;
case VMD_DEVTYPE_SCSI:
log_debug("%s: launching vioscsi", vm->vm_params.vmc_name);
break;
default:
log_warn("%s: invalid device type", __func__);
return (EINVAL);
}
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
sync_fds) == -1) {
log_warn("failed to create socketpair");
return (errno);
}
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, PF_UNSPEC,
async_fds) == -1) {
log_warn("failed to create async socketpair");
return (errno);
}
dev_pid = fork();
if (dev_pid == -1) {
ret = errno;
log_warn("%s: fork failed", __func__);
goto err;
}
if (dev_pid > 0) {
close_fd(sync_fds[1]);
close_fd(async_fds[1]);
dev->dev_pid = dev_pid;
dev->sync_fd = sync_fds[1];
dev->async_fd = async_fds[1];
log_debug("%s: sending '%c' type device struct", __func__,
dev->dev_type);
sz = atomicio(vwrite, sync_fds[0], dev, sizeof(*dev));
if (sz != sizeof(*dev)) {
log_warnx("%s: failed to send device", __func__);
ret = EIO;
goto err;
}
if (virtio_dev_closefds(dev) == -1) {
log_warnx("%s: failed to close device data fds",
__func__);
goto err;
}
log_debug("%s: sending vm message for '%s'", __func__,
vm->vm_params.vmc_name);
sz = atomicio(vwrite, sync_fds[0], vm, sizeof(*vm));
if (sz != sizeof(*vm)) {
log_warnx("%s: failed to send vm details", __func__);
ret = EIO;
goto err;
}
if (imsgbuf_init(&iev->ibuf, sync_fds[0]) == -1) {
log_warn("%s: failed to init imsgbuf", __func__);
goto err;
}
imsgbuf_allow_fdpass(&iev->ibuf);
ret = imsgbuf_read_one(&iev->ibuf, &imsg);
if (ret == 0 || ret == -1) {
log_warnx("%s: failed to receive ready message from "
"'%c' type device", __func__, dev->dev_type);
ret = EIO;
goto err;
}
ret = 0;
viodev_msg_read(&imsg, &msg);
imsg_free(&imsg);
if (msg.type != VIODEV_MSG_READY) {
log_warnx("%s: expected ready message, got type %d",
__func__, msg.type);
ret = EINVAL;
goto err;
}
log_debug("%s: device reports ready via sync channel",
__func__);
dev->sync_fd = sync_fds[0];
dev->async_fd = async_fds[0];
vm_device_pipe(dev, virtio_dispatch_dev, NULL);
} else {
close_fd(async_fds[0]);
close_fd(sync_fds[0]);
close_fd(vm->vm_tty);
vm->vm_tty = -1;
if (vm->vm_cdrom != -1 && dev->dev_type != VMD_DEVTYPE_SCSI) {
close_fd(vm->vm_cdrom);
vm->vm_cdrom = -1;
}
SLIST_FOREACH(dev_entry, &virtio_devs, dev_next) {
if (dev_entry == dev)
continue;
if (virtio_dev_closefds(dev_entry) == -1)
fatalx("unable to close other virtio devs");
}
memset(num, 0, sizeof(num));
snprintf(num, sizeof(num), "%d", sync_fds[1]);
memset(vmm_fd, 0, sizeof(vmm_fd));
snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd);
memset(vm_name, 0, sizeof(vm_name));
snprintf(vm_name, sizeof(vm_name), "%s",
vm->vm_params.vmc_name);
t[0] = dev->dev_type;
t[1] = '\0';
i = 0;
nargv[i++] = env->argv0;
nargv[i++] = "-X";
nargv[i++] = num;
nargv[i++] = "-t";
nargv[i++] = t;
nargv[i++] = "-i";
nargv[i++] = vmm_fd;
nargv[i++] = "-p";
nargv[i++] = vm_name;
if (env->vmd_debug)
nargv[i++] = "-d";
if (env->vmd_verbose == 1)
nargv[i++] = "-v";
else if (env->vmd_verbose > 1)
nargv[i++] = "-vv";
nargv[i++] = NULL;
if (i > sizeof(nargv) / sizeof(nargv[0]))
fatalx("%s: nargv overflow", __func__);
execvp(nargv[0], nargv);
ret = errno;
log_warn("%s: failed to exec device", __func__);
_exit(ret);
}
return (ret);
err:
close_fd(sync_fds[0]);
close_fd(sync_fds[1]);
close_fd(async_fds[0]);
close_fd(async_fds[1]);
return (ret);
}
int
vm_device_pipe(struct virtio_dev *dev, void (*cb)(int, short, void *),
struct event_base *ev_base)
{
struct imsgev *iev = &dev->async_iev;
int fd = dev->async_fd;
log_debug("%s: initializing '%c' device pipe (fd=%d)", __func__,
dev->dev_type, fd);
if (imsgbuf_init(&iev->ibuf, fd) == -1)
fatal("imsgbuf_init");
imsgbuf_allow_fdpass(&iev->ibuf);
iev->handler = cb;
iev->data = dev;
iev->events = EV_READ;
imsg_event_add2(iev, ev_base);
return (0);
}
void
virtio_dispatch_dev(int fd, short event, void *arg)
{
struct virtio_dev *dev = (struct virtio_dev*)arg;
struct imsgev *iev = &dev->async_iev;
struct imsgbuf *ibuf = &iev->ibuf;
struct imsg imsg;
struct viodev_msg msg;
ssize_t n = 0;
uint32_t type;
if (event & EV_READ) {
if ((n = imsgbuf_read(ibuf)) == -1)
fatal("%s: imsgbuf_read", __func__);
if (n == 0) {
log_debug("%s: pipe dead (EV_READ)", __func__);
event_del(&iev->ev);
event_loopexit(NULL);
return;
}
}
if (event & EV_WRITE) {
if (imsgbuf_write(ibuf) == -1) {
if (errno == EPIPE) {
log_debug("%s: pipe dead (EV_WRITE)", __func__);
event_del(&iev->ev);
event_loopexit(NULL);
return;
}
fatal("%s: imsgbuf_write", __func__);
}
}
for (;;) {
if ((n = imsg_get(ibuf, &imsg)) == -1)
fatal("%s: imsg_get", __func__);
if (n == 0)
break;
type = imsg_get_type(&imsg);
switch (type) {
case IMSG_DEVOP_MSG:
viodev_msg_read(&imsg, &msg);
handle_dev_msg(&msg, dev);
break;
default:
log_warnx("%s: got non devop imsg %d", __func__, type);
break;
}
imsg_free(&imsg);
}
imsg_event_add(iev);
}
static int
handle_dev_msg(struct viodev_msg *msg, struct virtio_dev *gdev)
{
uint32_t vmm_id = gdev->vmm_id;
switch (msg->type) {
case VIODEV_MSG_KICK:
if (msg->state == INTR_STATE_ASSERT)
vcpu_assert_irq(vmm_id, msg->vcpu, msg->irq);
else if (msg->state == INTR_STATE_DEASSERT)
vcpu_deassert_irq(vmm_id, msg->vcpu, msg->irq);
break;
case VIODEV_MSG_READY:
log_debug("%s: device reports ready", __func__);
break;
case VIODEV_MSG_ERROR:
log_warnx("%s: device reported error", __func__);
break;
case VIODEV_MSG_INVALID:
case VIODEV_MSG_IO_READ:
case VIODEV_MSG_IO_WRITE:
default:
log_warnx("%s: unsupported device message type %d", __func__,
msg->type);
return (1);
}
return (0);
};
int
virtio_pci_io(int dir, uint16_t reg, uint32_t *data, uint8_t *intr,
void *cookie, uint8_t sz)
{
struct virtio_dev *dev = (struct virtio_dev *)cookie;
struct imsgbuf *ibuf = &dev->sync_iev.ibuf;
struct imsg imsg;
struct viodev_msg msg;
int ret = 0;
memset(&msg, 0, sizeof(msg));
msg.reg = reg;
msg.io_sz = sz;
if (dir == 0) {
msg.type = VIODEV_MSG_IO_WRITE;
msg.data = *data;
msg.data_valid = 1;
} else
msg.type = VIODEV_MSG_IO_READ;
if (msg.type == VIODEV_MSG_IO_WRITE) {
ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
sizeof(msg));
if (ret == -1) {
log_warn("%s: failed to send async io event to virtio"
" device", __func__);
return (ret);
}
if (imsgbuf_flush(ibuf) == -1) {
log_warnx("%s: imsgbuf_flush (write)", __func__);
return (-1);
}
} else {
ret = imsg_compose(ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
sizeof(msg));
if (ret == -1) {
log_warnx("%s: failed to send sync io event to virtio"
" device", __func__);
return (ret);
}
if (imsgbuf_flush(ibuf) == -1) {
log_warnx("%s: imsgbuf_flush (read)", __func__);
return (-1);
}
ret = imsgbuf_read_one(ibuf, &imsg);
if (ret == 0 || ret == -1) {
log_warn("%s: imsgbuf_read (n=%d)", __func__, ret);
return (-1);
}
viodev_msg_read(&imsg, &msg);
imsg_free(&imsg);
if (msg.type == VIODEV_MSG_IO_READ && msg.data_valid) {
DPRINTF("%s: got sync read response (reg=%s)", __func__,
virtio_reg_name(msg.reg));
*data = msg.data;
if (msg.state == INTR_STATE_ASSERT)
vcpu_assert_irq(dev->vmm_id, msg.vcpu, msg.irq);
else if (msg.state == INTR_STATE_DEASSERT)
vcpu_deassert_irq(dev->vmm_id, msg.vcpu, msg.irq);
} else {
log_warnx("%s: expected IO_READ, got %d", __func__,
msg.type);
return (-1);
}
}
return (0);
}
void
virtio_assert_irq(struct virtio_dev *dev, int vcpu)
{
struct viodev_msg msg;
int ret;
memset(&msg, 0, sizeof(msg));
msg.irq = dev->irq;
msg.vcpu = vcpu;
msg.type = VIODEV_MSG_KICK;
msg.state = INTR_STATE_ASSERT;
ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
&msg, sizeof(msg));
if (ret == -1)
log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
}
void
virtio_deassert_irq(struct virtio_dev *dev, int vcpu)
{
struct viodev_msg msg;
int ret;
memset(&msg, 0, sizeof(msg));
msg.irq = dev->irq;
msg.vcpu = vcpu;
msg.type = VIODEV_MSG_KICK;
msg.state = INTR_STATE_DEASSERT;
ret = imsg_compose_event(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
&msg, sizeof(msg));
if (ret == -1)
log_warnx("%s: failed to deassert irq %d", __func__, dev->irq);
}
static int
virtio_dev_closefds(struct virtio_dev *dev)
{
size_t i;
switch (dev->dev_type) {
case VMD_DEVTYPE_DISK:
for (i = 0; i < dev->vioblk.ndisk_fd; i++) {
close_fd(dev->vioblk.disk_fd[i]);
dev->vioblk.disk_fd[i] = -1;
}
break;
case VMD_DEVTYPE_NET:
close_fd(dev->vionet.data_fd);
dev->vionet.data_fd = -1;
break;
case VMD_DEVTYPE_SCSI:
close_fd(dev->vioscsi.cdrom_fd);
dev->vioscsi.cdrom_fd = -1;
break;
default:
log_warnx("%s: invalid device type", __func__);
return (-1);
}
close_fd(dev->async_fd);
dev->async_fd = -1;
close_fd(dev->sync_fd);
dev->sync_fd = -1;
return (0);
}
void
viodev_msg_read(struct imsg *imsg, struct viodev_msg *msg)
{
if (imsg_get_data(imsg, msg, sizeof(*msg)))
fatal("%s", __func__);
}
void
vionet_hostmac_read(struct imsg *imsg, struct vionet_dev *dev)
{
if (imsg_get_data(imsg, dev->hostmac, sizeof(dev->hostmac)))
fatal("%s", __func__);
}