#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <strings.h>
#include <librename.h>
#include <md5.h>
#include <sys/sysmacros.h>
#include <dirent.h>
#include <sys/mman.h>
#include <umem.h>
#include <sys/debug.h>
#include <libvarpd_impl.h>
static uint8_t varpd_persist_magic[4] = {
'v',
'a',
'r',
'p',
};
#define VARPD_PERSIST_MAXWRITE 4096
#define VARPD_PERSIST_VERSION_ONE 1
#define VARPD_PERSIST_SUFFIX ".varpd"
typedef struct varpd_persist_header {
uint8_t vph_magic[4];
uint32_t vph_version;
uint8_t vph_md5[16];
} varpd_persist_header_t;
void
libvarpd_persist_init(varpd_impl_t *vip)
{
vip->vdi_persistfd = -1;
if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0)
libvarpd_panic("failed to create rw vdi_pfdlock");
}
void
libvarpd_persist_fini(varpd_impl_t *vip)
{
if (vip->vdi_persistfd != -1) {
if (close(vip->vdi_persistfd) != 0)
libvarpd_panic("failed to close persist fd %d: %d",
vip->vdi_persistfd, errno);
vip->vdi_persistfd = -1;
}
if (rwlock_destroy(&vip->vdi_pfdlock) != 0)
libvarpd_panic("failed to destroy rw vdi_pfdlock");
}
int
libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir)
{
int fd;
struct stat st;
varpd_impl_t *vip = (varpd_impl_t *)vhp;
fd = open(rootdir, O_RDONLY);
if (fd < 0)
return (errno);
if (fstat(fd, &st) != 0) {
int ret = errno;
if (close(fd) != 0)
libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
rootdir, fd, errno);
return (ret);
}
if (!S_ISDIR(st.st_mode)) {
if (close(fd) != 0)
libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
rootdir, fd, errno);
return (EINVAL);
}
VERIFY0(rw_wrlock(&vip->vdi_pfdlock));
if (vip->vdi_persistfd != -1) {
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
if (close(fd) != 0)
libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
rootdir, fd, errno);
return (EEXIST);
}
vip->vdi_persistfd = fd;
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
return (0);
}
static int
libvarpd_persist_write(int fd, const void *buf, size_t buflen)
{
ssize_t ret;
off_t off = 0;
while (buflen > 0) {
ret = write(fd, (void *)((uintptr_t)buf + off),
MIN(buflen, VARPD_PERSIST_MAXWRITE));
if (ret == -1 && errno == EINTR)
continue;
if (ret == -1)
return (errno);
off += ret;
buflen -= ret;
}
return (0);
}
static int
libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl)
{
int err, fd;
size_t size;
varpd_persist_header_t hdr;
librename_atomic_t *lrap;
char *buf = NULL, *name;
if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0)
return (err);
if (asprintf(&name, "%llu%s", (unsigned long long)id, ".varpd") == -1) {
err = errno;
free(buf);
return (err);
}
if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0,
&lrap)) != 0) {
free(name);
free(buf);
return (err);
}
fd = librename_atomic_fd(lrap);
bzero(&hdr, sizeof (varpd_persist_header_t));
bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic));
hdr.vph_version = VARPD_PERSIST_VERSION_ONE;
md5_calc(hdr.vph_md5, buf, size);
if ((err = libvarpd_persist_write(fd, &hdr,
sizeof (varpd_persist_header_t))) != 0) {
librename_atomic_fini(lrap);
free(name);
free(buf);
return (err);
}
if ((err = libvarpd_persist_write(fd, buf, size)) != 0) {
librename_atomic_fini(lrap);
free(name);
free(buf);
return (err);
}
do {
err = librename_atomic_commit(lrap);
} while (err == EINTR);
librename_atomic_fini(lrap);
free(name);
free(buf);
return (err);
}
int
libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst)
{
int err = 0;
nvlist_t *nvl = NULL, *cvl = NULL;
VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
if (vip->vdi_persistfd == -1)
goto out;
if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
goto out;
if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0)
goto out;
if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0)
goto out;
if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0)
goto out;
if ((err = nvlist_add_uint32(nvl, "vri_dest",
(uint32_t)inst->vri_dest)) != 0)
goto out;
if ((err = nvlist_add_uint32(nvl, "vri_mode",
(uint32_t)inst->vri_mode)) != 0)
goto out;
if ((err = nvlist_add_string(nvl, "vri_plugin",
inst->vri_plugin->vpp_name)) != 0)
goto out;
err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl);
if (err != 0)
goto out;
if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0)
goto out;
err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl);
out:
nvlist_free(nvl);
nvlist_free(cvl);
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
return (err);
}
void
libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst)
{
char buf[32];
int ret;
VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
if (vip->vdi_persistfd == -1) {
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
return;
}
if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32)
libvarpd_panic("somehow exceeded static value for "
"libvarpd_torch_instance buffer");
do {
ret = unlinkat(vip->vdi_persistfd, buf, 0);
} while (ret == -1 && errno == EINTR);
if (ret != 0) {
switch (errno) {
case ENOENT:
break;
default:
libvarpd_panic("failed to unlinkat %d`%s: %s",
vip->vdi_persistfd, buf, strerror(errno));
}
}
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
}
static int
libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl)
{
int err;
nvlist_t *pvl;
uint64_t id, flags, vid;
uint32_t linkid, dest, mode;
char *pluginstr;
varpd_plugin_t *plugin;
overlay_plugin_dest_t adest;
varpd_instance_t *inst, lookup;
if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0)
return (EINVAL);
if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0)
return (EINVAL);
if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0)
return (EINVAL);
if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0)
return (EINVAL);
if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0)
return (EINVAL);
if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0)
return (EINVAL);
plugin = libvarpd_plugin_lookup(vip, pluginstr);
if (plugin == NULL)
return (EINVAL);
if (plugin->vpp_mode != mode)
return (EINVAL);
if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0)
return (EINVAL);
if (dest != adest)
return (EINVAL);
inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
if (inst == NULL)
libvarpd_panic("failed to allocate instance for restore");
inst->vri_id = id_alloc_specific(vip->vdi_idspace, id);
if (inst->vri_id != id) {
umem_free(inst, sizeof (varpd_instance_t));
return (EINVAL);
}
inst->vri_linkid = linkid;
inst->vri_vnetid = vid;
inst->vri_mode = plugin->vpp_mode;
inst->vri_dest = dest;
inst->vri_plugin = plugin;
inst->vri_impl = vip;
inst->vri_flags = 0;
if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst,
dest, &inst->vri_private) != 0) {
id_free(vip->vdi_idspace, id);
umem_free(inst, sizeof (varpd_instance_t));
return (EINVAL);
}
if (mutex_init(&inst->vri_lock, USYNC_THREAD | LOCK_ERRORCHECK,
NULL) != 0)
libvarpd_panic("failed to create vri_lock mutex");
mutex_enter(&vip->vdi_lock);
lookup.vri_id = inst->vri_id;
if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
libvarpd_panic("found duplicate instance with id %d",
lookup.vri_id);
avl_add(&vip->vdi_instances, inst);
lookup.vri_linkid = inst->vri_linkid;
if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
libvarpd_panic("found duplicate linstance with id %d",
lookup.vri_linkid);
avl_add(&vip->vdi_linstances, inst);
mutex_exit(&vip->vdi_lock);
if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) {
libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
return (EINVAL);
}
if (flags & OVERLAY_TARG_INFO_F_ACTIVE)
(void) libvarpd_overlay_disassociate(inst);
if (libvarpd_overlay_associate(inst) != 0) {
libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
return (EINVAL);
}
if (flags & OVERLAY_TARG_INFO_F_DEGRADED) {
if ((err = libvarpd_overlay_restore(inst)) != 0) {
libvarpd_panic("failed to restore instance %p: %d\n",
inst, err);
}
}
mutex_enter(&inst->vri_lock);
inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
mutex_exit(&inst->vri_lock);
return (0);
}
static int
libvarpd_persist_restore_one(varpd_impl_t *vip, int fd)
{
int err;
size_t fsize;
struct stat st;
void *buf, *datap;
varpd_persist_header_t *hdr;
uint8_t md5[16];
nvlist_t *nvl;
if (fstat(fd, &st) != 0)
return (errno);
if (st.st_size <= sizeof (varpd_persist_header_t))
return (EINVAL);
fsize = st.st_size - sizeof (varpd_persist_header_t);
buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (buf == MAP_FAILED)
return (errno);
hdr = buf;
if (bcmp(varpd_persist_magic, hdr->vph_magic,
sizeof (varpd_persist_magic)) != 0) {
if (munmap(buf, st.st_size) != 0)
libvarpd_panic("failed to munmap %p: %d", buf, errno);
return (EINVAL);
}
if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) {
if (munmap(buf, st.st_size) != 0)
libvarpd_panic("failed to munmap %p: %d", buf, errno);
return (EINVAL);
}
datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t));
md5_calc(md5, datap, fsize);
if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) {
if (munmap(buf, st.st_size) != 0)
libvarpd_panic("failed to munmap %p: %d", buf, errno);
return (EINVAL);
}
err = nvlist_unpack(datap, fsize, &nvl, 0);
if (munmap(buf, st.st_size) != 0)
libvarpd_panic("failed to munmap %p: %d", buf, errno);
if (err != 0)
return (EINVAL);
err = libvarpd_persist_restore_instance(vip, nvl);
nvlist_free(nvl);
return (err);
}
static int
libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg)
{
varpd_instance_t *inst;
mutex_enter(&vip->vdi_lock);
for (inst = avl_first(&vip->vdi_instances); inst != NULL;
inst = AVL_NEXT(&vip->vdi_instances, inst)) {
if (inst->vri_linkid == linkid) {
mutex_exit(&vip->vdi_lock);
return (0);
}
}
mutex_exit(&vip->vdi_lock);
(void) libvarpd_overlay_degrade_datalink(vip, linkid,
"no varpd instance exists");
return (0);
}
static void
libvarpd_check_degrade(varpd_impl_t *vip)
{
(void) libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL);
}
int
libvarpd_persist_restore(varpd_handle_t *vhp)
{
int dirfd;
int ret = 0;
DIR *dirp = NULL;
struct dirent *dp;
varpd_impl_t *vip = (varpd_impl_t *)vhp;
VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
if ((dirfd = dup(vip->vdi_persistfd)) < 0) {
ret = errno;
goto out;
}
if ((dirp = fdopendir(dirfd)) == NULL) {
ret = errno;
if (close(dirfd) != 0)
libvarpd_panic("failed to close dirfd %d: %d",
dirfd, errno);
goto out;
}
for (;;) {
int fd;
uint64_t id;
char *eptr;
struct stat st;
errno = 0;
dp = readdir(dirp);
if (dp == NULL) {
ret = errno;
break;
}
if (strcmp(dp->d_name, ".") == 0 ||
strcmp(dp->d_name, "..") == 0)
continue;
errno = 0;
id = strtoull(dp->d_name, &eptr, 10);
if ((id == 0 && errno == EINVAL) ||
(id == ULLONG_MAX && errno == ERANGE))
continue;
if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0)
continue;
fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY);
if (fd < 0) {
ret = errno;
break;
}
if (fstat(fd, &st) != 0) {
ret = errno;
break;
}
if (!S_ISREG(st.st_mode)) {
if (close(fd) != 0)
libvarpd_panic("failed to close fd (%s) %d: "
"%d\n", dp->d_name, fd, errno);
continue;
}
ret = libvarpd_persist_restore_one(vip, fd);
if (close(fd) != 0)
libvarpd_panic("failed to close fd (%s) %d: "
"%d\n", dp->d_name, fd, errno);
if (ret != 0) {
if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) {
ret = errno;
break;
}
}
}
libvarpd_check_degrade(vip);
out:
if (dirp != NULL)
(void) closedir(dirp);
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
return (ret);
}
int
libvarpd_persist_disable(varpd_handle_t *vhp)
{
varpd_impl_t *vip = (varpd_impl_t *)vhp;
VERIFY0(rw_wrlock(&vip->vdi_pfdlock));
if (vip->vdi_persistfd == -1) {
mutex_exit(&vip->vdi_lock);
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
return (ENOENT);
}
if (close(vip->vdi_persistfd) != 0)
libvarpd_panic("failed to close persist fd %d: %d",
vip->vdi_persistfd, errno);
vip->vdi_persistfd = -1;
VERIFY0(rw_unlock(&vip->vdi_pfdlock));
return (0);
}