#include <sys/types.h>
#include <sys/modctl.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/devops.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/cred.h>
#include <sys/policy.h>
#include <sys/errno.h>
#include <vm/seg_dev.h>
#include <vm/seg_vn.h>
#include <vm/page.h>
#include <sys/fs/swapnode.h>
#include <sys/sysmacros.h>
#include <sys/fcntl.h>
#include <sys/vmsystm.h>
#include <sys/physmem.h>
#include <sys/vfs_opreg.h>
static dev_info_t *physmem_dip = NULL;
struct physmem_hash {
struct physmem_hash *ph_next;
uint64_t ph_base_pa;
caddr_t ph_base_va;
size_t ph_seg_len;
struct vnode *ph_vnode;
};
struct physmem_proc_hash {
struct proc *pph_proc;
struct physmem_hash *pph_hash;
struct physmem_proc_hash *pph_next;
};
#define PPH_SIZE 8
struct physmem_proc_hash *pph[PPH_SIZE];
krwlock_t pph_rwlock;
#define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1)))
uint64_t physmem_vnodecnt;
kmutex_t physmem_mutex;
static int physmem_getpage(struct vnode *vp, offset_t off, size_t len,
uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
enum seg_rw rw, struct cred *cr, caller_context_t *ct);
static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
struct cred *cred, caller_context_t *ct);
static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
struct cred *cred, caller_context_t *ct);
static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct);
const fs_operation_def_t physmem_vnodeops_template[] = {
VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage },
VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap },
VOPNAME_DELMAP, { .vop_delmap = physmem_delmap },
VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive },
NULL, NULL
};
vnodeops_t *physmem_vnodeops = NULL;
void
physmem_remove_hash_proc()
{
int index;
struct physmem_proc_hash **walker;
struct physmem_proc_hash *victim = NULL;
index = PHYSMEM_HASH(curproc);
rw_enter(&pph_rwlock, RW_WRITER);
walker = &pph[index];
while (*walker != NULL) {
if ((*walker)->pph_proc == curproc &&
(*walker)->pph_hash == NULL) {
victim = *walker;
*walker = victim->pph_next;
break;
}
walker = &((*walker)->pph_next);
}
rw_exit(&pph_rwlock);
if (victim != NULL)
kmem_free(victim, sizeof (struct physmem_proc_hash));
}
int
physmem_add_hash(struct physmem_hash *php)
{
int index;
struct physmem_proc_hash *iterator;
struct physmem_proc_hash *newp = NULL;
struct physmem_hash *temp;
int ret = 0;
index = PHYSMEM_HASH(curproc);
insert:
rw_enter(&pph_rwlock, RW_WRITER);
iterator = pph[index];
while (iterator != NULL) {
if (iterator->pph_proc == curproc) {
for (temp = iterator->pph_hash; temp != NULL;
temp = temp->ph_next) {
if ((php->ph_base_pa >= temp->ph_base_pa &&
php->ph_base_pa < temp->ph_base_pa +
temp->ph_seg_len) ||
(temp->ph_base_pa >= php->ph_base_pa &&
temp->ph_base_pa < php->ph_base_pa +
php->ph_seg_len)) {
ret = ERANGE;
break;
}
}
if (ret == 0) {
php->ph_next = iterator->pph_hash;
iterator->pph_hash = php;
}
rw_exit(&pph_rwlock);
if (newp != NULL)
kmem_free(newp, sizeof (*newp));
return (ret);
}
iterator = iterator->pph_next;
}
if (newp != NULL) {
newp->pph_proc = curproc;
newp->pph_next = pph[index];
newp->pph_hash = php;
php->ph_next = NULL;
pph[index] = newp;
rw_exit(&pph_rwlock);
return (0);
}
rw_exit(&pph_rwlock);
newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP);
goto insert;
}
struct physmem_hash *
physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp)
{
int index;
struct physmem_proc_hash *proc_hp;
struct physmem_hash *php;
ASSERT(rw_lock_held(&pph_rwlock));
index = PHYSMEM_HASH(procp);
proc_hp = pph[index];
while (proc_hp != NULL) {
if (proc_hp->pph_proc == procp) {
php = proc_hp->pph_hash;
while (php != NULL) {
if ((req_paddr >= php->ph_base_pa) &&
(req_paddr + len <=
php->ph_base_pa + php->ph_seg_len)) {
return (php);
}
php = php->ph_next;
}
}
proc_hp = proc_hp->pph_next;
}
return (NULL);
}
int
physmem_validate_cookie(uint64_t p_cookie)
{
int index;
struct physmem_proc_hash *proc_hp;
struct physmem_hash *php;
ASSERT(rw_lock_held(&pph_rwlock));
index = PHYSMEM_HASH(curproc);
proc_hp = pph[index];
while (proc_hp != NULL) {
if (proc_hp->pph_proc == curproc) {
php = proc_hp->pph_hash;
while (php != NULL) {
if ((uint64_t)(uintptr_t)php == p_cookie) {
return (1);
}
php = php->ph_next;
}
}
proc_hp = proc_hp->pph_next;
}
return (0);
}
int
physmem_remove_vnode_hash(vnode_t *vp)
{
int index;
struct physmem_proc_hash *proc_hp;
struct physmem_hash **phpp;
struct physmem_hash *victim;
index = PHYSMEM_HASH(curproc);
rw_enter(&pph_rwlock, RW_WRITER);
proc_hp = pph[index];
while (proc_hp != NULL) {
if (proc_hp->pph_proc == curproc) {
phpp = &proc_hp->pph_hash;
while (*phpp != NULL) {
if ((*phpp)->ph_vnode == vp) {
victim = *phpp;
*phpp = victim->ph_next;
rw_exit(&pph_rwlock);
kmem_free(victim, sizeof (*victim));
return (1);
}
phpp = &(*phpp)->ph_next;
}
}
proc_hp = proc_hp->pph_next;
}
rw_exit(&pph_rwlock);
return (0);
}
int
physmem_setup_vnops()
{
int error;
char *name = "physmem";
if (physmem_vnodeops != NULL)
cmn_err(CE_PANIC, "physmem vnodeops already set\n");
error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops);
if (error != 0) {
cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template");
}
return (error);
}
int
physmem_setup_addrs(struct physmem_setup_param *pspp)
{
struct as *as = curproc->p_as;
struct segvn_crargs vn_a;
int ret = 0;
uint64_t base_pa;
size_t len;
caddr_t uvaddr;
struct vnode *vp;
struct physmem_hash *php;
ASSERT(pspp != NULL);
base_pa = pspp->req_paddr;
len = pspp->len;
uvaddr = (caddr_t)(uintptr_t)pspp->user_va;
if (!IS_P2ALIGNED(base_pa, PAGESIZE))
return (EINVAL);
if (!IS_P2ALIGNED(len, PAGESIZE))
return (EINVAL);
if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE))
return (EINVAL);
php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP);
mutex_enter(&physmem_mutex);
physmem_vnodecnt++;
mutex_exit(&physmem_mutex);
vp = vn_alloc(KM_SLEEP);
ASSERT(vp != NULL);
vn_setops(vp, physmem_vnodeops);
php->ph_vnode = vp;
vn_a.vp = vp;
vn_a.offset = (u_offset_t)base_pa;
vn_a.type = MAP_SHARED;
vn_a.prot = PROT_ALL;
vn_a.maxprot = PROT_ALL;
vn_a.flags = 0;
vn_a.cred = NULL;
vn_a.amp = NULL;
vn_a.szc = 0;
vn_a.lgrp_mem_policy_flags = 0;
as_rangelock(as);
if (uvaddr != NULL) {
if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) {
ret = ENOMEM;
fail:
as_rangeunlock(as);
vn_free(vp);
kmem_free(php, sizeof (*php));
mutex_enter(&physmem_mutex);
physmem_vnodecnt--;
mutex_exit(&physmem_mutex);
return (ret);
}
} else {
map_addr(&uvaddr, len, 0, 1, 0);
if (uvaddr == NULL) {
ret = ENOMEM;
goto fail;
}
}
ret = as_map(as, uvaddr, len, segvn_create, &vn_a);
if (ret == 0) {
as_rangeunlock(as);
php->ph_base_pa = base_pa;
php->ph_base_va = uvaddr;
php->ph_seg_len = len;
pspp->user_va = (uint64_t)(uintptr_t)uvaddr;
pspp->cookie = (uint64_t)(uintptr_t)php;
ret = physmem_add_hash(php);
if (ret == 0)
return (0);
(void) as_unmap(as, uvaddr, len);
kmem_free(php, sizeof (*php));
return (ret);
}
goto fail;
}
static int
physmem_map_addrs(struct physmem_map_param *pmpp)
{
caddr_t uvaddr;
page_t *pp;
uint64_t req_paddr;
struct vnode *vp;
int ret = 0;
struct physmem_hash *php;
uint_t flags = 0;
ASSERT(pmpp != NULL);
req_paddr = pmpp->req_paddr;
if (!IS_P2ALIGNED(req_paddr, PAGESIZE))
return (EINVAL);
rw_enter(&pph_rwlock, RW_READER);
php = physmem_get_hash(req_paddr, PAGESIZE, curproc);
if (php == NULL) {
rw_exit(&pph_rwlock);
return (EINVAL);
}
vp = php->ph_vnode;
uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa);
rw_exit(&pph_rwlock);
pp = page_numtopp_nolock(btop((size_t)req_paddr));
if (pp == NULL) {
pmpp->ret_va = 0;
return (EPERM);
}
if (pp->p_vnode == vp) {
ASSERT(pp->p_offset == (u_offset_t)req_paddr);
pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
return (0);
}
if (pmpp->flags & PHYSMEM_CAGE)
flags = CAPTURE_GET_CAGE;
if (pmpp->flags & PHYSMEM_RETIRED)
flags |= CAPTURE_GET_RETIRED;
ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc);
if (ret != 0) {
pmpp->ret_va = 0;
return (ret);
} else {
pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
return (0);
}
}
int
map_page_proc(page_t *pp, void *arg, uint_t flags)
{
struct vnode *vp;
proc_t *procp = (proc_t *)arg;
int ret;
u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum);
struct physmem_hash *php;
ASSERT(pp != NULL);
if (swapfs_minfree > availrmem + 1) {
page_free(pp, 1);
return (1);
}
if ((flags & CAPTURE_ASYNC) && (curproc == procp)) {
page_free(pp, 1);
return (-1);
}
pagezero(pp, 0, PAGESIZE);
rw_enter(&pph_rwlock, RW_READER);
php = physmem_get_hash(paddr, PAGESIZE, procp);
if (php == NULL) {
rw_exit(&pph_rwlock);
page_free(pp, 1);
return (1);
}
vp = php->ph_vnode;
if (mutex_owned(page_vnode_mutex(vp))) {
rw_exit(&pph_rwlock);
page_free(pp, 1);
return (-1);
}
ret = page_hashin(pp, vp, paddr, NULL);
rw_exit(&pph_rwlock);
if (ret == 0) {
page_free(pp, 1);
return (-1);
}
page_downgrade(pp);
mutex_enter(&freemem_lock);
availrmem--;
mutex_exit(&freemem_lock);
return (0);
}
int
physmem_destroy_addrs(uint64_t p_cookie)
{
struct as *as = curproc->p_as;
size_t len;
caddr_t uvaddr;
rw_enter(&pph_rwlock, RW_READER);
if (physmem_validate_cookie(p_cookie) == 0) {
rw_exit(&pph_rwlock);
return (EINVAL);
}
len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len;
uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va;
rw_exit(&pph_rwlock);
(void) as_unmap(as, uvaddr, len);
return (0);
}
static int
physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
struct cred *cr, caller_context_t *ct)
{
page_t *pp;
ASSERT(len == PAGESIZE);
ASSERT(AS_READ_HELD(seg->s_as));
pp = page_lookup(vp, off, SE_SHARED);
if (pp != NULL) {
pl[0] = pp;
pl[1] = NULL;
*protp = PROT_ALL;
return (0);
}
return (ENOMEM);
}
static int
physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
struct cred *cred, caller_context_t *ct)
{
if (curproc->p_as != as) {
return (EINVAL);
}
return (0);
}
static int
physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
struct cred *cred, caller_context_t *ct)
{
VN_RELE(vp);
return (0);
}
static void
physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
{
page_t *pp;
if (physmem_remove_vnode_hash(vp) == 0) {
ASSERT(vp->v_pages == NULL);
vn_free(vp);
physmem_remove_hash_proc();
mutex_enter(&physmem_mutex);
physmem_vnodecnt--;
mutex_exit(&physmem_mutex);
return;
}
while ((pp = vp->v_pages) != NULL) {
page_t *rpp;
if (page_tryupgrade(pp)) {
pp->p_lckcnt = 1;
page_destroy(pp, 0);
} else {
rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED);
if (rpp != pp) {
page_unlock(rpp);
continue;
}
page_unlock(pp);
}
}
vn_free(vp);
physmem_remove_hash_proc();
mutex_enter(&physmem_mutex);
physmem_vnodecnt--;
mutex_exit(&physmem_mutex);
}
static int
physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
int ret;
switch (cmd) {
case PHYSMEM_SETUP:
{
struct physmem_setup_param psp;
if (ddi_copyin((void *)arg, &psp,
sizeof (struct physmem_setup_param), 0))
return (EFAULT);
ret = physmem_setup_addrs(&psp);
if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0))
return (EFAULT);
}
break;
case PHYSMEM_MAP:
{
struct physmem_map_param pmp;
if (ddi_copyin((void *)arg, &pmp,
sizeof (struct physmem_map_param), 0))
return (EFAULT);
ret = physmem_map_addrs(&pmp);
if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0))
return (EFAULT);
}
break;
case PHYSMEM_DESTROY:
{
uint64_t cookie;
if (ddi_copyin((void *)arg, &cookie,
sizeof (uint64_t), 0))
return (EFAULT);
ret = physmem_destroy_addrs(cookie);
}
break;
default:
return (ENOTSUP);
}
return (ret);
}
static int
physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
int ret;
static int msg_printed = 0;
if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
return (EINVAL);
}
if ((ret = secpolicy_resource(credp)) != 0)
return (ret);
if ((ret = secpolicy_lock_memory(credp)) != 0)
return (ret);
if (msg_printed == 0) {
cmn_err(CE_NOTE, "!driver has been opened. This driver may "
"take out long term locks on pages which may impact "
"dynamic reconfiguration events");
msg_printed = 1;
}
return (0);
}
static int
physmem_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
return (0);
}
static int
physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd,
void *arg, void **resultp)
{
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
*resultp = physmem_dip;
return (DDI_SUCCESS);
case DDI_INFO_DEVT2INSTANCE:
*resultp = (void *)(ulong_t)getminor((dev_t)arg);
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
}
static int
physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
int i;
if (cmd == DDI_RESUME) {
return (DDI_SUCCESS);
}
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
return (DDI_FAILURE);
physmem_dip = dip;
if (physmem_setup_vnops()) {
ddi_remove_minor_node(dip, ddi_get_name(dip));
return (DDI_FAILURE);
}
for (i = 0; i < PPH_SIZE; i++)
pph[i] = NULL;
page_capture_register_callback(PC_PHYSMEM, 10000,
map_page_proc);
return (DDI_SUCCESS);
}
static int
physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
int ret = DDI_SUCCESS;
if (cmd == DDI_SUSPEND) {
return (DDI_SUCCESS);
}
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
ASSERT(physmem_dip == dip);
mutex_enter(&physmem_mutex);
if (physmem_vnodecnt == 0) {
if (physmem_vnodeops != NULL) {
vn_freevnodeops(physmem_vnodeops);
physmem_vnodeops = NULL;
page_capture_unregister_callback(PC_PHYSMEM);
}
} else {
ret = EBUSY;
}
mutex_exit(&physmem_mutex);
if (ret == DDI_SUCCESS)
ddi_remove_minor_node(dip, ddi_get_name(dip));
return (ret);
}
static struct cb_ops physmem_cb_ops = {
physmem_open,
physmem_close,
nodev,
nodev,
nodev,
nodev,
nodev,
physmem_ioctl,
nodev,
nodev,
nodev,
nochpoll,
ddi_prop_op,
NULL,
D_NEW | D_MP | D_DEVMAP,
CB_REV,
NULL,
NULL
};
static struct dev_ops physmem_ops = {
DEVO_REV,
0,
physmem_getinfo,
nulldev,
nulldev,
physmem_attach,
physmem_detach,
nodev,
&physmem_cb_ops,
NULL,
NULL,
ddi_quiesce_not_needed,
};
static struct modldrv modldrv = {
&mod_driverops,
"physmem driver",
&physmem_ops
};
static struct modlinkage modlinkage = {
MODREV_1,
&modldrv,
NULL
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&modlinkage, modinfop));
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}