root/usr/src/uts/i86xpv/io/privcmd.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/xpv_user.h>

#include <sys/types.h>
#include <sys/file.h>
#include <sys/errno.h>
#include <sys/open.h>
#include <sys/cred.h>
#include <sys/conf.h>
#include <sys/stat.h>
#include <sys/modctl.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/vmsystm.h>
#include <sys/sdt.h>
#include <sys/hypervisor.h>
#include <sys/xen_errno.h>
#include <sys/policy.h>

#include <vm/hat_i86.h>
#include <vm/hat_pte.h>
#include <vm/seg_mf.h>

#include <xen/sys/privcmd.h>
#include <sys/privcmd_impl.h>

static dev_info_t *privcmd_devi;

/*ARGSUSED*/
static int
privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
{
        switch (cmd) {
        case DDI_INFO_DEVT2DEVINFO:
        case DDI_INFO_DEVT2INSTANCE:
                break;
        default:
                return (DDI_FAILURE);
        }

        switch (getminor((dev_t)arg)) {
        case PRIVCMD_MINOR:
                break;
        default:
                return (DDI_FAILURE);
        }

        if (cmd == DDI_INFO_DEVT2INSTANCE)
                *result = 0;
        else
                *result = privcmd_devi;
        return (DDI_SUCCESS);
}

static int
privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
        if (cmd != DDI_ATTACH)
                return (DDI_FAILURE);

        if (ddi_create_minor_node(devi, PRIVCMD_NODE,
            S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
                return (DDI_FAILURE);

        privcmd_devi = devi;
        ddi_report_dev(devi);
        return (DDI_SUCCESS);
}

static int
privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
        if (cmd != DDI_DETACH)
                return (DDI_FAILURE);
        ddi_remove_minor_node(devi, NULL);
        privcmd_devi = NULL;
        return (DDI_SUCCESS);
}

/*ARGSUSED1*/
static int
privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
{
        return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
}

/*
 * Map a contiguous set of machine frames in a foreign domain.
 * Used in the following way:
 *
 *      privcmd_mmap_t p;
 *      privcmd_mmap_entry_t e;
 *
 *      addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
 *      p.num = number of privcmd_mmap_entry_t's
 *      p.dom = domid;
 *      p.entry = &e;
 *      e.va = addr;
 *      e.mfn = mfn;
 *      e.npages = btopr(size);
 *      ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
 */
/*ARGSUSED2*/
int
do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
{
        privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
        privcmd_mmap_entry_t *umme;
        struct as *as = curproc->p_as;
        struct seg *seg;
        int i, error = 0;

        if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
                return (EFAULT);

        DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
            privcmd_mmap_entry_t *, mmc->entry);

        if (mmc->dom == DOMID_SELF) {
                error = ENOTSUP;        /* Too paranoid? */
                goto done;
        }

        for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
                privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
                caddr_t addr;

                if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
                        error = EFAULT;
                        break;
                }

                DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
                    ulong_t, mme->npages);

                if (mme->mfn == MFN_INVALID) {
                        error = EINVAL;
                        break;
                }

                addr = (caddr_t)mme->va;

                /*
                 * Find the segment we want to mess with, then add
                 * the mfn range to the segment.
                 */
                AS_LOCK_ENTER(as, RW_READER);
                if ((seg = as_findseg(as, addr, 0)) == NULL ||
                    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
                        error = EINVAL;
                else
                        error = segmf_add_mfns(seg, addr,
                            mme->mfn, mme->npages, mmc->dom);
                AS_LOCK_EXIT(as);

                if (error != 0)
                        break;
        }

done:
        DTRACE_XPV1(mmap__end, int, error);

        return (error);
}

/*
 * Set up the address range to map to an array of mfns in
 * a foreign domain.  Used in the following way:
 *
 *      privcmd_mmap_batch_t p;
 *
 *      addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
 *      p.num = number of pages
 *      p.dom = domid
 *      p.addr = addr;
 *      p.arr = array of mfns, indexed 0 .. p.num - 1
 *      ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
 */
/*ARGSUSED2*/
static int
do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
{
        privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
        struct as *as = curproc->p_as;
        struct seg *seg;
        int i, error = 0;
        caddr_t addr;
        ulong_t *ulp;

        if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
                return (EFAULT);

        DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
            caddr_t, mmb->addr);

        addr = (caddr_t)mmb->addr;
        AS_LOCK_ENTER(as, RW_READER);
        if ((seg = as_findseg(as, addr, 0)) == NULL ||
            addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
                error = EINVAL;
                goto done;
        }

        for (i = 0, ulp = mmb->arr;
            i < mmb->num; i++, addr += PAGESIZE, ulp++) {
                mfn_t mfn;

                if (fulword(ulp, &mfn) != 0) {
                        error = EFAULT;
                        break;
                }

                if (mfn == MFN_INVALID) {
                        /*
                         * This mfn is invalid and should not be added to
                         * segmf, as we'd only cause an immediate EFAULT when
                         * we tried to fault it in.
                         */
                        mfn |= XEN_DOMCTL_PFINFO_XTAB;
                        continue;
                }

                if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
                        continue;

                /*
                 * Tell the process that this MFN could not be mapped, so it
                 * won't later try to access it.
                 */
                mfn |= XEN_DOMCTL_PFINFO_XTAB;
                if (sulword(ulp, mfn) != 0) {
                        error = EFAULT;
                        break;
                }
        }

done:
        AS_LOCK_EXIT(as);

        DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
            mmb->addr);

        return (error);
}

/*ARGSUSED*/
static int
privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
{
        if (secpolicy_xvm_control(cr))
                return (EPERM);

        /*
         * Everything is a -native- data type.
         */
        if ((mode & FMODELS) != FNATIVE)
                return (EOVERFLOW);

        switch (cmd) {
        case IOCTL_PRIVCMD_HYPERCALL:
                return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
        case IOCTL_PRIVCMD_MMAP:
                if (DOMAIN_IS_PRIVILEGED(xen_info))
                        return (do_privcmd_mmap((void *)arg, mode, cr));
                break;
        case IOCTL_PRIVCMD_MMAPBATCH:
                if (DOMAIN_IS_PRIVILEGED(xen_info))
                        return (do_privcmd_mmapbatch((void *)arg, mode, cr));
                break;
        default:
                break;
        }
        return (EINVAL);
}

/*
 * The real magic happens in the segmf segment driver.
 */
/*ARGSUSED8*/
static int
privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
    off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
{
        struct segmf_crargs a;
        int error;

        if (secpolicy_xvm_control(cr))
                return (EPERM);

        as_rangelock(as);
        if ((flags & MAP_FIXED) == 0) {
                map_addr(addrp, len, (offset_t)off, 0, flags);
                if (*addrp == NULL) {
                        error = ENOMEM;
                        goto rangeunlock;
                }
        } else {
                /*
                 * User specified address
                 */
                (void) as_unmap(as, *addrp, len);
        }

        /*
         * The mapping *must* be MAP_SHARED at offset 0.
         *
         * (Foreign pages are treated like device memory; the
         * ioctl interface allows the backing objects to be
         * arbitrarily redefined to point at any machine frame.)
         */
        if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
                error = EINVAL;
                goto rangeunlock;
        }

        a.dev = dev;
        a.prot = (uchar_t)prot;
        a.maxprot = (uchar_t)maxprot;
        error = as_map(as, *addrp, len, segmf_create, &a);

rangeunlock:
        as_rangeunlock(as);
        return (error);
}

static struct cb_ops privcmd_cb_ops = {
        privcmd_open,
        nulldev,        /* close */
        nodev,          /* strategy */
        nodev,          /* print */
        nodev,          /* dump */
        nodev,          /* read */
        nodev,          /* write */
        privcmd_ioctl,
        nodev,          /* devmap */
        nodev,          /* mmap */
        privcmd_segmap,
        nochpoll,       /* poll */
        ddi_prop_op,
        NULL,
        D_64BIT | D_NEW | D_MP
};

static struct dev_ops privcmd_dv_ops = {
        DEVO_REV,
        0,
        privcmd_getinfo,
        nulldev,                /* identify */
        nulldev,                /* probe */
        privcmd_attach,
        privcmd_detach,
        nodev,                  /* reset */
        &privcmd_cb_ops,
        0,                      /* struct bus_ops */
        NULL,                   /* power */
        ddi_quiesce_not_needed,         /* quiesce */
};

static struct modldrv modldrv = {
        &mod_driverops,
        "privcmd driver",
        &privcmd_dv_ops
};

static struct modlinkage modl = {
        MODREV_1,
        &modldrv
};

int
_init(void)
{
        return (mod_install(&modl));
}

int
_fini(void)
{
        return (mod_remove(&modl));
}

int
_info(struct modinfo *modinfo)
{
        return (mod_info(&modl, modinfo));
}