root/usr/src/uts/sun4u/io/pci/pci_reloc.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident   "%Z%%M% %I%     %E% SMI"

/*
 * PCI nexus DVMA relocation routines.
 *
 * These routines handle the interactions with the HAT layer to
 * implement page relocation for page(s) which have active DMA handle
 * bindings when DVMA is being used for those handles.
 *
 * The current modus operandi is as follows:
 *
 *   Object binding: register the appropriate callback for each page
 *     of the kernel object while obtaining the PFN for the DVMA page.
 *
 *   Object unbinding: unregister the callback for each page of the
 *     kernel object.
 *
 *   Relocation request:
 *     1) Suspend the bus and sync the caches.
 *     2) Remap the DVMA object using the new provided PFN.
 *     3) Unsuspend the bus.
 *
 *  The relocation code runs with CPUs captured (idling in xc_loop())
 *  so we can only acquire spinlocks at PIL >= 13 for synchronization
 *  within those codepaths.
 */
#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/async.h>
#include <sys/sysmacros.h>
#include <sys/sunddi.h>
#include <sys/machsystm.h>
#include <sys/ddi_impldefs.h>
#include <sys/dvma.h>
#include <vm/hat.h>
#include <sys/pci/pci_obj.h>

/*LINTLIBRARY*/

void
pci_dvma_unregister_callbacks(pci_t *pci_p, ddi_dma_impl_t *mp)
{
        ddi_dma_obj_t *dobj_p = &mp->dmai_object;
        struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
        page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
        caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
        struct hat *hat_p;
        uint32_t offset;
        int i;

        if (!PCI_DMA_CANRELOC(mp))
                return;

        hat_p = (as_p == NULL)? kas.a_hat : as_p->a_hat;
        ASSERT(hat_p == kas.a_hat);
        ASSERT(pplist == NULL);

        offset = mp->dmai_roffset;
        hat_delete_callback(vaddr, IOMMU_PAGE_SIZE - offset, mp, HAC_PAGELOCK,
            MP_HAT_CB_COOKIE(mp, 0));
        vaddr = (caddr_t)(((uintptr_t)vaddr + IOMMU_PAGE_SIZE) &
            IOMMU_PAGE_MASK);
        for (i = 1; i < mp->dmai_ndvmapages; i++) {
                hat_delete_callback(vaddr, IOMMU_PAGE_SIZE, mp, HAC_PAGELOCK,
                    MP_HAT_CB_COOKIE(mp, i));
                vaddr += IOMMU_PAGE_SIZE;
        }
        mp->dmai_flags &= ~DMAI_FLAGS_RELOC;
}

static int
pci_dvma_postrelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid,
        pfn_t newpfn)
{
        ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
        dev_info_t *rdip = mp->dmai_rdip;
        ddi_dma_obj_t *dobj_p = &mp->dmai_object;
        page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
        caddr_t baseva = dobj_p->dmao_obj.virt_obj.v_addr;
        int index;
        size_t length = IOMMU_PTOB(1);
        off_t offset;

        DEBUG0(DBG_RELOC, rdip, "postrelocator called\n");

        if (flags == HAT_POSTUNSUSPEND) {
                mutex_enter(&pci_reloc_mutex);
                ASSERT(pci_reloc_thread == curthread);
                ASSERT(pci_reloc_presuspend > 0);
                if (--pci_reloc_presuspend == 0) {
                        pci_reloc_thread = NULL;
                        cv_broadcast(&pci_reloc_cv);
                }
                mutex_exit(&pci_reloc_mutex);
                return (0);
        }

        ASSERT(flags == HAT_UNSUSPEND);
        ASSERT(pci_reloc_suspend > 0);
        pci_reloc_suspend--;

        ASSERT(len <= length);
        ASSERT(pplist == NULL); /* addr bind handle only */
        ASSERT(dobj_p->dmao_obj.virt_obj.v_as == &kas ||
            dobj_p->dmao_obj.virt_obj.v_as == NULL);
        ASSERT(PCI_DMA_ISDVMA(mp));
        ASSERT(pci_reloc_thread == curthread);

        offset = va - baseva;
        index = IOMMU_BTOPR(offset);
        ASSERT(index < mp->dmai_ndvmapages);

        DEBUG3(DBG_RELOC, rdip, "index 0x%x, vaddr 0x%llx, baseva 0x%llx\n",
            index, (int64_t)va, (int64_t)baseva);

        if ((mp)->dmai_ndvmapages == 1) {
                DEBUG2(DBG_RELOC, rdip, "pfn remap (1) 0x%x -> 0x%x\n",
                    mp->dmai_pfnlst, newpfn);
                    mp->dmai_pfnlst = (void *)newpfn;
        } else {
                DEBUG3(DBG_RELOC, rdip, "pfn remap (%d) 0x%x -> 0x%x\n",
                    index, ((iopfn_t *)mp->dmai_pfnlst)[index], newpfn);
                ((iopfn_t *)mp->dmai_pfnlst)[index] = (iopfn_t)newpfn;
        }

        if (ddi_dma_mctl(rdip, rdip, (ddi_dma_handle_t)mp, DDI_DMA_REMAP,
            &offset, &length, NULL, 0) != DDI_SUCCESS)
                return (EIO);
        if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_UNQUIESCE, NULL, NULL) !=
            DDI_SUCCESS)
                return (EIO);

        return (0);
}

/*
 * Log a warning message if a callback is still registered on
 * a page which is being freed.  This is indicative of a driver
 * bug -- DMA handles are bound, and the memory is being freed by
 * the VM subsystem without an unbind call on the handle first.
 */
static int
pci_dma_relocerr(caddr_t va, uint_t len, uint_t errorcode, void *mpvoid)
{
        int errlevel = pci_dma_panic_on_leak? CE_PANIC : CE_WARN;
        if (errorcode == HAT_CB_ERR_LEAKED) {
                cmn_err(errlevel, "object 0x%p has a bound DMA handle 0x%p\n",
                        va, mpvoid);
                return (0);
        }

        /* unknown error code, unhandled so panic */
        return (EINVAL);
}

/*
 * pci DVMA remap entry points
 *
 * Called in response to a DDI_DMA_REMAP DMA ctlops command.
 * Remaps the region specified in the underlying IOMMU. Safe
 * to assume that the bus was quiesced and ddi_dma_sync() was
 * invoked by the caller before we got to this point.
 */
int
pci_dvma_remap(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
        off_t offset, size_t length)
{
        pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
        iommu_t *iommu_p = pci_p->pci_iommu_p;
        dvma_addr_t dvma_pg;
        size_t npgs;
        int idx;

        dvma_pg = IOMMU_BTOP(mp->dmai_mapping);
        idx = IOMMU_BTOPR(offset);
        dvma_pg += idx;
        npgs = IOMMU_BTOPR(length);

        DEBUG3(DBG_RELOC, mp->dmai_rdip,
            "pci_dvma_remap: dvma_pg 0x%llx len 0x%llx idx 0x%x\n",
            dvma_pg, length, idx);

        ASSERT(pci_p->pci_pbm_p->pbm_quiesce_count > 0);
        iommu_remap_pages(iommu_p, mp, dvma_pg, npgs, idx);

        return (DDI_SUCCESS);
}

void
pci_fdvma_remap(ddi_dma_impl_t *mp, caddr_t kvaddr, dvma_addr_t dvma_pg,
        size_t npages, size_t index, pfn_t newpfn)
{
        fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
        pci_t *pci_p = (pci_t *)fdvma_p->softsp;
        iommu_t *iommu_p = pci_p->pci_iommu_p;
        dev_info_t *dip = pci_p->pci_dip;
        iopfn_t pfn = (iopfn_t)newpfn;
        dvma_addr_t pg_index = dvma_pg - iommu_p->dvma_base_pg;
        int i;
        uint64_t tte;

        /* make sure we don't exceed reserved boundary */
        DEBUG3(DBG_FAST_DVMA, dip, "fast remap index=%x: %p, npgs=%x", index,
            kvaddr, npages);
        if (index + npages > mp->dmai_ndvmapages) {
                cmn_err(pci_panic_on_fatal_errors ? CE_PANIC : CE_WARN,
                        "%s%d: fdvma remap index(%lx)+pgs(%lx) exceeds limit\n",
                        ddi_driver_name(dip), ddi_get_instance(dip),
                        index, npages);
                return;
        }

        for (i = 0; i < npages; i++, kvaddr += IOMMU_PAGE_SIZE) {
                DEBUG3(DBG_FAST_DVMA, dip, "remap dvma_pg %x -> pfn %x,"
                    " old tte 0x%llx\n", dvma_pg + i, pfn,
                    iommu_p->iommu_tsb_vaddr[pg_index + i]);

                if (pfn == PFN_INVALID)
                        goto bad_pfn;

                if (i == 0)
                        tte = MAKE_TTE_TEMPLATE(pfn, mp);

                /* XXX assumes iommu and mmu has same page size */
                iommu_p->iommu_tsb_vaddr[pg_index + i] = tte | IOMMU_PTOB(pfn);
                IOMMU_PAGE_FLUSH(iommu_p, (dvma_pg + i));
        }
        return;
bad_pfn:
        cmn_err(CE_WARN, "%s%d: fdvma remap can't get page frame for vaddr %p",
                ddi_driver_name(dip), ddi_get_instance(dip), kvaddr);
}

static int
pci_fdvma_prerelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid)
{
        ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
        fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
        caddr_t baseva, endva;
        int i;

        /*
         * It isn't safe to do relocation if all of the IOMMU
         * mappings haven't yet been established at this index.
         */
        for (i = 0; i < mp->dmai_ndvmapages; i++) {
                baseva = fdvma_p->kvbase[i];
                endva = baseva + IOMMU_PTOB(fdvma_p->pagecnt[i]);
                if (va >= baseva && va < endva)
                        return (0);     /* found a valid index */
        }
        return (EAGAIN);
}

static int
pci_fdvma_postrelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid,
        pfn_t pfn)
{
        ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
        dev_info_t *rdip = mp->dmai_rdip;
        fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
        caddr_t baseva;
        dvma_addr_t dvma_pg;
        size_t length = PAGESIZE;
        int i;

        DEBUG0(DBG_RELOC, rdip, "fdvma postrelocator called\n");

        if (flags == HAT_POSTUNSUSPEND) {
                mutex_enter(&pci_reloc_mutex);
                ASSERT(pci_reloc_thread == curthread);
                if (--pci_reloc_presuspend == 0) {
                        pci_reloc_thread = NULL;
                        cv_broadcast(&pci_reloc_cv);
                }
                mutex_exit(&pci_reloc_mutex);
                return (0);
        }

        pci_reloc_suspend--;

        ASSERT(flags == HAT_UNSUSPEND);
        ASSERT(len <= length);
        ASSERT((mp->dmai_rflags & DMP_BYPASSNEXUS) != 0);

        /*
         * This virtual page can have multiple cookies that refer
         * to it within the same handle. We must walk the whole
         * table for this DMA handle finding all the cookies, and
         * update all of them. Sigh.
         */
        for (i = 0; i < mp->dmai_ndvmapages; i++) {
                caddr_t endva;
                int index;

                baseva = fdvma_p->kvbase[i];
                endva = baseva + IOMMU_PTOB(fdvma_p->pagecnt[i]);

                if (va >= baseva && va < endva) {
                        index = i + IOMMU_BTOP(va - baseva);
                        ASSERT(index < mp->dmai_ndvmapages);

                        DEBUG4(DBG_RELOC, rdip, "mp %p: index 0x%x, "
                            " vaddr 0x%llx, baseva 0x%llx\n", mp, index,
                            (int64_t)va, (int64_t)baseva);

                        dvma_pg = IOMMU_BTOP(mp->dmai_mapping) + index;
                        pci_fdvma_remap(mp, va, dvma_pg, IOMMU_BTOP(length),
                            index, pfn);
                }
        }

        if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_UNQUIESCE, NULL, NULL) !=
            DDI_SUCCESS)
                return (EIO);

        return (0);
}

void
pci_fdvma_unregister_callbacks(pci_t *pci_p, fdvma_t *fdvma_p,
        ddi_dma_impl_t *mp, uint_t index)
{
        size_t npgs = fdvma_p->pagecnt[index];
        caddr_t kva = fdvma_p->kvbase[index];
        int i;

        ASSERT(index + npgs <= mp->dmai_ndvmapages);
        ASSERT(kva != NULL);

        for (i = 0; i < npgs && pci_dvma_remap_enabled;
            i++, kva += IOMMU_PAGE_SIZE)
                hat_delete_callback(kva, IOMMU_PAGE_SIZE, mp, HAC_PAGELOCK,
                    fdvma_p->cbcookie[index + i]);
}

static int
pci_common_prerelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid)
{
        ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
        ddi_dma_handle_t h = (ddi_dma_handle_t)mpvoid;
        dev_info_t *rdip = mp->dmai_rdip;
        int ret;

        DEBUG0(DBG_RELOC, rdip, "prerelocator called\n");

        if (flags == HAT_PRESUSPEND) {
                if (!ddi_prop_exists(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
                    "dvma-remap-supported"))
                        return (ENOTSUP);
                if (!PCI_DMA_ISMAPPED(mp))
                        return (EAGAIN);

                if (mp->dmai_rflags & DMP_BYPASSNEXUS) {
                        ret = pci_fdvma_prerelocator(va, len, flags, mpvoid);
                        if (ret != 0)
                                return (ret);
                } else if (!PCI_DMA_ISDVMA(mp))
                        return (EINVAL);

                /*
                 * Acquire the exclusive right to relocate a PCI DMA page,
                 * since we later have to pause CPUs which could otherwise
                 * lead to all sorts of synchronization headaches.
                 */
                mutex_enter(&pci_reloc_mutex);
                if (pci_reloc_thread != curthread) {
                        while (pci_reloc_thread != NULL) {
                                cv_wait(&pci_reloc_cv, &pci_reloc_mutex);
                        }
                        pci_reloc_thread = curthread;
                        ASSERT(pci_reloc_suspend == 0);
                }
                mutex_exit(&pci_reloc_mutex);

                ASSERT(pci_reloc_thread == curthread);
                pci_reloc_presuspend++;

                return (0);
        }

        ASSERT(flags == HAT_SUSPEND);
        ASSERT(PCI_DMA_CANRELOC(mp));
        ASSERT(pci_reloc_thread == curthread);
        pci_reloc_suspend++;

        if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_QUIESCE, NULL, NULL) !=
            DDI_SUCCESS)
                return (EIO);
        if (ddi_dma_sync(h, 0, 0, DDI_DMA_SYNC_FORKERNEL) != DDI_SUCCESS)
                return (EIO);

        return (0);
}

/*
 * Register two callback types: one for normal DVMA and the
 * other for fast DVMA, since each method has a different way
 * of tracking the PFNs behind a handle.
 */
void
pci_reloc_init(void)
{
        int key = pci_reloc_getkey();

        mutex_init(&pci_reloc_mutex, NULL, MUTEX_DEFAULT, NULL);
        cv_init(&pci_reloc_cv, NULL, CV_DEFAULT, NULL);
        pci_dvma_cbid = hat_register_callback(
                key + ('D'<<24 | 'V'<<16 | 'M'<<8 | 'A'),
                pci_common_prerelocator, pci_dvma_postrelocator,
                pci_dma_relocerr, 1);
        pci_fast_dvma_cbid = hat_register_callback(
                key + ('F'<<24 | 'D'<<16 | 'M'<<8 | 'A'),
                pci_common_prerelocator,
                pci_fdvma_postrelocator, pci_dma_relocerr, 1);
}

void
pci_reloc_fini(void)
{
        cv_destroy(&pci_reloc_cv);
        mutex_destroy(&pci_reloc_mutex);
}