root/sys/powerpc/powernv/xive.c
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright 2019 Justin Hibbits
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#include "opt_platform.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/endian.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/smp.h>

#include <vm/vm.h>
#include <vm/pmap.h>

#include <machine/bus.h>
#include <machine/intr_machdep.h>
#include <machine/md_var.h>

#include <dev/ofw/ofw_bus.h>
#include <dev/ofw/ofw_bus_subr.h>

#ifdef POWERNV
#include <powerpc/powernv/opal.h>
#endif

#include "pic_if.h"

#define XIVE_PRIORITY   7       /* Random non-zero number */
#define MAX_XIVE_IRQS   (1<<24) /* 24-bit XIRR field */

/* Registers */
#define XIVE_TM_QW1_OS          0x010   /* Guest OS registers */
#define XIVE_TM_QW2_HV_POOL     0x020   /* Hypervisor pool registers */
#define XIVE_TM_QW3_HV          0x030   /* Hypervisor registers */

#define XIVE_TM_NSR     0x00
#define XIVE_TM_CPPR    0x01
#define XIVE_TM_IPB     0x02
#define XIVE_TM_LSMFB   0x03
#define XIVE_TM_ACK_CNT 0x04
#define XIVE_TM_INC     0x05
#define XIVE_TM_AGE     0x06
#define XIVE_TM_PIPR    0x07

#define TM_WORD0        0x0
#define TM_WORD2        0x8
#define   TM_QW2W2_VP     0x80000000

#define XIVE_TM_SPC_ACK                 0x800
#define   TM_QW3NSR_HE_SHIFT              14
#define   TM_QW3_NSR_HE_NONE              0
#define   TM_QW3_NSR_HE_POOL              1
#define   TM_QW3_NSR_HE_PHYS              2
#define   TM_QW3_NSR_HE_LSI               3
#define XIVE_TM_SPC_PULL_POOL_CTX       0x828

#define XIVE_IRQ_LOAD_EOI       0x000
#define XIVE_IRQ_STORE_EOI      0x400
#define XIVE_IRQ_PQ_00          0xc00
#define XIVE_IRQ_PQ_01          0xd00

#define XIVE_IRQ_VAL_P          0x02
#define XIVE_IRQ_VAL_Q          0x01

struct xive_softc;
struct xive_irq;

extern void (*powernv_smp_ap_extra_init)(void);

/* Private support */
static void     xive_setup_cpu(void);
static void     xive_smp_cpu_startup(void);
static void     xive_init_irq(struct xive_irq *irqd, u_int irq);
static struct xive_irq  *xive_configure_irq(u_int irq);
static int      xive_provision_page(struct xive_softc *sc);

/* Interfaces */
static int      xive_probe(device_t);
static int      xive_attach(device_t);
static int      xics_probe(device_t);
static int      xics_attach(device_t);

static void     xive_bind(device_t, u_int, cpuset_t, void **);
static void     xive_dispatch(device_t, struct trapframe *);
static void     xive_enable(device_t, u_int, u_int, void **);
static void     xive_eoi(device_t, u_int, void *);
static void     xive_ipi(device_t, u_int);
static void     xive_mask(device_t, u_int, void *);
static void     xive_unmask(device_t, u_int, void *);
static void     xive_translate_code(device_t dev, u_int irq, int code,
                    enum intr_trigger *trig, enum intr_polarity *pol);

static device_method_t  xive_methods[] = {
        /* Device interface */
        DEVMETHOD(device_probe,         xive_probe),
        DEVMETHOD(device_attach,        xive_attach),

        /* PIC interface */
        DEVMETHOD(pic_bind,             xive_bind),
        DEVMETHOD(pic_dispatch,         xive_dispatch),
        DEVMETHOD(pic_enable,           xive_enable),
        DEVMETHOD(pic_eoi,              xive_eoi),
        DEVMETHOD(pic_ipi,              xive_ipi),
        DEVMETHOD(pic_mask,             xive_mask),
        DEVMETHOD(pic_unmask,           xive_unmask),
        DEVMETHOD(pic_translate_code,   xive_translate_code),

        DEVMETHOD_END
};

static device_method_t  xics_methods[] = {
        /* Device interface */
        DEVMETHOD(device_probe,         xics_probe),
        DEVMETHOD(device_attach,        xics_attach),

        DEVMETHOD_END
};

struct xive_softc {
        struct mtx sc_mtx;
        struct resource *sc_mem;
        vm_size_t       sc_prov_page_size;
        uint32_t        sc_offset;
};

struct xive_queue {
        uint32_t        *q_page;
        uint32_t        *q_eoi_page;
        uint32_t         q_toggle;
        uint32_t         q_size;
        uint32_t         q_index;
        uint32_t         q_mask;
};

struct xive_irq {
        uint32_t        girq;
        uint32_t        lirq;
        uint64_t        vp;
        uint64_t        flags;
#define OPAL_XIVE_IRQ_SHIFT_BUG         0x00000008
#define OPAL_XIVE_IRQ_LSI               0x00000004
#define OPAL_XIVE_IRQ_STORE_EOI         0x00000002
#define OPAL_XIVE_IRQ_TRIGGER_PAGE      0x00000001
        uint8_t prio;
        vm_offset_t     eoi_page;
        vm_offset_t     trig_page;
        vm_size_t       esb_size;
        int             chip;
};

struct xive_cpu {
        uint64_t        vp;
        uint64_t        flags;
        struct xive_irq ipi_data;
        struct xive_queue       queue; /* We only use a single queue for now. */
        uint64_t        cam;
        uint32_t        chip;
};

static driver_t xive_driver = {
        "xive",
        xive_methods,
        sizeof(struct xive_softc)
};

static driver_t xics_driver = {
        "xivevc",
        xics_methods,
        0
};

EARLY_DRIVER_MODULE(xive, ofwbus, xive_driver, 0, 0, BUS_PASS_INTERRUPT - 1);
EARLY_DRIVER_MODULE(xivevc, ofwbus, xics_driver, 0, 0, BUS_PASS_INTERRUPT);

MALLOC_DEFINE(M_XIVE, "xive", "XIVE Memory");

DPCPU_DEFINE_STATIC(struct xive_cpu, xive_cpu_data);

static int xive_ipi_vector = -1;

/*
 * XIVE Exploitation mode driver.
 *
 * The XIVE, present in the POWER9 CPU, can run in two modes: XICS emulation
 * mode, and "Exploitation mode".  XICS emulation mode is compatible with the
 * POWER8 and earlier XICS interrupt controller, using OPAL calls to emulate
 * hypervisor calls and memory accesses.  Exploitation mode gives us raw access
 * to the XIVE MMIO, improving performance significantly.
 *
 * The XIVE controller is a very bizarre interrupt controller.  It uses queues
 * in memory to pass interrupts around, and maps itself into 512GB of physical
 * device address space, giving each interrupt in the system one or more pages
 * of address space.  An IRQ is tied to a virtual processor, which could be a
 * physical CPU thread, or a guest CPU thread (LPAR running on a physical
 * thread).  Thus, the controller can route interrupts directly to guest OSes
 * bypassing processing by the hypervisor, thereby improving performance of the
 * guest OS.
 *
 * An IRQ, in addition to being tied to a virtual processor, has one or two
 * page mappings: an EOI page, and an optional trigger page.  The trigger page
 * could be the same as the EOI page.  Level-sensitive interrupts (LSIs) don't
 * have a trigger page, as they're external interrupts controlled by physical
 * lines.  MSIs and IPIs have trigger pages.  An IPI is really just another IRQ
 * in the XIVE, which is triggered by software.
 *
 * An interesting behavior of the XIVE controller is that oftentimes the
 * contents of an address location don't actually matter, but the direction of
 * the action is the signifier (read vs write), and the address is significant.
 * Hence, masking and unmasking an interrupt is done by reading different
 * addresses in the EOI page, and triggering an interrupt consists of writing to
 * the trigger page.
 *
 * Additionally, the MMIO region mapped is CPU-sensitive, just like the
 * per-processor register space (private access) in OpenPIC.  In order for a CPU
 * to receive interrupts it must itself configure its CPPR (Current Processor
 * Priority Register), it cannot be set by any other processor.  This
 * necessitates the xive_smp_cpu_startup() function.
 *
 * Queues are pages of memory, sized powers-of-two, that are shared with the
 * XIVE.  The XIVE writes into the queue with an alternating polarity bit, which
 * flips when the queue wraps.
 */

/*
 * Offset-based read/write interfaces.
 */
static uint16_t
xive_read_2(struct xive_softc *sc, bus_size_t offset)
{

        return (bus_read_2(sc->sc_mem, sc->sc_offset + offset));
}

static void
xive_write_1(struct xive_softc *sc, bus_size_t offset, uint8_t val)
{

        bus_write_1(sc->sc_mem, sc->sc_offset + offset, val);
}

/* EOI and Trigger page access interfaces. */
static uint64_t
xive_read_mmap8(vm_offset_t addr)
{
        return (*(volatile uint64_t *)addr);
}

static void
xive_write_mmap8(vm_offset_t addr, uint64_t val)
{
        *(uint64_t *)(addr) = val;
}

/* Device interfaces. */
static int
xive_probe(device_t dev)
{

        if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-pe"))
                return (ENXIO);

        device_set_desc(dev, "External Interrupt Virtualization Engine");

        /* Make sure we always win against the xicp driver. */
        return (BUS_PROBE_DEFAULT);
}

static int
xics_probe(device_t dev)
{

        if (!ofw_bus_is_compatible(dev, "ibm,opal-xive-vc"))
                return (ENXIO);

        device_set_desc(dev, "External Interrupt Virtualization Engine Root");
        return (BUS_PROBE_DEFAULT);
}

static int
xive_attach(device_t dev)
{
        struct xive_softc *sc = device_get_softc(dev);
        struct xive_cpu *xive_cpud;
        phandle_t phandle = ofw_bus_get_node(dev);
        int64_t vp_block;
        int error;
        int rid;
        int i, order;
        uint64_t vp_id;
        int64_t ipi_irq;

        opal_call(OPAL_XIVE_RESET, OPAL_XIVE_XICS_MODE_EXP);

        error = OF_getencprop(phandle, "ibm,xive-provision-page-size",
            (pcell_t *)&sc->sc_prov_page_size, sizeof(sc->sc_prov_page_size));

        rid = 1;        /* Get the Hypervisor-level register set. */
        sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
            &rid, RF_ACTIVE);
        sc->sc_offset = XIVE_TM_QW3_HV;

        mtx_init(&sc->sc_mtx, "XIVE", NULL, MTX_DEF);

        /* Workaround for qemu single-thread powernv */
        if (mp_maxid == 0)
                order = 1;
        else
                order = fls(mp_maxid + (mp_maxid - 1)) - 1;

        do {
                vp_block = opal_call(OPAL_XIVE_ALLOCATE_VP_BLOCK, order);
                if (vp_block == OPAL_BUSY)
                        DELAY(10);
                else if (vp_block == OPAL_XIVE_PROVISIONING)
                        xive_provision_page(sc);
                else
                        break;
        } while (1);

        if (vp_block < 0) {
                device_printf(dev,
                    "Unable to allocate VP block.  Opal error %d\n",
                    (int)vp_block);
                bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->sc_mem);
                return (ENXIO);
        }

        /*
         * Set up the VPs.  Try to do as much as we can in attach, to lessen
         * what's needed at AP spawn time.
         */
        CPU_FOREACH(i) {
                vp_id = pcpu_find(i)->pc_hwref;

                xive_cpud = DPCPU_ID_PTR(i, xive_cpu_data);
                xive_cpud->vp = vp_id + vp_block;
                opal_call(OPAL_XIVE_GET_VP_INFO, xive_cpud->vp, NULL,
                    vtophys(&xive_cpud->cam), NULL, vtophys(&xive_cpud->chip));

                xive_cpud->cam = be64toh(xive_cpud->cam);
                xive_cpud->chip = be64toh(xive_cpud->chip);

                /* Allocate the queue page and populate the queue state data. */
                xive_cpud->queue.q_page = contigmalloc(PAGE_SIZE, M_XIVE,
                    M_ZERO | M_WAITOK, 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
                xive_cpud->queue.q_size = 1 << PAGE_SHIFT;
                xive_cpud->queue.q_mask =
                    ((xive_cpud->queue.q_size / sizeof(int)) - 1);
                xive_cpud->queue.q_toggle = 0;
                xive_cpud->queue.q_index = 0;
                do {
                        error = opal_call(OPAL_XIVE_SET_VP_INFO, xive_cpud->vp,
                            OPAL_XIVE_VP_ENABLED, 0);
                } while (error == OPAL_BUSY);
                error = opal_call(OPAL_XIVE_SET_QUEUE_INFO, vp_id,
                    XIVE_PRIORITY, vtophys(xive_cpud->queue.q_page), PAGE_SHIFT,
                    OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED);

                do {
                        ipi_irq = opal_call(OPAL_XIVE_ALLOCATE_IRQ,
                            xive_cpud->chip);
                } while (ipi_irq == OPAL_BUSY);

                if (ipi_irq < 0)
                        device_printf(root_pic,
                            "Failed allocating IPI.  OPAL error %d\n",
                            (int)ipi_irq);
                else {
                        xive_init_irq(&xive_cpud->ipi_data, ipi_irq);
                        xive_cpud->ipi_data.vp = vp_id;
                        xive_cpud->ipi_data.lirq = MAX_XIVE_IRQS;
                        opal_call(OPAL_XIVE_SET_IRQ_CONFIG, ipi_irq,
                            xive_cpud->ipi_data.vp, XIVE_PRIORITY,
                            MAX_XIVE_IRQS);
                }
        }

        powerpc_register_pic(dev, OF_xref_from_node(phandle), MAX_XIVE_IRQS,
            1 /* Number of IPIs */, FALSE);
        root_pic = dev;

        xive_setup_cpu();
        powernv_smp_ap_extra_init = xive_smp_cpu_startup;

        return (0);
}

static int
xics_attach(device_t dev)
{
        phandle_t phandle = ofw_bus_get_node(dev);

        /* The XIVE (root PIC) will handle all our interrupts */
        powerpc_register_pic(root_pic, OF_xref_from_node(phandle),
            MAX_XIVE_IRQS, 1 /* Number of IPIs */, FALSE);

        return (0);
}

/*
 * PIC I/F methods.
 */

static void
xive_bind(device_t dev, u_int irq, cpuset_t cpumask, void **priv)
{
        struct xive_irq *irqd;
        int cpu;
        int ncpus, i, error;

        if (*priv == NULL)
                *priv = xive_configure_irq(irq);

        irqd = *priv;

        /*
         * This doesn't appear to actually support affinity groups, so pick a
         * random CPU.
         */
        ncpus = 0;
        CPU_FOREACH(cpu)
                if (CPU_ISSET(cpu, &cpumask)) ncpus++;

        i = mftb() % ncpus;
        ncpus = 0;
        CPU_FOREACH(cpu) {
                if (!CPU_ISSET(cpu, &cpumask))
                        continue;
                if (ncpus == i)
                        break;
                ncpus++;
        }

        opal_call(OPAL_XIVE_SYNC, OPAL_XIVE_SYNC_QUEUE, irq);

        irqd->vp = pcpu_find(cpu)->pc_hwref;
        error = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, irqd->vp,
            XIVE_PRIORITY, irqd->lirq);

        if (error < 0)
                panic("Cannot bind interrupt %d to CPU %d", irq, cpu);

        xive_eoi(dev, irq, irqd);
}

/* Read the next entry in the queue page and update the index. */
static int
xive_read_eq(struct xive_queue *q)
{
        uint32_t i = be32toh(q->q_page[q->q_index]);

        /* Check validity, using current queue polarity. */
        if ((i >> 31) == q->q_toggle)
                return (0);

        q->q_index = (q->q_index + 1) & q->q_mask;

        if (q->q_index == 0)
                q->q_toggle ^= 1;

        return (i & 0x7fffffff);
}

static void
xive_dispatch(device_t dev, struct trapframe *tf)
{
        struct xive_softc *sc;
        struct xive_cpu *xive_cpud;
        uint32_t vector;
        uint16_t ack;
        uint8_t cppr, he;

        sc = device_get_softc(dev);

        xive_cpud = DPCPU_PTR(xive_cpu_data);
        for (;;) {
                ack = xive_read_2(sc, XIVE_TM_SPC_ACK);
                cppr = (ack & 0xff);

                he = ack >> TM_QW3NSR_HE_SHIFT;

                if (he == TM_QW3_NSR_HE_NONE)
                        break;

                else if (__predict_false(he != TM_QW3_NSR_HE_PHYS)) {
                        /*
                         * We don't support TM_QW3_NSR_HE_POOL or
                         * TM_QW3_NSR_HE_LSI interrupts.
                         */
                        device_printf(dev,
                            "Unexpected interrupt he type: %d\n", he);
                        goto end;
                }

                xive_write_1(sc, XIVE_TM_CPPR, cppr);

                for (;;) {
                        vector = xive_read_eq(&xive_cpud->queue);

                        if (vector == 0)
                                break;

                        if (vector == MAX_XIVE_IRQS)
                                vector = xive_ipi_vector;

                        powerpc_dispatch_intr(vector, tf);
                }
        }
end:
        xive_write_1(sc, XIVE_TM_CPPR, 0xff);
}

static void
xive_enable(device_t dev, u_int irq, u_int vector, void **priv)
{
        struct xive_irq *irqd;
        cell_t status, cpu;

        if (irq == MAX_XIVE_IRQS) {
                if (xive_ipi_vector == -1)
                        xive_ipi_vector = vector;
                return;
        }
        if (*priv == NULL)
                *priv = xive_configure_irq(irq);

        irqd = *priv;

        /* Bind to this CPU to start */
        cpu = PCPU_GET(hwref);
        irqd->lirq = vector;

        for (;;) {
                status = opal_call(OPAL_XIVE_SET_IRQ_CONFIG, irq, cpu,
                    XIVE_PRIORITY, vector);
                if (status != OPAL_BUSY)
                        break;
                DELAY(10);
        }

        if (status != 0)
                panic("OPAL_SET_XIVE IRQ %d -> cpu %d failed: %d", irq,
                    cpu, status);

        xive_unmask(dev, irq, *priv);
}

static void
xive_eoi(device_t dev, u_int irq, void *priv)
{
        struct xive_irq *rirq;
        struct xive_cpu *cpud;
        uint8_t eoi_val;

        if (irq == MAX_XIVE_IRQS) {
                cpud = DPCPU_PTR(xive_cpu_data);
                rirq = &cpud->ipi_data;
        } else
                rirq = priv;

        if (rirq->flags & OPAL_XIVE_IRQ_STORE_EOI)
                xive_write_mmap8(rirq->eoi_page + XIVE_IRQ_STORE_EOI, 0);
        else if (rirq->flags & OPAL_XIVE_IRQ_LSI)
                xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_LOAD_EOI);
        else {
                eoi_val = xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00);
                if ((eoi_val & XIVE_IRQ_VAL_Q) && rirq->trig_page != 0)
                        xive_write_mmap8(rirq->trig_page, 0);
        }
}

static void
xive_ipi(device_t dev, u_int cpu)
{
        struct xive_cpu *xive_cpud;

        xive_cpud = DPCPU_ID_PTR(cpu, xive_cpu_data);

        if (xive_cpud->ipi_data.trig_page == 0)
                return;
        xive_write_mmap8(xive_cpud->ipi_data.trig_page, 0);
}

static void
xive_mask(device_t dev, u_int irq, void *priv)
{
        struct xive_irq *rirq;

        /* Never mask IPIs */
        if (irq == MAX_XIVE_IRQS)
                return;

        rirq = priv;

        if (!(rirq->flags & OPAL_XIVE_IRQ_LSI))
                return;
        xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_01);
}

static void
xive_unmask(device_t dev, u_int irq, void *priv)
{
        struct xive_irq *rirq;

        rirq = priv;

        xive_read_mmap8(rirq->eoi_page + XIVE_IRQ_PQ_00);
}

static void
xive_translate_code(device_t dev, u_int irq, int code,
    enum intr_trigger *trig, enum intr_polarity *pol)
{
        switch (code) {
        case 0:
                /* L to H edge */
                *trig = INTR_TRIGGER_EDGE;
                *pol = INTR_POLARITY_HIGH;
                break;
        case 1:
                /* Active L level */
                *trig = INTR_TRIGGER_LEVEL;
                *pol = INTR_POLARITY_LOW;
                break;
        default:
                *trig = INTR_TRIGGER_CONFORM;
                *pol = INTR_POLARITY_CONFORM;
        }
}

/* Private functions. */
/*
 * Setup the current CPU.  Called by the BSP at driver attachment, and by each
 * AP at wakeup (via xive_smp_cpu_startup()).
 */
static void
xive_setup_cpu(void)
{
        struct xive_softc *sc;
        struct xive_cpu *cpup;
        uint32_t val;

        cpup = DPCPU_PTR(xive_cpu_data);

        sc = device_get_softc(root_pic);

        val = bus_read_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2);
        if (val & TM_QW2W2_VP)
                bus_read_8(sc->sc_mem, XIVE_TM_SPC_PULL_POOL_CTX);

        bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD0, 0xff);
        bus_write_4(sc->sc_mem, XIVE_TM_QW2_HV_POOL + TM_WORD2,
            TM_QW2W2_VP | cpup->cam);

        xive_unmask(root_pic, cpup->ipi_data.girq, &cpup->ipi_data);
        xive_write_1(sc, XIVE_TM_CPPR, 0xff);
}

/* Populate an IRQ structure, mapping the EOI and trigger pages. */
static void
xive_init_irq(struct xive_irq *irqd, u_int irq)
{
        uint64_t eoi_phys, trig_phys;
        uint32_t esb_shift;

        opal_call(OPAL_XIVE_GET_IRQ_INFO, irq,
            vtophys(&irqd->flags), vtophys(&eoi_phys),
            vtophys(&trig_phys), vtophys(&esb_shift),
            vtophys(&irqd->chip));

        irqd->flags = be64toh(irqd->flags);
        eoi_phys = be64toh(eoi_phys);
        trig_phys = be64toh(trig_phys);
        esb_shift = be32toh(esb_shift);
        irqd->chip = be32toh(irqd->chip);

        irqd->girq = irq;
        irqd->esb_size = 1 << esb_shift;
        irqd->eoi_page = (vm_offset_t)pmap_mapdev(eoi_phys, irqd->esb_size);

        if (eoi_phys == trig_phys)
                irqd->trig_page = irqd->eoi_page;
        else if (trig_phys != 0)
                irqd->trig_page = (vm_offset_t)pmap_mapdev(trig_phys,
                    irqd->esb_size);
        else
                irqd->trig_page = 0;

        opal_call(OPAL_XIVE_GET_IRQ_CONFIG, irq, vtophys(&irqd->vp),
            vtophys(&irqd->prio), vtophys(&irqd->lirq));

        irqd->vp = be64toh(irqd->vp);
        irqd->prio = be64toh(irqd->prio);
        irqd->lirq = be32toh(irqd->lirq);
}

/* Allocate an IRQ struct before populating it. */
static struct xive_irq *
xive_configure_irq(u_int irq)
{
        struct xive_irq *irqd;

        irqd = malloc(sizeof(struct xive_irq), M_XIVE, M_WAITOK);

        xive_init_irq(irqd, irq);

        return (irqd);
}

/*
 * Part of the OPAL API.  OPAL_XIVE_ALLOCATE_VP_BLOCK might require more pages,
 * provisioned through this call.
 */
static int
xive_provision_page(struct xive_softc *sc)
{
        void *prov_page;
        int error;

        do {
                prov_page = contigmalloc(sc->sc_prov_page_size, M_XIVE, 0,
                    0, BUS_SPACE_MAXADDR,
                    sc->sc_prov_page_size, sc->sc_prov_page_size);

                error = opal_call(OPAL_XIVE_DONATE_PAGE, -1,
                    vtophys(prov_page));
        } while (error == OPAL_XIVE_PROVISIONING);

        return (0);
}

/* The XIVE_TM_CPPR register must be set by each thread */
static void
xive_smp_cpu_startup(void)
{

        xive_setup_cpu();
}