root/usr/src/uts/i86pc/io/apix/apix_irm.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/ddi.h>
#include <sys/sunndi.h>
#include <sys/ddi_impldefs.h>
#include <sys/psm_types.h>
#include <sys/smp_impldefs.h>
#include <sys/apic.h>
#include <sys/processor.h>
#include <sys/apix_irm_impl.h>

/* global variable for static default limit for non-IRM drivers */
extern int ddi_msix_alloc_limit;

/* Extern declarations */
extern int (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
    psm_intr_op_t, int *);

/*
 * Global variables for IRM pool configuration:
 *
 *    (1) apix_system_max_vectors -- this would limit the maximum
 *    number of interrupt vectors that will be made avilable
 *    to the device drivers. The default value (-1) indicates
 *    that all the available vectors could be used.
 *
 *    (2) apix_irm_cpu_factor -- This would specify the number of CPUs that
 *    should be excluded from the global IRM pool of interrupt vectors.
 *    By default this would be zero, so vectors from all the CPUs
 *    present will be factored into the IRM pool.
 *
 *    (3) apix_irm_reserve_fixed_vectors -- This would specify the number
 *    of vectors that should be reserved for FIXED type interrupts and
 *    exclude them from the IRM pool. The value can be one of the
 *    following:
 *      0       - no reservation (default)
 *      <n>     - a positive number for the reserved cache
 *      -1      - reserve the maximum needed
 *
 *    (4) apix_irm_free_fixed_vectors -- This flag specifies if the
 *    vectors for FIXED type should be freed and added back
 *    to the IRM pool when ddi_intr_free() is called. The default
 *    is to add it back to the pool.
 */
int apix_system_max_vectors = -1;
int apix_irm_cpu_factor = 0;
int apix_irm_reserve_fixed_vectors = 0;
int apix_irm_free_fixed_vector = 1;

/* info from APIX module for IRM configuration */
apix_irm_info_t apix_irminfo;

kmutex_t apix_irm_lock; /* global mutex for apix_irm_* data */
ddi_irm_params_t apix_irm_params; /* IRM pool info */
int apix_irm_cache_size = 0; /* local cache for FIXED type requests */
int apix_irm_cpu_factor_available = 0;
int apix_irm_max_cpus = 0;
int apix_irm_cpus_used = 0;
int apix_irm_fixed_intr_vectors_used;

extern int ncpus;

/* local data/functions */
static int apix_irm_chk_apix();
int apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
        psm_intr_op_t op, int *result);
int apix_irm_disable_intr(processorid_t);
void apix_irm_enable_intr(processorid_t);
int (*psm_intr_ops_saved)(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
        psm_intr_op_t op, int *result) = NULL;
int (*psm_disable_intr_saved)(processorid_t) = NULL;
void (*psm_enable_intr_saved)(processorid_t) = NULL;
int apix_irm_alloc_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *);
int apix_irm_free_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *);

/*
 * Initilaize IRM pool for APIC interrupts if the PSM module
 * is of APIX type. This should be called only after PSM module
 * is loaded and APIC interrupt system is initialized.
 */
void
apix_irm_init(void)
{
        dev_info_t              *dip;
        int                     total_avail_vectors;
        int                     cpus_used;
        int                     cache_size;

        /* nothing to do if IRM is disabled */
        if (!irm_enable)
                return;

        /*
         * Use root devinfo node to associate the IRM pool with it
         * as the pool is global to the system.
         */
        dip = ddi_root_node();

        /*
         * Check if PSM module is initialized and it is APIX
         * module (which supports IRM functionality).
         */
        if ((psm_intr_ops == NULL) || !apix_irm_chk_apix()) {
                /* not an APIX module */
                APIX_IRM_DEBUG((CE_CONT,
                    "apix_irm_init: APIX module not present"));
                return;
        }

        /*
         * Now, determine the IRM pool parameters based on the
         * info from APIX module and global config variables.
         */

        /*
         * apix_ncpus shows all the CPUs present in the
         * system but not all of them may have been enabled
         * (i.e. mp_startup() may not have been called yet).
         * So, use ncpus for IRM pool creation.
         */
        if (apix_irminfo.apix_ncpus > ncpus)
                apix_irminfo.apix_ncpus = ncpus;

        /* apply the CPU factor if possible */
        if ((apix_irm_cpu_factor > 0) &&
            (apix_irminfo.apix_ncpus > apix_irm_cpu_factor)) {
                cpus_used = apix_irminfo.apix_ncpus - apix_irm_cpu_factor;
                apix_irm_cpu_factor_available = apix_irm_cpu_factor;
        } else {
                cpus_used = apix_irminfo.apix_ncpus;
        }
        apix_irm_cpus_used = apix_irm_max_cpus = cpus_used;

        APIX_IRM_DEBUG((CE_CONT,
            "apix_irm_init: %d CPUs used for IRM pool size", cpus_used));

        total_avail_vectors = cpus_used * apix_irminfo.apix_per_cpu_vectors -
            apix_irminfo.apix_vectors_allocated;

        apix_irm_fixed_intr_vectors_used = apix_irminfo.apix_vectors_allocated;

        if (total_avail_vectors <= 0) {
                /* can not determine pool size */
                APIX_IRM_DEBUG((CE_NOTE,
                    "apix_irm_init: can not determine pool size"));
                return;
        }

        /* adjust the pool size as per the global config variable */
        if ((apix_system_max_vectors > 0) &&
            (apix_system_max_vectors < total_avail_vectors))
                total_avail_vectors = apix_system_max_vectors;

        /* pre-reserve vectors (i.e. local cache) for FIXED type if needed */
        if (apix_irm_reserve_fixed_vectors != 0) {
                cache_size = apix_irm_reserve_fixed_vectors;
                if ((cache_size == -1) ||
                    (cache_size > apix_irminfo.apix_ioapic_max_vectors))
                        cache_size = apix_irminfo.apix_ioapic_max_vectors;
                total_avail_vectors -= cache_size;
                apix_irm_cache_size = cache_size;
        }

        if (total_avail_vectors <= 0) {
                APIX_IRM_DEBUG((CE_NOTE,
                    "apix_irm_init: invalid config parameters!"));
                return;
        }

        /* IRM pool is used only for MSI/X interrupts */
        apix_irm_params.iparams_types = DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX;
        apix_irm_params.iparams_total = total_avail_vectors;

        if (ndi_irm_create(dip, &apix_irm_params,
            &apix_irm_pool_p) == NDI_SUCCESS) {
                /*
                 * re-direct psm_intr_ops to intercept FIXED
                 * interrupt allocation requests.
                 */
                psm_intr_ops_saved = psm_intr_ops;
                psm_intr_ops = apix_irm_intr_ops;
                /*
                 * re-direct psm_enable_intr()/psm_disable_intr() to
                 * intercept CPU offline/online requests.
                 */
                psm_disable_intr_saved = psm_disable_intr;
                psm_enable_intr_saved = psm_enable_intr;
                psm_enable_intr = apix_irm_enable_intr;
                psm_disable_intr = apix_irm_disable_intr;

                mutex_init(&apix_irm_lock, NULL, MUTEX_DRIVER, NULL);

                /*
                 * Set default alloc limit for non-IRM drivers
                 * to DDI_MIN_MSIX_ALLOC (currently defined as 8).
                 *
                 * NOTE: This is done here so that the limit of 8 vectors
                 * is applicable only with APIX module. For the old pcplusmp
                 * implementation, the current default of 2 (i.e
                 * DDI_DEFAULT_MSIX_ALLOC) is retained.
                 */
                if (ddi_msix_alloc_limit < DDI_MIN_MSIX_ALLOC)
                        ddi_msix_alloc_limit = DDI_MIN_MSIX_ALLOC;
        } else {
                APIX_IRM_DEBUG((CE_NOTE,
                    "apix_irm_init: ndi_irm_create() failed"));
                apix_irm_pool_p = NULL;
        }
}

/*
 * Check if the PSM module is "APIX" type which supports IRM feature.
 * Returns 0 if it is not an APIX module.
 */
static int
apix_irm_chk_apix(void)
{
        ddi_intr_handle_impl_t  info_hdl;
        apic_get_type_t         type_info;

        if (!psm_intr_ops)
                return (0);

        bzero(&info_hdl, sizeof (ddi_intr_handle_impl_t));
        info_hdl.ih_private = &type_info;
        if (((*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_APIC_TYPE,
            NULL)) != PSM_SUCCESS) {
                /* unknown type; assume not an APIX module */
                return (0);
        }
        if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0)
                return (1);
        else
                return (0);
}

/*
 * This function intercepts PSM_INTR_OP_* requests to deal with
 * IRM pool maintainance for FIXED type interrupts. The following
 * commands are intercepted and the rest are simply passed back to
 * the original psm_intr_ops function:
 *      PSM_INTR_OP_ALLOC_VECTORS
 *      PSM_INTR_OP_FREE_VECTORS
 * Return value is either PSM_SUCCESS or PSM_FAILURE.
 */
int
apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
        psm_intr_op_t op, int *result)
{
        switch (op) {
        case PSM_INTR_OP_ALLOC_VECTORS:
                if (handle->ih_type == DDI_INTR_TYPE_FIXED)
                        return (apix_irm_alloc_fixed(dip, handle, result));
                else
                        break;
        case PSM_INTR_OP_FREE_VECTORS:
                if (handle->ih_type == DDI_INTR_TYPE_FIXED)
                        return (apix_irm_free_fixed(dip, handle, result));
                else
                        break;
        default:
                break;
        }

        /* pass the request to APIX */
        return ((*psm_intr_ops_saved)(dip, handle, op, result));
}

/*
 * Allocate a FIXED type interrupt. The procedure for this
 * operation is as follows:
 *
 * 1) Check if this IRQ is shared (i.e. IRQ is already mapped
 *    and a vector has been already allocated). If so, then no
 *    new vector is needed and simply pass the request to APIX
 *    and return.
 * 2) Check the local cache pool for an available vector. If
 *    the cache is not empty then take it from there and simply
 *    pass the request to APIX and return.
 * 3) Otherwise, get a vector from the IRM pool by reducing the
 *    pool size by 1. If it is successful then pass the
 *    request to APIX module. Otherwise return PSM_FAILURE.
 */
int
apix_irm_alloc_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
        int *result)
{
        int     vector;
        uint_t  new_pool_size;
        int     ret;

        /*
         * Check if this IRQ has been mapped (i.e. shared IRQ case)
         * by doing PSM_INTR_OP_XLATE_VECTOR.
         */
        ret = (*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_XLATE_VECTOR,
            &vector);
        if (ret == PSM_SUCCESS) {
                APIX_IRM_DEBUG((CE_CONT,
                    "apix_irm_alloc_fixed: dip %p (%s) xlated vector 0x%x",
                    (void *)dip, ddi_driver_name(dip), vector));
                /* (1) mapping already exists; pass the request to PSM */
                return ((*psm_intr_ops_saved)(dip, handle,
                    PSM_INTR_OP_ALLOC_VECTORS, result));
        }

        /* check the local cache for an available vector */
        mutex_enter(&apix_irm_lock);
        if (apix_irm_cache_size) { /* cache is not empty */
                --apix_irm_cache_size;
                apix_irm_fixed_intr_vectors_used++;
                mutex_exit(&apix_irm_lock);
                /* (2) use the vector from the local cache */
                return ((*psm_intr_ops_saved)(dip, handle,
                    PSM_INTR_OP_ALLOC_VECTORS, result));
        }

        /* (3) get a vector from the IRM pool */

        new_pool_size = apix_irm_params.iparams_total - 1;

        APIX_IRM_DEBUG((CE_CONT, "apix_irm_alloc_fixed: dip %p (%s) resize pool"
            " from %x to %x\n", (void *)dip, ddi_driver_name(dip),
            apix_irm_pool_p->ipool_totsz, new_pool_size));

        if (ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size) ==
            NDI_SUCCESS) {
                /* update the pool size info */
                apix_irm_params.iparams_total = new_pool_size;
                apix_irm_fixed_intr_vectors_used++;
                mutex_exit(&apix_irm_lock);
                return ((*psm_intr_ops_saved)(dip, handle,
                    PSM_INTR_OP_ALLOC_VECTORS, result));
        }

        mutex_exit(&apix_irm_lock);

        return (PSM_FAILURE);
}

/*
 * Free up the FIXED type interrupt.
 *
 * 1) If it is a shared vector then simply pass the request to
 *    APIX and return.
 * 2) Otherwise, if apix_irm_free_fixed_vector is not set then add the
 *    vector back to the IRM pool. Otherwise, keep it in the local cache.
 */
int
apix_irm_free_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
        int *result)
{
        int shared;
        int ret;
        uint_t new_pool_size;

        /* check if it is a shared vector */
        ret = (*psm_intr_ops_saved)(dip, handle,
            PSM_INTR_OP_GET_SHARED, &shared);

        if ((ret == PSM_SUCCESS) && (shared > 0)) {
                /* (1) it is a shared vector; simply pass the request */
                APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: dip %p (%s) "
                    "shared %d\n", (void *)dip, ddi_driver_name(dip), shared));
                return ((*psm_intr_ops_saved)(dip, handle,
                    PSM_INTR_OP_FREE_VECTORS, result));
        }

        ret = (*psm_intr_ops_saved)(dip, handle,
            PSM_INTR_OP_FREE_VECTORS, result);

        if (ret == PSM_SUCCESS) {
                mutex_enter(&apix_irm_lock);
                if (apix_irm_free_fixed_vector) {
                        /* (2) add the vector back to IRM pool */
                        new_pool_size = apix_irm_params.iparams_total + 1;
                        APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: "
                            "dip %p (%s) resize pool from %x to %x\n",
                            (void *)dip, ddi_driver_name(dip),
                            apix_irm_pool_p->ipool_totsz, new_pool_size));
                        if (ndi_irm_resize_pool(apix_irm_pool_p,
                            new_pool_size) == NDI_SUCCESS) {
                                /* update the pool size info */
                                apix_irm_params.iparams_total = new_pool_size;
                        } else {
                                cmn_err(CE_NOTE,
                                    "apix_irm_free_fixed: failed to add"
                                    " a vector to IRM pool");
                        }
                } else {
                        /* keep the vector in the local cache */
                        apix_irm_cache_size += 1;
                }
                apix_irm_fixed_intr_vectors_used--;
                mutex_exit(&apix_irm_lock);
        }

        return (ret);
}

/*
 * Disable the CPU for interrupts. It is assumed that this is called to
 * offline/disable the CPU so that no interrupts are allocated on
 * that CPU. For IRM perspective, the interrupt vectors on this
 * CPU are to be excluded for any allocations.
 *
 * If APIX module is successful in migrating all the vectors
 * from this CPU then reduce the IRM pool size to exclude the
 * interrupt vectors for that CPU.
 */
int
apix_irm_disable_intr(processorid_t id)
{
        uint_t new_pool_size;

        /* Interrupt disabling for Suspend/Resume */
        if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND)
                return ((*psm_disable_intr_saved)(id));

        mutex_enter(&apix_irm_lock);
        /*
         * Don't remove the CPU from the IRM pool if we have CPU factor
         * available.
         */
        if ((apix_irm_cpu_factor > 0) && (apix_irm_cpu_factor_available > 0)) {
                apix_irm_cpu_factor_available--;
        } else {
                /* can't disable if there is only one CPU used */
                if (apix_irm_cpus_used == 1) {
                        mutex_exit(&apix_irm_lock);
                        return (PSM_FAILURE);
                }
                /* Calculate the new size for the IRM pool */
                new_pool_size = apix_irm_params.iparams_total -
                    apix_irminfo.apix_per_cpu_vectors;

                /* Apply the max. limit */
                if (apix_system_max_vectors > 0) {
                        uint_t  max;

                        max = apix_system_max_vectors -
                            apix_irm_fixed_intr_vectors_used -
                            apix_irm_cache_size;

                        new_pool_size = MIN(new_pool_size, max);
                }

                if (new_pool_size == 0) {
                        cmn_err(CE_WARN, "Invalid pool size 0 with "
                            "apix_system_max_vectors = %d",
                            apix_system_max_vectors);
                        mutex_exit(&apix_irm_lock);
                        return (PSM_FAILURE);
                }

                if (new_pool_size != apix_irm_params.iparams_total) {
                        /* remove the CPU from the IRM pool */
                        if (ndi_irm_resize_pool(apix_irm_pool_p,
                            new_pool_size) != NDI_SUCCESS) {
                                mutex_exit(&apix_irm_lock);
                                APIX_IRM_DEBUG((CE_NOTE,
                                    "apix_irm_disable_intr: failed to resize"
                                    " the IRM pool"));
                                return (PSM_FAILURE);
                        }
                        /* update the pool size info */
                        apix_irm_params.iparams_total = new_pool_size;
                }

                /* decrement the CPU count used by IRM pool */
                apix_irm_cpus_used--;
        }

        /*
         * Now, disable the CPU for interrupts.
         */
        if ((*psm_disable_intr_saved)(id) != PSM_SUCCESS) {
                APIX_IRM_DEBUG((CE_NOTE,
                    "apix_irm_disable_intr: failed to disable CPU interrupts"
                    " for CPU#%d", id));
                mutex_exit(&apix_irm_lock);
                return (PSM_FAILURE);
        }
        /* decrement the CPU count enabled for interrupts */
        apix_irm_max_cpus--;
        mutex_exit(&apix_irm_lock);
        return (PSM_SUCCESS);
}

/*
 * Enable the CPU for interrupts. It is assumed that this function is
 * called to enable/online the CPU so that interrupts could be assigned
 * to it. If successful, add available vectors for that CPU to the IRM
 * pool if apix_irm_cpu_factor is already satisfied.
 */
void
apix_irm_enable_intr(processorid_t id)
{
        uint_t new_pool_size;

        /* Interrupt enabling for Suspend/Resume */
        if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND) {
                (*psm_enable_intr_saved)(id);
                return;
        }

        mutex_enter(&apix_irm_lock);

        /* enable the CPU for interrupts */
        (*psm_enable_intr_saved)(id);

        /* increment the number of CPUs enabled for interrupts */
        apix_irm_max_cpus++;

        ASSERT(apix_irminfo.apix_per_cpu_vectors > 0);

        /*
         * Check if the apix_irm_cpu_factor is satisfied before.
         * If satisfied, add the CPU to IRM pool.
         */
        if ((apix_irm_cpu_factor > 0) &&
            (apix_irm_cpu_factor_available < apix_irm_cpu_factor)) {
                /*
                 * Don't add the CPU to the IRM pool. Just update
                 * the available CPU factor.
                 */
                apix_irm_cpu_factor_available++;
                mutex_exit(&apix_irm_lock);
                return;
        }

        /*
         * Add the CPU to the IRM pool.
         */

        /* increment the CPU count used by IRM */
        apix_irm_cpus_used++;

        /* Calculate the new pool size */
        new_pool_size = apix_irm_params.iparams_total +
            apix_irminfo.apix_per_cpu_vectors;

        /* Apply the max. limit */
        if (apix_system_max_vectors > 0) {
                uint_t  max;

                max = apix_system_max_vectors -
                    apix_irm_fixed_intr_vectors_used -
                    apix_irm_cache_size;

                new_pool_size = MIN(new_pool_size, max);
        }
        if (new_pool_size == apix_irm_params.iparams_total) {
                /* no change to pool size */
                mutex_exit(&apix_irm_lock);
                return;
        }
        if (new_pool_size < apix_irm_params.iparams_total) {
                cmn_err(CE_WARN, "new_pool_size %d is inconsistent "
                    "with irm_params.iparams_total %d",
                    new_pool_size, apix_irm_params.iparams_total);
                mutex_exit(&apix_irm_lock);
                return;
        }

        (void) ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size);

        /* update the pool size info */
        apix_irm_params.iparams_total = new_pool_size;

        mutex_exit(&apix_irm_lock);
}