root/drivers/cxl/core/hdm.c
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/delay.h>

#include "cxlmem.h"
#include "core.h"

/**
 * DOC: cxl core hdm
 *
 * Compute Express Link Host Managed Device Memory, starting with the
 * CXL 2.0 specification, is managed by an array of HDM Decoder register
 * instances per CXL port and per CXL endpoint. Define common helpers
 * for enumerating these registers and capabilities.
 */

struct cxl_rwsem cxl_rwsem = {
        .region = __RWSEM_INITIALIZER(cxl_rwsem.region),
        .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa),
};

static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld)
{
        int rc;

        rc = cxl_decoder_add_locked(cxld);
        if (rc) {
                put_device(&cxld->dev);
                dev_err(&port->dev, "Failed to add decoder\n");
                return rc;
        }

        rc = cxl_decoder_autoremove(&port->dev, cxld);
        if (rc)
                return rc;

        dev_dbg(port->uport_dev, "%s added to %s\n",
                dev_name(&cxld->dev), dev_name(&port->dev));

        return 0;
}

/*
 * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure)
 * single ported host-bridges need not publish a decoder capability when a
 * passthrough decode can be assumed, i.e. all transactions that the uport sees
 * are claimed and passed to the single dport. Disable the range until the first
 * CXL region is enumerated / activated.
 */
static int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
{
        struct cxl_switch_decoder *cxlsd;
        struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);

        /*
         * Capability checks are moot for passthrough decoders, support
         * any and all possibilities.
         */
        cxlhdm->interleave_mask = ~0U;
        cxlhdm->iw_cap_mask = ~0UL;

        cxlsd = cxl_switch_decoder_alloc(port, 1);
        if (IS_ERR(cxlsd))
                return PTR_ERR(cxlsd);

        device_lock_assert(&port->dev);

        return add_hdm_decoder(port, &cxlsd->cxld);
}

static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
{
        u32 hdm_cap;

        hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET);
        cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap);
        cxlhdm->target_count =
                FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap);
        if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap))
                cxlhdm->interleave_mask |= GENMASK(11, 8);
        if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
                cxlhdm->interleave_mask |= GENMASK(14, 12);
        cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8);
        if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
                cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
        if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
                cxlhdm->iw_cap_mask |= BIT(16);
}

static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
{
        struct cxl_hdm *cxlhdm;
        void __iomem *hdm;
        u32 ctrl;

        if (!info)
                return false;

        cxlhdm = dev_get_drvdata(&info->port->dev);
        hdm = cxlhdm->regs.hdm_decoder;

        if (!hdm)
                return true;

        /*
         * If HDM decoders are present and the driver is in control of
         * Mem_Enable skip DVSEC based emulation
         */
        if (!info->mem_enabled)
                return false;

        /*
         * If HDM decoders are globally enabled, do not fall back to DVSEC
         * range emulation. Zeroed decoder registers after region teardown
         * do not imply absence of HDM capability.
         *
         * Falling back to DVSEC here would treat the decoder as AUTO and
         * may incorrectly latch default interleave settings.
         */
        ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
        if (ctrl & CXL_HDM_DECODER_ENABLE)
                return false;

        return true;
}

/**
 * devm_cxl_setup_hdm - map HDM decoder component registers
 * @port: cxl_port to map
 * @info: cached DVSEC range register info
 */
static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
                                          struct cxl_endpoint_dvsec_info *info)
{
        struct cxl_register_map *reg_map = &port->reg_map;
        struct device *dev = &port->dev;
        struct cxl_hdm *cxlhdm;
        int rc;

        cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
        if (!cxlhdm)
                return ERR_PTR(-ENOMEM);
        cxlhdm->port = port;
        dev_set_drvdata(dev, cxlhdm);

        /* Memory devices can configure device HDM using DVSEC range regs. */
        if (reg_map->resource == CXL_RESOURCE_NONE) {
                if (!info || !info->mem_enabled) {
                        dev_err(dev, "No component registers mapped\n");
                        return ERR_PTR(-ENXIO);
                }

                cxlhdm->decoder_count = info->ranges;
                return cxlhdm;
        }

        if (!reg_map->component_map.hdm_decoder.valid) {
                dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
                /* unique error code to indicate no HDM decoder capability */
                return ERR_PTR(-ENODEV);
        }

        rc = cxl_map_component_regs(reg_map, &cxlhdm->regs,
                                    BIT(CXL_CM_CAP_CAP_ID_HDM));
        if (rc) {
                dev_err(dev, "Failed to map HDM capability.\n");
                return ERR_PTR(rc);
        }

        parse_hdm_decoder_caps(cxlhdm);
        if (cxlhdm->decoder_count == 0) {
                dev_err(dev, "Spec violation. Caps invalid\n");
                return ERR_PTR(-ENXIO);
        }

        /*
         * Now that the hdm capability is parsed, decide if range
         * register emulation is needed and fixup cxlhdm accordingly.
         */
        if (should_emulate_decoders(info)) {
                dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges,
                        str_plural(info->ranges));
                cxlhdm->decoder_count = info->ranges;
        }

        return cxlhdm;
}

static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth)
{
        unsigned long long start = r->start, end = r->end;

        seq_printf(file, "%*s%08llx-%08llx : %s\n", depth * 2, "", start, end,
                   r->name);
}

void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
{
        struct resource *p1, *p2;

        guard(rwsem_read)(&cxl_rwsem.dpa);
        for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) {
                __cxl_dpa_debug(file, p1, 0);
                for (p2 = p1->child; p2; p2 = p2->sibling)
                        __cxl_dpa_debug(file, p2, 1);
        }
}
EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL");

/* See request_skip() kernel-doc */
static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds,
                                     const resource_size_t skip_base,
                                     const resource_size_t skip_len,
                                     const char *requester)
{
        const resource_size_t skip_end = skip_base + skip_len - 1;

        for (int i = 0; i < cxlds->nr_partitions; i++) {
                const struct resource *part_res = &cxlds->part[i].res;
                resource_size_t adjust_start, adjust_end, size;

                adjust_start = max(skip_base, part_res->start);
                adjust_end = min(skip_end, part_res->end);

                if (adjust_end < adjust_start)
                        continue;

                size = adjust_end - adjust_start + 1;

                if (!requester)
                        __release_region(&cxlds->dpa_res, adjust_start, size);
                else if (!__request_region(&cxlds->dpa_res, adjust_start, size,
                                           requester, 0))
                        return adjust_start - skip_base;
        }

        return skip_len;
}
#define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL)

/*
 * Must be called in a context that synchronizes against this decoder's
 * port ->remove() callback (like an endpoint decoder sysfs attribute)
 */
static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
{
        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
        struct cxl_port *port = cxled_to_port(cxled);
        struct cxl_dev_state *cxlds = cxlmd->cxlds;
        struct resource *res = cxled->dpa_res;
        resource_size_t skip_start;

        lockdep_assert_held_write(&cxl_rwsem.dpa);

        /* save @skip_start, before @res is released */
        skip_start = res->start - cxled->skip;
        __release_region(&cxlds->dpa_res, res->start, resource_size(res));
        if (cxled->skip)
                release_skip(cxlds, skip_start, cxled->skip);
        cxled->skip = 0;
        cxled->dpa_res = NULL;
        put_device(&cxled->cxld.dev);
        port->hdm_end--;
}

static void cxl_dpa_release(void *cxled)
{
        guard(rwsem_write)(&cxl_rwsem.dpa);
        __cxl_dpa_release(cxled);
}

/*
 * Must be called from context that will not race port device
 * unregistration, like decoder sysfs attribute methods
 */
static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
{
        struct cxl_port *port = cxled_to_port(cxled);

        lockdep_assert_held_write(&cxl_rwsem.dpa);
        devm_remove_action(&port->dev, cxl_dpa_release, cxled);
        __cxl_dpa_release(cxled);
}

/**
 * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree
 * @cxlds: CXL.mem device context that parents @cxled
 * @cxled: Endpoint decoder establishing new allocation that skips lower DPA
 * @skip_base: DPA < start of new DPA allocation (DPAnew)
 * @skip_len: @skip_base + @skip_len == DPAnew
 *
 * DPA 'skip' arises from out-of-sequence DPA allocation events relative
 * to free capacity across multiple partitions. It is a wasteful event
 * as usable DPA gets thrown away, but if a deployment has, for example,
 * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM
 * DPA, the free RAM DPA must be sacrificed to start allocating PMEM.
 * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder
 * Protection" for more details.
 *
 * A 'skip' always covers the last allocated DPA in a previous partition
 * to the start of the current partition to allocate.  Allocations never
 * start in the middle of a partition, and allocations are always
 * de-allocated in reverse order (see cxl_dpa_free(), or natural devm
 * unwind order from forced in-order allocation).
 *
 * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip'
 * would always be contained to a single partition. Given
 * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip'
 * might span "tail capacity of partition[0], all of partition[1], ...,
 * all of partition[N-1]" to support allocating from partition[N]. That
 * in turn interacts with the partition 'struct resource' boundaries
 * within @cxlds->dpa_res whereby 'skip' requests need to be divided by
 * partition. I.e. this is a quirk of using a 'struct resource' tree to
 * detect range conflicts while also tracking partition boundaries in
 * @cxlds->dpa_res.
 */
static int request_skip(struct cxl_dev_state *cxlds,
                        struct cxl_endpoint_decoder *cxled,
                        const resource_size_t skip_base,
                        const resource_size_t skip_len)
{
        resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len,
                                                dev_name(&cxled->cxld.dev));

        if (skipped == skip_len)
                return 0;

        dev_dbg(cxlds->dev,
                "%s: failed to reserve skipped space (%pa %pa %pa)\n",
                dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped);

        release_skip(cxlds, skip_base, skipped);

        return -EBUSY;
}

static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
                             resource_size_t base, resource_size_t len,
                             resource_size_t skipped)
{
        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
        struct cxl_port *port = cxled_to_port(cxled);
        struct cxl_dev_state *cxlds = cxlmd->cxlds;
        struct device *dev = &port->dev;
        struct resource *res;
        int rc;

        lockdep_assert_held_write(&cxl_rwsem.dpa);

        if (!len) {
                dev_warn(dev, "decoder%d.%d: empty reservation attempted\n",
                         port->id, cxled->cxld.id);
                return -EINVAL;
        }

        if (cxled->dpa_res) {
                dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n",
                        port->id, cxled->cxld.id, cxled->dpa_res);
                return -EBUSY;
        }

        if (port->hdm_end + 1 != cxled->cxld.id) {
                /*
                 * Assumes alloc and commit order is always in hardware instance
                 * order per expectations from 8.2.5.12.20 Committing Decoder
                 * Programming that enforce decoder[m] committed before
                 * decoder[m+1] commit start.
                 */
                dev_dbg(dev, "decoder%d.%d: expected decoder%d.%d\n", port->id,
                        cxled->cxld.id, port->id, port->hdm_end + 1);
                return -EBUSY;
        }

        if (skipped) {
                rc = request_skip(cxlds, cxled, base - skipped, skipped);
                if (rc)
                        return rc;
        }
        res = __request_region(&cxlds->dpa_res, base, len,
                               dev_name(&cxled->cxld.dev), 0);
        if (!res) {
                dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n",
                        port->id, cxled->cxld.id);
                if (skipped)
                        release_skip(cxlds, base - skipped, skipped);
                return -EBUSY;
        }
        cxled->dpa_res = res;
        cxled->skip = skipped;

        /*
         * When allocating new capacity, ->part is already set, when
         * discovering decoder settings at initial enumeration, ->part
         * is not set.
         */
        if (cxled->part < 0)
                for (int i = 0; i < cxlds->nr_partitions; i++)
                        if (resource_contains(&cxlds->part[i].res, res)) {
                                cxled->part = i;
                                break;
                        }

        if (cxled->part < 0)
                dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n",
                         port->id, cxled->cxld.id, res);

        port->hdm_end++;
        get_device(&cxled->cxld.dev);
        return 0;
}

static int add_dpa_res(struct device *dev, struct resource *parent,
                       struct resource *res, resource_size_t start,
                       resource_size_t size, const char *type)
{
        int rc;

        *res = (struct resource) {
                .name = type,
                .start = start,
                .end =  start + size - 1,
                .flags = IORESOURCE_MEM,
        };
        if (resource_size(res) == 0) {
                dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
                return 0;
        }
        rc = request_resource(parent, res);
        if (rc) {
                dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
                        res, rc);
                return rc;
        }

        dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);

        return 0;
}

static const char *cxl_mode_name(enum cxl_partition_mode mode)
{
        switch (mode) {
        case CXL_PARTMODE_RAM:
                return "ram";
        case CXL_PARTMODE_PMEM:
                return "pmem";
        default:
                return "";
        };
}

/* if this fails the caller must destroy @cxlds, there is no recovery */
int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info)
{
        struct device *dev = cxlds->dev;

        guard(rwsem_write)(&cxl_rwsem.dpa);

        if (cxlds->nr_partitions)
                return -EBUSY;

        if (!info->size || !info->nr_partitions) {
                cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
                cxlds->nr_partitions = 0;
                return 0;
        }

        cxlds->dpa_res = DEFINE_RES_MEM(0, info->size);

        for (int i = 0; i < info->nr_partitions; i++) {
                const struct cxl_dpa_part_info *part = &info->part[i];
                int rc;

                cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID;
                cxlds->part[i].mode = part->mode;

                /* Require ordered + contiguous partitions */
                if (i) {
                        const struct cxl_dpa_part_info *prev = &info->part[i - 1];

                        if (prev->range.end + 1 != part->range.start)
                                return -EINVAL;
                }
                rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res,
                                 part->range.start, range_len(&part->range),
                                 cxl_mode_name(part->mode));
                if (rc)
                        return rc;
                cxlds->nr_partitions++;
        }

        return 0;
}
EXPORT_SYMBOL_GPL(cxl_dpa_setup);

int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
                                resource_size_t base, resource_size_t len,
                                resource_size_t skipped)
{
        struct cxl_port *port = cxled_to_port(cxled);
        int rc;

        scoped_guard(rwsem_write, &cxl_rwsem.dpa)
                rc = __cxl_dpa_reserve(cxled, base, len, skipped);

        if (rc)
                return rc;

        return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, "CXL");

resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
        guard(rwsem_read)(&cxl_rwsem.dpa);
        if (cxled->dpa_res)
                return resource_size(cxled->dpa_res);

        return 0;
}

resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
{
        resource_size_t base = RESOURCE_SIZE_MAX;

        lockdep_assert_held(&cxl_rwsem.dpa);
        if (cxled->dpa_res)
                base = cxled->dpa_res->start;

        return base;
}

bool cxl_resource_contains_addr(const struct resource *res, const resource_size_t addr)
{
        struct resource _addr = DEFINE_RES_MEM(addr, 1);

        return resource_contains(res, &_addr);
}

int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
{
        struct cxl_port *port = cxled_to_port(cxled);
        struct device *dev = &cxled->cxld.dev;

        guard(rwsem_write)(&cxl_rwsem.dpa);
        if (!cxled->dpa_res)
                return 0;
        if (cxled->cxld.region) {
                dev_dbg(dev, "decoder assigned to: %s\n",
                        dev_name(&cxled->cxld.region->dev));
                return -EBUSY;
        }
        if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
                dev_dbg(dev, "decoder enabled\n");
                return -EBUSY;
        }
        if (cxled->cxld.id != port->hdm_end) {
                dev_dbg(dev, "expected decoder%d.%d\n", port->id,
                        port->hdm_end);
                return -EBUSY;
        }

        devm_cxl_dpa_release(cxled);
        return 0;
}

int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
                     enum cxl_partition_mode mode)
{
        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
        struct cxl_dev_state *cxlds = cxlmd->cxlds;
        struct device *dev = &cxled->cxld.dev;
        int part;

        guard(rwsem_write)(&cxl_rwsem.dpa);
        if (cxled->cxld.flags & CXL_DECODER_F_ENABLE)
                return -EBUSY;

        for (part = 0; part < cxlds->nr_partitions; part++)
                if (cxlds->part[part].mode == mode)
                        break;

        if (part >= cxlds->nr_partitions) {
                dev_dbg(dev, "unsupported mode: %d\n", mode);
                return -EINVAL;
        }

        if (!resource_size(&cxlds->part[part].res)) {
                dev_dbg(dev, "no available capacity for mode: %d\n", mode);
                return -ENXIO;
        }

        cxled->part = part;
        return 0;
}

static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
{
        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
        struct cxl_dev_state *cxlds = cxlmd->cxlds;
        struct device *dev = &cxled->cxld.dev;
        struct resource *res, *prev = NULL;
        resource_size_t start, avail, skip, skip_start;
        struct resource *p, *last;
        int part;

        guard(rwsem_write)(&cxl_rwsem.dpa);
        if (cxled->cxld.region) {
                dev_dbg(dev, "decoder attached to %s\n",
                        dev_name(&cxled->cxld.region->dev));
                return -EBUSY;
        }

        if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
                dev_dbg(dev, "decoder enabled\n");
                return -EBUSY;
        }

        part = cxled->part;
        if (part < 0) {
                dev_dbg(dev, "partition not set\n");
                return -EBUSY;
        }

        res = &cxlds->part[part].res;
        for (p = res->child, last = NULL; p; p = p->sibling)
                last = p;
        if (last)
                start = last->end + 1;
        else
                start = res->start;

        /*
         * To allocate at partition N, a skip needs to be calculated for all
         * unallocated space at lower partitions indices.
         *
         * If a partition has any allocations, the search can end because a
         * previous cxl_dpa_alloc() invocation is assumed to have accounted for
         * all previous partitions.
         */
        skip_start = CXL_RESOURCE_NONE;
        for (int i = part; i; i--) {
                prev = &cxlds->part[i - 1].res;
                for (p = prev->child, last = NULL; p; p = p->sibling)
                        last = p;
                if (last) {
                        skip_start = last->end + 1;
                        break;
                }
                skip_start = prev->start;
        }

        avail = res->end - start + 1;
        if (skip_start == CXL_RESOURCE_NONE)
                skip = 0;
        else
                skip = res->start - skip_start;

        if (size > avail) {
                dev_dbg(dev, "%llu exceeds available %s capacity: %llu\n", size,
                        res->name, (u64)avail);
                return -ENOSPC;
        }

        return __cxl_dpa_reserve(cxled, start, size, skip);
}

int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
{
        struct cxl_port *port = cxled_to_port(cxled);
        int rc;

        rc = __cxl_dpa_alloc(cxled, size);
        if (rc)
                return rc;

        return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
}

static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl)
{
        u16 eig;
        u8 eiw;

        /*
         * Input validation ensures these warns never fire, but otherwise
         * suppress unititalized variable usage warnings.
         */
        if (WARN_ONCE(ways_to_eiw(cxld->interleave_ways, &eiw),
                      "invalid interleave_ways: %d\n", cxld->interleave_ways))
                return;
        if (WARN_ONCE(granularity_to_eig(cxld->interleave_granularity, &eig),
                      "invalid interleave_granularity: %d\n",
                      cxld->interleave_granularity))
                return;

        u32p_replace_bits(ctrl, eig, CXL_HDM_DECODER0_CTRL_IG_MASK);
        u32p_replace_bits(ctrl, eiw, CXL_HDM_DECODER0_CTRL_IW_MASK);
        *ctrl |= CXL_HDM_DECODER0_CTRL_COMMIT;
}

static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl)
{
        u32p_replace_bits(ctrl,
                          !!(cxld->target_type == CXL_DECODER_HOSTONLYMEM),
                          CXL_HDM_DECODER0_CTRL_HOSTONLY);
}

static void cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt)
{
        struct cxl_dport **t = &cxlsd->target[0];
        int ways = cxlsd->cxld.interleave_ways;

        *tgt = FIELD_PREP(GENMASK(7, 0), t[0]->port_id);
        if (ways > 1)
                *tgt |= FIELD_PREP(GENMASK(15, 8), t[1]->port_id);
        if (ways > 2)
                *tgt |= FIELD_PREP(GENMASK(23, 16), t[2]->port_id);
        if (ways > 3)
                *tgt |= FIELD_PREP(GENMASK(31, 24), t[3]->port_id);
        if (ways > 4)
                *tgt |= FIELD_PREP(GENMASK_ULL(39, 32), t[4]->port_id);
        if (ways > 5)
                *tgt |= FIELD_PREP(GENMASK_ULL(47, 40), t[5]->port_id);
        if (ways > 6)
                *tgt |= FIELD_PREP(GENMASK_ULL(55, 48), t[6]->port_id);
        if (ways > 7)
                *tgt |= FIELD_PREP(GENMASK_ULL(63, 56), t[7]->port_id);
}

/*
 * Per CXL 2.0 8.2.5.12.20 Committing Decoder Programming, hardware must set
 * committed or error within 10ms, but just be generous with 20ms to account for
 * clock skew and other marginal behavior
 */
#define COMMIT_TIMEOUT_MS 20
static int cxld_await_commit(void __iomem *hdm, int id)
{
        u32 ctrl;
        int i;

        for (i = 0; i < COMMIT_TIMEOUT_MS; i++) {
                ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
                if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMIT_ERROR, ctrl)) {
                        ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
                        writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
                        return -EIO;
                }
                if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
                        return 0;
                fsleep(1000);
        }

        return -ETIMEDOUT;
}

static void setup_hw_decoder(struct cxl_decoder *cxld, void __iomem *hdm)
{
        int id = cxld->id;
        u64 base, size;
        u32 ctrl;

        /* common decoder settings */
        ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
        cxld_set_interleave(cxld, &ctrl);
        cxld_set_type(cxld, &ctrl);
        base = cxld->hpa_range.start;
        size = range_len(&cxld->hpa_range);

        writel(upper_32_bits(base), hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
        writel(lower_32_bits(base), hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
        writel(upper_32_bits(size), hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(id));
        writel(lower_32_bits(size), hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));

        if (is_switch_decoder(&cxld->dev)) {
                struct cxl_switch_decoder *cxlsd =
                        to_cxl_switch_decoder(&cxld->dev);
                void __iomem *tl_hi = hdm + CXL_HDM_DECODER0_TL_HIGH(id);
                void __iomem *tl_lo = hdm + CXL_HDM_DECODER0_TL_LOW(id);
                u64 targets;

                cxlsd_set_targets(cxlsd, &targets);
                writel(upper_32_bits(targets), tl_hi);
                writel(lower_32_bits(targets), tl_lo);
        } else {
                struct cxl_endpoint_decoder *cxled =
                        to_cxl_endpoint_decoder(&cxld->dev);
                void __iomem *sk_hi = hdm + CXL_HDM_DECODER0_SKIP_HIGH(id);
                void __iomem *sk_lo = hdm + CXL_HDM_DECODER0_SKIP_LOW(id);

                writel(upper_32_bits(cxled->skip), sk_hi);
                writel(lower_32_bits(cxled->skip), sk_lo);
        }

        writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
}

static int cxl_decoder_commit(struct cxl_decoder *cxld)
{
        struct cxl_port *port = to_cxl_port(cxld->dev.parent);
        struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
        void __iomem *hdm = cxlhdm->regs.hdm_decoder;
        int id = cxld->id, rc;

        if (cxld->flags & CXL_DECODER_F_ENABLE)
                return 0;

        if (cxl_num_decoders_committed(port) != id) {
                dev_dbg(&port->dev,
                        "%s: out of order commit, expected decoder%d.%d\n",
                        dev_name(&cxld->dev), port->id,
                        cxl_num_decoders_committed(port));
                return -EBUSY;
        }

        /*
         * For endpoint decoders hosted on CXL memory devices that
         * support the sanitize operation, make sure sanitize is not in-flight.
         */
        if (is_endpoint_decoder(&cxld->dev)) {
                struct cxl_endpoint_decoder *cxled =
                        to_cxl_endpoint_decoder(&cxld->dev);
                struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
                struct cxl_memdev_state *mds =
                        to_cxl_memdev_state(cxlmd->cxlds);

                if (mds && mds->security.sanitize_active) {
                        dev_dbg(&cxlmd->dev,
                                "attempted to commit %s during sanitize\n",
                                dev_name(&cxld->dev));
                        return -EBUSY;
                }
        }

        scoped_guard(rwsem_read, &cxl_rwsem.dpa)
                setup_hw_decoder(cxld, hdm);

        rc = cxld_await_commit(hdm, cxld->id);
        if (rc) {
                dev_dbg(&port->dev, "%s: error %d committing decoder\n",
                        dev_name(&cxld->dev), rc);
                return rc;
        }
        port->commit_end++;
        cxld->flags |= CXL_DECODER_F_ENABLE;

        return 0;
}

static int commit_reap(struct device *dev, void *data)
{
        struct cxl_port *port = to_cxl_port(dev->parent);
        struct cxl_decoder *cxld;

        if (!is_switch_decoder(dev) && !is_endpoint_decoder(dev))
                return 0;

        cxld = to_cxl_decoder(dev);
        if (port->commit_end == cxld->id &&
            ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
                port->commit_end--;
                dev_dbg(&port->dev, "reap: %s commit_end: %d\n",
                        dev_name(&cxld->dev), port->commit_end);
        }

        return 0;
}

void cxl_port_commit_reap(struct cxl_decoder *cxld)
{
        struct cxl_port *port = to_cxl_port(cxld->dev.parent);

        lockdep_assert_held_write(&cxl_rwsem.region);

        /*
         * Once the highest committed decoder is disabled, free any other
         * decoders that were pinned allocated by out-of-order release.
         */
        port->commit_end--;
        dev_dbg(&port->dev, "reap: %s commit_end: %d\n", dev_name(&cxld->dev),
                port->commit_end);
        device_for_each_child_reverse_from(&port->dev, &cxld->dev, NULL,
                                           commit_reap);
}
EXPORT_SYMBOL_NS_GPL(cxl_port_commit_reap, "CXL");

static void cxl_decoder_reset(struct cxl_decoder *cxld)
{
        struct cxl_port *port = to_cxl_port(cxld->dev.parent);
        struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
        void __iomem *hdm = cxlhdm->regs.hdm_decoder;
        int id = cxld->id;
        u32 ctrl;

        if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
                return;

        if (cxld->flags & CXL_DECODER_F_LOCK)
                return;

        if (port->commit_end == id)
                cxl_port_commit_reap(cxld);
        else
                dev_dbg(&port->dev,
                        "%s: out of order reset, expected decoder%d.%d\n",
                        dev_name(&cxld->dev), port->id, port->commit_end);

        ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
        ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
        writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));

        writel(0, hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(id));
        writel(0, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));
        writel(0, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
        writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));

        cxld->flags &= ~CXL_DECODER_F_ENABLE;

        /* Userspace is now responsible for reconfiguring this decoder */
        if (is_endpoint_decoder(&cxld->dev)) {
                struct cxl_endpoint_decoder *cxled;

                cxled = to_cxl_endpoint_decoder(&cxld->dev);
                cxled->state = CXL_DECODER_STATE_MANUAL;
        }
}

static int cxl_setup_hdm_decoder_from_dvsec(
        struct cxl_port *port, struct cxl_decoder *cxld, u64 *dpa_base,
        int which, struct cxl_endpoint_dvsec_info *info)
{
        struct cxl_endpoint_decoder *cxled;
        u64 len;
        int rc;

        if (!is_cxl_endpoint(port))
                return -EOPNOTSUPP;

        cxled = to_cxl_endpoint_decoder(&cxld->dev);
        len = range_len(&info->dvsec_range[which]);
        if (!len)
                return -ENOENT;

        cxld->target_type = CXL_DECODER_HOSTONLYMEM;
        cxld->commit = NULL;
        cxld->reset = NULL;
        cxld->hpa_range = info->dvsec_range[which];

        /*
         * Set the emulated decoder as locked pending additional support to
         * change the range registers at run time.
         */
        cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK;
        port->commit_end = cxld->id;

        rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0);
        if (rc) {
                dev_err(&port->dev,
                        "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n",
                        port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc);
                return rc;
        }
        *dpa_base += len;
        cxled->state = CXL_DECODER_STATE_AUTO;

        return 0;
}

static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
                            void __iomem *hdm, int which,
                            u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
{
        struct cxl_endpoint_decoder *cxled = NULL;
        u64 size, base, skip, dpa_size, lo, hi;
        bool committed;
        u32 remainder;
        int i, rc;
        u32 ctrl;
        union {
                u64 value;
                unsigned char target_id[8];
        } target_list;

        if (should_emulate_decoders(info))
                return cxl_setup_hdm_decoder_from_dvsec(port, cxld, dpa_base,
                                                        which, info);

        ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
        lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
        hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which));
        base = (hi << 32) + lo;
        lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
        hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
        size = (hi << 32) + lo;
        committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
        cxld->commit = cxl_decoder_commit;
        cxld->reset = cxl_decoder_reset;

        if (!committed)
                size = 0;
        if (base == U64_MAX || size == U64_MAX) {
                dev_warn(&port->dev, "decoder%d.%d: Invalid resource range\n",
                         port->id, cxld->id);
                return -ENXIO;
        }

        if (info)
                cxled = to_cxl_endpoint_decoder(&cxld->dev);
        cxld->hpa_range = (struct range) {
                .start = base,
                .end = base + size - 1,
        };

        /* decoders are enabled if committed */
        if (committed) {
                cxld->flags |= CXL_DECODER_F_ENABLE;
                if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
                        cxld->flags |= CXL_DECODER_F_LOCK;
                if (FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl))
                        cxld->target_type = CXL_DECODER_HOSTONLYMEM;
                else
                        cxld->target_type = CXL_DECODER_DEVMEM;

                guard(rwsem_write)(&cxl_rwsem.region);
                if (cxld->id != cxl_num_decoders_committed(port)) {
                        dev_warn(&port->dev,
                                 "decoder%d.%d: Committed out of order\n",
                                 port->id, cxld->id);
                        return -ENXIO;
                }

                if (size == 0) {
                        dev_warn(&port->dev,
                                 "decoder%d.%d: Committed with zero size\n",
                                 port->id, cxld->id);
                        return -ENXIO;
                }
                port->commit_end = cxld->id;
        } else {
                if (cxled) {
                        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
                        struct cxl_dev_state *cxlds = cxlmd->cxlds;

                        /*
                         * Default by devtype until a device arrives that needs
                         * more precision.
                         */
                        if (cxlds->type == CXL_DEVTYPE_CLASSMEM)
                                cxld->target_type = CXL_DECODER_HOSTONLYMEM;
                        else
                                cxld->target_type = CXL_DECODER_DEVMEM;
                } else {
                        /* To be overridden by region type at commit time */
                        cxld->target_type = CXL_DECODER_HOSTONLYMEM;
                }

                if (!FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl) &&
                    cxld->target_type == CXL_DECODER_HOSTONLYMEM) {
                        ctrl |= CXL_HDM_DECODER0_CTRL_HOSTONLY;
                        writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
                }
        }
        rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
                          &cxld->interleave_ways);
        if (rc) {
                dev_warn(&port->dev,
                         "decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n",
                         port->id, cxld->id, ctrl);
                return rc;
        }
        rc = eig_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl),
                                 &cxld->interleave_granularity);
        if (rc) {
                dev_warn(&port->dev,
                         "decoder%d.%d: Invalid interleave granularity (ctrl: %#x)\n",
                         port->id, cxld->id, ctrl);
                return rc;
        }

        dev_dbg(&port->dev, "decoder%d.%d: range: %#llx-%#llx iw: %d ig: %d\n",
                port->id, cxld->id, cxld->hpa_range.start, cxld->hpa_range.end,
                cxld->interleave_ways, cxld->interleave_granularity);

        if (!cxled) {
                lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
                hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
                target_list.value = (hi << 32) + lo;
                for (i = 0; i < cxld->interleave_ways; i++)
                        cxld->target_map[i] = target_list.target_id[i];

                return 0;
        }

        if (!committed)
                return 0;

        dpa_size = div_u64_rem(size, cxld->interleave_ways, &remainder);
        if (remainder) {
                dev_err(&port->dev,
                        "decoder%d.%d: invalid committed configuration size: %#llx ways: %d\n",
                        port->id, cxld->id, size, cxld->interleave_ways);
                return -ENXIO;
        }
        lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
        hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
        skip = (hi << 32) + lo;
        rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
        if (rc) {
                dev_err(&port->dev,
                        "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n",
                        port->id, cxld->id, *dpa_base,
                        *dpa_base + dpa_size + skip - 1, rc);
                return rc;
        }
        *dpa_base += dpa_size + skip;

        cxled->state = CXL_DECODER_STATE_AUTO;

        return 0;
}

static void cxl_settle_decoders(struct cxl_hdm *cxlhdm)
{
        void __iomem *hdm = cxlhdm->regs.hdm_decoder;
        int committed, i;
        u32 ctrl;

        if (!hdm)
                return;

        /*
         * Since the register resource was recently claimed via request_region()
         * be careful about trusting the "not-committed" status until the commit
         * timeout has elapsed.  The commit timeout is 10ms (CXL 2.0
         * 8.2.5.12.20), but double it to be tolerant of any clock skew between
         * host and target.
         */
        for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) {
                ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
                if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)
                        committed++;
        }

        /* ensure that future checks of committed can be trusted */
        if (committed != cxlhdm->decoder_count)
                msleep(20);
}

/**
 * devm_cxl_enumerate_decoders - add decoder objects per HDM register set
 * @cxlhdm: Structure to populate with HDM capabilities
 * @info: cached DVSEC range register info
 */
static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
                                       struct cxl_endpoint_dvsec_info *info)
{
        void __iomem *hdm = cxlhdm->regs.hdm_decoder;
        struct cxl_port *port = cxlhdm->port;
        int i;
        u64 dpa_base = 0;

        cxl_settle_decoders(cxlhdm);

        for (i = 0; i < cxlhdm->decoder_count; i++) {
                int rc, target_count = cxlhdm->target_count;
                struct cxl_decoder *cxld;

                if (is_cxl_endpoint(port)) {
                        struct cxl_endpoint_decoder *cxled;

                        cxled = cxl_endpoint_decoder_alloc(port);
                        if (IS_ERR(cxled)) {
                                dev_warn(&port->dev,
                                         "Failed to allocate decoder%d.%d\n",
                                         port->id, i);
                                return PTR_ERR(cxled);
                        }
                        cxld = &cxled->cxld;
                } else {
                        struct cxl_switch_decoder *cxlsd;

                        cxlsd = cxl_switch_decoder_alloc(port, target_count);
                        if (IS_ERR(cxlsd)) {
                                dev_warn(&port->dev,
                                         "Failed to allocate decoder%d.%d\n",
                                         port->id, i);
                                return PTR_ERR(cxlsd);
                        }
                        cxld = &cxlsd->cxld;
                }

                rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info);
                if (rc) {
                        dev_warn(&port->dev,
                                 "Failed to initialize decoder%d.%d\n",
                                 port->id, i);
                        put_device(&cxld->dev);
                        return rc;
                }
                rc = add_hdm_decoder(port, cxld);
                if (rc) {
                        dev_warn(&port->dev,
                                 "Failed to add decoder%d.%d\n", port->id, i);
                        return rc;
                }
        }

        return 0;
}

/**
 * devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders
 * @port: CXL port context
 *
 * Return 0 or -errno on error
 */
int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
{
        struct cxl_hdm *cxlhdm;

        if (is_cxl_root(port) || is_cxl_endpoint(port))
                return -EOPNOTSUPP;

        cxlhdm = devm_cxl_setup_hdm(port, NULL);
        if (!IS_ERR(cxlhdm))
                return devm_cxl_enumerate_decoders(cxlhdm, NULL);

        if (PTR_ERR(cxlhdm) != -ENODEV) {
                dev_err(&port->dev, "Failed to map HDM decoder capability\n");
                return PTR_ERR(cxlhdm);
        }

        if (cxl_port_get_possible_dports(port) == 1) {
                dev_dbg(&port->dev, "Fallback to passthrough decoder\n");
                return devm_cxl_add_passthrough_decoder(port);
        }

        dev_err(&port->dev, "HDM decoder capability not found\n");
        return -ENXIO;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");

/**
 * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders
 * @port: CXL port context
 *
 * Return 0 or -errno on error
 */
int devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
{
        struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
        struct cxl_endpoint_dvsec_info info = { .port = port };
        struct cxl_dev_state *cxlds = cxlmd->cxlds;
        struct cxl_hdm *cxlhdm;
        int rc;

        if (!is_cxl_endpoint(port))
                return -EOPNOTSUPP;

        rc = cxl_dvsec_rr_decode(cxlds, &info);
        if (rc < 0)
                return rc;

        cxlhdm = devm_cxl_setup_hdm(port, &info);
        if (IS_ERR(cxlhdm)) {
                if (PTR_ERR(cxlhdm) == -ENODEV)
                        dev_err(&port->dev, "HDM decoder registers not found\n");
                return PTR_ERR(cxlhdm);
        }

        rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info);
        if (rc)
                return rc;

        return devm_cxl_enumerate_decoders(cxlhdm, &info);
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_endpoint_decoders_setup, "CXL");