root/arch/x86/pci/acpi.c
// SPDX-License-Identifier: GPL-2.0

#define pr_fmt(fmt) "PCI: " fmt

#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/dmi.h>
#include <linux/slab.h>
#include <linux/pci-acpi.h>
#include <asm/numa.h>
#include <asm/pci_x86.h>

struct pci_root_info {
        struct acpi_pci_root_info common;
        struct pci_sysdata sd;
#ifdef  CONFIG_PCI_MMCONFIG
        bool mcfg_added;
        u8 start_bus;
        u8 end_bus;
#endif
};

bool pci_use_e820 = true;
static bool pci_use_crs = true;
static bool pci_ignore_seg;

static int __init set_use_crs(const struct dmi_system_id *id)
{
        pci_use_crs = true;
        return 0;
}

static int __init set_nouse_crs(const struct dmi_system_id *id)
{
        pci_use_crs = false;
        return 0;
}

static int __init set_ignore_seg(const struct dmi_system_id *id)
{
        pr_info("%s detected: ignoring ACPI _SEG\n", id->ident);
        pci_ignore_seg = true;
        return 0;
}

static int __init set_no_e820(const struct dmi_system_id *id)
{
        pr_info("%s detected: not clipping E820 regions from _CRS\n",
                id->ident);
        pci_use_e820 = false;
        return 0;
}

static const struct dmi_system_id pci_crs_quirks[] __initconst = {
        /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */
        {
                .callback = set_use_crs,
                .ident = "IBM System x3800",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
                },
        },
        /* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */
        /* 2006 AMD HT/VIA system with two host bridges */
        {
                .callback = set_use_crs,
                .ident = "ASRock ALiveSATA2-GLAN",
                .matches = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"),
                },
        },
        /* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */
        /* 2006 AMD HT/VIA system with two host bridges */
        {
                .callback = set_use_crs,
                .ident = "ASUS M2V-MX SE",
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
                        DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"),
                        DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
                },
        },
        /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
        {
                .callback = set_use_crs,
                .ident = "MSI MS-7253",
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
                        DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
                        DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
                },
        },
        /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */
        /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */
        {
                .callback = set_use_crs,
                .ident = "Foxconn K8M890-8237A",
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"),
                        DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"),
                        DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
                },
        },

        /* Now for the blacklist.. */

        /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */
        {
                .callback = set_nouse_crs,
                .ident = "Dell Studio 1557",
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"),
                        DMI_MATCH(DMI_BIOS_VERSION, "A09"),
                },
        },
        /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */
        {
                .callback = set_nouse_crs,
                .ident = "Thinkpad SL510",
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "2847DFG"),
                        DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"),
                },
        },
        /* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */
        {
                .callback = set_nouse_crs,
                .ident = "Supermicro X8DTH",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F"),
                        DMI_MATCH(DMI_BIOS_VERSION, "2.0a"),
                },
        },

        /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */
        {
                .callback = set_ignore_seg,
                .ident = "HP xw9300",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"),
                },
        },

        /*
         * Many Lenovo models with "IIL" in their DMI_PRODUCT_VERSION have
         * an E820 reserved region that covers the entire 32-bit host
         * bridge memory window from _CRS.  Using the E820 region to clip
         * _CRS means no space is available for hot-added or uninitialized
         * PCI devices.  This typically breaks I2C controllers for touchpads
         * and hot-added Thunderbolt devices.  See the commit log for
         * models known to require this quirk and related bug reports.
         */
        {
                .callback = set_no_e820,
                .ident = "Lenovo *IIL* product version",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_PRODUCT_VERSION, "IIL"),
                },
        },

        /*
         * The Acer Spin 5 (SP513-54N) has the same E820 reservation covering
         * the entire _CRS 32-bit window issue as the Lenovo *IIL* models.
         * See https://bugs.launchpad.net/bugs/1884232
         */
        {
                .callback = set_no_e820,
                .ident = "Acer Spin 5 (SP513-54N)",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Spin SP513-54N"),
                },
        },

        /*
         * Clevo X170KM-G barebones have the same E820 reservation covering
         * the entire _CRS 32-bit window issue as the Lenovo *IIL* models.
         * See https://bugzilla.kernel.org/show_bug.cgi?id=214259
         */
        {
                .callback = set_no_e820,
                .ident = "Clevo X170KM-G Barebone",
                .matches = {
                        DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"),
                },
        },
        {}
};

void __init pci_acpi_crs_quirks(void)
{
        int year = dmi_get_bios_year();

        if (year >= 0 && year < 2008 && iomem_resource.end <= 0xffffffff)
                pci_use_crs = false;

        /*
         * Some firmware includes unusable space (host bridge registers,
         * hidden PCI device BARs, etc) in PCI host bridge _CRS.  This is a
         * firmware defect, and 4dc2287c1805 ("x86: avoid E820 regions when
         * allocating address space") has clipped out the unusable space in
         * the past.
         *
         * But other firmware supplies E820 reserved regions that cover
         * entire _CRS windows, so clipping throws away the entire window,
         * leaving none for hot-added or uninitialized devices.  These E820
         * entries are probably *not* a firmware defect, so disable the
         * clipping by default for post-2022 machines.
         *
         * We already have quirks to disable clipping for pre-2023
         * machines, and we'll likely need quirks to *enable* clipping for
         * post-2022 machines that incorrectly include unusable space in
         * _CRS.
         */
        if (year >= 2023)
                pci_use_e820 = false;

        dmi_check_system(pci_crs_quirks);

        /*
         * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that
         * takes precedence over anything we figured out above.
         */
        if (pci_probe & PCI_ROOT_NO_CRS)
                pci_use_crs = false;
        else if (pci_probe & PCI_USE__CRS)
                pci_use_crs = true;

        pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n",
                pci_use_crs ? "Using" : "Ignoring",
                pci_use_crs ? "nocrs" : "use_crs");

        /* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */
        if (pci_probe & PCI_NO_E820)
                pci_use_e820 = false;
        else if (pci_probe & PCI_USE_E820)
                pci_use_e820 = true;

        pr_info("%s E820 reservations for host bridge windows\n",
                pci_use_e820 ? "Using" : "Ignoring");
        if (pci_probe & (PCI_NO_E820 | PCI_USE_E820))
                pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n");
}

/*
 * Check if pdev is part of a PCIe switch that is directly below the
 * specified bridge.
 */
static bool pcie_switch_directly_under(struct pci_dev *bridge,
                                       struct pci_dev *pdev)
{
        struct pci_dev *parent = pci_upstream_bridge(pdev);

        /* If the device doesn't have a parent, it's not under anything */
        if (!parent)
                return false;

        /*
         * If the device has a PCIe type, check if it is below the
         * corresponding PCIe switch components (if applicable). Then check
         * if its upstream port is directly beneath the specified bridge.
         */
        switch (pci_pcie_type(pdev)) {
        case PCI_EXP_TYPE_UPSTREAM:
                return parent == bridge;

        case PCI_EXP_TYPE_DOWNSTREAM:
                if (pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM)
                        return false;
                parent = pci_upstream_bridge(parent);
                return parent == bridge;

        case PCI_EXP_TYPE_ENDPOINT:
                if (pci_pcie_type(parent) != PCI_EXP_TYPE_DOWNSTREAM)
                        return false;
                parent = pci_upstream_bridge(parent);
                if (!parent || pci_pcie_type(parent) != PCI_EXP_TYPE_UPSTREAM)
                        return false;
                parent = pci_upstream_bridge(parent);
                return parent == bridge;
        }

        return false;
}

static bool pcie_has_usb4_host_interface(struct pci_dev *pdev)
{
        struct fwnode_handle *fwnode;

        /*
         * For USB4, the tunneled PCIe Root or Downstream Ports are marked
         * with the "usb4-host-interface" ACPI property, so we look for
         * that first. This should cover most cases.
         */
        fwnode = fwnode_find_reference(dev_fwnode(&pdev->dev),
                                       "usb4-host-interface", 0);
        if (!IS_ERR(fwnode)) {
                fwnode_handle_put(fwnode);
                return true;
        }

        /*
         * Any integrated Thunderbolt 3/4 PCIe Root Ports from Intel
         * before Alder Lake do not have the "usb4-host-interface"
         * property so we use their PCI IDs instead. All these are
         * tunneled. This list is not expected to grow.
         */
        if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
                switch (pdev->device) {
                /* Ice Lake Thunderbolt 3 PCIe Root Ports */
                case 0x8a1d:
                case 0x8a1f:
                case 0x8a21:
                case 0x8a23:
                /* Tiger Lake-LP Thunderbolt 4 PCIe Root Ports */
                case 0x9a23:
                case 0x9a25:
                case 0x9a27:
                case 0x9a29:
                /* Tiger Lake-H Thunderbolt 4 PCIe Root Ports */
                case 0x9a2b:
                case 0x9a2d:
                case 0x9a2f:
                case 0x9a31:
                        return true;
                }
        }

        return false;
}

bool arch_pci_dev_is_removable(struct pci_dev *pdev)
{
        struct pci_dev *parent, *root;

        /* pdev without a parent or Root Port is never tunneled */
        parent = pci_upstream_bridge(pdev);
        if (!parent)
                return false;
        root = pcie_find_root_port(pdev);
        if (!root)
                return false;

        /* Internal PCIe devices are not tunneled */
        if (!root->external_facing)
                return false;

        /* Anything directly behind a "usb4-host-interface" is tunneled */
        if (pcie_has_usb4_host_interface(parent))
                return true;

        /*
         * Check if this is a discrete Thunderbolt/USB4 controller that is
         * directly behind the non-USB4 PCIe Root Port marked as
         * "ExternalFacingPort". Those are not behind a PCIe tunnel.
         */
        if (pcie_switch_directly_under(root, pdev))
                return false;

        /* PCIe devices after the discrete chip are tunneled */
        return true;
}

#ifdef  CONFIG_PCI_MMCONFIG
static int check_segment(u16 seg, struct device *dev, char *estr)
{
        if (seg) {
                dev_err(dev, "%s can't access configuration space under this host bridge\n",
                        estr);
                return -EIO;
        }

        /*
         * Failure in adding MMCFG information is not fatal,
         * just can't access extended configuration space of
         * devices under this host bridge.
         */
        dev_warn(dev, "%s can't access extended configuration space under this bridge\n",
                 estr);

        return 0;
}

static int setup_mcfg_map(struct acpi_pci_root_info *ci)
{
        int result, seg;
        struct pci_root_info *info;
        struct acpi_pci_root *root = ci->root;
        struct device *dev = &ci->bridge->dev;

        info = container_of(ci, struct pci_root_info, common);
        info->start_bus = (u8)root->secondary.start;
        info->end_bus = (u8)root->secondary.end;
        info->mcfg_added = false;
        seg = info->sd.domain;

        dev_dbg(dev, "%s(%04x %pR ECAM %pa)\n", __func__, seg,
                &root->secondary, &root->mcfg_addr);

        /* return success if MMCFG is not in use */
        if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg)
                return 0;

        if (!(pci_probe & PCI_PROBE_MMCONF))
                return check_segment(seg, dev, "MMCONFIG is disabled,");

        result = pci_mmconfig_insert(dev, seg, info->start_bus, info->end_bus,
                                     root->mcfg_addr);
        if (result == 0) {
                /* enable MMCFG if it hasn't been enabled yet */
                if (raw_pci_ext_ops == NULL)
                        raw_pci_ext_ops = &pci_mmcfg;
                info->mcfg_added = true;
        } else if (result != -EEXIST)
                return check_segment(seg, dev,
                         "fail to add MMCONFIG information,");

        return 0;
}

static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
{
        struct pci_root_info *info;

        info = container_of(ci, struct pci_root_info, common);
        if (info->mcfg_added) {
                pci_mmconfig_delete(info->sd.domain,
                                    info->start_bus, info->end_bus);
                info->mcfg_added = false;
        }
}
#else
static int setup_mcfg_map(struct acpi_pci_root_info *ci)
{
        return 0;
}

static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
{
}
#endif

static int pci_acpi_root_get_node(struct acpi_pci_root *root)
{
        int busnum = root->secondary.start;
        struct acpi_device *device = root->device;
        int node = acpi_get_node(device->handle);

        if (node == NUMA_NO_NODE) {
                node = x86_pci_root_bus_node(busnum);
                if (node != 0 && node != NUMA_NO_NODE)
                        dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n",
                                node);
        }
        if (node != NUMA_NO_NODE && !node_online(node))
                node = NUMA_NO_NODE;

        return node;
}

static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci)
{
        return setup_mcfg_map(ci);
}

static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci)
{
        teardown_mcfg_map(ci);
        kfree(container_of(ci, struct pci_root_info, common));
}

/*
 * An IO port or MMIO resource assigned to a PCI host bridge may be
 * consumed by the host bridge itself or available to its child
 * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
 * to tell whether the resource is consumed by the host bridge itself,
 * but firmware hasn't used that bit consistently, so we can't rely on it.
 *
 * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
 * to be available to child bus/devices except one special case:
 *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
 *     to access PCI configuration space.
 *
 * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
 */
static bool resource_is_pcicfg_ioport(struct resource *res)
{
        return (res->flags & IORESOURCE_IO) &&
                res->start == 0xCF8 && res->end == 0xCFF;
}

static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
{
        struct acpi_device *device = ci->bridge;
        int busnum = ci->root->secondary.start;
        struct resource_entry *entry, *tmp;
        int status;

        status = acpi_pci_probe_root_resources(ci);

        if (pci_use_crs) {
                resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
                        if (resource_is_pcicfg_ioport(entry->res))
                                resource_list_destroy_entry(entry);
                return status;
        }

        resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
                dev_printk(KERN_DEBUG, &device->dev,
                           "host bridge window %pR (ignored)\n", entry->res);
                resource_list_destroy_entry(entry);
        }
        x86_pci_root_bus_resources(busnum, &ci->resources);

        return 0;
}

static struct acpi_pci_root_ops acpi_pci_root_ops = {
        .pci_ops = &pci_root_ops,
        .init_info = pci_acpi_root_init_info,
        .release_info = pci_acpi_root_release_info,
        .prepare_resources = pci_acpi_root_prepare_resources,
};

struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
        int domain = root->segment;
        int busnum = root->secondary.start;
        int node = pci_acpi_root_get_node(root);
        struct pci_bus *bus;

        if (pci_ignore_seg)
                root->segment = domain = 0;

        if (domain && !pci_domains_supported) {
                pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n",
                        domain, busnum);
                return NULL;
        }

        bus = pci_find_bus(domain, busnum);
        if (bus) {
                /*
                 * If the desired bus has been scanned already, replace
                 * its bus->sysdata.
                 */
                struct pci_sysdata sd = {
                        .domain = domain,
                        .node = node,
                        .companion = root->device
                };

                memcpy(bus->sysdata, &sd, sizeof(sd));
        } else {
                struct pci_root_info *info;

                info = kzalloc_obj(*info);
                if (!info)
                        dev_err(&root->device->dev,
                                "pci_bus %04x:%02x: ignored (out of memory)\n",
                                domain, busnum);
                else {
                        info->sd.domain = domain;
                        info->sd.node = node;
                        info->sd.companion = root->device;
                        bus = acpi_pci_root_create(root, &acpi_pci_root_ops,
                                                   &info->common, &info->sd);
                }
        }

        /* After the PCI-E bus has been walked and all devices discovered,
         * configure any settings of the fabric that might be necessary.
         */
        if (bus) {
                struct pci_bus *child;
                list_for_each_entry(child, &bus->children, node)
                        pcie_bus_configure_settings(child);
        }

        return bus;
}

int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
        /*
         * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
         * here, pci_create_root_bus() has been called by someone else and
         * sysdata is likely to be different from what we expect.  Let it go in
         * that case.
         */
        if (!bridge->dev.parent) {
                struct pci_sysdata *sd = bridge->bus->sysdata;
                ACPI_COMPANION_SET(&bridge->dev, sd->companion);
        }
        return 0;
}

int __init pci_acpi_init(void)
{
        struct pci_dev *dev = NULL;

        if (acpi_noirq)
                return -ENODEV;

        pr_info("Using ACPI for IRQ routing\n");
        acpi_irq_penalty_init();
        pcibios_enable_irq = acpi_pci_irq_enable;
        pcibios_disable_irq = acpi_pci_irq_disable;
        x86_init.pci.init_irq = x86_init_noop;

        if (pci_routeirq) {
                /*
                 * PCI IRQ routing is set up by pci_enable_device(), but we
                 * also do it here in case there are still broken drivers that
                 * don't use pci_enable_device().
                 */
                pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
                for_each_pci_dev(dev)
                        acpi_pci_irq_enable(dev);
        }

        return 0;
}