root/src/add-ons/kernel/busses/usb/xhci.cpp
/*
 * Copyright 2011-2021, Haiku, Inc. All rights reserved.
 * Distributed under the terms of the MIT License.
 *
 * Authors:
 *              Augustin Cavalier <waddlesplash>
 *              Jian Chiang <j.jian.chiang@gmail.com>
 *              Jérôme Duval <jerome.duval@gmail.com>
 *              Akshay Jaggi <akshay1994.leo@gmail.com>
 *              Michael Lotz <mmlr@mlotz.ch>
 *              Alexander von Gluck <kallisti5@unixzen.com>
 */


#include <stdio.h>

#include <bus/PCI.h>
#include <USB3.h>
#include <KernelExport.h>

#include <ByteOrder.h>
#include <util/AutoLock.h>

#include "xhci.h"


#define CALLED(x...)    TRACE_MODULE("CALLED %s\n", __PRETTY_FUNCTION__)


#define USB_MODULE_NAME "xhci"

device_manager_info* gDeviceManager;
static usb_for_controller_interface* gUSB;


#define XHCI_PCI_DEVICE_MODULE_NAME "busses/usb/xhci/pci/driver_v1"
#define XHCI_PCI_USB_BUS_MODULE_NAME "busses/usb/xhci/device_v1"


typedef struct {
        XHCI* xhci;
        pci_device_module_info* pci;
        pci_device* device;

        pci_info pciinfo;

        device_node* node;
        device_node* driver_node;
} xhci_pci_sim_info;


//      #pragma mark -


static status_t
init_bus(device_node* node, void** bus_cookie)
{
        CALLED();

        driver_module_info* driver;
        xhci_pci_sim_info* bus;
        device_node* parent = gDeviceManager->get_parent_node(node);
        gDeviceManager->get_driver(parent, &driver, (void**)&bus);
        gDeviceManager->put_node(parent);

        Stack *stack;
        if (gUSB->get_stack((void**)&stack) != B_OK)
                return B_ERROR;

        XHCI *xhci = new(std::nothrow) XHCI(&bus->pciinfo, bus->pci, bus->device, stack, node);
        if (xhci == NULL) {
                return B_NO_MEMORY;
        }

        if (xhci->InitCheck() < B_OK) {
                TRACE_MODULE_ERROR("bus failed init check\n");
                delete xhci;
                return B_ERROR;
        }

        if (xhci->Start() != B_OK) {
                delete xhci;
                return B_ERROR;
        }

        *bus_cookie = xhci;

        return B_OK;
}


static void
uninit_bus(void* bus_cookie)
{
        CALLED();
        XHCI* xhci = (XHCI*)bus_cookie;
        delete xhci;
}


static status_t
register_child_devices(void* cookie)
{
        CALLED();
        xhci_pci_sim_info* bus = (xhci_pci_sim_info*)cookie;
        device_node* node = bus->driver_node;

        char prettyName[25];
        sprintf(prettyName, "XHCI Controller %" B_PRIu16, 0);

        device_attr attrs[] = {
                // properties of this controller for the usb bus manager
                { B_DEVICE_PRETTY_NAME, B_STRING_TYPE,
                        { .string = prettyName }},
                { B_DEVICE_FIXED_CHILD, B_STRING_TYPE,
                        { .string = USB_FOR_CONTROLLER_MODULE_NAME }},

                // private data to identify the device
                { NULL }
        };

        return gDeviceManager->register_node(node, XHCI_PCI_USB_BUS_MODULE_NAME,
                attrs, NULL, NULL);
}


static status_t
init_device(device_node* node, void** device_cookie)
{
        CALLED();
        xhci_pci_sim_info* bus = (xhci_pci_sim_info*)calloc(1,
                sizeof(xhci_pci_sim_info));
        if (bus == NULL)
                return B_NO_MEMORY;

        pci_device_module_info* pci;
        pci_device* device;
        {
                device_node* pciParent = gDeviceManager->get_parent_node(node);
                gDeviceManager->get_driver(pciParent, (driver_module_info**)&pci,
                        (void**)&device);
                gDeviceManager->put_node(pciParent);
        }

        bus->pci = pci;
        bus->device = device;
        bus->driver_node = node;

        pci_info *pciInfo = &bus->pciinfo;
        pci->get_pci_info(device, pciInfo);

        *device_cookie = bus;
        return B_OK;
}


static void
uninit_device(void* device_cookie)
{
        CALLED();
        xhci_pci_sim_info* bus = (xhci_pci_sim_info*)device_cookie;
        free(bus);
}


static status_t
register_device(device_node* parent)
{
        CALLED();
        device_attr attrs[] = {
                {B_DEVICE_PRETTY_NAME, B_STRING_TYPE, {.string = "XHCI PCI"}},
                {}
        };

        return gDeviceManager->register_node(parent,
                XHCI_PCI_DEVICE_MODULE_NAME, attrs, NULL, NULL);
}


static float
supports_device(device_node* parent)
{
        CALLED();
        const char* bus;
        uint16 type, subType, api;

        // make sure parent is a XHCI PCI device node
        if (gDeviceManager->get_attr_string(parent, B_DEVICE_BUS, &bus, false)
                < B_OK) {
                return -1;
        }

        if (strcmp(bus, "pci") != 0)
                return 0.0f;

        if (gDeviceManager->get_attr_uint16(parent, B_DEVICE_SUB_TYPE, &subType,
                        false) < B_OK
                || gDeviceManager->get_attr_uint16(parent, B_DEVICE_TYPE, &type,
                        false) < B_OK
                || gDeviceManager->get_attr_uint16(parent, B_DEVICE_INTERFACE, &api,
                        false) < B_OK) {
                TRACE_MODULE("Could not find type/subtype/interface attributes\n");
                return -1;
        }

        if (type == PCI_serial_bus && subType == PCI_usb && api == PCI_usb_xhci) {
                pci_device_module_info* pci;
                pci_device* device;
                gDeviceManager->get_driver(parent, (driver_module_info**)&pci,
                        (void**)&device);
                TRACE_MODULE("XHCI Device found!\n");

                return 0.8f;
        }

        return 0.0f;
}


static const char*
xhci_error_string(uint32 error)
{
        switch (error) {
                case COMP_INVALID: return "Invalid";
                case COMP_SUCCESS: return "Success";
                case COMP_DATA_BUFFER: return "Data buffer";
                case COMP_BABBLE: return "Babble detected";
                case COMP_USB_TRANSACTION: return "USB transaction";
                case COMP_TRB: return "TRB";
                case COMP_STALL: return "Stall";
                case COMP_RESOURCE: return "Resource";
                case COMP_BANDWIDTH: return "Bandwidth";
                case COMP_NO_SLOTS: return "No slots";
                case COMP_INVALID_STREAM: return "Invalid stream";
                case COMP_SLOT_NOT_ENABLED: return "Slot not enabled";
                case COMP_ENDPOINT_NOT_ENABLED: return "Endpoint not enabled";
                case COMP_SHORT_PACKET: return "Short packet";
                case COMP_RING_UNDERRUN: return "Ring underrun";
                case COMP_RING_OVERRUN: return "Ring overrun";
                case COMP_VF_RING_FULL: return "VF Event Ring Full";
                case COMP_PARAMETER: return "Parameter";
                case COMP_BANDWIDTH_OVERRUN: return "Bandwidth overrun";
                case COMP_CONTEXT_STATE: return "Context state";
                case COMP_NO_PING_RESPONSE: return "No ping response";
                case COMP_EVENT_RING_FULL: return "Event ring full";
                case COMP_INCOMPATIBLE_DEVICE: return "Incompatible device";
                case COMP_MISSED_SERVICE: return "Missed service";
                case COMP_COMMAND_RING_STOPPED: return "Command ring stopped";
                case COMP_COMMAND_ABORTED: return "Command aborted";
                case COMP_STOPPED: return "Stopped";
                case COMP_STOPPED_LENGTH_INVALID: return "Stopped (length invalid)";
                case COMP_MAX_EXIT_LATENCY: return "Max exit latency too large";
                case COMP_ISOC_OVERRUN: return "Isoch buffer overrun";
                case COMP_EVENT_LOST: return "Event lost";
                case COMP_UNDEFINED: return "Undefined";
                case COMP_INVALID_STREAM_ID: return "Invalid stream ID";
                case COMP_SECONDARY_BANDWIDTH: return "Secondary bandwidth";
                case COMP_SPLIT_TRANSACTION: return "Split transaction";

                default: return "Undefined";
        }
}


static status_t
xhci_error_status(uint32 error, bool directionIn)
{
        switch (error) {
                case COMP_SHORT_PACKET:
                case COMP_SUCCESS:
                        return B_OK;
                case COMP_DATA_BUFFER:
                        return directionIn ? B_DEV_WRITE_ERROR : B_DEV_READ_ERROR;
                case COMP_BABBLE:
                        return directionIn ? B_DEV_DATA_OVERRUN : B_DEV_DATA_UNDERRUN;
                case COMP_RING_UNDERRUN:
                        return B_DEV_FIFO_UNDERRUN;
                case COMP_RING_OVERRUN:
                        return B_DEV_FIFO_OVERRUN;
                case COMP_MISSED_SERVICE:
                        return B_DEV_TOO_LATE;
                case COMP_USB_TRANSACTION:
                        return B_DEV_CRC_ERROR;
                case COMP_STALL:
                        return B_DEV_STALLED;
                default:
                        return B_DEV_STALLED;
        }
}


module_dependency module_dependencies[] = {
        { USB_FOR_CONTROLLER_MODULE_NAME, (module_info**)&gUSB },
        { B_DEVICE_MANAGER_MODULE_NAME, (module_info**)&gDeviceManager },
        {}
};


static usb_bus_interface gXHCIPCIDeviceModule = {
        {
                {
                        XHCI_PCI_USB_BUS_MODULE_NAME,
                        0,
                        NULL
                },
                NULL,  // supports device
                NULL,  // register device
                init_bus,
                uninit_bus,
                NULL,  // register child devices
                NULL,  // rescan
                NULL,  // device removed
        },
};

// Root device that binds to the PCI bus. It will register an usb_bus_interface
// node for each device.
static driver_module_info sXHCIDevice = {
        {
                XHCI_PCI_DEVICE_MODULE_NAME,
                0,
                NULL
        },
        supports_device,
        register_device,
        init_device,
        uninit_device,
        register_child_devices,
        NULL, // rescan
        NULL, // device removed
};

module_info* modules[] = {
        (module_info* )&sXHCIDevice,
        (module_info* )&gXHCIPCIDeviceModule,
        NULL
};


XHCI::XHCI(pci_info *info,      pci_device_module_info* pci, pci_device* device, Stack *stack,
        device_node* node)
        :       BusManager(stack, node),
                fRegisterArea(-1),
                fRegisters(NULL),
                fPCIInfo(info),
                fPci(pci),
                fDevice(device),
                fStack(stack),
                fIRQ(0),
                fUseMSI(false),
                fErstArea(-1),
                fDcbaArea(-1),
                fCmdCompSem(-1),
                fStopThreads(false),
                fRootHub(NULL),
                fPortCount(0),
                fSlotCount(0),
                fScratchpadCount(0),
                fContextSizeShift(0),
                fFinishedHead(NULL),
                fFinishTransfersSem(-1),
                fFinishThread(-1),
                fEventSem(-1),
                fEventThread(-1),
                fEventIdx(0),
                fCmdIdx(0),
                fEventCcs(1),
                fCmdCcs(1)
{
        B_INITIALIZE_SPINLOCK(&fSpinlock);
        mutex_init(&fFinishedLock, "XHCI finished transfers");
        mutex_init(&fEventLock, "XHCI event handler");

        if (BusManager::InitCheck() < B_OK) {
                TRACE_ERROR("bus manager failed to init\n");
                return;
        }

        TRACE("constructing new XHCI host controller driver\n");
        fInitOK = false;

        // enable busmaster and memory mapped access
        uint16 command = fPci->read_pci_config(fDevice, PCI_command, 2);
        command &= ~(PCI_command_io | PCI_command_int_disable);
        command |= PCI_command_master | PCI_command_memory;

        fPci->write_pci_config(fDevice, PCI_command, 2, command);

        // map the registers (low + high for 64-bit when requested)
        phys_addr_t physicalAddress = fPCIInfo->u.h0.base_registers[0];
        if ((fPCIInfo->u.h0.base_register_flags[0] & PCI_address_type)
                        == PCI_address_type_64) {
                physicalAddress |= (uint64)fPCIInfo->u.h0.base_registers[1] << 32;
        }

        size_t mapSize = fPCIInfo->u.h0.base_register_sizes[0];

        TRACE("map registers %08" B_PRIxPHYSADDR ", size: %" B_PRIuSIZE "\n",
                physicalAddress, mapSize);

        fRegisterArea = map_physical_memory("XHCI memory mapped registers",
                physicalAddress, mapSize, B_ANY_KERNEL_BLOCK_ADDRESS,
                B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
                (void **)&fRegisters);
        if (fRegisterArea < B_OK) {
                TRACE_ERROR("failed to map register memory\n");
                return;
        }

        // determine the register offsets
        fCapabilityRegisterOffset = 0;
        fOperationalRegisterOffset = HCI_CAPLENGTH(ReadCapReg32(XHCI_HCI_CAPLENGTH));
        fRuntimeRegisterOffset = ReadCapReg32(XHCI_RTSOFF) & ~0x1F;
        fDoorbellRegisterOffset = ReadCapReg32(XHCI_DBOFF) & ~0x3;

        TRACE("mapped registers: %p\n", fRegisters);
        TRACE("operational register offset: %" B_PRId32 "\n", fOperationalRegisterOffset);
        TRACE("runtime register offset: %" B_PRId32 "\n", fRuntimeRegisterOffset);
        TRACE("doorbell register offset: %" B_PRId32 "\n", fDoorbellRegisterOffset);

        int32 interfaceVersion = HCI_VERSION(ReadCapReg32(XHCI_HCI_VERSION));
        if (interfaceVersion < 0x0090 || interfaceVersion > 0x0120) {
                TRACE_ERROR("unsupported interface version: 0x%04" B_PRIx32 "\n",
                        interfaceVersion);
                return;
        }
        TRACE_ALWAYS("interface version: 0x%04" B_PRIx32 "\n", interfaceVersion);

        TRACE_ALWAYS("structural parameters: 1:0x%08" B_PRIx32 " 2:0x%08"
                B_PRIx32 " 3:0x%08" B_PRIx32 "\n", ReadCapReg32(XHCI_HCSPARAMS1),
                ReadCapReg32(XHCI_HCSPARAMS2), ReadCapReg32(XHCI_HCSPARAMS3));

        uint32 cparams = ReadCapReg32(XHCI_HCCPARAMS);
        if (cparams == 0xffffffff)
                return;
        TRACE_ALWAYS("capability parameters: 0x%08" B_PRIx32 "\n", cparams);

        // if 64 bytes context structures, then 1
        fContextSizeShift = HCC_CSZ(cparams);

        // Assume ownership of the controller from the BIOS.
        uint32 eec = 0xffffffff;
        uint32 eecp = HCS0_XECP(cparams) << 2;
        for (; eecp != 0 && XECP_NEXT(eec) != 0; eecp += XECP_NEXT(eec) << 2) {
                TRACE("eecp register: 0x%08" B_PRIx32 "\n", eecp);

                eec = ReadCapReg32(eecp);
                if (eec == 0xffffffff)
                        break;
                if (XECP_ID(eec) != XHCI_LEGSUP_CAPID)
                        continue;

                if (eec & XHCI_LEGSUP_BIOSOWNED) {
                        TRACE_ALWAYS("the host controller is bios owned, claiming"
                                " ownership\n");
                        WriteCapReg32(eecp, eec | XHCI_LEGSUP_OSOWNED);

                        for (int32 i = 0; i < 20; i++) {
                                eec = ReadCapReg32(eecp);

                                if ((eec & XHCI_LEGSUP_BIOSOWNED) == 0)
                                        break;

                                TRACE_ALWAYS("controller is still bios owned, waiting\n");
                                snooze(50000);
                        }

                        if (eec & XHCI_LEGSUP_BIOSOWNED) {
                                TRACE_ERROR("bios won't give up control over the host "
                                        "controller (ignoring)\n");
                        } else if (eec & XHCI_LEGSUP_OSOWNED) {
                                TRACE_ALWAYS("successfully took ownership of the host "
                                        "controller\n");
                        }

                        // Force off the BIOS owned flag, and clear all SMIs. Some BIOSes
                        // do indicate a successful handover but do not remove their SMIs
                        // and then freeze the system when interrupts are generated.
                        WriteCapReg32(eecp, eec & ~XHCI_LEGSUP_BIOSOWNED);
                }

                uint32 legctlsts = ReadCapReg32(eecp + XHCI_LEGCTLSTS);
                legctlsts &= (XHCI_LEGCTLSTS_RESERVED_BITS | XHCI_LEGCTLSTS_READONLY_BITS);
                WriteCapReg32(eecp + XHCI_LEGCTLSTS, legctlsts);
                break;
        }

        // We need to explicitly take ownership of EHCI ports on earlier Intel chipsets.
        if (fPCIInfo->vendor_id == PCI_VENDOR_INTEL) {
                switch (fPCIInfo->device_id) {
                        case PCI_DEVICE_INTEL_PANTHER_POINT_XHCI:
                        case PCI_DEVICE_INTEL_LYNX_POINT_XHCI:
                        case PCI_DEVICE_INTEL_LYNX_POINT_LP_XHCI:
                        case PCI_DEVICE_INTEL_BAYTRAIL_XHCI:
                        case PCI_DEVICE_INTEL_WILDCAT_POINT_XHCI:
                        case PCI_DEVICE_INTEL_WILDCAT_POINT_LP_XHCI:
                                _SwitchIntelPorts();
                                break;
                }
        }

        // halt the host controller
        if (ControllerHalt() < B_OK) {
                return;
        }

        // reset the host controller
        if (ControllerReset() < B_OK) {
                TRACE_ERROR("host controller failed to reset\n");
                return;
        }

        fCmdCompSem = create_sem(0, "XHCI Command Complete");
        fFinishTransfersSem = create_sem(0, "XHCI Finish Transfers");
        fEventSem = create_sem(0, "XHCI Event");
        if (fFinishTransfersSem < B_OK || fCmdCompSem < B_OK || fEventSem < B_OK) {
                TRACE_ERROR("failed to create semaphores\n");
                return;
        }

        // create event handler thread
        fEventThread = spawn_kernel_thread(EventThread, "xhci event thread",
                B_URGENT_PRIORITY, (void *)this);
        resume_thread(fEventThread);

        // create finisher service thread
        fFinishThread = spawn_kernel_thread(FinishThread, "xhci finish thread",
                B_URGENT_PRIORITY - 1, (void *)this);
        resume_thread(fFinishThread);

        // Find the right interrupt vector, using MSIs if available.
        fIRQ = fPCIInfo->u.h0.interrupt_line;
        if (fIRQ == 0xFF)
                fIRQ = 0;

#if 0
        if (fPci->get_msix_count(fDevice) >= 1) {
                uint8 msiVector = 0;
                if (fPci->configure_msix(fDevice, 1, &msiVector) == B_OK
                        && fPci->enable_msix(fDevice) == B_OK) {
                        TRACE_ALWAYS("using MSI-X\n");
                        fIRQ = msiVector;
                        fUseMSI = true;
                }
        } else
#endif
        if (fPci->get_msi_count(fDevice) >= 1) {
                uint32 msiVector = 0;
                if (fPci->configure_msi(fDevice, 1, &msiVector) == B_OK
                        && fPci->enable_msi(fDevice) == B_OK) {
                        TRACE_ALWAYS("using message signaled interrupts\n");
                        fIRQ = msiVector;
                        fUseMSI = true;
                }
        }

        if (fIRQ == 0) {
                TRACE_MODULE_ERROR("device PCI:%d:%d:%d was assigned an invalid IRQ\n",
                        fPCIInfo->bus, fPCIInfo->device, fPCIInfo->function);
                return;
        }

        // Install the interrupt handler
        TRACE("installing interrupt handler\n");
        install_io_interrupt_handler(fIRQ, InterruptHandler, (void *)this, 0);

        memset(fPortSpeeds, 0, sizeof(fPortSpeeds));
        memset(fDevices, 0, sizeof(fDevices));

        fInitOK = true;
        TRACE("driver construction successful\n");
}


XHCI::~XHCI()
{
        TRACE("tear down XHCI host controller driver\n");

        WriteOpReg(XHCI_CMD, 0);

        int32 result = 0;
        fStopThreads = true;
        delete_sem(fCmdCompSem);
        delete_sem(fFinishTransfersSem);
        delete_sem(fEventSem);
        wait_for_thread(fFinishThread, &result);
        wait_for_thread(fEventThread, &result);

        mutex_destroy(&fFinishedLock);
        mutex_destroy(&fEventLock);

        remove_io_interrupt_handler(fIRQ, InterruptHandler, (void *)this);

        delete_area(fRegisterArea);
        delete_area(fErstArea);
        for (uint32 i = 0; i < fScratchpadCount; i++)
                delete_area(fScratchpadArea[i]);
        delete_area(fDcbaArea);

        if (fUseMSI) {
                fPci->disable_msi(fDevice);
                fPci->unconfigure_msi(fDevice);
        }
}


void
XHCI::_SwitchIntelPorts()
{
        TRACE("Looking for EHCI owned ports\n");
        uint32 ports = fPci->read_pci_config(fDevice, XHCI_INTEL_USB3PRM, 4);
        TRACE("Superspeed Ports: 0x%" B_PRIx32 "\n", ports);
        fPci->write_pci_config(fDevice, XHCI_INTEL_USB3_PSSEN, 4, ports);
        ports = fPci->read_pci_config(fDevice, XHCI_INTEL_USB3_PSSEN, 4);
        TRACE("Superspeed ports now under XHCI : 0x%" B_PRIx32 "\n", ports);
        ports = fPci->read_pci_config(fDevice, XHCI_INTEL_USB2PRM, 4);
        TRACE("USB 2.0 Ports : 0x%" B_PRIx32 "\n", ports);
        fPci->write_pci_config(fDevice, XHCI_INTEL_XUSB2PR, 4, ports);
        ports = fPci->read_pci_config(fDevice, XHCI_INTEL_XUSB2PR, 4);
        TRACE("USB 2.0 ports now under XHCI: 0x%" B_PRIx32 "\n", ports);
}


status_t
XHCI::Start()
{
        TRACE_ALWAYS("starting XHCI host controller\n");
        TRACE("usbcmd: 0x%08" B_PRIx32 "; usbsts: 0x%08" B_PRIx32 "\n",
                ReadOpReg(XHCI_CMD), ReadOpReg(XHCI_STS));

        if (WaitOpBits(XHCI_STS, STS_CNR, 0) != B_OK) {
                TRACE("Start() failed STS_CNR\n");
        }

        if ((ReadOpReg(XHCI_CMD) & CMD_RUN) != 0) {
                TRACE_ERROR("Start() warning, starting running XHCI controller!\n");
        }

        if ((ReadOpReg(XHCI_PAGESIZE) & (1 << 0)) == 0) {
                TRACE_ERROR("controller does not support 4K page size\n");
                return B_ERROR;
        }

        // read port count from capability register
        uint32 capabilities = ReadCapReg32(XHCI_HCSPARAMS1);
        fPortCount = HCS_MAX_PORTS(capabilities);
        if (fPortCount == 0) {
                TRACE_ERROR("invalid number of ports: %u\n", fPortCount);
                return B_ERROR;
        }

        fSlotCount = HCS_MAX_SLOTS(capabilities);
        if (fSlotCount > XHCI_MAX_DEVICES)
                fSlotCount = XHCI_MAX_DEVICES;
        WriteOpReg(XHCI_CONFIG, fSlotCount);

        // find out which protocol is used for each port
        uint8 portFound = 0;
        uint32 cparams = ReadCapReg32(XHCI_HCCPARAMS);
        uint32 eec = 0xffffffff;
        uint32 eecp = HCS0_XECP(cparams) << 2;
        for (; eecp != 0 && XECP_NEXT(eec) && portFound < fPortCount;
                eecp += XECP_NEXT(eec) << 2) {
                eec = ReadCapReg32(eecp);
                if (XECP_ID(eec) != XHCI_SUPPORTED_PROTOCOLS_CAPID)
                        continue;
                if (XHCI_SUPPORTED_PROTOCOLS_0_MAJOR(eec) > 3)
                        continue;
                uint32 temp = ReadCapReg32(eecp + 8);
                uint32 offset = XHCI_SUPPORTED_PROTOCOLS_1_OFFSET(temp);
                uint32 count = XHCI_SUPPORTED_PROTOCOLS_1_COUNT(temp);
                if (offset == 0 || count == 0)
                        continue;
                offset--;
                for (uint32 i = offset; i < offset + count; i++) {
                        if (XHCI_SUPPORTED_PROTOCOLS_0_MAJOR(eec) == 0x3)
                                fPortSpeeds[i] = USB_SPEED_SUPERSPEED;
                        else
                                fPortSpeeds[i] = USB_SPEED_HIGHSPEED;

                        TRACE("speed for port %" B_PRId32 " is %s\n", i,
                                fPortSpeeds[i] == USB_SPEED_SUPERSPEED ? "super" : "high");
                }
                portFound += count;
        }

        uint32 params2 = ReadCapReg32(XHCI_HCSPARAMS2);
        fScratchpadCount = HCS_MAX_SC_BUFFERS(params2);
        if (fScratchpadCount > XHCI_MAX_SCRATCHPADS) {
                TRACE_ERROR("invalid number of scratchpads: %" B_PRIu32 "\n",
                        fScratchpadCount);
                return B_ERROR;
        }

        uint32 params3 = ReadCapReg32(XHCI_HCSPARAMS3);
        fExitLatMax = HCS_U1_DEVICE_LATENCY(params3)
                + HCS_U2_DEVICE_LATENCY(params3);

        // clear interrupts & disable device notifications
        WriteOpReg(XHCI_STS, ReadOpReg(XHCI_STS));
        WriteOpReg(XHCI_DNCTRL, 0);

        // allocate Device Context Base Address array
        phys_addr_t dmaAddress;
        fDcbaArea = fStack->AllocateArea((void **)&fDcba, &dmaAddress,
                sizeof(*fDcba), "DCBA Area");
        if (fDcbaArea < B_OK) {
                TRACE_ERROR("unable to create the DCBA area\n");
                return B_ERROR;
        }
        memset(fDcba, 0, sizeof(*fDcba));
        memset(fScratchpadArea, 0, sizeof(fScratchpadArea));
        memset(fScratchpad, 0, sizeof(fScratchpad));

        // setting the first address to the scratchpad array address
        fDcba->baseAddress[0] = dmaAddress
                + offsetof(struct xhci_device_context_array, scratchpad);

        // fill up the scratchpad array with scratchpad pages
        for (uint32 i = 0; i < fScratchpadCount; i++) {
                phys_addr_t scratchDmaAddress;
                fScratchpadArea[i] = fStack->AllocateArea((void **)&fScratchpad[i],
                        &scratchDmaAddress, B_PAGE_SIZE, "Scratchpad Area");
                if (fScratchpadArea[i] < B_OK) {
                        TRACE_ERROR("unable to create the scratchpad area\n");
                        return B_ERROR;
                }
                fDcba->scratchpad[i] = scratchDmaAddress;
        }

        TRACE("setting DCBAAP %" B_PRIxPHYSADDR "\n", dmaAddress);
        WriteOpReg(XHCI_DCBAAP_LO, (uint32)dmaAddress);
        WriteOpReg(XHCI_DCBAAP_HI, (uint32)(dmaAddress >> 32));

        // allocate Event Ring Segment Table
        uint8 *addr;
        fErstArea = fStack->AllocateArea((void **)&addr, &dmaAddress,
                (XHCI_MAX_COMMANDS + XHCI_MAX_EVENTS) * sizeof(xhci_trb)
                + sizeof(xhci_erst_element),
                "USB XHCI ERST CMD_RING and EVENT_RING Area");

        if (fErstArea < B_OK) {
                TRACE_ERROR("unable to create the ERST AND RING area\n");
                delete_area(fDcbaArea);
                return B_ERROR;
        }
        fErst = (xhci_erst_element *)addr;
        memset(fErst, 0, (XHCI_MAX_COMMANDS + XHCI_MAX_EVENTS) * sizeof(xhci_trb)
                + sizeof(xhci_erst_element));

        // fill with Event Ring Segment Base Address and Event Ring Segment Size
        fErst->rs_addr = dmaAddress + sizeof(xhci_erst_element);
        fErst->rs_size = XHCI_MAX_EVENTS;
        fErst->rsvdz = 0;

        addr += sizeof(xhci_erst_element);
        fEventRing = (xhci_trb *)addr;
        addr += XHCI_MAX_EVENTS * sizeof(xhci_trb);
        fCmdRing = (xhci_trb *)addr;

        TRACE("setting ERST size\n");
        WriteRunReg32(XHCI_ERSTSZ(0), XHCI_ERSTS_SET(1));

        TRACE("setting ERDP addr = 0x%" B_PRIx64 "\n", fErst->rs_addr);
        WriteRunReg32(XHCI_ERDP_LO(0), (uint32)fErst->rs_addr);
        WriteRunReg32(XHCI_ERDP_HI(0), (uint32)(fErst->rs_addr >> 32));

        TRACE("setting ERST base addr = 0x%" B_PRIxPHYSADDR "\n", dmaAddress);
        WriteRunReg32(XHCI_ERSTBA_LO(0), (uint32)dmaAddress);
        WriteRunReg32(XHCI_ERSTBA_HI(0), (uint32)(dmaAddress >> 32));

        dmaAddress += sizeof(xhci_erst_element) + XHCI_MAX_EVENTS
                * sizeof(xhci_trb);

        // Make sure the Command Ring is stopped
        if ((ReadOpReg(XHCI_CRCR_LO) & CRCR_CRR) != 0) {
                TRACE_ALWAYS("Command Ring is running, send stop/cancel\n");
                WriteOpReg(XHCI_CRCR_LO, CRCR_CS);
                WriteOpReg(XHCI_CRCR_HI, 0);
                WriteOpReg(XHCI_CRCR_LO, CRCR_CA);
                WriteOpReg(XHCI_CRCR_HI, 0);
                snooze(1000);
                if ((ReadOpReg(XHCI_CRCR_LO) & CRCR_CRR) != 0) {
                        TRACE_ERROR("Command Ring still running after stop/cancel\n");
                }
        }
        TRACE("setting CRCR addr = 0x%" B_PRIxPHYSADDR "\n", dmaAddress);
        WriteOpReg(XHCI_CRCR_LO, (uint32)dmaAddress | CRCR_RCS);
        WriteOpReg(XHCI_CRCR_HI, (uint32)(dmaAddress >> 32));
        // link trb
        fCmdRing[XHCI_MAX_COMMANDS - 1].address = dmaAddress;

        TRACE("setting interrupt rate\n");

        // Setting IMOD below 0x3F8 on Intel Lynx Point can cause IRQ lockups
        if (fPCIInfo->vendor_id == PCI_VENDOR_INTEL
                && (fPCIInfo->device_id == PCI_DEVICE_INTEL_PANTHER_POINT_XHCI
                        || fPCIInfo->device_id == PCI_DEVICE_INTEL_LYNX_POINT_XHCI
                        || fPCIInfo->device_id == PCI_DEVICE_INTEL_LYNX_POINT_LP_XHCI
                        || fPCIInfo->device_id == PCI_DEVICE_INTEL_BAYTRAIL_XHCI
                        || fPCIInfo->device_id == PCI_DEVICE_INTEL_WILDCAT_POINT_XHCI)) {
                WriteRunReg32(XHCI_IMOD(0), 0x000003f8); // 4000 irq/s
        } else {
                WriteRunReg32(XHCI_IMOD(0), 0x000001f4); // 8000 irq/s
        }

        TRACE("enabling interrupt\n");
        WriteRunReg32(XHCI_IMAN(0), ReadRunReg32(XHCI_IMAN(0)) | IMAN_INTR_ENA);

        WriteOpReg(XHCI_CMD, CMD_RUN | CMD_INTE | CMD_HSEE);

        // wait for start up state
        if (WaitOpBits(XHCI_STS, STS_HCH, 0) != B_OK) {
                TRACE_ERROR("HCH start up timeout\n");
        }

        fRootHub = new(std::nothrow) XHCIRootHub(RootObject(), 1);
        if (!fRootHub) {
                TRACE_ERROR("no memory to allocate root hub\n");
                return B_NO_MEMORY;
        }

        if (fRootHub->InitCheck() < B_OK) {
                TRACE_ERROR("root hub failed init check\n");
                return fRootHub->InitCheck();
        }

        SetRootHub(fRootHub);

        fRootHub->RegisterNode(Node());

        TRACE_ALWAYS("successfully started the controller\n");

#ifdef TRACE_USB
        TRACE("No-Op test...\n");
        Noop();
#endif

        return BusManager::Start();
}


status_t
XHCI::SubmitTransfer(Transfer *transfer)
{
        // short circuit the root hub
        if (transfer->TransferPipe()->DeviceAddress() == 1)
                return fRootHub->ProcessTransfer(this, transfer);

        TRACE("SubmitTransfer(%p)\n", transfer);
        Pipe *pipe = transfer->TransferPipe();
        if ((pipe->Type() & USB_OBJECT_CONTROL_PIPE) != 0)
                return SubmitControlRequest(transfer);
        return SubmitNormalRequest(transfer);
}


status_t
XHCI::SubmitControlRequest(Transfer *transfer)
{
        Pipe *pipe = transfer->TransferPipe();
        usb_request_data *requestData = transfer->RequestData();
        bool directionIn = (requestData->RequestType & USB_REQTYPE_DEVICE_IN) != 0;

        TRACE("SubmitControlRequest() length %d\n", requestData->Length);

        xhci_endpoint *endpoint = (xhci_endpoint *)pipe->ControllerCookie();
        if (endpoint == NULL) {
                TRACE_ERROR("control pipe has no endpoint!\n");
                return B_BAD_VALUE;
        }
        if (endpoint->device == NULL) {
                panic("endpoint is not initialized!");
                return B_NO_INIT;
        }

        if (transfer->IsPhysical()) {
                // We don't handle this case.
                return B_NOT_SUPPORTED;
        }

        status_t status = transfer->InitKernelAccess();
        if (status != B_OK)
                return status;

        xhci_td *descriptor = CreateDescriptor(3, 1, requestData->Length);
        if (descriptor == NULL)
                return B_NO_MEMORY;
        descriptor->transfer = transfer;

        // Setup Stage
        uint8 index = 0;
        memcpy(&descriptor->trbs[index].address, requestData,
                sizeof(usb_request_data));
        descriptor->trbs[index].status = TRB_2_IRQ(0) | TRB_2_BYTES(8);
        descriptor->trbs[index].flags
                = TRB_3_TYPE(TRB_TYPE_SETUP_STAGE) | TRB_3_IDT_BIT | TRB_3_CYCLE_BIT;
        if (requestData->Length > 0) {
                descriptor->trbs[index].flags |=
                        directionIn ? TRB_3_TRT_IN : TRB_3_TRT_OUT;
        }

        index++;

        // Data Stage (if any)
        if (requestData->Length > 0) {
                descriptor->trbs[index].address = descriptor->buffer_addrs[0];
                descriptor->trbs[index].status = TRB_2_IRQ(0)
                        | TRB_2_BYTES(requestData->Length)
                        | TRB_2_TD_SIZE(0);
                descriptor->trbs[index].flags = TRB_3_TYPE(TRB_TYPE_DATA_STAGE)
                        | (directionIn ? TRB_3_DIR_IN : 0)
                        | TRB_3_CYCLE_BIT;

                if (!directionIn) {
                        transfer->PrepareKernelAccess();
                        memcpy(descriptor->buffers[0],
                                (uint8 *)transfer->Vector()[0].base, requestData->Length);
                }

                index++;
        }

        // Status Stage
        descriptor->trbs[index].address = 0;
        descriptor->trbs[index].status = TRB_2_IRQ(0);
        descriptor->trbs[index].flags = TRB_3_TYPE(TRB_TYPE_STATUS_STAGE)
                        | TRB_3_CHAIN_BIT | TRB_3_ENT_BIT | TRB_3_CYCLE_BIT;
                // The CHAIN bit must be set when using an Event Data TRB
                // (XHCI 1.2 § 6.4.1.2.3 Table 6-31 p472).

        // Status Stage is an OUT transfer when the device is sending data
        // (XHCI 1.2 § 4.11.2.2 Table 4-7 p213), otherwise set the IN bit.
        if (requestData->Length == 0 || !directionIn)
                descriptor->trbs[index].flags |= TRB_3_DIR_IN;

        descriptor->trb_used = index + 1;

        status = _LinkDescriptorForPipe(descriptor, endpoint);
        if (status != B_OK) {
                FreeDescriptor(descriptor);
                return status;
        }

        return B_OK;
}


status_t
XHCI::SubmitNormalRequest(Transfer *transfer)
{
        TRACE("SubmitNormalRequest() length %" B_PRIuSIZE "\n", transfer->FragmentLength());

        Pipe *pipe = transfer->TransferPipe();
        usb_isochronous_data *isochronousData = transfer->IsochronousData();
        bool directionIn = (pipe->Direction() == Pipe::In);

        xhci_endpoint *endpoint = (xhci_endpoint *)pipe->ControllerCookie();
        if (endpoint == NULL) {
                TRACE_ERROR("pipe has no endpoint!\n");
                return B_BAD_VALUE;
        }
        if (endpoint->device == NULL) {
                panic("endpoint is not initialized!");
                return B_NO_INIT;
        }

        status_t status = transfer->InitKernelAccess();
        if (status != B_OK)
                return status;

        // TRBs within a TD must be "grouped" into TD Fragments, which mostly means
        // that a max_burst_payload boundary cannot be crossed within a TRB, but
        // only between TRBs. More than one TRB can be in a TD Fragment, but we keep
        // things simple by setting trbSize to the MBP. (XHCI 1.2 § 4.11.7.1 p235.)
        size_t trbSize = endpoint->max_burst_payload;

        if (isochronousData != NULL) {
                if (isochronousData->packet_count == 0)
                        return B_BAD_VALUE;

                // Isochronous transfers use more specifically sized packets.
                trbSize = transfer->DataLength() / isochronousData->packet_count;
                if (trbSize == 0 || trbSize > pipe->MaxPacketSize() || trbSize
                                != (size_t)isochronousData->packet_descriptors[0].request_length)
                        return B_BAD_VALUE;
        }

        // Now that we know trbSize, compute the count.
        int32 trbCount = (transfer->FragmentLength() + trbSize - 1) / trbSize;

        generic_io_vec* transferVec = transfer->Vector();
        generic_size_t transferVecOffset = 0;
        if (transfer->IsPhysical()) {
                trbSize = 0;
                trbCount = 0;

                for (size_t i = 0; i < transfer->VectorCount(); i++) {
                        // There's an XHCI context parameter to indicate if the controller is
                        // 64-bit capable, but for consistency we require 32-bit DMA.
                        if ((transferVec[i].base + transferVec[i].length) > UINT32_MAX)
                                return B_BAD_VALUE;

                        trbCount += (transferVec[i].length + endpoint->max_burst_payload - 1)
                                / endpoint->max_burst_payload;
                }
        }

        xhci_td *td = CreateDescriptor(trbCount, trbCount, trbSize);
        if (td == NULL)
                return B_NO_MEMORY;

        // Normal Stage
        const size_t maxPacketSize = pipe->MaxPacketSize();
        size_t remaining = transfer->FragmentLength();
        for (int32 i = 0; i < trbCount; i++) {
                phys_addr_t address;
                generic_size_t trbLength;
                if (!transfer->IsPhysical()) {
                         address = td->buffer_addrs[i];
                         trbLength = (remaining < trbSize) ? remaining : trbSize;
                } else {
                        address = transferVec->base + transferVecOffset;
                        trbLength = transferVec->length - transferVecOffset;
                        if (trbLength > endpoint->max_burst_payload)
                                trbLength = endpoint->max_burst_payload;

                        transferVecOffset += trbLength;
                        if (transferVecOffset == transferVec->length) {
                                transferVec++;
                                transferVecOffset = 0;
                        }
                }

                remaining -= trbLength;

                // The "TD Size" field of a transfer TRB indicates the number of
                // remaining maximum-size *packets* in this TD, *not* including the
                // packets in the current TRB, and capped at 31 if there are more
                // than 31 packets remaining in the TD. (XHCI 1.2 § 4.11.2.4 p218.)
                int32 tdSize = (remaining + maxPacketSize - 1) / maxPacketSize;
                if (tdSize > 31)
                        tdSize = 31;

                td->trbs[i].address = address;
                td->trbs[i].status = TRB_2_IRQ(0)
                        | TRB_2_BYTES(trbLength)
                        | TRB_2_TD_SIZE(tdSize);
                td->trbs[i].flags = TRB_3_TYPE(TRB_TYPE_NORMAL)
                        | TRB_3_CYCLE_BIT | TRB_3_CHAIN_BIT;

                td->trb_used++;
        }

        // Isochronous-specific.
        if (isochronousData != NULL) {
                // This is an isochronous transfer; it should have one TD per packet.
                for (uint32 i = 0; i < isochronousData->packet_count; i++) {
                        td->trbs[i].flags &= ~(TRB_3_TYPE(TRB_TYPE_NORMAL));
                        td->trbs[i].flags |= TRB_3_TYPE(TRB_TYPE_ISOCH);

                        if (i != (isochronousData->packet_count - 1)) {
                                // For all but the last TD, generate events (but not interrupts) on short packets.
                                // (The last TD uses the regular Event Data TRB.)
                                td->trbs[i].flags |= TRB_3_ISP_BIT | TRB_3_BEI_BIT;
                                td->trbs[i].flags &= ~TRB_3_CHAIN_BIT;
                        }
                }

                // TODO: We do not currently take Mult into account at all!
                // How are we supposed to do that here?

                // Determine the (starting) frame number: if ISO_ASAP is set,
                // we are queueing this "right away", and so want to reset
                // the starting_frame_number. Otherwise we use the passed one.
                uint32 frame;
                if ((isochronousData->flags & USB_ISO_ASAP) != 0
                                || isochronousData->starting_frame_number == NULL) {
                        // All reads from the microframe index register must be
                        // incremented by 1. (XHCI 1.2 § 4.14.2.1.4 p265.)
                        frame = (ReadRunReg32(XHCI_MFINDEX) + 1) >> 3;
                        td->trbs[0].flags |= TRB_3_ISO_SIA_BIT;
                } else {
                        frame = *isochronousData->starting_frame_number;
                        td->trbs[0].flags |= TRB_3_FRID(frame);
                }
                if (isochronousData->starting_frame_number != NULL)
                        *isochronousData->starting_frame_number = frame;
        }

        // Set the ENT (Evaluate Next TRB) bit, so that the HC will not switch
        // contexts before evaluating the Link TRB that _LinkDescriptorForPipe
        // will insert, as otherwise there would be a race between us freeing
        // and unlinking the descriptor, and the controller evaluating the Link TRB
        // and thus getting back onto the main ring and executing the Event Data
        // TRB that generates the interrupt for this transfer.
        //
        // Note that we *do not* unset the CHAIN bit in this TRB, thus including
        // the Link TRB in this TD formally, which is required when using the
        // ENT bit. (XHCI 1.2 § 4.12.3 p250.)
        td->trbs[td->trb_used - 1].flags |= TRB_3_ENT_BIT;

        if (!directionIn && !transfer->IsPhysical()) {
                TRACE("copying out iov count %ld\n", transfer->VectorCount());
                status_t status = transfer->PrepareKernelAccess();
                if (status != B_OK) {
                        FreeDescriptor(td);
                        return status;
                }
                WriteDescriptor(td, transfer->Vector(),
                        transfer->VectorCount(), transfer->IsPhysical());
        }

        td->transfer = transfer;
        status = _LinkDescriptorForPipe(td, endpoint);
        if (status != B_OK) {
                FreeDescriptor(td);
                return status;
        }

        return B_OK;
}


status_t
XHCI::CancelQueuedTransfers(Pipe *pipe, bool force)
{
        xhci_endpoint* endpoint = (xhci_endpoint*)pipe->ControllerCookie();
        if (endpoint == NULL || endpoint->trbs == NULL) {
                // Someone's de-allocated this pipe or endpoint in the meantime.
                // (Possibly AllocateDevice failed, and we were the temporary pipe.)
                return B_NO_INIT;
        }

#ifndef TRACE_USB
        if (force)
#endif
        {
                TRACE_ALWAYS("cancel queued transfers (%" B_PRId8 ") for pipe %p (%d)\n",
                        endpoint->used, pipe, pipe->EndpointAddress());
        }

        MutexLocker endpointLocker(endpoint->lock);

        if (endpoint->td_head == NULL) {
                // There aren't any currently pending transfers to cancel.
                return B_OK;
        }

        // Calling the callbacks while holding the endpoint lock could potentially
        // cause deadlocks, so we instead store them in a pointer array. We need
        // to do this separately from freeing the TDs, for in the case we fail
        // to stop the endpoint, we cancel the transfers but do not free the TDs.
        Transfer* transfers[XHCI_MAX_TRANSFERS];
        int32 transfersCount = 0;

        for (xhci_td* td = endpoint->td_head; td != NULL; td = td->next) {
                if (td->transfer == NULL)
                        continue;

                transfers[transfersCount] = td->transfer;
                transfersCount++;
                td->transfer = NULL;
        }

        // It is possible that while waiting for the stop-endpoint command to
        // complete, one of the queued transfers posts a completion event, so in
        // order to avoid a deadlock, we must unlock the endpoint.
        endpointLocker.Unlock();
        status_t status = StopEndpoint(false, endpoint);
        if (status != B_OK && status != B_DEV_STALLED) {
                // It is possible that the endpoint was stopped by the controller at the
                // same time our STOP command was in progress, causing a "Context State"
                // error. In that case, try again; if the endpoint is already stopped,
                // StopEndpoint will notice this. (XHCI 1.2 § 4.6.9 p137.)
                status = StopEndpoint(false, endpoint);
        }
        if (status == B_DEV_STALLED) {
                // Only exit from a Halted state is a RESET. (XHCI 1.2 § 4.8.3 p163.)
                TRACE_ERROR("cancel queued transfers: halted endpoint, reset!\n");
                status = ResetEndpoint(false, endpoint);
        }
        endpointLocker.Lock();

        // Detach the head TD from the endpoint.
        xhci_td* td_head = endpoint->td_head;
        endpoint->td_head = NULL;

        if (status == B_OK) {
                // Clear the endpoint's TRBs.
                memset(endpoint->trbs, 0, sizeof(xhci_trb) * XHCI_ENDPOINT_RING_SIZE);
                endpoint->used = 0;
                endpoint->next = 0;

                // Set dequeue pointer location to the beginning of the ring.
                SetTRDequeue(endpoint->trb_addr, 0, endpoint->id + 1,
                        endpoint->device->slot);

                // We don't need to do anything else to restart the ring, as it will resume
                // operation as normal upon the next doorbell. (XHCI 1.2 § 4.6.9 p136.)
        } else {
                // We couldn't stop the endpoint. Most likely the device has been
                // removed and the endpoint was stopped by the hardware, or is
                // for some reason busy and cannot be stopped.
                TRACE_ERROR("cancel queued transfers: could not stop endpoint: %s!\n",
                        strerror(status));

                // Instead of freeing the TDs, we want to leave them in the endpoint
                // so that when/if the hardware returns, they can be properly unlinked,
                // as otherwise the endpoint could get "stuck" by having the "used"
                // slowly accumulate due to "dead" transfers.
                endpoint->td_head = td_head;
                td_head = NULL;
        }

        endpointLocker.Unlock();

        for (int32 i = 0; i < transfersCount; i++) {
                // If the transfer is canceled by force, the one causing the
                // cancel is possibly not the one who initiated the transfer
                // and the callback is likely not safe anymore.
                if (!force)
                        transfers[i]->Finished(B_CANCELED, 0);

                delete transfers[i];
        }

        // This loop looks a bit strange because we need to store the "next"
        // pointer before freeing the descriptor.
        xhci_td* td;
        while ((td = td_head) != NULL) {
                td_head = td_head->next;
                FreeDescriptor(td);
        }

        return B_OK;
}


status_t
XHCI::StartDebugTransfer(Transfer *transfer)
{
        Pipe *pipe = transfer->TransferPipe();
        xhci_endpoint *endpoint = (xhci_endpoint *)pipe->ControllerCookie();
        if (endpoint == NULL)
                return B_BAD_VALUE;

        // Check all locks that we are going to hit when running transfers.
        if (mutex_trylock(&endpoint->lock) != B_OK)
                return B_WOULD_BLOCK;
        if (mutex_trylock(&fFinishedLock) != B_OK) {
                mutex_unlock(&endpoint->lock);
                return B_WOULD_BLOCK;
        }
        if (mutex_trylock(&fEventLock) != B_OK) {
                mutex_unlock(&endpoint->lock);
                mutex_unlock(&fFinishedLock);
                return B_WOULD_BLOCK;
        }
        mutex_unlock(&endpoint->lock);
        mutex_unlock(&fFinishedLock);
        mutex_unlock(&fEventLock);

        status_t status = SubmitTransfer(transfer);
        if (status != B_OK)
                return status;

        // The endpoint's head TD is the TD of the just-submitted transfer.
        // Just like EHCI, abuse the callback cookie to hold the TD pointer.
        transfer->SetCallback(NULL, endpoint->td_head);

        return B_OK;
}


status_t
XHCI::CheckDebugTransfer(Transfer *transfer)
{
        xhci_td *transfer_td = (xhci_td *)transfer->CallbackCookie();
        if (transfer_td == NULL)
                return B_NO_INIT;

        // Process events once, and then look for it in the finished list.
        ProcessEvents();
        xhci_td *previous = NULL;
        for (xhci_td *td = fFinishedHead; td != NULL; td = td->next) {
                if (td != transfer_td) {
                        previous = td;
                        continue;
                }

                // We've found it!
                if (previous == NULL) {
                        fFinishedHead = fFinishedHead->next;
                } else {
                        previous->next = td->next;
                }

                bool directionIn = (transfer->TransferPipe()->Direction() != Pipe::Out);
                status_t status = (td->trb_completion_code == COMP_SUCCESS
                        || td->trb_completion_code == COMP_SHORT_PACKET) ? B_OK : B_ERROR;

                if (status == B_OK && directionIn && !transfer->IsPhysical()) {
                        ReadDescriptor(td, transfer->Vector(), transfer->VectorCount(),
                                transfer->IsPhysical());
                }

                FreeDescriptor(td);
                transfer->SetCallback(NULL, NULL);
                return status;
        }

        // We didn't find it.
        spin(75);
        return B_DEV_PENDING;
}


void
XHCI::CancelDebugTransfer(Transfer *transfer)
{
        while (CheckDebugTransfer(transfer) == B_DEV_PENDING)
                spin(100);
}


status_t
XHCI::NotifyPipeChange(Pipe *pipe, usb_change change)
{
        TRACE("pipe change %d for pipe %p (%d)\n", change, pipe,
                pipe->EndpointAddress());

        switch (change) {
        case USB_CHANGE_CREATED:
                return _InsertEndpointForPipe(pipe);
        case USB_CHANGE_DESTROYED:
                return _RemoveEndpointForPipe(pipe);

        case USB_CHANGE_PIPE_POLICY_CHANGED:
                // We don't care about these, at least for now.
                return B_OK;
        }

        TRACE_ERROR("unknown pipe change!\n");
        return B_UNSUPPORTED;
}


xhci_td *
XHCI::CreateDescriptor(uint32 trbCount, uint32 bufferCount, size_t bufferSize)
{
        const bool inKDL = debug_debugger_running();

        xhci_td *result;
        if (!inKDL) {
                result = (xhci_td*)calloc(1, sizeof(xhci_td));
        } else {
                // Just use the physical memory allocator while in KDL; it's less
                // secure than using the regular heap, but it's easier to deal with.
                phys_addr_t dummy;
                fStack->AllocateChunk((void **)&result, &dummy, sizeof(xhci_td));
        }

        if (result == NULL) {
                TRACE_ERROR("failed to allocate a transfer descriptor\n");
                return NULL;
        }

        // We always allocate 1 more TRB than requested, so that
        // _LinkDescriptorForPipe() has room to insert a link TRB.
        trbCount++;
        if (fStack->AllocateChunk((void **)&result->trbs, &result->trb_addr,
                        (trbCount * sizeof(xhci_trb))) < B_OK) {
                TRACE_ERROR("failed to allocate TRBs\n");
                FreeDescriptor(result);
                return NULL;
        }
        result->trb_count = trbCount;
        result->trb_used = 0;

        if (bufferSize > 0) {
                // Due to how the USB stack allocates physical memory, we can't just
                // request one large chunk the size of the transfer, and so instead we
                // create a series of buffers as requested by our caller.

                // We store the buffer pointers and addresses in one memory block.
                if (!inKDL) {
                        result->buffers = (void**)calloc(bufferCount,
                                (sizeof(void*) + sizeof(phys_addr_t)));
                } else {
                        phys_addr_t dummy;
                        fStack->AllocateChunk((void **)&result->buffers, &dummy,
                                bufferCount * (sizeof(void*) + sizeof(phys_addr_t)));
                }
                if (result->buffers == NULL) {
                        TRACE_ERROR("unable to allocate space for buffer infos\n");
                        FreeDescriptor(result);
                        return NULL;
                }
                result->buffer_addrs = (phys_addr_t*)&result->buffers[bufferCount];
                result->buffer_size = bufferSize;
                result->buffer_count = bufferCount;

                // Optimization: If the requested total size of all buffers is less
                // than 32*B_PAGE_SIZE (the maximum size that the physical memory
                // allocator can handle), we allocate only one buffer and segment it.
                size_t totalSize = bufferSize * bufferCount;
                if (totalSize < (32 * B_PAGE_SIZE)) {
                        if (fStack->AllocateChunk(&result->buffers[0],
                                        &result->buffer_addrs[0], totalSize) < B_OK) {
                                TRACE_ERROR("unable to allocate space for large buffer (size %ld)\n",
                                        totalSize);
                                FreeDescriptor(result);
                                return NULL;
                        }
                        for (uint32 i = 1; i < bufferCount; i++) {
                                result->buffers[i] = (void*)((addr_t)(result->buffers[i - 1])
                                        + bufferSize);
                                result->buffer_addrs[i] = result->buffer_addrs[i - 1]
                                        + bufferSize;
                        }
                } else {
                        // Otherwise, we allocate each buffer individually.
                        for (uint32 i = 0; i < bufferCount; i++) {
                                if (fStack->AllocateChunk(&result->buffers[i],
                                                &result->buffer_addrs[i], bufferSize) < B_OK) {
                                        TRACE_ERROR("unable to allocate space for a buffer (size "
                                                "%" B_PRIuSIZE ", count %" B_PRIu32 ")\n",
                                                bufferSize, bufferCount);
                                        FreeDescriptor(result);
                                        return NULL;
                                }
                        }
                }
        } else {
                result->buffers = NULL;
                result->buffer_addrs = NULL;
        }

        // Initialize all other fields.
        result->transfer = NULL;
        result->trb_completion_code = 0;
        result->trb_left = 0;
        result->next = NULL;

        TRACE("CreateDescriptor allocated %p, buffer_size %ld, buffer_count %" B_PRIu32 "\n",
                result, result->buffer_size, result->buffer_count);

        return result;
}


void
XHCI::FreeDescriptor(xhci_td *descriptor)
{
        if (descriptor == NULL)
                return;

        const bool inKDL = debug_debugger_running();

        if (descriptor->trbs != NULL) {
                fStack->FreeChunk(descriptor->trbs, descriptor->trb_addr,
                        (descriptor->trb_count * sizeof(xhci_trb)));
        }
        if (descriptor->buffers != NULL) {
                size_t totalSize = descriptor->buffer_size * descriptor->buffer_count;
                if (totalSize < (32 * B_PAGE_SIZE)) {
                        // This was allocated as one contiguous buffer.
                        fStack->FreeChunk(descriptor->buffers[0], descriptor->buffer_addrs[0],
                                totalSize);
                } else {
                        for (uint32 i = 0; i < descriptor->buffer_count; i++) {
                                if (descriptor->buffers[i] == NULL)
                                        continue;
                                fStack->FreeChunk(descriptor->buffers[i], descriptor->buffer_addrs[i],
                                        descriptor->buffer_size);
                        }
                }

                if (!inKDL) {
                        free(descriptor->buffers);
                } else {
                        fStack->FreeChunk(descriptor->buffers, 0,
                                descriptor->buffer_count * (sizeof(void*) + sizeof(phys_addr_t)));
                }
        }

        if (!inKDL)
                free(descriptor);
        else
                fStack->FreeChunk(descriptor, 0, sizeof(xhci_td));
}


size_t
XHCI::WriteDescriptor(xhci_td *descriptor, generic_io_vec *vector, size_t vectorCount, bool physical)
{
        size_t written = 0;

        size_t bufIdx = 0, bufUsed = 0;
        for (size_t vecIdx = 0; vecIdx < vectorCount; vecIdx++) {
                size_t length = vector[vecIdx].length;

                while (length > 0 && bufIdx < descriptor->buffer_count) {
                        size_t toCopy = min_c(length, descriptor->buffer_size - bufUsed);
                        status_t status = generic_memcpy(
                                (generic_addr_t)descriptor->buffers[bufIdx] + bufUsed, false,
                                vector[vecIdx].base + (vector[vecIdx].length - length), physical,
                                toCopy);
                        ASSERT_ALWAYS(status == B_OK);

                        written += toCopy;
                        bufUsed += toCopy;
                        length -= toCopy;
                        if (bufUsed == descriptor->buffer_size) {
                                bufIdx++;
                                bufUsed = 0;
                        }
                }
        }

        TRACE("wrote descriptor (%" B_PRIuSIZE " bytes)\n", written);
        return written;
}


size_t
XHCI::ReadDescriptor(xhci_td *descriptor, generic_io_vec *vector, size_t vectorCount, bool physical)
{
        size_t read = 0;

        size_t bufIdx = 0, bufUsed = 0;
        for (size_t vecIdx = 0; vecIdx < vectorCount; vecIdx++) {
                size_t length = vector[vecIdx].length;

                while (length > 0 && bufIdx < descriptor->buffer_count) {
                        size_t toCopy = min_c(length, descriptor->buffer_size - bufUsed);
                        status_t status = generic_memcpy(
                                vector[vecIdx].base + (vector[vecIdx].length - length), physical,
                                (generic_addr_t)descriptor->buffers[bufIdx] + bufUsed, false, toCopy);
                        ASSERT_ALWAYS(status == B_OK);

                        read += toCopy;
                        bufUsed += toCopy;
                        length -= toCopy;
                        if (bufUsed == descriptor->buffer_size) {
                                bufIdx++;
                                bufUsed = 0;
                        }
                }
        }

        TRACE("read descriptor (%" B_PRIuSIZE " bytes)\n", read);
        return read;
}


Device *
XHCI::AllocateDevice(Hub *parent, int8 hubAddress, uint8 hubPort,
        usb_speed speed)
{
        TRACE("AllocateDevice hubAddress %d hubPort %d speed %d\n", hubAddress,
                hubPort, speed);

        uint8 slot = XHCI_MAX_SLOTS;
        status_t status = EnableSlot(&slot);
        if (status != B_OK) {
                TRACE_ERROR("failed to enable slot: %s\n", strerror(status));
                return NULL;
        }

        if (slot == 0 || slot > fSlotCount) {
                TRACE_ERROR("AllocateDevice: bad slot\n");
                return NULL;
        }

        if (fDevices[slot].slot != 0) {
                TRACE_ERROR("AllocateDevice: slot already used\n");
                return NULL;
        }

        struct xhci_device *device = &fDevices[slot];
        device->slot = slot;

        device->input_ctx_area = fStack->AllocateArea((void **)&device->input_ctx,
                &device->input_ctx_addr, sizeof(*device->input_ctx) << fContextSizeShift,
                "XHCI input context");
        if (device->input_ctx_area < B_OK) {
                TRACE_ERROR("unable to create a input context area\n");
                CleanupDevice(device);
                return NULL;
        }
        if (fContextSizeShift == 1) {
                // 64-byte contexts have to be page-aligned in order for
                // _OffsetContextAddr to function properly.
                ASSERT((((addr_t)device->input_ctx) % B_PAGE_SIZE) == 0);
        }

        memset(device->input_ctx, 0, sizeof(*device->input_ctx) << fContextSizeShift);
        _WriteContext(&device->input_ctx->input.dropFlags, 0);
        _WriteContext(&device->input_ctx->input.addFlags, 3);

        uint8 rhPort = hubPort;
        uint32 route = 0;
        for (Device *hubDevice = parent; hubDevice != RootObject();
                        hubDevice = (Device *)hubDevice->Parent()) {
                if (hubDevice->Parent() == RootObject())
                        break;

                if (rhPort > 15)
                        rhPort = 15;
                route = route << 4;
                route |= rhPort;

                rhPort = hubDevice->HubPort();
        }

        uint32 dwslot0 = SLOT_0_NUM_ENTRIES(1) | SLOT_0_ROUTE(route);

        // Get speed of port, only if device connected to root hub port
        // else we have to rely on value reported by the Hub Explore thread
        if (route == 0) {
                GetPortSpeed(hubPort - 1, &speed);
                TRACE("speed updated %d\n", speed);
        }

        // add the speed
        switch (speed) {
        case USB_SPEED_LOWSPEED:
                dwslot0 |= SLOT_0_SPEED(2);
                break;
        case USB_SPEED_FULLSPEED:
                dwslot0 |= SLOT_0_SPEED(1);
                break;
        case USB_SPEED_HIGHSPEED:
                dwslot0 |= SLOT_0_SPEED(3);
                break;
        case USB_SPEED_SUPERSPEED:
                dwslot0 |= SLOT_0_SPEED(4);
                break;
        case USB_SPEED_SUPERSPEEDPLUS:
                dwslot0 |= SLOT_0_SPEED(5);
                break;
        default:
                TRACE_ERROR("unknown usb speed\n");
                break;
        }

        _WriteContext(&device->input_ctx->slot.dwslot0, dwslot0);
        // TODO enable power save
        _WriteContext(&device->input_ctx->slot.dwslot1, SLOT_1_RH_PORT(rhPort));
        uint32 dwslot2 = SLOT_2_IRQ_TARGET(0);

        // If LS/FS device connected to non-root HS device
        if (route != 0 && parent->Speed() == USB_SPEED_HIGHSPEED
                && (speed == USB_SPEED_LOWSPEED || speed == USB_SPEED_FULLSPEED)) {
                struct xhci_device *parenthub = (struct xhci_device *)
                        parent->ControllerCookie();
                dwslot2 |= SLOT_2_PORT_NUM(hubPort);
                dwslot2 |= SLOT_2_TT_HUB_SLOT(parenthub->slot);
        }

        _WriteContext(&device->input_ctx->slot.dwslot2, dwslot2);

        _WriteContext(&device->input_ctx->slot.dwslot3, SLOT_3_SLOT_STATE(0)
                | SLOT_3_DEVICE_ADDRESS(0));

        TRACE("slot 0x%08" B_PRIx32 " 0x%08" B_PRIx32 " 0x%08" B_PRIx32 " 0x%08" B_PRIx32
                "\n", _ReadContext(&device->input_ctx->slot.dwslot0),
                _ReadContext(&device->input_ctx->slot.dwslot1),
                _ReadContext(&device->input_ctx->slot.dwslot2),
                _ReadContext(&device->input_ctx->slot.dwslot3));

        device->device_ctx_area = fStack->AllocateArea((void **)&device->device_ctx,
                &device->device_ctx_addr, sizeof(*device->device_ctx) << fContextSizeShift,
                "XHCI device context");
        if (device->device_ctx_area < B_OK) {
                TRACE_ERROR("unable to create a device context area\n");
                CleanupDevice(device);
                return NULL;
        }
        memset(device->device_ctx, 0, sizeof(*device->device_ctx) << fContextSizeShift);

        device->trb_area = fStack->AllocateArea((void **)&device->trbs,
                &device->trb_addr, sizeof(xhci_trb) * (XHCI_MAX_ENDPOINTS - 1)
                        * XHCI_ENDPOINT_RING_SIZE, "XHCI endpoint trbs");
        if (device->trb_area < B_OK) {
                TRACE_ERROR("unable to create a device trbs area\n");
                CleanupDevice(device);
                return NULL;
        }

        // set up slot pointer to device context
        fDcba->baseAddress[slot] = device->device_ctx_addr;

        size_t maxPacketSize;
        switch (speed) {
        case USB_SPEED_LOWSPEED:
        case USB_SPEED_FULLSPEED:
                maxPacketSize = 8;
                break;
        case USB_SPEED_HIGHSPEED:
                maxPacketSize = 64;
                break;
        default:
                maxPacketSize = 512;
                break;
        }

        xhci_endpoint* endpoint0 = &device->endpoints[0];
        mutex_init(&endpoint0->lock, "xhci endpoint lock");
        endpoint0->device = device;
        endpoint0->id = 0;
        endpoint0->status = 0;
        endpoint0->td_head = NULL;
        endpoint0->used = 0;
        endpoint0->next = 0;
        endpoint0->trbs = device->trbs;
        endpoint0->trb_addr = device->trb_addr;

        // configure the Control endpoint 0
        if (ConfigureEndpoint(endpoint0, slot, 0, USB_OBJECT_CONTROL_PIPE, false,
                        0, maxPacketSize, speed, 0, 0) != B_OK) {
                TRACE_ERROR("unable to configure default control endpoint\n");
                CleanupDevice(device);
                return NULL;
        }

        // device should get to addressed state (bsr = 0)
        status = SetAddress(device->input_ctx_addr, false, slot);
        if (status != B_OK) {
                TRACE_ERROR("unable to set address: %s\n", strerror(status));
                CleanupDevice(device);
                return NULL;
        }

        device->address = SLOT_3_DEVICE_ADDRESS_GET(_ReadContext(
                &device->device_ctx->slot.dwslot3));

        TRACE("device: address 0x%x state 0x%08" B_PRIx32 "\n", device->address,
                SLOT_3_SLOT_STATE_GET(_ReadContext(
                        &device->device_ctx->slot.dwslot3)));
        TRACE("endpoint0 state 0x%08" B_PRIx32 "\n",
                ENDPOINT_0_STATE_GET(_ReadContext(
                        &device->device_ctx->endpoints[0].dwendpoint0)));

        // Wait a bit for the device to complete addressing
        snooze(USB_DELAY_SET_ADDRESS);

        // Create a temporary pipe with the new address
        ControlPipe pipe(parent);
        pipe.SetControllerCookie(endpoint0);
        pipe.InitCommon(device->address + 1, 0, speed, Pipe::Default, maxPacketSize, 0,
                hubAddress, hubPort);

        // Get the device descriptor
        // Just retrieve the first 8 bytes of the descriptor -> minimum supported
        // size of any device. It is enough because it includes the device type.

        size_t actualLength = 0;
        usb_device_descriptor deviceDescriptor;

        TRACE("getting the device descriptor\n");
        status = pipe.SendRequest(
                USB_REQTYPE_DEVICE_IN | USB_REQTYPE_STANDARD,           // type
                USB_REQUEST_GET_DESCRIPTOR,                                                     // request
                USB_DESCRIPTOR_DEVICE << 8,                                                     // value
                0,                                                                                                      // index
                8,                                                                                                      // length
                (void *)&deviceDescriptor,                                                      // buffer
                8,                                                                                                      // buffer length
                &actualLength);                                                                         // actual length

        if (actualLength != 8) {
                TRACE_ERROR("failed to get the device descriptor: %s\n",
                        strerror(status));
                CleanupDevice(device);
                return NULL;
        }

        TRACE("device_class: %d device_subclass %d device_protocol %d\n",
                deviceDescriptor.device_class, deviceDescriptor.device_subclass,
                deviceDescriptor.device_protocol);

        if (speed == USB_SPEED_FULLSPEED && deviceDescriptor.max_packet_size_0 != 8) {
                TRACE("Full speed device with different max packet size for Endpoint 0\n");
                uint32 dwendpoint1 = _ReadContext(
                        &device->input_ctx->endpoints[0].dwendpoint1);
                dwendpoint1 &= ~ENDPOINT_1_MAXPACKETSIZE(0xffff);
                dwendpoint1 |= ENDPOINT_1_MAXPACKETSIZE(
                        deviceDescriptor.max_packet_size_0);
                _WriteContext(&device->input_ctx->endpoints[0].dwendpoint1,
                        dwendpoint1);
                _WriteContext(&device->input_ctx->input.dropFlags, 0);
                _WriteContext(&device->input_ctx->input.addFlags, (1 << 1));
                EvaluateContext(device->input_ctx_addr, device->slot);
        }

        Device *deviceObject = NULL;
        if (deviceDescriptor.device_class == 0x09) {
                TRACE("creating new Hub\n");
                TRACE("getting the hub descriptor\n");
                size_t actualLength = 0;
                usb_hub_descriptor hubDescriptor;
                status = pipe.SendRequest(
                        USB_REQTYPE_DEVICE_IN | USB_REQTYPE_CLASS,                      // type
                        USB_REQUEST_GET_DESCRIPTOR,                                                     // request
                        USB_DESCRIPTOR_HUB << 8,                                                        // value
                        0,                                                                                                      // index
                        sizeof(usb_hub_descriptor),                                                     // length
                        (void *)&hubDescriptor,                                                         // buffer
                        sizeof(usb_hub_descriptor),                                                     // buffer length
                        &actualLength);

                if (actualLength != sizeof(usb_hub_descriptor)) {
                        TRACE_ERROR("error while getting the hub descriptor: %s\n",
                                strerror(status));
                        CleanupDevice(device);
                        return NULL;
                }

                uint32 dwslot0 = _ReadContext(&device->input_ctx->slot.dwslot0);
                dwslot0 |= SLOT_0_HUB_BIT;
                _WriteContext(&device->input_ctx->slot.dwslot0, dwslot0);
                uint32 dwslot1 = _ReadContext(&device->input_ctx->slot.dwslot1);
                dwslot1 |= SLOT_1_NUM_PORTS(hubDescriptor.num_ports);
                _WriteContext(&device->input_ctx->slot.dwslot1, dwslot1);
                if (speed == USB_SPEED_HIGHSPEED) {
                        uint32 dwslot2 = _ReadContext(&device->input_ctx->slot.dwslot2);
                        dwslot2 |= SLOT_2_TT_TIME(HUB_TTT_GET(hubDescriptor.characteristics));
                        _WriteContext(&device->input_ctx->slot.dwslot2, dwslot2);
                }

                deviceObject = new(std::nothrow) Hub(parent, hubAddress, hubPort,
                        deviceDescriptor, device->address + 1, speed, false, device);
        } else {
                TRACE("creating new device\n");
                deviceObject = new(std::nothrow) Device(parent, hubAddress, hubPort,
                        deviceDescriptor, device->address + 1, speed, false, device);
        }
        if (deviceObject == NULL || deviceObject->InitCheck() != B_OK) {
                if (deviceObject == NULL) {
                        TRACE_ERROR("no memory to allocate device\n");
                } else {
                        TRACE_ERROR("device object failed to initialize\n");
                }
                CleanupDevice(device);
                return NULL;
        }

        // We don't want to disable the default endpoint, naturally, which would
        // otherwise happen when this Pipe object is destroyed.
        pipe.SetControllerCookie(NULL);

        deviceObject->RegisterNode();

        TRACE("AllocateDevice() port %d slot %d\n", hubPort, slot);
        return deviceObject;
}


void
XHCI::FreeDevice(Device *usbDevice)
{
        xhci_device* device = (xhci_device*)usbDevice->ControllerCookie();
        TRACE("FreeDevice() slot %d\n", device->slot);

        // Delete the device first, so it cleans up its pipes and tells us
        // what we need to destroy before we tear down our internal state.
        delete usbDevice;

        CleanupDevice(device);
}


void
XHCI::CleanupDevice(xhci_device *device)
{
        if (device->slot != 0) {
                DisableSlot(device->slot);
                fDcba->baseAddress[device->slot] = 0;
        }

        if (device->trb_addr != 0)
                delete_area(device->trb_area);
        if (device->input_ctx_addr != 0)
                delete_area(device->input_ctx_area);
        if (device->device_ctx_addr != 0)
                delete_area(device->device_ctx_area);

        memset(device, 0, sizeof(xhci_device));
}


uint8
XHCI::_GetEndpointState(xhci_endpoint* endpoint)
{
        struct xhci_device_ctx* device_ctx = endpoint->device->device_ctx;
        return ENDPOINT_0_STATE_GET(
                _ReadContext(&device_ctx->endpoints[endpoint->id].dwendpoint0));
}


status_t
XHCI::_InsertEndpointForPipe(Pipe *pipe)
{
        TRACE("insert endpoint for pipe %p (%d)\n", pipe, pipe->EndpointAddress());

        if (pipe->ControllerCookie() != NULL
                        || pipe->Parent()->Type() != USB_OBJECT_DEVICE) {
                // default pipe is already referenced
                return B_OK;
        }

        Device* usbDevice = (Device *)pipe->Parent();
        if (usbDevice->Parent() == RootObject()) {
                // root hub needs no initialization
                return B_OK;
        }

        struct xhci_device *device = (struct xhci_device *)
                usbDevice->ControllerCookie();
        if (device == NULL) {
                panic("device is NULL\n");
                return B_NO_INIT;
        }

        const uint8 id = (2 * pipe->EndpointAddress()
                + (pipe->Direction() != Pipe::Out ? 1 : 0)) - 1;
        if (id >= XHCI_MAX_ENDPOINTS - 1)
                return B_BAD_VALUE;

        if (id > 0) {
                uint32 devicedwslot0 = _ReadContext(&device->device_ctx->slot.dwslot0);
                if (SLOT_0_NUM_ENTRIES_GET(devicedwslot0) == 1) {
                        uint32 inputdwslot0 = _ReadContext(&device->input_ctx->slot.dwslot0);
                        inputdwslot0 &= ~(SLOT_0_NUM_ENTRIES(0x1f));
                        inputdwslot0 |= SLOT_0_NUM_ENTRIES(XHCI_MAX_ENDPOINTS - 1);
                        _WriteContext(&device->input_ctx->slot.dwslot0, inputdwslot0);
                        EvaluateContext(device->input_ctx_addr, device->slot);
                }

                xhci_endpoint* endpoint = &device->endpoints[id];
                mutex_init(&endpoint->lock, "xhci endpoint lock");
                MutexLocker endpointLocker(endpoint->lock);

                endpoint->device = device;
                endpoint->id = id;
                endpoint->td_head = NULL;
                endpoint->used = 0;
                endpoint->next = 0;

                endpoint->trbs = device->trbs + id * XHCI_ENDPOINT_RING_SIZE;
                endpoint->trb_addr = device->trb_addr
                        + id * XHCI_ENDPOINT_RING_SIZE * sizeof(xhci_trb);
                memset(endpoint->trbs, 0,
                        sizeof(xhci_trb) * XHCI_ENDPOINT_RING_SIZE);

                TRACE("insert endpoint for pipe: trbs, device %p endpoint %p\n",
                        device->trbs, endpoint->trbs);
                TRACE("insert endpoint for pipe: trb_addr, device 0x%" B_PRIxPHYSADDR
                        " endpoint 0x%" B_PRIxPHYSADDR "\n", device->trb_addr,
                        endpoint->trb_addr);

                const uint8 endpointNum = id + 1;

                status_t status = ConfigureEndpoint(endpoint, device->slot, id, pipe->Type(),
                        pipe->Direction() == Pipe::In, pipe->Interval(), pipe->MaxPacketSize(),
                        usbDevice->Speed(), pipe->MaxBurst(), pipe->BytesPerInterval());
                if (status != B_OK) {
                        TRACE_ERROR("unable to configure endpoint: %s\n", strerror(status));
                        return status;
                }

                _WriteContext(&device->input_ctx->input.dropFlags, 0);
                _WriteContext(&device->input_ctx->input.addFlags,
                        (1 << endpointNum) | (1 << 0));

                ConfigureEndpoint(device->input_ctx_addr, false, device->slot);

                TRACE("device: address 0x%x state 0x%08" B_PRIx32 "\n",
                        device->address, SLOT_3_SLOT_STATE_GET(_ReadContext(
                                &device->device_ctx->slot.dwslot3)));
                TRACE("endpoint[0] state 0x%08" B_PRIx32 "\n",
                        ENDPOINT_0_STATE_GET(_ReadContext(
                                &device->device_ctx->endpoints[0].dwendpoint0)));
                TRACE("endpoint[%d] state 0x%08" B_PRIx32 "\n", id,
                        ENDPOINT_0_STATE_GET(_ReadContext(
                                &device->device_ctx->endpoints[id].dwendpoint0)));
        }
        pipe->SetControllerCookie(&device->endpoints[id]);

        return B_OK;
}


status_t
XHCI::_RemoveEndpointForPipe(Pipe *pipe)
{
        TRACE("remove endpoint for pipe %p (%d)\n", pipe, pipe->EndpointAddress());

        if (pipe->Parent()->Type() != USB_OBJECT_DEVICE)
                return B_OK;
        Device* usbDevice = (Device *)pipe->Parent();
        if (usbDevice->Parent() == RootObject())
                return B_BAD_VALUE;

        xhci_endpoint *endpoint = (xhci_endpoint *)pipe->ControllerCookie();
        if (endpoint == NULL || endpoint->trbs == NULL)
                return B_NO_INIT;

        pipe->SetControllerCookie(NULL);

        if (endpoint->id > 0) {
                xhci_device *device = endpoint->device;
                uint8 epNumber = endpoint->id + 1;
                StopEndpoint(true, endpoint);

                mutex_lock(&endpoint->lock);

                // See comment in CancelQueuedTransfers.
                xhci_td* td;
                while ((td = endpoint->td_head) != NULL) {
                        endpoint->td_head = endpoint->td_head->next;
                        FreeDescriptor(td);
                }

                mutex_destroy(&endpoint->lock);
                memset(endpoint, 0, sizeof(xhci_endpoint));

                _WriteContext(&device->input_ctx->input.dropFlags, (1 << epNumber));
                _WriteContext(&device->input_ctx->input.addFlags, (1 << 0));

                // The Deconfigure bit in the Configure Endpoint command indicates
                // that *all* endpoints are to be deconfigured, and not just the ones
                // specified in the context flags. (XHCI 1.2 § 4.6.6 p115.)
                ConfigureEndpoint(device->input_ctx_addr, false, device->slot);
        }

        return B_OK;
}


status_t
XHCI::_LinkDescriptorForPipe(xhci_td *descriptor, xhci_endpoint *endpoint)
{
        TRACE("link descriptor for pipe\n");

        // Use mutex_trylock first, in case we are in KDL.
        MutexLocker endpointLocker(&endpoint->lock, mutex_trylock(&endpoint->lock) == B_OK);

        // "used" refers to the number of currently linked TDs, not the number of
        // used TRBs on the ring (we use 2 TRBs on the ring per transfer.)
        // Furthermore, we have to leave an empty item between the head and tail.
        if (endpoint->used >= (XHCI_MAX_TRANSFERS - 1)) {
                TRACE_ERROR("link descriptor for pipe: max transfers count exceeded\n");
                return B_BAD_VALUE;
        }

        // We do not support queuing other transfers in tandem with a fragmented one.
        if (endpoint->td_head != NULL && endpoint->td_head->transfer != NULL
                        && endpoint->td_head->transfer->IsFragmented()) {
                TRACE_ERROR("cannot submit transfer: a fragmented transfer is queued\n");
                return B_DEV_RESOURCE_CONFLICT;
        }

        endpoint->used++;
        descriptor->next = endpoint->td_head;
        endpoint->td_head = descriptor;

        uint32 link = endpoint->next, eventdata = link + 1, next = eventdata + 1;
        if (eventdata == XHCI_ENDPOINT_RING_SIZE || next == XHCI_ENDPOINT_RING_SIZE) {
                // If it's "next" not "eventdata" that got us here, we will be leaving
                // one TRB at the end of the ring unused.
                eventdata = 0;
                next = 1;
        }

        TRACE("link descriptor for pipe: link %d, next %d\n", link, next);

        // Add a Link TRB to the end of the descriptor.
        phys_addr_t addr = endpoint->trb_addr + (eventdata * sizeof(xhci_trb));
        descriptor->trbs[descriptor->trb_used].address = addr;
        descriptor->trbs[descriptor->trb_used].status = TRB_2_IRQ(0);
        descriptor->trbs[descriptor->trb_used].flags = TRB_3_TYPE(TRB_TYPE_LINK)
                | TRB_3_CHAIN_BIT | TRB_3_CYCLE_BIT;
                // It is specified that (XHCI 1.2 § 4.12.3 Note 2 p251) if the TRB
                // following one with the ENT bit set is a Link TRB, the Link TRB
                // shall be evaluated *and* the subsequent TRB shall be. Thus a
                // TRB_3_ENT_BIT is unnecessary here; and from testing seems to
                // break all transfers on a (very) small number of controllers.

#if !B_HOST_IS_LENDIAN
        // Convert endianness.
        for (uint32 i = 0; i <= descriptor->trb_used; i++) {
                descriptor->trbs[i].address =
                        B_HOST_TO_LENDIAN_INT64(descriptor->trbs[i].address);
                descriptor->trbs[i].status =
                        B_HOST_TO_LENDIAN_INT32(descriptor->trbs[i].status);
                descriptor->trbs[i].flags =
                        B_HOST_TO_LENDIAN_INT32(descriptor->trbs[i].flags);
        }
#endif

        // Link the descriptor.
        endpoint->trbs[link].address =
                B_HOST_TO_LENDIAN_INT64(descriptor->trb_addr);
        endpoint->trbs[link].status =
                B_HOST_TO_LENDIAN_INT32(TRB_2_IRQ(0));
        endpoint->trbs[link].flags =
                B_HOST_TO_LENDIAN_INT32(TRB_3_TYPE(TRB_TYPE_LINK));

        // Set up the Event Data TRB (XHCI 1.2 § 4.11.5.2 p230.)
        //
        // We do this on the main ring for two reasons: first, to avoid a small
        // potential race between the interrupt and the controller evaluating
        // the link TRB to get back onto the ring; and second, because many
        // controllers throw errors if the target of a Link TRB is not valid
        // (i.e. does not have its Cycle Bit set.)
        //
        // We also set the "address" field, which the controller will copy
        // verbatim into the TRB it posts to the event ring, to be the last
        // "real" TRB in the TD; this will allow us to determine what transfer
        // the resulting Transfer Event TRB refers to.
        endpoint->trbs[eventdata].address =
                B_HOST_TO_LENDIAN_INT64(descriptor->trb_addr
                        + (descriptor->trb_used - 1) * sizeof(xhci_trb));
        endpoint->trbs[eventdata].status =
                B_HOST_TO_LENDIAN_INT32(TRB_2_IRQ(0));
        endpoint->trbs[eventdata].flags =
                B_HOST_TO_LENDIAN_INT32(TRB_3_TYPE(TRB_TYPE_EVENT_DATA)
                        | TRB_3_IOC_BIT | TRB_3_CYCLE_BIT);

        endpoint->trbs[next].address = 0;
        endpoint->trbs[next].status = 0;
        endpoint->trbs[next].flags = 0;

        memory_write_barrier();

        // Everything is ready, so write the cycle bit.
        endpoint->trbs[link].flags |= B_HOST_TO_LENDIAN_INT32(TRB_3_CYCLE_BIT);

        TRACE("_LinkDescriptorForPipe pLink %p phys 0x%" B_PRIxPHYSADDR
                " 0x%" B_PRIxPHYSADDR " 0x%08" B_PRIx32 "\n", &endpoint->trbs[link],
                endpoint->trb_addr + link * sizeof(struct xhci_trb),
                endpoint->trbs[link].address,
                B_LENDIAN_TO_HOST_INT32(endpoint->trbs[link].flags));

        endpoint->next = next;
        endpointLocker.Unlock();

        TRACE("Endpoint status 0x%08" B_PRIx32 " 0x%08" B_PRIx32 " 0x%016" B_PRIx64 "\n",
                _ReadContext(&endpoint->device->device_ctx->endpoints[endpoint->id].dwendpoint0),
                _ReadContext(&endpoint->device->device_ctx->endpoints[endpoint->id].dwendpoint1),
                _ReadContext(&endpoint->device->device_ctx->endpoints[endpoint->id].qwendpoint2));

        Ring(endpoint->device->slot, endpoint->id + 1);

        TRACE("Endpoint status 0x%08" B_PRIx32 " 0x%08" B_PRIx32 " 0x%016" B_PRIx64 "\n",
                _ReadContext(&endpoint->device->device_ctx->endpoints[endpoint->id].dwendpoint0),
                _ReadContext(&endpoint->device->device_ctx->endpoints[endpoint->id].dwendpoint1),
                _ReadContext(&endpoint->device->device_ctx->endpoints[endpoint->id].qwendpoint2));

        return B_OK;
}


status_t
XHCI::_UnlinkDescriptorForPipe(xhci_td *descriptor, xhci_endpoint *endpoint)
{
        TRACE("unlink descriptor for pipe\n");
        // We presume that the caller has already locked or owns the endpoint.

        endpoint->used--;
        if (descriptor == endpoint->td_head) {
                endpoint->td_head = descriptor->next;
                descriptor->next = NULL;
                return B_OK;
        } else {
                for (xhci_td *td = endpoint->td_head; td->next != NULL; td = td->next) {
                        if (td->next == descriptor) {
                                td->next = descriptor->next;
                                descriptor->next = NULL;
                                return B_OK;
                        }
                }
        }

        endpoint->used++;
        return B_ERROR;
}


status_t
XHCI::ConfigureEndpoint(xhci_endpoint* ep, uint8 slot, uint8 number, uint8 type,
        bool directionIn, uint16 interval, uint16 maxPacketSize, usb_speed speed,
        uint8 maxBurst, uint16 bytesPerInterval)
{
        struct xhci_device* device = &fDevices[slot];

        uint32 dwendpoint0 = 0;
        uint32 dwendpoint1 = 0;
        uint64 qwendpoint2 = 0;
        uint32 dwendpoint4 = 0;

        // Compute and assign the endpoint type. (XHCI 1.2 § 6.2.3 Table 6-9 p452.)
        uint8 xhciType = 4;
        if ((type & USB_OBJECT_INTERRUPT_PIPE) != 0)
                xhciType = 3;
        if ((type & USB_OBJECT_BULK_PIPE) != 0)
                xhciType = 2;
        if ((type & USB_OBJECT_ISO_PIPE) != 0)
                xhciType = 1;
        xhciType |= directionIn ? (1 << 2) : 0;
        dwendpoint1 |= ENDPOINT_1_EPTYPE(xhciType);

        // Compute and assign interval. (XHCI 1.2 § 6.2.3.6 p456.)
        uint16 calcInterval;
        if ((type & USB_OBJECT_BULK_PIPE) != 0
                        || (type & USB_OBJECT_CONTROL_PIPE) != 0) {
                // Bulk and Control endpoints never issue NAKs.
                calcInterval = 0;
        } else {
                switch (speed) {
                case USB_SPEED_FULLSPEED:
                        if ((type & USB_OBJECT_ISO_PIPE) != 0) {
                                // Convert 1-16 into 3-18.
                                calcInterval = min_c(max_c(interval, 1), 16) + 2;
                                break;
                        }

                        // fall through
                case USB_SPEED_LOWSPEED: {
                        // Convert 1ms-255ms into 3-10.

                        // Find the index of the highest set bit in "interval".
                        uint32 temp = min_c(max_c(interval, 1), 255);
                        for (calcInterval = 0; temp != 1; calcInterval++)
                                temp = temp >> 1;
                        calcInterval += 3;
                        break;
                }

                case USB_SPEED_HIGHSPEED:
                case USB_SPEED_SUPERSPEED:
                case USB_SPEED_SUPERSPEEDPLUS:
                default:
                        // Convert 1-16 into 0-15.
                        calcInterval = min_c(max_c(interval, 1), 16) - 1;
                        break;
                }
        }
        dwendpoint0 |= ENDPOINT_0_INTERVAL(calcInterval);

        // For non-isochronous endpoints, we want the controller to retry failed
        // transfers, if possible. (XHCI 1.2 § 4.10.2.3 p197.)
        if ((type & USB_OBJECT_ISO_PIPE) == 0)
                dwendpoint1 |= ENDPOINT_1_CERR(3);

        // Assign maximum burst size. For USB3 devices this is passed in; for
        // all other devices we compute it. (XHCI 1.2 § 4.8.2 p161.)
        if (speed == USB_SPEED_HIGHSPEED && (type & (USB_OBJECT_INTERRUPT_PIPE
                        | USB_OBJECT_ISO_PIPE)) != 0) {
                maxBurst = (maxPacketSize & 0x1800) >> 11;
        } else if (speed < USB_SPEED_SUPERSPEED) {
                maxBurst = 0;
        }
        dwendpoint1 |= ENDPOINT_1_MAXBURST(maxBurst);

        // Assign maximum packet size, set the ring address, and set the
        // "Dequeue Cycle State" bit. (XHCI 1.2 § 6.2.3 Table 6-10 p453.)
        dwendpoint1 |= ENDPOINT_1_MAXPACKETSIZE(maxPacketSize);
        qwendpoint2 |= ENDPOINT_2_DCS_BIT | ep->trb_addr;

        // The Max Burst Payload is the number of bytes moved by a
        // maximum sized burst. (XHCI 1.2 § 4.11.7.1 p236.)
        ep->max_burst_payload = (maxBurst + 1) * maxPacketSize;
        if (ep->max_burst_payload == 0) {
                TRACE_ERROR("ConfigureEndpoint() failed invalid max_burst_payload\n");
                return B_BAD_VALUE;
        }

        // Assign average TRB length.
        if ((type & USB_OBJECT_CONTROL_PIPE) != 0) {
                // Control pipes are a special case, as they rarely have
                // outbound transfers of any substantial size.
                dwendpoint4 |= ENDPOINT_4_AVGTRBLENGTH(8);
        } else if ((type & USB_OBJECT_ISO_PIPE) != 0) {
                // Isochronous pipes are another special case: the TRB size will be
                // one packet (which is normally smaller than the max packet size,
                // but we don't know what it is here.)
                dwendpoint4 |= ENDPOINT_4_AVGTRBLENGTH(maxPacketSize);
        } else {
                // Under all other circumstances, we put max_burst_payload in a TRB.
                dwendpoint4 |= ENDPOINT_4_AVGTRBLENGTH(ep->max_burst_payload);
        }

        // Assign maximum ESIT payload. (XHCI 1.2 § 4.14.2 p259.)
        if ((type & (USB_OBJECT_INTERRUPT_PIPE | USB_OBJECT_ISO_PIPE)) != 0) {
                // TODO: For SuperSpeedPlus endpoints, there is yet another descriptor
                // for isochronous endpoints that specifies the maximum ESIT payload.
                // We don't fetch this yet, so just fall back to the USB2 computation
                // method if bytesPerInterval is 0.
                if (speed >= USB_SPEED_SUPERSPEED && bytesPerInterval != 0)
                        dwendpoint4 |= ENDPOINT_4_MAXESITPAYLOAD(bytesPerInterval);
                else if (speed >= USB_SPEED_HIGHSPEED)
                        dwendpoint4 |= ENDPOINT_4_MAXESITPAYLOAD((maxBurst + 1) * maxPacketSize);
        }

        _WriteContext(&device->input_ctx->endpoints[number].dwendpoint0,
                dwendpoint0);
        _WriteContext(&device->input_ctx->endpoints[number].dwendpoint1,
                dwendpoint1);
        _WriteContext(&device->input_ctx->endpoints[number].qwendpoint2,
                qwendpoint2);
        _WriteContext(&device->input_ctx->endpoints[number].dwendpoint4,
                dwendpoint4);

        TRACE("endpoint 0x%" B_PRIx32 " 0x%" B_PRIx32 " 0x%" B_PRIx64 " 0x%"
                B_PRIx32 "\n",
                _ReadContext(&device->input_ctx->endpoints[number].dwendpoint0),
                _ReadContext(&device->input_ctx->endpoints[number].dwendpoint1),
                _ReadContext(&device->input_ctx->endpoints[number].qwendpoint2),
                _ReadContext(&device->input_ctx->endpoints[number].dwendpoint4));

        return B_OK;
}


status_t
XHCI::GetPortSpeed(uint8 index, usb_speed* speed)
{
        if (index >= fPortCount)
                return B_BAD_INDEX;

        uint32 portStatus = ReadOpReg(XHCI_PORTSC(index));

        switch (PS_SPEED_GET(portStatus)) {
        case 2:
                *speed = USB_SPEED_LOWSPEED;
                break;
        case 1:
                *speed = USB_SPEED_FULLSPEED;
                break;
        case 3:
                *speed = USB_SPEED_HIGHSPEED;
                break;
        case 4:
                *speed = USB_SPEED_SUPERSPEED;
                break;
        case 5:
                *speed = USB_SPEED_SUPERSPEEDPLUS;
                break;
        default:
                TRACE_ALWAYS("nonstandard port speed %" B_PRId32 ", assuming SuperSpeed\n",
                        PS_SPEED_GET(portStatus));
                *speed = USB_SPEED_SUPERSPEED;
                break;
        }

        return B_OK;
}


status_t
XHCI::GetPortStatus(uint8 index, usb_port_status* status)
{
        if (index >= fPortCount)
                return B_BAD_INDEX;

        status->status = status->change = 0;
        uint32 portStatus = ReadOpReg(XHCI_PORTSC(index));
        TRACE("port %" B_PRId8 " status=0x%08" B_PRIx32 "\n", index, portStatus);

        // build the status
        switch (PS_SPEED_GET(portStatus)) {
        case 3:
                status->status |= PORT_STATUS_HIGH_SPEED;
                break;
        case 2:
                status->status |= PORT_STATUS_LOW_SPEED;
                break;
        default:
                break;
        }

        if (portStatus & PS_CCS)
                status->status |= PORT_STATUS_CONNECTION;
        if (portStatus & PS_PED)
                status->status |= PORT_STATUS_ENABLE;
        if (portStatus & PS_OCA)
                status->status |= PORT_STATUS_OVER_CURRENT;
        if (portStatus & PS_PR)
                status->status |= PORT_STATUS_RESET;
        if (portStatus & PS_PP) {
                if (fPortSpeeds[index] >= USB_SPEED_SUPERSPEED)
                        status->status |= PORT_STATUS_SS_POWER;
                else
                        status->status |= PORT_STATUS_POWER;
        }
        if (fPortSpeeds[index] >= USB_SPEED_SUPERSPEED)
                status->status |= portStatus & PS_PLS_MASK;

        // build the change
        if (portStatus & PS_CSC)
                status->change |= PORT_STATUS_CONNECTION;
        if (portStatus & PS_PEC)
                status->change |= PORT_STATUS_ENABLE;
        if (portStatus & PS_OCC)
                status->change |= PORT_STATUS_OVER_CURRENT;
        if (portStatus & PS_PRC)
                status->change |= PORT_STATUS_RESET;

        if (fPortSpeeds[index] >= USB_SPEED_SUPERSPEED) {
                if (portStatus & PS_PLC)
                        status->change |= PORT_CHANGE_LINK_STATE;
                if (portStatus & PS_WRC)
                        status->change |= PORT_CHANGE_BH_PORT_RESET;
        }

        return B_OK;
}


status_t
XHCI::SetPortFeature(uint8 index, uint16 feature)
{
        TRACE("set port feature index %u feature %u\n", index, feature);
        if (index >= fPortCount)
                return B_BAD_INDEX;

        uint32 portRegister = XHCI_PORTSC(index);
        uint32 portStatus = ReadOpReg(portRegister) & ~PS_CLEAR;

        switch (feature) {
        case PORT_SUSPEND:
                if ((portStatus & PS_PED) == 0 || (portStatus & PS_PR)
                        || (portStatus & PS_PLS_MASK) >= PS_XDEV_U3) {
                        TRACE_ERROR("USB core suspending device not in U0/U1/U2.\n");
                        return B_BAD_VALUE;
                }
                portStatus &= ~PS_PLS_MASK;
                WriteOpReg(portRegister, portStatus | PS_LWS | PS_XDEV_U3);
                break;

        case PORT_RESET:
                WriteOpReg(portRegister, portStatus | PS_PR);
                break;

        case PORT_POWER:
                WriteOpReg(portRegister, portStatus | PS_PP);
                break;
        default:
                return B_BAD_VALUE;
        }
        ReadOpReg(portRegister);
        return B_OK;
}


status_t
XHCI::ClearPortFeature(uint8 index, uint16 feature)
{
        TRACE("clear port feature index %u feature %u\n", index, feature);
        if (index >= fPortCount)
                return B_BAD_INDEX;

        uint32 portRegister = XHCI_PORTSC(index);
        uint32 portStatus = ReadOpReg(portRegister) & ~PS_CLEAR;

        switch (feature) {
        case PORT_SUSPEND:
                portStatus = ReadOpReg(portRegister);
                if (portStatus & PS_PR)
                        return B_BAD_VALUE;
                if (portStatus & PS_XDEV_U3) {
                        if ((portStatus & PS_PED) == 0)
                                return B_BAD_VALUE;
                        portStatus &= ~PS_PLS_MASK;
                        WriteOpReg(portRegister, portStatus | PS_XDEV_U0 | PS_LWS);
                }
                break;
        case PORT_ENABLE:
                WriteOpReg(portRegister, portStatus | PS_PED);
                break;
        case PORT_POWER:
                WriteOpReg(portRegister, portStatus & ~PS_PP);
                break;
        case C_PORT_CONNECTION:
                WriteOpReg(portRegister, portStatus | PS_CSC);
                break;
        case C_PORT_ENABLE:
                WriteOpReg(portRegister, portStatus | PS_PEC);
                break;
        case C_PORT_OVER_CURRENT:
                WriteOpReg(portRegister, portStatus | PS_OCC);
                break;
        case C_PORT_RESET:
                WriteOpReg(portRegister, portStatus | PS_PRC);
                break;
        case C_PORT_BH_PORT_RESET:
                WriteOpReg(portRegister, portStatus | PS_WRC);
                break;
        case C_PORT_LINK_STATE:
                WriteOpReg(portRegister, portStatus | PS_PLC);
                break;
        default:
                return B_BAD_VALUE;
        }

        ReadOpReg(portRegister);
        return B_OK;
}


status_t
XHCI::ControllerHalt()
{
        // Mask off run state
        WriteOpReg(XHCI_CMD, ReadOpReg(XHCI_CMD) & ~CMD_RUN);

        // wait for shutdown state
        if (WaitOpBits(XHCI_STS, STS_HCH, STS_HCH) != B_OK) {
                TRACE_ERROR("HCH shutdown timeout\n");
                return B_ERROR;
        }
        return B_OK;
}


status_t
XHCI::ControllerReset()
{
        TRACE("ControllerReset() cmd: 0x%" B_PRIx32 " sts: 0x%" B_PRIx32 "\n",
                ReadOpReg(XHCI_CMD), ReadOpReg(XHCI_STS));
        WriteOpReg(XHCI_CMD, ReadOpReg(XHCI_CMD) | CMD_HCRST);

        if (WaitOpBits(XHCI_CMD, CMD_HCRST, 0) != B_OK) {
                TRACE_ERROR("ControllerReset() failed CMD_HCRST\n");
                return B_ERROR;
        }

        if (WaitOpBits(XHCI_STS, STS_CNR, 0) != B_OK) {
                TRACE_ERROR("ControllerReset() failed STS_CNR\n");
                return B_ERROR;
        }

        return B_OK;
}


int32
XHCI::InterruptHandler(void* data)
{
        return ((XHCI*)data)->Interrupt();
}


int32
XHCI::Interrupt()
{
        SpinLocker _(&fSpinlock);

        uint32 status = ReadOpReg(XHCI_STS);
        uint32 temp = ReadRunReg32(XHCI_IMAN(0));
        WriteOpReg(XHCI_STS, status);
        WriteRunReg32(XHCI_IMAN(0), temp);

        int32 result = B_HANDLED_INTERRUPT;

        if ((status & STS_HCH) != 0) {
                TRACE_ERROR("Host Controller halted\n");
                return result;
        }
        if ((status & STS_HSE) != 0) {
                TRACE_ERROR("Host System Error\n");
                return result;
        }
        if ((status & STS_HCE) != 0) {
                TRACE_ERROR("Host Controller Error\n");
                return result;
        }

        if ((status & STS_EINT) == 0) {
                TRACE("STS: 0x%" B_PRIx32 " IRQ_PENDING: 0x%" B_PRIx32 "\n",
                        status, temp);
                return B_UNHANDLED_INTERRUPT;
        }

        TRACE("Event Interrupt\n");
        release_sem_etc(fEventSem, 1, B_DO_NOT_RESCHEDULE);
        return B_INVOKE_SCHEDULER;
}


void
XHCI::Ring(uint8 slot, uint8 endpoint)
{
        TRACE("Ding Dong! slot:%d endpoint %d\n", slot, endpoint)
        if ((slot == 0 && endpoint > 0) || (slot > 0 && endpoint == 0))
                panic("Ring() invalid slot/endpoint combination\n");
        if (slot > fSlotCount || endpoint >= XHCI_MAX_ENDPOINTS)
                panic("Ring() invalid slot or endpoint\n");

        WriteDoorReg32(XHCI_DOORBELL(slot), XHCI_DOORBELL_TARGET(endpoint)
                | XHCI_DOORBELL_STREAMID(0));
        ReadDoorReg32(XHCI_DOORBELL(slot));
                // Flush PCI writes
}


void
XHCI::QueueCommand(xhci_trb* trb)
{
        uint8 i, j;
        uint32 temp;

        i = fCmdIdx;
        j = fCmdCcs;

        TRACE("command[%u] = %" B_PRId32 " (0x%016" B_PRIx64 ", 0x%08" B_PRIx32
                ", 0x%08" B_PRIx32 ")\n", i, TRB_3_TYPE_GET(trb->flags), trb->address,
                trb->status, trb->flags);

        fCmdRing[i].address = trb->address;
        fCmdRing[i].status = trb->status;
        temp = trb->flags;

        if (j)
                temp |= TRB_3_CYCLE_BIT;
        else
                temp &= ~TRB_3_CYCLE_BIT;
        temp &= ~TRB_3_TC_BIT;
        fCmdRing[i].flags = B_HOST_TO_LENDIAN_INT32(temp);

        fCmdAddr = fErst->rs_addr + (XHCI_MAX_EVENTS + i) * sizeof(xhci_trb);

        i++;

        if (i == (XHCI_MAX_COMMANDS - 1)) {
                temp = TRB_3_TYPE(TRB_TYPE_LINK) | TRB_3_TC_BIT;
                if (j)
                        temp |= TRB_3_CYCLE_BIT;
                fCmdRing[i].flags = B_HOST_TO_LENDIAN_INT32(temp);

                i = 0;
                j ^= 1;
        }

        fCmdIdx = i;
        fCmdCcs = j;
}


void
XHCI::HandleCmdComplete(xhci_trb* trb)
{
        if (fCmdAddr == trb->address) {
                TRACE("Received command event\n");
                fCmdResult[0] = trb->status;
                fCmdResult[1] = B_LENDIAN_TO_HOST_INT32(trb->flags);
                release_sem_etc(fCmdCompSem, 1, B_DO_NOT_RESCHEDULE);
        } else
                TRACE_ERROR("received command event for unknown command!\n")
}


void
XHCI::HandleTransferComplete(xhci_trb* trb)
{
        const uint32 flags = B_LENDIAN_TO_HOST_INT32(trb->flags);
        const uint8 endpointNumber = TRB_3_ENDPOINT_GET(flags),
                slot = TRB_3_SLOT_GET(flags);

        if (slot > fSlotCount)
                TRACE_ERROR("invalid slot\n");
        if (endpointNumber == 0 || endpointNumber >= XHCI_MAX_ENDPOINTS) {
                TRACE_ERROR("invalid endpoint\n");
                return;
        }

        xhci_device *device = &fDevices[slot];
        xhci_endpoint *endpoint = &device->endpoints[endpointNumber - 1];

        if (endpoint->trbs == NULL) {
                TRACE_ERROR("got TRB but endpoint is not allocated!\n");
                return;
        }

        // Use mutex_trylock first, in case we are in KDL.
        MutexLocker endpointLocker(endpoint->lock, mutex_trylock(&endpoint->lock) == B_OK);
        if (!endpointLocker.IsLocked()) {
                // We failed to get the lock. Most likely it was destroyed
                // while we were waiting for it.
                return;
        }

        TRACE("HandleTransferComplete: ed %" B_PRIu32 ", status %" B_PRId32 "\n",
                  (flags & TRB_3_EVENT_DATA_BIT), trb->status);

        uint8 completionCode = TRB_2_COMP_CODE_GET(trb->status);

        if (completionCode == COMP_RING_OVERRUN || completionCode == COMP_RING_UNDERRUN) {
                // These occur on isochronous endpoints when there is no TRB ready to be
                // executed at the appropriate time. (XHCI 1.2 § 4.10.3.1 p204.)
                endpoint->status = completionCode;
                return;
        }

        int32 remainder = TRB_2_REM_GET(trb->status), transferred = -1;
        if ((flags & TRB_3_EVENT_DATA_BIT) != 0) {
                // In the case of an Event Data TRB, value in the status field refers
                // to the actual number of bytes transferred across the whole TD.
                // (XHCI 1.2 § 6.4.2.1 Table 6-38 p478.)
                transferred = remainder;
                remainder = -1;
        } else {
                // This should only occur under error conditions, or for isochronous transfers.
                TRACE("got transfer event for a non-Event Data TRB!\n");

                if (completionCode == COMP_STOPPED_LENGTH_INVALID)
                        remainder = -1;
        }

        if (completionCode != COMP_SUCCESS && completionCode != COMP_SHORT_PACKET
                        && completionCode != COMP_STOPPED && completionCode != COMP_STOPPED_LENGTH_INVALID) {
                TRACE_ALWAYS("transfer error on slot %" B_PRId8 " endpoint %" B_PRId8
                        ": %s\n", slot, endpointNumber, xhci_error_string(completionCode));
        }

        phys_addr_t source = B_LENDIAN_TO_HOST_INT64(trb->address);
        if (source >= endpoint->trb_addr
                        && (source - endpoint->trb_addr) < (XHCI_ENDPOINT_RING_SIZE * sizeof(xhci_trb))) {
                // The "source" address points to a TRB on the ring.
                // See if we can figure out what it really corresponds to.
                const int64 offset = (source - endpoint->trb_addr) / sizeof(xhci_trb);
                const int32 type = TRB_3_TYPE_GET(endpoint->trbs[offset].flags);
                if (type == TRB_TYPE_EVENT_DATA || type == TRB_TYPE_LINK)
                        source = B_LENDIAN_TO_HOST_INT64(endpoint->trbs[offset].address);
        }

        for (xhci_td *td = endpoint->td_head; td != NULL; td = td->next) {
                int64 offset = (source - td->trb_addr) / sizeof(xhci_trb);
                if (offset < 0 || offset >= td->trb_count)
                        continue;

                TRACE("HandleTransferComplete td %p trb %" B_PRId64 " found\n",
                        td, offset);

                if (td->transfer != NULL && td->transfer->IsochronousData() != NULL) {
                        usb_isochronous_data* isochronousData = td->transfer->IsochronousData();
                        usb_iso_packet_descriptor& descriptor = isochronousData->packet_descriptors[offset];
                        if (transferred < 0)
                                transferred = (TRB_2_BYTES_GET(td->trbs[offset].status) - remainder);
                        descriptor.actual_length = transferred;
                        descriptor.status = xhci_error_status(completionCode,
                                (td->transfer->TransferPipe()->Direction() != Pipe::Out));

                        // Don't double-report completion status.
                        completionCode = COMP_SUCCESS;

                        if (offset != (td->trb_used - 1)) {
                                // We'll be sent here again.
                                return;
                        }

                        // Compute the real transferred length.
                        transferred = 0;
                        for (int32 i = 0; i < offset; i++) {
                                usb_iso_packet_descriptor& descriptor = isochronousData->packet_descriptors[i];
                                if (descriptor.status == B_NO_INIT) {
                                        // Assume success.
                                        descriptor.actual_length = descriptor.request_length;
                                        descriptor.status = B_OK;
                                }
                                transferred += descriptor.actual_length;
                        }

                        // Report the endpoint status (if any.)
                        if (endpoint->status != 0) {
                                completionCode = endpoint->status;
                                endpoint->status = 0;
                        }
                } else if (completionCode == COMP_STOPPED_LENGTH_INVALID) {
                        // To determine transferred length, sum up the lengths of all TRBs
                        // prior to the referenced one. (XHCI 1.2 § 4.6.9 p136.)
                        transferred = 0;
                        for (int32 i = 0; i < offset; i++)
                                transferred += TRB_2_BYTES_GET(td->trbs[i].status);
                }

                // The TRB at offset trb_used will be the link TRB, which we do not
                // care about (and should not generate an interrupt at all.) We really
                // care about the properly last TRB, at index "count - 1", which the
                // Event Data TRB that _LinkDescriptorForPipe creates points to.
                //
                // But if we have an unsuccessful completion code, the transfer
                // likely failed midway; so just accept it anyway.
                if (offset == (td->trb_used - 1) || completionCode != COMP_SUCCESS) {
                        _UnlinkDescriptorForPipe(td, endpoint);
                        endpointLocker.Unlock();

                        td->trb_completion_code = completionCode;
                        td->td_transferred = transferred;
                        td->trb_left = remainder;

                        // add descriptor to finished list
                        if (mutex_trylock(&fFinishedLock) != B_OK)
                                mutex_lock(&fFinishedLock);
                        td->next = fFinishedHead;
                        fFinishedHead = td;
                        mutex_unlock(&fFinishedLock);

                        release_sem_etc(fFinishTransfersSem, 1, B_DO_NOT_RESCHEDULE);
                        TRACE("HandleTransferComplete td %p done\n", td);
                } else {
                        TRACE_ERROR("successful TRB 0x%" B_PRIxPHYSADDR " was found, but it wasn't "
                                "the last in the TD!\n", source);
                }
                return;
        }
        TRACE_ERROR("TRB 0x%" B_PRIxPHYSADDR " was not found in the endpoint!\n", source);
}


void
XHCI::DumpRing(xhci_trb *trbs, uint32 size)
{
        if (!Lock()) {
                TRACE("Unable to get lock!\n");
                return;
        }

        for (uint32 i = 0; i < size; i++) {
                TRACE("command[%" B_PRId32 "] = %" B_PRId32 " (0x%016" B_PRIx64 ","
                        " 0x%08" B_PRIx32 ", 0x%08" B_PRIx32 ")\n", i,
                        TRB_3_TYPE_GET(B_LENDIAN_TO_HOST_INT32(trbs[i].flags)),
                        trbs[i].address, trbs[i].status, trbs[i].flags);
        }

        Unlock();
}


status_t
XHCI::DoCommand(xhci_trb* trb)
{
        if (!Lock()) {
                TRACE("Unable to get lock!\n");
                return B_ERROR;
        }

        QueueCommand(trb);
        Ring(0, 0);

        // Begin with a 50ms timeout.
        if (acquire_sem_etc(fCmdCompSem, 1, B_RELATIVE_TIMEOUT, 50 * 1000) != B_OK) {
                // We've hit the timeout. In some error cases, interrupts are not
                // generated; so here we force the event ring to be polled once.
                release_sem(fEventSem);

                // Now try again, this time with a 750ms timeout.
                if (acquire_sem_etc(fCmdCompSem, 1, B_RELATIVE_TIMEOUT,
                                750 * 1000) != B_OK) {
                        TRACE("Unable to obtain fCmdCompSem!\n");
                        fCmdAddr = 0;
                        Unlock();
                        return B_TIMED_OUT;
                }
        }

        // eat up sems that have been released by multiple interrupts
        int32 semCount = 0;
        get_sem_count(fCmdCompSem, &semCount);
        if (semCount > 0)
                acquire_sem_etc(fCmdCompSem, semCount, B_RELATIVE_TIMEOUT, 0);

        status_t status = B_OK;
        uint32 completionCode = TRB_2_COMP_CODE_GET(fCmdResult[0]);
        TRACE("command complete\n");
        if (completionCode != COMP_SUCCESS) {
                TRACE_ERROR("unsuccessful command %" B_PRId32 ", error %s (%" B_PRId32 ")\n",
                        TRB_3_TYPE_GET(trb->flags), xhci_error_string(completionCode),
                        completionCode);
                status = B_IO_ERROR;
        }

        trb->status = fCmdResult[0];
        trb->flags = fCmdResult[1];

        fCmdAddr = 0;
        Unlock();
        return status;
}


status_t
XHCI::Noop()
{
        TRACE("Issue No-Op\n");
        xhci_trb trb;
        trb.address = 0;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_CMD_NOOP);

        return DoCommand(&trb);
}


status_t
XHCI::EnableSlot(uint8* slot)
{
        TRACE("Enable Slot\n");
        xhci_trb trb;
        trb.address = 0;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_ENABLE_SLOT);

        status_t status = DoCommand(&trb);
        if (status != B_OK)
                return status;

        *slot = TRB_3_SLOT_GET(trb.flags);
        return *slot != 0 ? B_OK : B_BAD_VALUE;
}


status_t
XHCI::DisableSlot(uint8 slot)
{
        TRACE("Disable Slot\n");
        xhci_trb trb;
        trb.address = 0;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_DISABLE_SLOT) | TRB_3_SLOT(slot);

        return DoCommand(&trb);
}


status_t
XHCI::SetAddress(uint64 inputContext, bool bsr, uint8 slot)
{
        TRACE("Set Address\n");
        xhci_trb trb;
        trb.address = inputContext;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_ADDRESS_DEVICE) | TRB_3_SLOT(slot);

        if (bsr)
                trb.flags |= TRB_3_BSR_BIT;

        return DoCommand(&trb);
}


status_t
XHCI::ConfigureEndpoint(uint64 inputContext, bool deconfigure, uint8 slot)
{
        TRACE("Configure Endpoint\n");
        xhci_trb trb;
        trb.address = inputContext;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_CONFIGURE_ENDPOINT) | TRB_3_SLOT(slot);

        if (deconfigure)
                trb.flags |= TRB_3_DCEP_BIT;

        return DoCommand(&trb);
}


status_t
XHCI::EvaluateContext(uint64 inputContext, uint8 slot)
{
        TRACE("Evaluate Context\n");
        xhci_trb trb;
        trb.address = inputContext;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_EVALUATE_CONTEXT) | TRB_3_SLOT(slot);

        return DoCommand(&trb);
}


status_t
XHCI::ResetEndpoint(bool preserve, xhci_endpoint* endpoint)
{
        TRACE("Reset Endpoint\n");

        switch (_GetEndpointState(endpoint)) {
                case ENDPOINT_STATE_STOPPED:
                        TRACE("Reset Endpoint: already stopped");
                        return B_OK;
                case ENDPOINT_STATE_HALTED:
                        TRACE("Reset Endpoint: warning, weird state!");
                default:
                        break;
        }

        xhci_trb trb;
        trb.address = 0;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_RESET_ENDPOINT)
                | TRB_3_SLOT(endpoint->device->slot) | TRB_3_ENDPOINT(endpoint->id + 1);
        if (preserve)
                trb.flags |= TRB_3_PRSV_BIT;

        return DoCommand(&trb);
}


status_t
XHCI::StopEndpoint(bool suspend, xhci_endpoint* endpoint)
{
        TRACE("Stop Endpoint\n");

        switch (_GetEndpointState(endpoint)) {
                case ENDPOINT_STATE_HALTED:
                        TRACE("Stop Endpoint: error, halted");
                        return B_DEV_STALLED;
                case ENDPOINT_STATE_STOPPED:
                        TRACE("Stop Endpoint: already stopped");
                        return B_OK;
                default:
                        break;
        }

        xhci_trb trb;
        trb.address = 0;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_STOP_ENDPOINT)
                | TRB_3_SLOT(endpoint->device->slot) | TRB_3_ENDPOINT(endpoint->id + 1);
        if (suspend)
                trb.flags |= TRB_3_SUSPEND_ENDPOINT_BIT;

        return DoCommand(&trb);
}


status_t
XHCI::SetTRDequeue(uint64 dequeue, uint16 stream, uint8 endpoint, uint8 slot)
{
        TRACE("Set TR Dequeue\n");
        xhci_trb trb;
        trb.address = dequeue | ENDPOINT_2_DCS_BIT;
                // The DCS bit is copied from the address field as in ConfigureEndpoint.
                // (XHCI 1.2 § 4.6.10 p142.)
        trb.status = TRB_2_STREAM(stream);
        trb.flags = TRB_3_TYPE(TRB_TYPE_SET_TR_DEQUEUE)
                | TRB_3_SLOT(slot) | TRB_3_ENDPOINT(endpoint);

        return DoCommand(&trb);
}


status_t
XHCI::ResetDevice(uint8 slot)
{
        TRACE("Reset Device\n");
        xhci_trb trb;
        trb.address = 0;
        trb.status = 0;
        trb.flags = TRB_3_TYPE(TRB_TYPE_RESET_DEVICE) | TRB_3_SLOT(slot);

        return DoCommand(&trb);
}


int32
XHCI::EventThread(void* data)
{
        ((XHCI *)data)->CompleteEvents();
        return B_OK;
}


void
XHCI::CompleteEvents()
{
        while (!fStopThreads) {
                if (acquire_sem(fEventSem) < B_OK)
                        continue;

                // eat up sems that have been released by multiple interrupts
                int32 semCount = 0;
                get_sem_count(fEventSem, &semCount);
                if (semCount > 0)
                        acquire_sem_etc(fEventSem, semCount, B_RELATIVE_TIMEOUT, 0);

                ProcessEvents();
        }
}


void
XHCI::ProcessEvents()
{
        // Use mutex_trylock first, in case we are in KDL.
        MutexLocker locker(fEventLock, mutex_trylock(&fEventLock) == B_OK);
        if (!locker.IsLocked()) {
                // We failed to get the lock. This really should not happen.
                TRACE_ERROR("failed to acquire event lock!\n");
                return;
        }

        uint16 i = fEventIdx;
        uint8 j = fEventCcs;
        uint8 t = 2;

        while (1) {
                uint32 temp = B_LENDIAN_TO_HOST_INT32(fEventRing[i].flags);
                uint8 event = TRB_3_TYPE_GET(temp);
                TRACE("event[%u] = %u (0x%016" B_PRIx64 " 0x%08" B_PRIx32 " 0x%08"
                        B_PRIx32 ")\n", i, event, fEventRing[i].address,
                        fEventRing[i].status, B_LENDIAN_TO_HOST_INT32(fEventRing[i].flags));
                uint8 k = (temp & TRB_3_CYCLE_BIT) ? 1 : 0;
                if (j != k)
                        break;

                switch (event) {
                case TRB_TYPE_COMMAND_COMPLETION:
                        HandleCmdComplete(&fEventRing[i]);
                        break;
                case TRB_TYPE_TRANSFER:
                        HandleTransferComplete(&fEventRing[i]);
                        break;
                case TRB_TYPE_PORT_STATUS_CHANGE:
                        TRACE("port change detected\n");
                        break;
                default:
                        TRACE_ERROR("Unhandled event = %u\n", event);
                        break;
                }

                i++;
                if (i == XHCI_MAX_EVENTS) {
                        i = 0;
                        j ^= 1;
                        if (!--t)
                                break;
                }
        }

        fEventIdx = i;
        fEventCcs = j;

        uint64 addr = fErst->rs_addr + i * sizeof(xhci_trb);
        WriteRunReg32(XHCI_ERDP_LO(0), (uint32)addr | ERDP_BUSY);
        WriteRunReg32(XHCI_ERDP_HI(0), (uint32)(addr >> 32));
}


int32
XHCI::FinishThread(void* data)
{
        ((XHCI *)data)->FinishTransfers();
        return B_OK;
}


void
XHCI::FinishTransfers()
{
        while (!fStopThreads) {
                if (acquire_sem(fFinishTransfersSem) < B_OK)
                        continue;

                // eat up sems that have been released by multiple interrupts
                int32 semCount = 0;
                get_sem_count(fFinishTransfersSem, &semCount);
                if (semCount > 0)
                        acquire_sem_etc(fFinishTransfersSem, semCount, B_RELATIVE_TIMEOUT, 0);

                mutex_lock(&fFinishedLock);
                TRACE("finishing transfers\n");
                while (fFinishedHead != NULL) {
                        xhci_td* td = fFinishedHead;
                        fFinishedHead = td->next;
                        td->next = NULL;
                        mutex_unlock(&fFinishedLock);

                        TRACE("finishing transfer td %p\n", td);

                        Transfer* transfer = td->transfer;
                        if (transfer == NULL) {
                                // No transfer? Quick way out.
                                FreeDescriptor(td);
                                mutex_lock(&fFinishedLock);
                                continue;
                        }

                        bool directionIn = (transfer->TransferPipe()->Direction() != Pipe::Out);

                        const uint8 completionCode = td->trb_completion_code;
                        status_t callbackStatus = xhci_error_status(completionCode, directionIn);

                        size_t actualLength = transfer->FragmentLength();
                        if (completionCode != COMP_SUCCESS) {
                                actualLength = td->td_transferred;
                                if (td->td_transferred == -1)
                                        actualLength = transfer->FragmentLength() - td->trb_left;
                                TRACE("transfer not successful, actualLength=%" B_PRIuSIZE "\n",
                                        actualLength);
                        }

                        if (directionIn && actualLength > 0 && !transfer->IsPhysical()) {
                                TRACE("copying in iov count %ld\n", transfer->VectorCount());
                                status_t status = transfer->PrepareKernelAccess();
                                if (status == B_OK) {
                                        ReadDescriptor(td, transfer->Vector(),
                                                transfer->VectorCount(), transfer->IsPhysical());
                                } else {
                                        callbackStatus = status;
                                }
                        }

                        FreeDescriptor(td);

                        // this transfer may still have data left
                        bool finished = true;
                        transfer->AdvanceByFragment(actualLength);
                        if (completionCode == COMP_SUCCESS
                                        && transfer->FragmentLength() > 0) {
                                TRACE("still %" B_PRIuSIZE " bytes left on transfer\n",
                                        transfer->FragmentLength());
                                callbackStatus = SubmitTransfer(transfer);
                                finished = (callbackStatus != B_OK);
                        }
                        if (finished) {
                                // The actualLength was already handled in AdvanceByFragment.
                                transfer->Finished(callbackStatus, 0);
                                delete transfer;
                        }

                        mutex_lock(&fFinishedLock);
                }
                mutex_unlock(&fFinishedLock);
        }
}


inline void
XHCI::WriteOpReg(uint32 reg, uint32 value)
{
        *(volatile uint32 *)(fRegisters + fOperationalRegisterOffset + reg) = value;
}


inline uint32
XHCI::ReadOpReg(uint32 reg)
{
        return *(volatile uint32 *)(fRegisters + fOperationalRegisterOffset + reg);
}


inline status_t
XHCI::WaitOpBits(uint32 reg, uint32 mask, uint32 expected)
{
        int loops = 0;
        uint32 value = ReadOpReg(reg);
        while ((value & mask) != expected) {
                snooze(1000);
                value = ReadOpReg(reg);
                if (loops == 100) {
                        TRACE("delay waiting on reg 0x%" B_PRIX32 " match 0x%" B_PRIX32
                                " (0x%" B_PRIX32 ")\n", reg, expected, mask);
                } else if (loops > 250) {
                        TRACE_ERROR("timeout waiting on reg 0x%" B_PRIX32
                                " match 0x%" B_PRIX32 " (0x%" B_PRIX32 ")\n", reg, expected,
                                mask);
                        return B_ERROR;
                }
                loops++;
        }
        return B_OK;
}


inline uint32
XHCI::ReadCapReg32(uint32 reg)
{
        return *(volatile uint32 *)(fRegisters + fCapabilityRegisterOffset + reg);
}


inline void
XHCI::WriteCapReg32(uint32 reg, uint32 value)
{
        *(volatile uint32 *)(fRegisters + fCapabilityRegisterOffset + reg) = value;
}


inline uint32
XHCI::ReadRunReg32(uint32 reg)
{
        return *(volatile uint32 *)(fRegisters + fRuntimeRegisterOffset + reg);
}


inline void
XHCI::WriteRunReg32(uint32 reg, uint32 value)
{
        *(volatile uint32 *)(fRegisters + fRuntimeRegisterOffset + reg) = value;
}


inline uint32
XHCI::ReadDoorReg32(uint32 reg)
{
        return *(volatile uint32 *)(fRegisters + fDoorbellRegisterOffset + reg);
}


inline void
XHCI::WriteDoorReg32(uint32 reg, uint32 value)
{
        *(volatile uint32 *)(fRegisters + fDoorbellRegisterOffset + reg) = value;
}


inline addr_t
XHCI::_OffsetContextAddr(addr_t p)
{
        if (fContextSizeShift == 1) {
                // each structure is page aligned, each pointer is 32 bits aligned
                uint32 offset = p & ((B_PAGE_SIZE - 1) & ~31U);
                p += offset;
        }
        return p;
}

inline uint32
XHCI::_ReadContext(uint32* p)
{
        p = (uint32*)_OffsetContextAddr((addr_t)p);
        return *p;
}


inline void
XHCI::_WriteContext(uint32* p, uint32 value)
{
        p = (uint32*)_OffsetContextAddr((addr_t)p);
        *p = value;
}


inline uint64
XHCI::_ReadContext(uint64* p)
{
        p = (uint64*)_OffsetContextAddr((addr_t)p);
        return *p;
}


inline void
XHCI::_WriteContext(uint64* p, uint64 value)
{
        p = (uint64*)_OffsetContextAddr((addr_t)p);
        *p = value;
}