root/stand/efi/libefi/efipart.c
/*-
 * Copyright (c) 2010 Marcel Moolenaar
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/disk.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/queue.h>
#include <stddef.h>
#include <stdarg.h>

#include <bootstrap.h>

#include <efi.h>
#include <efilib.h>
#include <efichar.h>
#include <Protocol/DevicePath.h>
#include <Protocol/BlockIo.h>
#include <disk.h>

static EFI_GUID blkio_guid = BLOCK_IO_PROTOCOL;

typedef bool (*pd_test_cb_t)(pdinfo_t *, pdinfo_t *);
static int efipart_initfd(void);
static int efipart_initcd(void);
static int efipart_inithd(void);
static void efipart_cdinfo_add(pdinfo_t *);

static int efipart_strategy(void *, int, daddr_t, size_t, char *, size_t *);
static int efipart_realstrategy(void *, int, daddr_t, size_t, char *, size_t *);

static int efipart_open(struct open_file *, ...);
static int efipart_close(struct open_file *);
static int efipart_ioctl(struct open_file *, u_long, void *);

static int efipart_printfd(int);
static int efipart_printcd(int);
static int efipart_printhd(int);

/* EISA PNP ID's for floppy controllers */
#define PNP0604 0x604
#define PNP0700 0x700
#define PNP0701 0x701

/* Bounce buffer max size */
#define BIO_BUFFER_SIZE 0x4000

struct devsw efipart_fddev = {
        .dv_name = "fd",
        .dv_type = DEVT_FD,
        .dv_init = efipart_initfd,
        .dv_strategy = efipart_strategy,
        .dv_open = efipart_open,
        .dv_close = efipart_close,
        .dv_ioctl = efipart_ioctl,
        .dv_print = efipart_printfd,
        .dv_cleanup = nullsys,
};

struct devsw efipart_cddev = {
        .dv_name = "cd",
        .dv_type = DEVT_CD,
        .dv_init = efipart_initcd,
        .dv_strategy = efipart_strategy,
        .dv_open = efipart_open,
        .dv_close = efipart_close,
        .dv_ioctl = efipart_ioctl,
        .dv_print = efipart_printcd,
        .dv_cleanup = nullsys,
};

struct devsw efipart_hddev = {
        .dv_name = "disk",
        .dv_type = DEVT_DISK,
        .dv_init = efipart_inithd,
        .dv_strategy = efipart_strategy,
        .dv_open = efipart_open,
        .dv_close = efipart_close,
        .dv_ioctl = efipart_ioctl,
        .dv_print = efipart_printhd,
        .dv_cleanup = nullsys,
        .dv_fmtdev = disk_fmtdev,
        .dv_parsedev = disk_parsedev,
};

static pdinfo_list_t fdinfo = STAILQ_HEAD_INITIALIZER(fdinfo);
static pdinfo_list_t cdinfo = STAILQ_HEAD_INITIALIZER(cdinfo);
static pdinfo_list_t hdinfo = STAILQ_HEAD_INITIALIZER(hdinfo);

/*
 * efipart_inithandles() is used to build up the pdinfo list from
 * block device handles. Then each devsw init callback is used to
 * pick items from pdinfo and move to proper device list.
 * In ideal world, we should end up with empty pdinfo once all
 * devsw initializers are called.
 */
static pdinfo_list_t pdinfo = STAILQ_HEAD_INITIALIZER(pdinfo);

pdinfo_list_t *
efiblk_get_pdinfo_list(struct devsw *dev)
{
        if (dev->dv_type == DEVT_DISK)
                return (&hdinfo);
        if (dev->dv_type == DEVT_CD)
                return (&cdinfo);
        if (dev->dv_type == DEVT_FD)
                return (&fdinfo);
        return (NULL);
}

/* XXX this gets called way way too often, investigate */
pdinfo_t *
efiblk_get_pdinfo(struct devdesc *dev)
{
        pdinfo_list_t *pdi;
        pdinfo_t *pd = NULL;

        pdi = efiblk_get_pdinfo_list(dev->d_dev);
        if (pdi == NULL)
                return (pd);

        STAILQ_FOREACH(pd, pdi, pd_link) {
                if (pd->pd_unit == dev->d_unit)
                        return (pd);
        }
        return (pd);
}

pdinfo_t *
efiblk_get_pdinfo_by_device_path(EFI_DEVICE_PATH *path)
{
        EFI_HANDLE h;
        EFI_STATUS status;
        EFI_DEVICE_PATH *devp = path;

        status = BS->LocateDevicePath(&blkio_guid, &devp, &h);
        if (EFI_ERROR(status))
                return (NULL);
        return (efiblk_get_pdinfo_by_handle(h));
}

static bool
same_handle(pdinfo_t *pd, EFI_HANDLE h)
{

        return (pd->pd_handle == h || pd->pd_alias == h);
}

pdinfo_t *
efiblk_get_pdinfo_by_handle(EFI_HANDLE h)
{
        pdinfo_t *dp, *pp;

        /*
         * Check hard disks, then cd, then floppy
         */
        STAILQ_FOREACH(dp, &hdinfo, pd_link) {
                if (same_handle(dp, h))
                        return (dp);
                STAILQ_FOREACH(pp, &dp->pd_part, pd_link) {
                        if (same_handle(pp, h))
                                return (pp);
                }
        }
        STAILQ_FOREACH(dp, &cdinfo, pd_link) {
                if (same_handle(dp, h))
                        return (dp);
                STAILQ_FOREACH(pp, &dp->pd_part, pd_link) {
                        if (same_handle(pp, h))
                                return (pp);
                }
        }
        STAILQ_FOREACH(dp, &fdinfo, pd_link) {
                if (same_handle(dp, h))
                        return (dp);
        }
        return (NULL);
}

static int
efiblk_pdinfo_count(pdinfo_list_t *pdi)
{
        pdinfo_t *pd;
        int i = 0;

        STAILQ_FOREACH(pd, pdi, pd_link) {
                i++;
        }
        return (i);
}

static pdinfo_t *
efipart_find_parent(pdinfo_list_t *pdi, EFI_DEVICE_PATH *devpath)
{
        pdinfo_t *pd;
        EFI_DEVICE_PATH *parent;

        /* We want to find direct parent */
        parent = efi_devpath_trim(devpath);
        /* We should not get out of memory here but be careful. */
        if (parent == NULL)
                return (NULL);

        STAILQ_FOREACH(pd, pdi, pd_link) {
                /* We must have exact match. */
                if (efi_devpath_match(pd->pd_devpath, parent))
                        break;
        }
        free(parent);
        return (pd);
}

/*
 * Return true when we should ignore this device.
 */
static bool
efipart_ignore_device(EFI_HANDLE h, EFI_BLOCK_IO *blkio,
    EFI_DEVICE_PATH *devpath)
{
        EFI_DEVICE_PATH *node, *parent;

        /*
         * We assume the block size 512 or greater power of 2.
         * Also skip devices with block size > 64k (16 is max
         * ashift supported by zfs).
         * iPXE is known to insert stub BLOCK IO device with
         * BlockSize 1.
         */
        if (blkio->Media->BlockSize < 512 ||
            blkio->Media->BlockSize > (1 << 16) ||
            !powerof2(blkio->Media->BlockSize)) {
                efi_close_devpath(h);
                return (true);
        }

        /* Allowed values are 0, 1 and power of 2. */
        if (blkio->Media->IoAlign > 1 &&
            !powerof2(blkio->Media->IoAlign)) {
                efi_close_devpath(h);
                return (true);
        }

        /*
         * With device tree setup:
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x1)
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x2)
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x3)
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x3)/CDROM..
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x3)/CDROM..
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x4)
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x5)
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x6)
         * PciRoot(0x0)/Pci(0x14,0x0)/USB(0x5,0)/USB(0x2,0x0)/Unit(0x7)
         *
         * In above exmple only Unit(0x3) has media, all other nodes are
         * missing media and should not be used.
         *
         * No media does not always mean there is no device, but in above
         * case, we can not really assume there is any device.
         * Therefore, if this node is USB, or this node is Unit (LUN) and
         * direct parent is USB and we have no media, we will ignore this
         * device.
         *
         * Variation of the same situation, but with SCSI devices:
         * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x1)
         * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x2)
         * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x3)
         * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x3)/CD..
         * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x3)/CD..
         * PciRoot(0x0)/Pci(0x1a,0x0)/USB(0x1,0)/USB(0x3,0x0)/SCSI(0x0,0x4)
         *
         * Here above the SCSI luns 1,2 and 4 have no media.
         */

        /* Do not ignore device with media. */
        if (blkio->Media->MediaPresent)
                return (false);

        node = efi_devpath_last_node(devpath);
        if (node == NULL)
                return (false);

        /* USB without media present */
        if (DevicePathType(node) == MESSAGING_DEVICE_PATH &&
            DevicePathSubType(node) == MSG_USB_DP) {
                efi_close_devpath(h);
                return (true);
        }

        parent = efi_devpath_trim(devpath);
        if (parent != NULL) {
                bool parent_is_usb = false;

                node = efi_devpath_last_node(parent);
                if (node == NULL) {
                        free(parent);
                        return (false);
                }
                if (DevicePathType(node) == MESSAGING_DEVICE_PATH &&
                    DevicePathSubType(node) == MSG_USB_DP)
                        parent_is_usb = true;
                free(parent);

                node = efi_devpath_last_node(devpath);
                if (node == NULL)
                        return (false);
                if (parent_is_usb &&
                    DevicePathType(node) == MESSAGING_DEVICE_PATH) {
                        /*
                         * no media, parent is USB and devicepath is
                         * LUN or SCSI.
                         */
                        if (DevicePathSubType(node) ==
                            MSG_DEVICE_LOGICAL_UNIT_DP ||
                            DevicePathSubType(node) == MSG_SCSI_DP) {
                                efi_close_devpath(h);
                                return (true);
                        }
                }
        }
        return (false);
}

int
efipart_inithandles(void)
{
        unsigned i, nin;
        UINTN sz;
        EFI_HANDLE *hin;
        EFI_DEVICE_PATH *devpath;
        EFI_BLOCK_IO *blkio;
        EFI_STATUS status;
        pdinfo_t *pd;

        if (!STAILQ_EMPTY(&pdinfo))
                return (0);

        sz = 0;
        hin = NULL;
        status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, hin);
        if (status == EFI_BUFFER_TOO_SMALL) {
                hin = malloc(sz);
                if (hin == NULL)
                        return (ENOMEM);
                status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz,
                    hin);
                if (EFI_ERROR(status))
                        free(hin);
        }
        if (EFI_ERROR(status))
                return (efi_status_to_errno(status));

        nin = sz / sizeof(*hin);
#ifdef EFIPART_DEBUG
        printf("%s: Got %d BLOCK IO MEDIA handle(s)\n", __func__, nin);
#endif

        for (i = 0; i < nin; i++) {
                /*
                 * Get devpath and open protocol.
                 * We should not get errors here
                 */
                if ((devpath = efi_lookup_devpath(hin[i])) == NULL)
                        continue;

                status = OpenProtocolByHandle(hin[i], &blkio_guid,
                    (void **)&blkio);
                if (EFI_ERROR(status)) {
                        printf("error %lu\n", DECODE_ERROR(status));
                        continue;
                }

                if (efipart_ignore_device(hin[i], blkio, devpath))
                        continue;

                /* This is bad. */
                if ((pd = calloc(1, sizeof(*pd))) == NULL) {
                        printf("efipart_inithandles: Out of memory.\n");
                        free(hin);
                        return (ENOMEM);
                }
                STAILQ_INIT(&pd->pd_part);

                pd->pd_handle = hin[i];
                pd->pd_devpath = devpath;
                pd->pd_blkio = blkio;
                STAILQ_INSERT_TAIL(&pdinfo, pd, pd_link);
        }

        /*
         * Walk pdinfo and set parents based on device path.
         */
        STAILQ_FOREACH(pd, &pdinfo, pd_link) {
                pd->pd_parent = efipart_find_parent(&pdinfo, pd->pd_devpath);
        }
        free(hin);
        return (0);
}

/*
 * Get node identified by pd_test() from plist.
 */
static pdinfo_t *
efipart_get_pd(pdinfo_list_t *plist, pd_test_cb_t pd_test, pdinfo_t *data)
{
        pdinfo_t *pd;

        STAILQ_FOREACH(pd, plist, pd_link) {
                if (pd_test(pd, data))
                        break;
        }

        return (pd);
}

static ACPI_HID_DEVICE_PATH *
efipart_floppy(EFI_DEVICE_PATH *node)
{
        ACPI_HID_DEVICE_PATH *acpi;

        if (DevicePathType(node) == ACPI_DEVICE_PATH &&
            DevicePathSubType(node) == ACPI_DP) {
                acpi = (ACPI_HID_DEVICE_PATH *) node;
                if (acpi->HID == EISA_PNP_ID(PNP0604) ||
                    acpi->HID == EISA_PNP_ID(PNP0700) ||
                    acpi->HID == EISA_PNP_ID(PNP0701)) {
                        return (acpi);
                }
        }
        return (NULL);
}

static bool
efipart_testfd(pdinfo_t *fd, pdinfo_t *data __unused)
{
        EFI_DEVICE_PATH *node;

        node = efi_devpath_last_node(fd->pd_devpath);
        if (node == NULL)
                return (false);

        if (efipart_floppy(node) != NULL)
                return (true);

        return (false);
}

static int
efipart_initfd(void)
{
        EFI_DEVICE_PATH *node;
        ACPI_HID_DEVICE_PATH *acpi;
        pdinfo_t *parent, *fd;

        while ((fd = efipart_get_pd(&pdinfo, efipart_testfd, NULL)) != NULL) {
                if ((node = efi_devpath_last_node(fd->pd_devpath)) == NULL)
                        continue;

                if ((acpi = efipart_floppy(node)) == NULL)
                        continue;

                STAILQ_REMOVE(&pdinfo, fd, pdinfo, pd_link);
                parent = fd->pd_parent;
                if (parent != NULL) {
                        STAILQ_REMOVE(&pdinfo, parent, pdinfo, pd_link);
                        parent->pd_alias = fd->pd_handle;
                        parent->pd_unit = acpi->UID;
                        free(fd);
                        fd = parent;
                } else {
                        fd->pd_unit = acpi->UID;
                }
                fd->pd_devsw = &efipart_fddev;
                STAILQ_INSERT_TAIL(&fdinfo, fd, pd_link);
        }

        bcache_add_dev(efiblk_pdinfo_count(&fdinfo));
        return (0);
}

/*
 * Add or update entries with new handle data.
 */
static void
efipart_cdinfo_add(pdinfo_t *cd)
{
        pdinfo_t *parent, *pd, *last;

        if (cd == NULL)
                return;

        parent = cd->pd_parent;
        /* Make sure we have parent added */
        efipart_cdinfo_add(parent);

        STAILQ_FOREACH(pd, &pdinfo, pd_link) {
                if (efi_devpath_match(pd->pd_devpath, cd->pd_devpath)) {
                        STAILQ_REMOVE(&pdinfo, cd, pdinfo, pd_link);
                        break;
                }
        }
        if (pd == NULL) {
                /* This device is already added. */
                return;
        }

        if (parent != NULL) {
                last = STAILQ_LAST(&parent->pd_part, pdinfo, pd_link);
                if (last != NULL)
                        cd->pd_unit = last->pd_unit + 1;
                else
                        cd->pd_unit = 0;
                cd->pd_devsw = &efipart_cddev;
                STAILQ_INSERT_TAIL(&parent->pd_part, cd, pd_link);
                return;
        }

        last = STAILQ_LAST(&cdinfo, pdinfo, pd_link);
        if (last != NULL)
                cd->pd_unit = last->pd_unit + 1;
        else
                cd->pd_unit = 0;

        cd->pd_devsw = &efipart_cddev;
        STAILQ_INSERT_TAIL(&cdinfo, cd, pd_link);
}

static bool
efipart_testcd(pdinfo_t *cd, pdinfo_t *data __unused)
{
        EFI_DEVICE_PATH *node;

        node = efi_devpath_last_node(cd->pd_devpath);
        if (node == NULL)
                return (false);

        if (efipart_floppy(node) != NULL)
                return (false);

        if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
            DevicePathSubType(node) == MEDIA_CDROM_DP) {
                return (true);
        }

        /* cd drive without the media. */
        if (cd->pd_blkio->Media->RemovableMedia &&
            !cd->pd_blkio->Media->MediaPresent) {
                return (true);
        }

        return (false);
}

/*
 * Test if pd is parent for device.
 */
static bool
efipart_testchild(pdinfo_t *dev, pdinfo_t *pd)
{
        /* device with no parent. */
        if (dev->pd_parent == NULL)
                return (false);

        if (efi_devpath_match(dev->pd_parent->pd_devpath, pd->pd_devpath)) {
                return (true);
        }
        return (false);
}

static int
efipart_initcd(void)
{
        pdinfo_t *cd;

        while ((cd = efipart_get_pd(&pdinfo, efipart_testcd, NULL)) != NULL)
                efipart_cdinfo_add(cd);

        /* Find all children of CD devices we did add above. */
        STAILQ_FOREACH(cd, &cdinfo, pd_link) {
                pdinfo_t *child;

                for (child = efipart_get_pd(&pdinfo, efipart_testchild, cd);
                    child != NULL;
                    child = efipart_get_pd(&pdinfo, efipart_testchild, cd))
                        efipart_cdinfo_add(child);
        }
        bcache_add_dev(efiblk_pdinfo_count(&cdinfo));
        return (0);
}

static void
efipart_hdinfo_add_node(pdinfo_t *hd, EFI_DEVICE_PATH *node)
{
        pdinfo_t *parent, *ptr;

        if (node == NULL)
                return;

        parent = hd->pd_parent;
        /*
         * If the node is not MEDIA_HARDDRIVE_DP, it is sub-partition.
         * This can happen with Vendor nodes, and since we do not know
         * the more about those nodes, we just count them.
         */
        if (DevicePathSubType(node) != MEDIA_HARDDRIVE_DP) {
                ptr = STAILQ_LAST(&parent->pd_part, pdinfo, pd_link);
                if (ptr != NULL)
                        hd->pd_unit = ptr->pd_unit + 1;
                else
                        hd->pd_unit = 0;
        } else {
                hd->pd_unit = ((HARDDRIVE_DEVICE_PATH *)node)->PartitionNumber;
        }

        hd->pd_devsw = &efipart_hddev;
        STAILQ_INSERT_TAIL(&parent->pd_part, hd, pd_link);
}

/*
 * The MEDIA_FILEPATH_DP has device name.
 * From U-Boot sources it looks like names are in the form
 * of typeN:M, where type is interface type, N is disk id
 * and M is partition id.
 */
static void
efipart_hdinfo_add_filepath(pdinfo_t *hd, FILEPATH_DEVICE_PATH *node)
{
        char *pathname, *p;
        int len;
        pdinfo_t *last;

        last = STAILQ_LAST(&hdinfo, pdinfo, pd_link);
        if (last != NULL)
                hd->pd_unit = last->pd_unit + 1;
        else
                hd->pd_unit = 0;

        /* FILEPATH_DEVICE_PATH has 0 terminated string */
        len = ucs2len(node->PathName);
        if ((pathname = malloc(len + 1)) == NULL) {
                printf("Failed to add disk, out of memory\n");
                free(hd);
                return;
        }
        cpy16to8(node->PathName, pathname, len + 1);
        p = strchr(pathname, ':');

        /*
         * Assume we are receiving handles in order, first disk handle,
         * then partitions for this disk. If this assumption proves
         * false, this code would need update.
         */
        if (p == NULL) {        /* no colon, add the disk */
                hd->pd_devsw = &efipart_hddev;
                STAILQ_INSERT_TAIL(&hdinfo, hd, pd_link);
                free(pathname);
                return;
        }
        p++;    /* skip the colon */
        errno = 0;
        hd->pd_unit = (int)strtol(p, NULL, 0);
        if (errno != 0) {
                printf("Bad unit number for partition \"%s\"\n", pathname);
                free(pathname);
                free(hd);
                return;
        }

        /*
         * We should have disk registered, if not, we are receiving
         * handles out of order, and this code should be reworked
         * to create "blank" disk for partition, and to find the
         * disk based on PathName compares.
         */
        if (last == NULL) {
                printf("BUG: No disk for partition \"%s\"\n", pathname);
                free(pathname);
                free(hd);
                return;
        }
        /* Add the partition. */
        hd->pd_parent = last;
        hd->pd_devsw = &efipart_hddev;
        STAILQ_INSERT_TAIL(&last->pd_part, hd, pd_link);
        free(pathname);
}

static void
efipart_hdinfo_add(pdinfo_t *hd)
{
        pdinfo_t *parent, *pd, *last;
        EFI_DEVICE_PATH *node;

        if (hd == NULL)
                return;

        parent = hd->pd_parent;
        /* Make sure we have parent added */
        efipart_hdinfo_add(parent);

        STAILQ_FOREACH(pd, &pdinfo, pd_link) {
                if (efi_devpath_match(pd->pd_devpath, hd->pd_devpath)) {
                        STAILQ_REMOVE(&pdinfo, hd, pdinfo, pd_link);
                        break;
                }
        }
        if (pd == NULL) {
                /* This device is already added. */
                return;
        }

        if ((node = efi_devpath_last_node(hd->pd_devpath)) == NULL)
                return;

        if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
            DevicePathSubType(node) == MEDIA_FILEPATH_DP) {
                efipart_hdinfo_add_filepath(hd,
                    (FILEPATH_DEVICE_PATH *)node);
                return;
        }

        if (parent != NULL) {
                efipart_hdinfo_add_node(hd, node);
                return;
        }

        last = STAILQ_LAST(&hdinfo, pdinfo, pd_link);
        if (last != NULL)
                hd->pd_unit = last->pd_unit + 1;
        else
                hd->pd_unit = 0;

        /* Add the disk. */
        hd->pd_devsw = &efipart_hddev;
        STAILQ_INSERT_TAIL(&hdinfo, hd, pd_link);
}

static bool
efipart_testhd(pdinfo_t *hd, pdinfo_t *data __unused)
{
        if (efipart_testfd(hd, NULL))
                return (false);

        if (efipart_testcd(hd, NULL))
                return (false);

        /* Anything else must be HD. */
        return (true);
}

static int
efipart_inithd(void)
{
        pdinfo_t *hd;

        while ((hd = efipart_get_pd(&pdinfo, efipart_testhd, NULL)) != NULL)
                efipart_hdinfo_add(hd);

        bcache_add_dev(efiblk_pdinfo_count(&hdinfo));
        return (0);
}

static int
efipart_print_common(struct devsw *dev, pdinfo_list_t *pdlist, int verbose)
{
        int ret = 0;
        EFI_BLOCK_IO *blkio;
        EFI_STATUS status;
        EFI_HANDLE h;
        pdinfo_t *pd;
        CHAR16 *text;
        struct disk_devdesc pd_dev;
        char line[80];

        if (STAILQ_EMPTY(pdlist))
                return (0);

        printf("%s devices:", dev->dv_name);
        if ((ret = pager_output("\n")) != 0)
                return (ret);

        STAILQ_FOREACH(pd, pdlist, pd_link) {
                h = pd->pd_handle;
                if (verbose) {  /* Output the device path. */
                        text = efi_devpath_name(efi_lookup_devpath(h));
                        if (text != NULL) {
                                printf("  %S", text);
                                efi_free_devpath_name(text);
                                if ((ret = pager_output("\n")) != 0)
                                        break;
                        }
                }
                snprintf(line, sizeof(line),
                    "    %s%d", dev->dv_name, pd->pd_unit);
                printf("%s:", line);
                status = OpenProtocolByHandle(h, &blkio_guid, (void **)&blkio);
                if (!EFI_ERROR(status)) {
                        printf("    %llu",
                            blkio->Media->LastBlock == 0? 0:
                            (unsigned long long) (blkio->Media->LastBlock + 1));
                        if (blkio->Media->LastBlock != 0) {
                                printf(" X %u", blkio->Media->BlockSize);
                        }
                        printf(" blocks");
                        if (blkio->Media->MediaPresent) {
                                if (blkio->Media->RemovableMedia)
                                        printf(" (removable)");
                        } else {
                                printf(" (no media)");
                        }
                        if ((ret = pager_output("\n")) != 0)
                                break;
                        if (!blkio->Media->MediaPresent)
                                continue;

                        pd->pd_blkio = blkio;
                        pd_dev.dd.d_dev = dev;
                        pd_dev.dd.d_unit = pd->pd_unit;
                        pd_dev.d_slice = D_SLICENONE;
                        pd_dev.d_partition = D_PARTNONE;
                        ret = disk_open(&pd_dev, blkio->Media->BlockSize *
                            (blkio->Media->LastBlock + 1),
                            blkio->Media->BlockSize);
                        if (ret == 0) {
                                ret = disk_print(&pd_dev, line, verbose);
                                disk_close(&pd_dev);
                                if (ret != 0)
                                        return (ret);
                        } else {
                                /* Do not fail from disk_open() */
                                ret = 0;
                        }
                } else {
                        if ((ret = pager_output("\n")) != 0)
                                break;
                }
        }
        return (ret);
}

static int
efipart_printfd(int verbose)
{
        return (efipart_print_common(&efipart_fddev, &fdinfo, verbose));
}

static int
efipart_printcd(int verbose)
{
        return (efipart_print_common(&efipart_cddev, &cdinfo, verbose));
}

static int
efipart_printhd(int verbose)
{
        return (efipart_print_common(&efipart_hddev, &hdinfo, verbose));
}

static int
efipart_open(struct open_file *f, ...)
{
        va_list args;
        struct disk_devdesc *dev;
        pdinfo_t *pd;
        EFI_BLOCK_IO *blkio;
        EFI_STATUS status;

        va_start(args, f);
        dev = va_arg(args, struct disk_devdesc *);
        va_end(args);
        if (dev == NULL)
                return (EINVAL);

        pd = efiblk_get_pdinfo((struct devdesc *)dev);
        if (pd == NULL)
                return (EIO);

        if (pd->pd_blkio == NULL) {
                status = OpenProtocolByHandle(pd->pd_handle, &blkio_guid,
                    (void **)&pd->pd_blkio);
                if (EFI_ERROR(status))
                        return (efi_status_to_errno(status));
        }

        blkio = pd->pd_blkio;
        if (!blkio->Media->MediaPresent)
                return (EAGAIN);

        pd->pd_open++;
        if (pd->pd_bcache == NULL)
                pd->pd_bcache = bcache_allocate();

        if (dev->dd.d_dev->dv_type == DEVT_DISK) {
                int rc;

                rc = disk_open(dev,
                    blkio->Media->BlockSize * (blkio->Media->LastBlock + 1),
                    blkio->Media->BlockSize);
                if (rc != 0) {
                        pd->pd_open--;
                        if (pd->pd_open == 0) {
                                pd->pd_blkio = NULL;
                                bcache_free(pd->pd_bcache);
                                pd->pd_bcache = NULL;
                        }
                }
                return (rc);
        }
        return (0);
}

static int
efipart_close(struct open_file *f)
{
        struct disk_devdesc *dev;
        pdinfo_t *pd;

        dev = (struct disk_devdesc *)(f->f_devdata);
        if (dev == NULL)
                return (EINVAL);

        pd = efiblk_get_pdinfo((struct devdesc *)dev);
        if (pd == NULL)
                return (EINVAL);

        pd->pd_open--;
        if (pd->pd_open == 0) {
                pd->pd_blkio = NULL;
                if (dev->dd.d_dev->dv_type != DEVT_DISK) {
                        bcache_free(pd->pd_bcache);
                        pd->pd_bcache = NULL;
                }
        }
        if (dev->dd.d_dev->dv_type == DEVT_DISK)
                return (disk_close(dev));
        return (0);
}

static int
efipart_ioctl(struct open_file *f, u_long cmd, void *data)
{
        struct disk_devdesc *dev;
        pdinfo_t *pd;
        int rc;

        dev = (struct disk_devdesc *)(f->f_devdata);
        if (dev == NULL)
                return (EINVAL);

        pd = efiblk_get_pdinfo((struct devdesc *)dev);
        if (pd == NULL)
                return (EINVAL);

        if (dev->dd.d_dev->dv_type == DEVT_DISK) {
                rc = disk_ioctl(dev, cmd, data);
                if (rc != ENOTTY)
                        return (rc);
        }

        switch (cmd) {
        case DIOCGSECTORSIZE:
                *(u_int *)data = pd->pd_blkio->Media->BlockSize;
                break;
        case DIOCGMEDIASIZE:
                *(uint64_t *)data = pd->pd_blkio->Media->BlockSize *
                    (pd->pd_blkio->Media->LastBlock + 1);
                break;
        default:
                return (ENOTTY);
        }

        return (0);
}

/*
 * efipart_readwrite()
 * Internal equivalent of efipart_strategy(), which operates on the
 * media-native block size. This function expects all I/O requests
 * to be within the media size and returns an error if such is not
 * the case.
 */
static int
efipart_readwrite(EFI_BLOCK_IO *blkio, int rw, daddr_t blk, daddr_t nblks,
    char *buf)
{
        EFI_STATUS status;

        TSENTER();

        if (blkio == NULL)
                return (ENXIO);
        if (blk < 0 || blk > blkio->Media->LastBlock)
                return (EIO);
        if ((blk + nblks - 1) > blkio->Media->LastBlock)
                return (EIO);

        switch (rw & F_MASK) {
        case F_READ:
                status = blkio->ReadBlocks(blkio, blkio->Media->MediaId, blk,
                    nblks * blkio->Media->BlockSize, buf);
                break;
        case F_WRITE:
                if (blkio->Media->ReadOnly)
                        return (EROFS);
                status = blkio->WriteBlocks(blkio, blkio->Media->MediaId, blk,
                    nblks * blkio->Media->BlockSize, buf);
                break;
        default:
                return (ENOSYS);
        }

        if (EFI_ERROR(status)) {
                printf("%s: rw=%d, blk=%ju size=%ju status=%lu\n", __func__, rw,
                    blk, nblks, DECODE_ERROR(status));
        }
        TSEXIT();
        return (efi_status_to_errno(status));
}

static int
efipart_strategy(void *devdata, int rw, daddr_t blk, size_t size,
    char *buf, size_t *rsize)
{
        struct bcache_devdata bcd;
        struct disk_devdesc *dev;
        pdinfo_t *pd;

        dev = (struct disk_devdesc *)devdata;
        if (dev == NULL)
                return (EINVAL);

        pd = efiblk_get_pdinfo((struct devdesc *)dev);
        if (pd == NULL)
                return (EINVAL);

        if (pd->pd_blkio->Media->RemovableMedia &&
            !pd->pd_blkio->Media->MediaPresent)
                return (ENXIO);

        bcd.dv_strategy = efipart_realstrategy;
        bcd.dv_devdata = devdata;
        bcd.dv_cache = pd->pd_bcache;

        if (dev->dd.d_dev->dv_type == DEVT_DISK) {
                daddr_t offset;

                offset = dev->d_offset * pd->pd_blkio->Media->BlockSize;
                offset /= 512;
                return (bcache_strategy(&bcd, rw, blk + offset,
                    size, buf, rsize));
        }
        return (bcache_strategy(&bcd, rw, blk, size, buf, rsize));
}

static int
efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t size,
    char *buf, size_t *rsize)
{
        struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
        pdinfo_t *pd;
        EFI_BLOCK_IO *blkio;
        uint64_t off, disk_blocks, d_offset = 0;
        char *blkbuf;
        size_t blkoff, blksz, bio_size;
        unsigned ioalign;
        bool need_buf;
        int rc;
        uint64_t diskend, readstart;

        if (dev == NULL || blk < 0)
                return (EINVAL);

        pd = efiblk_get_pdinfo((struct devdesc *)dev);
        if (pd == NULL)
                return (EINVAL);

        blkio = pd->pd_blkio;
        if (blkio == NULL)
                return (ENXIO);

        if (size == 0 || (size % 512) != 0)
                return (EIO);

        off = blk * 512;
        /*
         * Get disk blocks, this value is either for whole disk or for
         * partition.
         */
        disk_blocks = 0;
        if (dev->dd.d_dev->dv_type == DEVT_DISK) {
                if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) {
                        /* DIOCGMEDIASIZE does return bytes. */
                        disk_blocks /= blkio->Media->BlockSize;
                }
                d_offset = dev->d_offset;
        }
        if (disk_blocks == 0)
                disk_blocks = blkio->Media->LastBlock + 1 - d_offset;

        /* make sure we don't read past disk end */
        if ((off + size) / blkio->Media->BlockSize > d_offset + disk_blocks) {
                diskend = d_offset + disk_blocks;
                readstart = off / blkio->Media->BlockSize;

                if (diskend <= readstart) {
                        if (rsize != NULL)
                                *rsize = 0;

                        return (EIO);
                }
                size = diskend - readstart;
                size = size * blkio->Media->BlockSize;
        }

        need_buf = true;
        /* Do we need bounce buffer? */
        if ((size % blkio->Media->BlockSize == 0) &&
            (off % blkio->Media->BlockSize == 0))
                need_buf = false;

        /* Do we have IO alignment requirement? */
        ioalign = blkio->Media->IoAlign;
        if (ioalign == 0)
                ioalign++;

        if (ioalign > 1 && (uintptr_t)buf != roundup2((uintptr_t)buf, ioalign))
                need_buf = true;

        if (need_buf) {
                for (bio_size = BIO_BUFFER_SIZE; bio_size > 0;
                    bio_size -= blkio->Media->BlockSize) {
                        blkbuf = memalign(ioalign, bio_size);
                        if (blkbuf != NULL)
                                break;
                }
        } else {
                blkbuf = buf;
                bio_size = size;
        }

        if (blkbuf == NULL)
                return (ENOMEM);

        if (rsize != NULL)
                *rsize = size;

        rc = 0;
        blk = off / blkio->Media->BlockSize;
        blkoff = off % blkio->Media->BlockSize;

        while (size > 0) {
                size_t x = min(size, bio_size);

                if (x < blkio->Media->BlockSize)
                        x = 1;
                else
                        x /= blkio->Media->BlockSize;

                switch (rw & F_MASK) {
                case F_READ:
                        blksz = blkio->Media->BlockSize * x - blkoff;
                        if (size < blksz)
                                blksz = size;

                        rc = efipart_readwrite(blkio, rw, blk, x, blkbuf);
                        if (rc != 0)
                                goto error;

                        if (need_buf)
                                bcopy(blkbuf + blkoff, buf, blksz);
                        break;
                case F_WRITE:
                        rc = 0;
                        if (blkoff != 0) {
                                /*
                                 * We got offset to sector, read 1 sector to
                                 * blkbuf.
                                 */
                                x = 1;
                                blksz = blkio->Media->BlockSize - blkoff;
                                blksz = min(blksz, size);
                                rc = efipart_readwrite(blkio, F_READ, blk, x,
                                    blkbuf);
                        } else if (size < blkio->Media->BlockSize) {
                                /*
                                 * The remaining block is not full
                                 * sector. Read 1 sector to blkbuf.
                                 */
                                x = 1;
                                blksz = size;
                                rc = efipart_readwrite(blkio, F_READ, blk, x,
                                    blkbuf);
                        } else {
                                /* We can write full sector(s). */
                                blksz = blkio->Media->BlockSize * x;
                        }

                        if (rc != 0)
                                goto error;
                        /*
                         * Put your Data In, Put your Data out,
                         * Put your Data In, and shake it all about
                         */
                        if (need_buf)
                                bcopy(buf, blkbuf + blkoff, blksz);
                        rc = efipart_readwrite(blkio, F_WRITE, blk, x, blkbuf);
                        if (rc != 0)
                                goto error;
                        break;
                default:
                        /* DO NOTHING */
                        rc = EROFS;
                        goto error;
                }

                blkoff = 0;
                buf += blksz;
                size -= blksz;
                blk += x;
        }

error:
        if (rsize != NULL)
                *rsize -= size;

        if (need_buf)
                free(blkbuf);
        return (rc);
}