root/usr/src/uts/sun4v/io/vdsk_common.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/crc32.h>
#include <sys/cred.h>
#include <sys/ddi.h>
#include <sys/dkio.h>
#include <sys/file.h>
#include <sys/kmem.h>
#include <sys/sunddi.h>
#include <sys/sunldi.h>
#include <sys/types.h>
#include <sys/varargs.h>
#include <sys/vtoc.h>

#include <sys/vdsk_common.h>

/*
 * Hooks for EFI support
 */

/*
 * This code provides generic functions to the vds and vdc drivers to read
 * EFI labels from the disk backend and to get the EFI GPT and GPE. This is
 * inspired from the libefi userland library and the cmlb driver. We will
 * certainly be able to remove that code if RFE 6213117 is ever implemented.
 */

#ifdef DEBUG

#define VD_EFI_DEBUG    if (vd_efi_debug) vd_efi_print

static int vd_efi_debug = 0;

#else

#define VD_EFI_DEBUG(...)

#endif

#define VD_EFI_GPE_LEN(vdisk, nparts) \
        ((((sizeof (efi_gpe_t) * (nparts) - 1) / (vdisk)->block_size) + 1) * \
        (vdisk)->block_size)

static void
vd_efi_print(const char *format, ...)
{
        va_list args;

        va_start(args, format);
        vcmn_err(CE_CONT, format, args);
        va_end(args);
}

/*
 * Return a 32-bit CRC of the contents of the buffer.
 *
 * The seed is 0xffffffff and the result is XORed with 0xffffffff
 * because this is what the Itanium firmware expects.
 */
unsigned int
vd_efi_crc32(const unsigned char *s, unsigned int len)
{
        unsigned int crc32val;

        CRC32(crc32val, s, len, -1U, crc32_table);

        return (crc32val ^ -1U);
}

static int
vd_efi_ioctl(vd_efi_dev_t *dev, int cmd, void *arg)
{
        int status;

        ASSERT(dev->vdisk_ioctl != NULL);
        ASSERT(dev->vdisk != NULL);
        status = (*dev->vdisk_ioctl)(dev->vdisk, cmd, (uintptr_t)arg);

        return (status);
}

/*
 * Swap GPT data to match with the system endianness.
 */
static void
vd_efi_swap_gpt(efi_gpt_t *gpt)
{
        gpt->efi_gpt_Signature = LE_64(gpt->efi_gpt_Signature);
        gpt->efi_gpt_Revision = LE_32(gpt->efi_gpt_Revision);
        gpt->efi_gpt_HeaderSize = LE_32(gpt->efi_gpt_HeaderSize);
        gpt->efi_gpt_HeaderCRC32 = LE_32(gpt->efi_gpt_HeaderCRC32);
        gpt->efi_gpt_MyLBA = LE_64(gpt->efi_gpt_MyLBA);
        gpt->efi_gpt_AlternateLBA = LE_64(gpt->efi_gpt_AlternateLBA);
        gpt->efi_gpt_FirstUsableLBA = LE_64(gpt->efi_gpt_FirstUsableLBA);
        gpt->efi_gpt_LastUsableLBA = LE_64(gpt->efi_gpt_LastUsableLBA);
        UUID_LE_CONVERT(gpt->efi_gpt_DiskGUID, gpt->efi_gpt_DiskGUID);
        gpt->efi_gpt_PartitionEntryLBA = LE_64(gpt->efi_gpt_PartitionEntryLBA);
        gpt->efi_gpt_NumberOfPartitionEntries =
            LE_32(gpt->efi_gpt_NumberOfPartitionEntries);
        gpt->efi_gpt_SizeOfPartitionEntry =
            LE_32(gpt->efi_gpt_SizeOfPartitionEntry);
        gpt->efi_gpt_PartitionEntryArrayCRC32 =
            LE_32(gpt->efi_gpt_PartitionEntryArrayCRC32);
}

/*
 * Swap GPE data to match with the system endianness.
 */
static void
vd_efi_swap_gpe(efi_gpe_t *gpe, int nparts)
{
        int i, j;

        for (i = 0; i < nparts; i++) {
                UUID_LE_CONVERT(gpe[i].efi_gpe_PartitionTypeGUID,
                    gpe[i].efi_gpe_PartitionTypeGUID);
                UUID_LE_CONVERT(gpe[i].efi_gpe_UniquePartitionGUID,
                    gpe[i].efi_gpe_UniquePartitionGUID);
                gpe[i].efi_gpe_StartingLBA = LE_64(gpe[i].efi_gpe_StartingLBA);
                gpe[i].efi_gpe_EndingLBA = LE_64(gpe[i].efi_gpe_EndingLBA);
                gpe[i].efi_gpe_Attributes.PartitionAttrs =
                    LE_16(gpe[i].efi_gpe_Attributes.PartitionAttrs);
                for (j = 0; j < EFI_PART_NAME_LEN; j++) {
                        gpe[i].efi_gpe_PartitionName[j] =
                            LE_16(gpe[i].efi_gpe_PartitionName[j]);
                }
        }
}

/*
 * Check that an EFI GPT is valid. This function should be called with a raw
 * EFI GPT i.e. GPT data should be in little endian format as indicated in the
 * EFI specification and they should not have been swapped to match with the
 * system endianness.
 */
static int
vd_efi_check_gpt(vd_efi_dev_t *dev, efi_gpt_t *gpt)
{
        uint_t crc_stored, crc_computed;

        if (gpt->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
                VD_EFI_DEBUG("Bad EFI signature: 0x%llx != 0x%llx\n",
                    (long long)gpt->efi_gpt_Signature,
                    (long long)LE_64(EFI_SIGNATURE));
                return (EINVAL);
        }

        /*
         * check CRC of the header; the size of the header should
         * never be larger than one block
         */
        if (LE_32(gpt->efi_gpt_HeaderSize) > dev->block_size) {
                VD_EFI_DEBUG("Header size (%u bytes) larger than one block"
                    "(%u bytes)\n", LE_32(gpt->efi_gpt_HeaderSize),
                    dev->block_size);
                return (EINVAL);
        }

        crc_stored = LE_32(gpt->efi_gpt_HeaderCRC32);
        gpt->efi_gpt_HeaderCRC32 = LE_32(0);
        crc_computed = vd_efi_crc32((unsigned char *)gpt,
            LE_32(gpt->efi_gpt_HeaderSize));
        gpt->efi_gpt_HeaderCRC32 = LE_32(crc_stored);

        if (crc_stored != crc_computed) {
                VD_EFI_DEBUG("Bad EFI CRC: 0x%x != 0x%x\n",
                    crc_stored, crc_computed);
                        return (EINVAL);
        }

        return (0);
}

/*
 * Allocate and read the EFI GPT and GPE from the disk backend. Note that the
 * on-disk GPT and GPE are stored in little endian format but this function
 * returns them using the endianness of the system so that any field in the
 * GPT/GPE structures can be directly accessible without any further conversion.
 * The caller is responsible for freeing the allocated structures by calling
 * vd_efi_free().
 */
int
vd_efi_alloc_and_read(vd_efi_dev_t *dev, efi_gpt_t **efi_gpt,
    efi_gpe_t **efi_gpe)
{
        dk_efi_t                dk_efi;
        efi_gpt_t               *gpt = NULL;
        efi_gpe_t               *gpe = NULL;
        efi_gpt_t               *data = NULL;
        size_t                  gpt_len, gpe_len, data_len;
        int                     nparts, status;

        ASSERT(dev->block_size >= sizeof (efi_gpt_t));
        gpt_len = dev->block_size;
        gpt = kmem_zalloc(gpt_len, KM_SLEEP);

        /*
         * Read the EFI GPT.
         */
        dk_efi.dki_lba = 1;
        dk_efi.dki_data = gpt;
        dk_efi.dki_length = gpt_len;

        status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi);

        if (status == EINVAL) {
                /*
                 * Because the DKIOCGETEFI ioctl was initially incorrectly
                 * implemented for a ZFS volume, the ioctl can fail with
                 * EINVAL if it is done on a ZFS volume managed by an old
                 * version of Solaris. This can happen if a ZFS volume is
                 * exported as a single-slice disk by a service domain
                 * running Solaris older than Solaris 10 Update 6.
                 *
                 * So we retry the ioctl to read both the GPT and the GPE at
                 * the same time accordingly to the old implementation.
                 */
                data_len = sizeof (efi_gpt_t) + sizeof (efi_gpe_t);
                data = kmem_zalloc(data_len, KM_SLEEP);

                dk_efi.dki_lba = 1;
                dk_efi.dki_data = data;
                dk_efi.dki_length = data_len;
                status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi);

                if (status == 0)
                        bcopy(data, gpt, sizeof (efi_gpt_t));
        }

        if (status != 0) {
                VD_EFI_DEBUG("DKIOCGETEFI (GPT, LBA=1) error %d\n", status);
                goto errdone;
        }

        if ((status = vd_efi_check_gpt(dev, gpt)) != 0) {
                /*
                 * No valid label here; try the alternate. The alternate GPT is
                 * located in the last block of the disk.
                 */
                dk_efi.dki_lba = dev->disk_size - 1;
                dk_efi.dki_data = gpt;
                dk_efi.dki_length = gpt_len;

                if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi)) != 0) {
                        VD_EFI_DEBUG("DKIOCGETEFI (LBA=%lu) error %d\n",
                            dev->disk_size - 1, status);
                        goto errdone;
                }

                if ((status = vd_efi_check_gpt(dev, gpt)) != 0)
                        goto errdone;

                VD_EFI_DEBUG("efi_read: primary label corrupt; using backup\n");
        }

        /* swap GPT data after checking the GPT is valid */
        vd_efi_swap_gpt(gpt);

        /*
         * Read the EFI GPE.
         */
        nparts = gpt->efi_gpt_NumberOfPartitionEntries;

        if (nparts > NDKMAP + 1) {
                VD_EFI_DEBUG("Too many EFI partitions (%u)", nparts);
                status = EINVAL;
                goto errdone;
        }

        if (nparts == 0) {
                VD_EFI_DEBUG("No partition defined");
                status = EINVAL;
                goto errdone;
        }

        gpe_len = VD_EFI_GPE_LEN(dev, nparts);
        gpe = kmem_zalloc(gpe_len, KM_SLEEP);

        if (data != NULL) {
                /*
                 * The data variable is not NULL if we have used the old ioctl
                 * implementation for a ZFS volume. In that case, we only expect
                 * one partition and GPE data are already available in the data
                 * buffer, right after GPT data.
                 */
                if (nparts != 1) {
                        VD_EFI_DEBUG("Unexpected number of partitions (%u)",
                            nparts);
                        status = EINVAL;
                        goto errdone;
                }

                bcopy(data + 1, gpe, sizeof (efi_gpe_t));

        } else {
                dk_efi.dki_lba = gpt->efi_gpt_PartitionEntryLBA;
                dk_efi.dki_data = (efi_gpt_t *)gpe;
                dk_efi.dki_length = gpe_len;

                if ((status = vd_efi_ioctl(dev, DKIOCGETEFI, &dk_efi)) != 0) {
                        VD_EFI_DEBUG("DKIOCGETEFI (GPE, LBA=%lu) error %d\n",
                            gpt->efi_gpt_PartitionEntryLBA, status);
                        goto errdone;
                }
        }

        vd_efi_swap_gpe(gpe, nparts);

        *efi_gpt = gpt;
        *efi_gpe = gpe;

errdone:

        if (data != NULL)
                kmem_free(data, data_len);

        if (status != 0) {
                if (gpe != NULL)
                        kmem_free(gpe, gpe_len);
                if (gpt != NULL)
                        kmem_free(gpt, gpt_len);
        }

        return (status);
}

/*
 * Free the EFI GPE and GPT structures returned by vd_efi_alloc_and_read().
 */
void
vd_efi_free(vd_efi_dev_t *dev, efi_gpt_t *gpt, efi_gpe_t *gpe)
{
        kmem_free(gpe, VD_EFI_GPE_LEN(dev,
            gpt->efi_gpt_NumberOfPartitionEntries));
        kmem_free(gpt, dev->block_size);
}