root/stand/efi/loader/copy.c
/*-
 * Copyright (c) 2013 The FreeBSD Foundation
 *
 * This software was developed by Benno Rice under sponsorship from
 * the FreeBSD Foundation.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/param.h>

#include <stand.h>
#include <bootstrap.h>

#include <efi.h>
#include <efilib.h>

#include "loader_efi.h"

#if defined(__amd64__)
#include <machine/cpufunc.h>
#include <machine/specialreg.h>
#include <machine/vmparam.h>

/*
 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
 */
#define CPUID_LEAF_HV_MAXLEAF           0x40000000
#define CPUID_LEAF_HV_INTERFACE         0x40000001
#define CPUID_LEAF_HV_FEATURES          0x40000003
#define CPUID_LEAF_HV_LIMITS            0x40000005
#define CPUID_HV_IFACE_HYPERV           0x31237648      /* HV#1 */
#define CPUID_HV_MSR_TIME_REFCNT        0x0002  /* MSR_HV_TIME_REF_COUNT */
#define CPUID_HV_MSR_HYPERCALL          0x0020

static int
running_on_hyperv(void)
{
        char hv_vendor[16];
        uint32_t regs[4];

        do_cpuid(1, regs);
        if ((regs[2] & CPUID2_HV) == 0)
                return (0);

        do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
        if (regs[0] < CPUID_LEAF_HV_LIMITS)
                return (0);

        ((uint32_t *)&hv_vendor)[0] = regs[1];
        ((uint32_t *)&hv_vendor)[1] = regs[2];
        ((uint32_t *)&hv_vendor)[2] = regs[3];
        hv_vendor[12] = '\0';
        if (strcmp(hv_vendor, "Microsoft Hv") != 0)
                return (0);

        do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
        if (regs[0] != CPUID_HV_IFACE_HYPERV)
                return (0);

        do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
        if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
                return (0);
        if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
                return (0);

        return (1);
}

static void
efi_verify_staging_size(unsigned long *nr_pages)
{
        UINTN sz;
        EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
        EFI_PHYSICAL_ADDRESS start, end;
        UINTN key, dsz;
        UINT32 dver;
        EFI_STATUS status;
        int i, ndesc;
        unsigned long available_pages = 0;

        sz = 0;

        for (;;) {
                status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
                if (!EFI_ERROR(status))
                        break;

                if (status != EFI_BUFFER_TOO_SMALL) {
                        printf("Can't read memory map: %lu\n",
                            DECODE_ERROR(status));
                        goto out;
                }

                free(map);

                /* Allocate 10 descriptors more than the size reported,
                 * to allow for any fragmentation caused by calling
                 * malloc */
                map = malloc(sz + (10 * dsz));
                if (map == NULL) {
                        printf("Unable to allocate memory\n");
                        goto out;
                }
        }

        ndesc = sz / dsz;
        for (i = 0, p = map; i < ndesc;
             i++, p = NextMemoryDescriptor(p, dsz)) {
                start = p->PhysicalStart;
                end = start + p->NumberOfPages * EFI_PAGE_SIZE;

                if (KERNLOAD < start || KERNLOAD >= end)
                        continue;

                available_pages = p->NumberOfPages -
                        ((KERNLOAD - start) >> EFI_PAGE_SHIFT);
                break;
        }

        if (available_pages == 0) {
                printf("Can't find valid memory map for staging area!\n");
                goto out;
        }

        i++;
        p = NextMemoryDescriptor(p, dsz);

        for ( ; i < ndesc;
             i++, p = NextMemoryDescriptor(p, dsz)) {
                if (p->Type != EfiConventionalMemory &&
                    p->Type != EfiLoaderData)
                        break;

                if (p->PhysicalStart != end)
                        break;

                end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;

                available_pages += p->NumberOfPages;
        }

        if (*nr_pages > available_pages) {
                printf("Staging area's size is reduced: %ld -> %ld!\n",
                    *nr_pages, available_pages);
                *nr_pages = available_pages;
        }
out:
        free(map);
}
#endif /* __amd64__ */

#if defined(__arm__)
#define DEFAULT_EFI_STAGING_SIZE        32
#else
#define DEFAULT_EFI_STAGING_SIZE        64
#endif
#ifndef EFI_STAGING_SIZE
#define EFI_STAGING_SIZE        DEFAULT_EFI_STAGING_SIZE
#endif

#define EFI_STAGING_2M_ALIGN    1

#if defined(__amd64__) || defined(__i386__)
#define EFI_STAGING_SLOP        M(8)
#else
#define EFI_STAGING_SLOP        0
#endif

static u_long staging_slop = EFI_STAGING_SLOP;

EFI_PHYSICAL_ADDRESS    staging, staging_end, staging_base;
bool                    stage_offset_set = false;
ssize_t                 stage_offset;

static void
efi_copy_free(void)
{
        BS->FreePages(staging_base, (staging_end - staging_base) /
            EFI_PAGE_SIZE);
        stage_offset_set = false;
        stage_offset = 0;
}

#if defined(__amd64__) || defined(__i386__)
int copy_staging = COPY_STAGING_AUTO;

static int
command_copy_staging(int argc, char *argv[])
{
        static const char *const mode[3] = {
                [COPY_STAGING_ENABLE] = "enable",
                [COPY_STAGING_DISABLE] = "disable",
                [COPY_STAGING_AUTO] = "auto",
        };
        int prev;

        if (argc > 2) {
                goto usage;
        } else if (argc == 2) {
                prev = copy_staging;
                if (strcmp(argv[1], "enable") == 0)
                        copy_staging = COPY_STAGING_ENABLE;
                else if (strcmp(argv[1], "disable") == 0)
                        copy_staging = COPY_STAGING_DISABLE;
                else if (strcmp(argv[1], "auto") == 0)
                        copy_staging = COPY_STAGING_AUTO;
                else
                        goto usage;
                if (prev != copy_staging) {
                        printf("changed copy_staging, unloading kernel\n");
                        unload();
                        efi_copy_free();
                        efi_copy_init();
                }
        } else {
                printf("copy staging: %s\n", mode[copy_staging]);
        }
        return (CMD_OK);

usage:
        command_errmsg = "usage: copy_staging enable|disable|auto";
        return (CMD_ERROR);
}
COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
#endif

static int
command_staging_slop(int argc, char *argv[])
{
        char *endp;
        u_long new;

        if (argc > 2) {
                goto err;
        } else if (argc == 2) {
                new = strtoul(argv[1], &endp, 0);
                if (*endp != '\0')
                        goto err;
                if (staging_slop != new) {
                        staging_slop = new;
                        printf("changed slop, unloading kernel\n");

                        unload();
                        efi_copy_free();
                        efi_copy_init();
                }
        } else {
                printf("staging slop %#lx\n", staging_slop);
        }
        return (CMD_OK);

err:
        command_errmsg = "invalid slop value";
        return (CMD_ERROR);
}
COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
    command_staging_slop);

#if defined(__amd64__) || defined(__i386__)
/*
 * The staging area must reside in the first 1GB or 4GB physical
 * memory: see elf64_exec() in
 * boot/efi/loader/arch/amd64/elf64_freebsd.c.
 */
static EFI_PHYSICAL_ADDRESS
get_staging_max(void)
{
        EFI_PHYSICAL_ADDRESS res;

        res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
        return (res);
}
#define EFI_ALLOC_MAX_ADDR
#elif defined(__aarch64__)
/*
 * Older kernels only support a 48-bit physical address space, and locore.S
 * only supports a 50-bit space. Limit to 48 bits so older kernels can boot
 * even if FEAT_LPA2 is supported by the hardware.
 */
#define get_staging_max()       (1ul << 48)
#define EFI_ALLOC_MAX_ADDR
#endif
#ifdef EFI_ALLOC_MAX_ADDR
#define EFI_ALLOC_METHOD        AllocateMaxAddress
#else
#define EFI_ALLOC_METHOD        AllocateAnyPages
#endif

int
efi_copy_init(void)
{
        EFI_STATUS      status;
        unsigned long nr_pages;
        vm_offset_t ess;

        ess = EFI_STAGING_SIZE;
        if (ess < DEFAULT_EFI_STAGING_SIZE)
                ess = DEFAULT_EFI_STAGING_SIZE;
        nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);

#if defined(__amd64__)
        /*
         * We'll decrease nr_pages, if it's too big. Currently we only
         * apply this to FreeBSD VM running on Hyper-V. Why? Please see
         * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
         */
        if (running_on_hyperv())
                efi_verify_staging_size(&nr_pages);
#endif
#ifdef EFI_ALLOC_MAX_ADDR
        staging = get_staging_max();
#endif
        status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
            nr_pages, &staging);
        if (EFI_ERROR(status)) {
                printf("failed to allocate staging area: %lu\n",
                    DECODE_ERROR(status));
                return (status);
        }
        staging_base = staging;
        staging_end = staging + nr_pages * EFI_PAGE_SIZE;

#if EFI_STAGING_2M_ALIGN
        /*
         * Round the kernel load address to a 2MiB value. This is needed
         * because the kernel builds a page table based on where it has
         * been loaded in physical address space. As the kernel will use
         * either a 1MiB or 2MiB page for this we need to make sure it
         * is correctly aligned for both cases.
         */
        staging = roundup2(staging, M(2));
#endif

        return (0);
}

static bool
efi_check_space(vm_offset_t end)
{
        EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
        EFI_STATUS status;
        unsigned long nr_pages;

        end = roundup2(end, EFI_PAGE_SIZE);

        /* There is already enough space */
        if (end + staging_slop <= staging_end)
                return (true);

        if (!boot_services_active) {
                if (end <= staging_end)
                        return (true);
                panic("efi_check_space: cannot expand staging area "
                    "after boot services were exited\n");
        }

        /*
         * Add slop at the end:
         * 1. amd64 kernel expects to do some very early allocations
         *    by carving out memory after kernend.  Slop guarantees
         *    that it does not ovewrite anything useful.
         * 2. It seems that initial calculation of the staging size
         *    could be somewhat smaller than actually copying in after
         *    boot services are exited.  Slop avoids calling
         *    BS->AllocatePages() when it cannot work.
         */
        end += staging_slop;

        nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
#if defined(__amd64__) || defined(__i386__)
        /*
         * The amd64 kernel needs all memory to be allocated under the 1G or
         * 4G boundary.
         */
        if (end > get_staging_max())
                goto before_staging;
#endif

        /* Try to allocate more space after the previous allocation */
        addr = staging_end;
        status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
            &addr);
        if (!EFI_ERROR(status)) {
                staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
                return (true);
        }

#if defined(__amd64__) || defined(__i386__)
before_staging:
#endif
        /* Try allocating space before the previous allocation */
        if (staging < nr_pages * EFI_PAGE_SIZE)
                goto expand;
        addr = staging - nr_pages * EFI_PAGE_SIZE;
#if EFI_STAGING_2M_ALIGN
        /* See efi_copy_init for why this is needed */
        addr = rounddown2(addr, M(2));
#endif
        nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
        status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
            &addr);
        if (!EFI_ERROR(status)) {
                /*
                 * Move the old allocation and update the state so
                 * translation still works.
                 */
                staging_base = addr;
                memmove((void *)(uintptr_t)staging_base,
                    (void *)(uintptr_t)staging, staging_end - staging);
                stage_offset -= staging - staging_base;
                staging = staging_base;
                return (true);
        }

expand:
        nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
#if EFI_STAGING_2M_ALIGN
        nr_pages += M(2) / EFI_PAGE_SIZE;
#endif
#ifdef EFI_ALLOC_MAX_ADDR
        new_base = get_staging_max();
#endif
        status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
            nr_pages, &new_base);
        if (!EFI_ERROR(status)) {
#if EFI_STAGING_2M_ALIGN
                new_staging = roundup2(new_base, M(2));
#else
                new_staging = new_base;
#endif
                /*
                 * Move the old allocation and update the state so
                 * translation still works.
                 */
                memcpy((void *)(uintptr_t)new_staging,
                    (void *)(uintptr_t)staging, staging_end - staging);
                BS->FreePages(staging_base, (staging_end - staging_base) /
                    EFI_PAGE_SIZE);
                stage_offset -= staging - new_staging;
                staging = new_staging;
                staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
                staging_base = new_base;
                return (true);
        }

        printf("efi_check_space: Unable to expand staging area\n");
        return (false);
}

void *
efi_translate(vm_offset_t ptr)
{

        return ((void *)(ptr + stage_offset));
}

ssize_t
efi_copyin(const void *src, vm_offset_t dest, const size_t len)
{

        if (!stage_offset_set) {
                stage_offset = (vm_offset_t)staging - dest;
                stage_offset_set = true;
        }

        /* XXX: Callers do not check for failure. */
        if (!efi_check_space(dest + stage_offset + len)) {
                errno = ENOMEM;
                return (-1);
        }
        bcopy(src, (void *)(dest + stage_offset), len);
        return (len);
}

ssize_t
efi_copyout(const vm_offset_t src, void *dest, const size_t len)
{

        /* XXX: Callers do not check for failure. */
        if (src + stage_offset + len > staging_end) {
                errno = ENOMEM;
                return (-1);
        }
        bcopy((void *)(src + stage_offset), dest, len);
        return (len);
}

ssize_t
efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
{

        if (!stage_offset_set) {
                stage_offset = (vm_offset_t)staging - dest;
                stage_offset_set = true;
        }

        if (!efi_check_space(dest + stage_offset + len)) {
                errno = ENOMEM;
                return (-1);
        }
        return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
}

void
efi_copy_finish(void)
{
        uint64_t        *src, *dst, *last;

        src = (uint64_t *)(uintptr_t)staging;
        dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
        last = (uint64_t *)(uintptr_t)staging_end;

        while (src < last)
                *dst++ = *src++;
}

void
efi_copy_finish_nop(void)
{
}