root/usr/src/boot/i386/libi386/linux.c
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2015 Toomas Soome <tsoome@me.com>
 */

/*
 * Primitive linux loader, at the moment only intended to load memtest86+.bin.
 *
 * Note the linux kernel location conflicts with loader, so we need to
 * read in to temporary space and relocate on exec, when btx is stopped.
 */
#include <sys/cdefs.h>
#include <sys/stat.h>
#include <stand.h>
#include <machine/metadata.h>
#include <machine/pc/bios.h>

#include "linux.h"
#include "bootstrap.h"
#include "vbe.h"
#include "libi386.h"
#include "btxv86.h"

static int linux_loadkernel(char *, u_int64_t, struct preloaded_file **);
static int linux_loadinitrd(char *, u_int64_t, struct preloaded_file **);
static int linux_exec(struct preloaded_file *);
static int linux_execinitrd(struct preloaded_file *);

struct file_format linux = { linux_loadkernel, linux_exec };
struct file_format linux_initrd = { linux_loadinitrd, linux_execinitrd };

uint32_t linux_text_len;
uint32_t linux_data_tmp_addr;
uint32_t linux_data_real_addr;
static size_t max_cmdline_size;

static void
test_addr(uint64_t addr, uint64_t length, vm_offset_t *result)
{
        vm_offset_t candidate;

        if (addr + length >= 0xa0000)
                length = 0xa0000 - addr;

        candidate = addr + length - (LINUX_CL_OFFSET + max_cmdline_size);
        if (candidate > LINUX_OLD_REAL_MODE_ADDR)
                candidate = LINUX_OLD_REAL_MODE_ADDR;
        if (candidate < addr)
                return;

        if (candidate > *result || *result == (vm_offset_t)-1)
                *result = candidate;
}

static vm_offset_t
find_real_addr(struct preloaded_file *fp)
{
        struct bios_smap *smap;
        struct file_metadata *md;
        int entries, i;
        vm_offset_t candidate = -1;

        md = file_findmetadata(fp, MODINFOMD_SMAP);
        if (md == NULL) {
                printf("no memory smap\n");
                return (candidate);
        }
        entries = md->md_size / sizeof (struct bios_smap);
        smap = (struct bios_smap *)md->md_data;
        for (i = 0; i < entries; i++) {
                if (smap[i].type != SMAP_TYPE_MEMORY)
                        continue;
                if (smap[i].base >= 0xa0000)
                        continue;
                test_addr(smap[i].base, smap[i].length, &candidate);
        }
        return (candidate);
}

static int
linux_loadkernel(char *filename, uint64_t dest __unused,
    struct preloaded_file **result)
{
        struct linux_kernel_header lh;
        struct preloaded_file *fp;
        struct stat sb;
        ssize_t n;
        int fd, error = 0;
        int setup_sects, linux_big;
        unsigned long data, text;
        vm_offset_t mem;

        if (filename == NULL)
                return (EFTYPE);

        /* is kernel already loaded? */
        fp = file_findfile(NULL, NULL);
        if (fp != NULL)
                return (EFTYPE);

        if ((fd = open(filename, O_RDONLY)) == -1)
                return (errno);

        if (fstat(fd, &sb) != 0) {
                printf("stat failed\n");
                error = errno;
                close(fd);
                return (error);
        }

        n = read(fd, &lh, sizeof (lh));
        if (n != sizeof (lh)) {
                printf("error reading kernel header\n");
                error = EIO;
                goto end;
        }

        if (lh.boot_flag != BOOTSEC_SIGNATURE) {
                printf("invalid magic number\n");
                error = EFTYPE;
                goto end;
        }

        setup_sects = lh.setup_sects;
        linux_big = 0;
        max_cmdline_size = 256;

        if (setup_sects > LINUX_MAX_SETUP_SECTS) {
                printf("too many setup sectors\n");
                error = EFTYPE;
                goto end;
        }

        fp = file_alloc();
        if (fp == NULL) {
                error = ENOMEM;
                goto end;
        }

        bios_addsmapdata(fp);

        if (lh.header == LINUX_MAGIC_SIGNATURE && lh.version >= 0x0200) {
                linux_big = lh.loadflags & LINUX_FLAG_BIG_KERNEL;
                lh.type_of_loader = LINUX_BOOT_LOADER_TYPE;

                if (lh.version >= 0x0206)
                        max_cmdline_size = lh.cmdline_size + 1;

                linux_data_real_addr = find_real_addr(fp);
                if (linux_data_real_addr == -1) {
                        printf("failed to detect suitable low memory\n");
                        file_discard(fp);
                        error = ENOMEM;
                        goto end;
                }
                if (lh.version >= 0x0201) {
                        lh.heap_end_ptr = LINUX_HEAP_END_OFFSET;
                        lh.loadflags |= LINUX_FLAG_CAN_USE_HEAP;
                }
                if (lh.version >= 0x0202) {
                        lh.cmd_line_ptr = linux_data_real_addr +
                            LINUX_CL_OFFSET;
                } else {
                        lh.cl_magic = LINUX_CL_MAGIC;
                        lh.cl_offset = LINUX_CL_OFFSET;
                        lh.setup_move_size = LINUX_CL_OFFSET + max_cmdline_size;
                }
        } else {
                /* old kernel */
                lh.cl_magic = LINUX_CL_MAGIC;
                lh.cl_offset = LINUX_CL_OFFSET;
                setup_sects = LINUX_DEFAULT_SETUP_SECTS;
                linux_data_real_addr = LINUX_OLD_REAL_MODE_ADDR;
        }
        if (setup_sects == 0)
                setup_sects = LINUX_DEFAULT_SETUP_SECTS;

        data = setup_sects << 9;
        text = sb.st_size - data - 512;

        /* temporary location of real mode part */
        linux_data_tmp_addr = LINUX_BZIMAGE_ADDR + text;

        if (!linux_big && text > linux_data_real_addr - LINUX_ZIMAGE_ADDR) {
                printf("Linux zImage is too big, use bzImage instead\n");
                file_discard(fp);
                error = EFBIG;
                goto end;
        }
        printf("   [Linux-%s, setup=0x%lx, size=0x%lx]\n",
            (linux_big ? "bzImage" : "zImage"), data, text);

        /* copy real mode part to place */
        i386_copyin(&lh, linux_data_tmp_addr, sizeof (lh));
        n = data + 512 - sizeof (lh);
        if (archsw.arch_readin(fd, linux_data_tmp_addr+sizeof (lh), n) != n) {
                printf("failed to read %s\n", filename);
                file_discard(fp);
                error = errno;
                goto end;
        }

        /* Clear the heap space. */
        if (lh.header != LINUX_MAGIC_SIGNATURE || lh.version < 0x0200) {
                memset(PTOV(linux_data_tmp_addr + ((setup_sects + 1) << 9)),
                    0, (LINUX_MAX_SETUP_SECTS - setup_sects - 1) << 9);
        }

        mem = LINUX_BZIMAGE_ADDR;

        if (archsw.arch_readin(fd, mem, text) != text) {
                printf("failed to read %s\n", filename);
                file_discard(fp);
                error = EIO;
                goto end;
        }

        fp->f_name = strdup(filename);
        if (linux_big)
                fp->f_type = strdup("Linux bzImage");
        else
                fp->f_type = strdup("Linux zImage");

        /*
         * NOTE: f_addr and f_size is used here as hint for module
         * allocation, as module location will be f_addr + f_size.
         */
        fp->f_addr = linux_data_tmp_addr;
        fp->f_size = LINUX_SETUP_MOVE_SIZE;
        linux_text_len = text;

        /*
         * relocater_data is space allocated in relocater_tramp.S
         * There is space for 3 instances + terminating zero in case
         * all 3 entries are used.
         */
        if (linux_big == 0) {
                relocater_data[0].src = LINUX_BZIMAGE_ADDR;
                relocater_data[0].dest = LINUX_ZIMAGE_ADDR;
                relocater_data[0].size = text;
                relocater_data[1].src = linux_data_tmp_addr;
                relocater_data[1].dest = linux_data_real_addr;
                relocater_data[1].size = LINUX_SETUP_MOVE_SIZE;
                /* make sure the next entry is zeroed */
                relocater_data[2].src = 0;
                relocater_data[2].dest = 0;
                relocater_data[2].size = 0;
        } else {
                relocater_data[0].src = linux_data_tmp_addr;
                relocater_data[0].dest = linux_data_real_addr;
                relocater_data[0].size = LINUX_SETUP_MOVE_SIZE;
                /* make sure the next entry is zeroed */
                relocater_data[1].src = 0;
                relocater_data[1].dest = 0;
                relocater_data[1].size = 0;
        }

        *result = fp;
        setenv("kernelname", fp->f_name, 1);
end:
        close(fd);
        return (error);
}

static int
linux_exec(struct preloaded_file *fp)
{
        struct linux_kernel_header *lh = (struct linux_kernel_header *)
            PTOV(linux_data_tmp_addr);
        struct preloaded_file *mfp = fp->f_next;
        char *arg, *vga;
        char *src, *dst;
        int linux_big;
        uint32_t moveto, max_addr;
        uint16_t segment;
        struct i386_devdesc *rootdev;

        if (strcmp(fp->f_type, "Linux bzImage") == 0)
                linux_big = 1;
        else if (strcmp(fp->f_type, "Linux zImage") == 0)
                linux_big = 0;
        else
                return (EFTYPE);

        i386_getdev((void **)(&rootdev), fp->f_name, NULL);
        if (rootdev != NULL)
                relocator_edx = bd_unit2bios(rootdev);

        /*
         * command line
         * if not set in fp, read from boot-args env
         */
        if (fp->f_args == NULL)
                fp->f_args = getenv("boot-args");
        arg = fp->f_args;               /* it can still be NULL */

        /* video mode selection */
        if (arg && (vga = strstr(arg, "vga=")) != NULL) {
                char *value = vga + 4;
                uint16_t vid_mode;

                if (strncmp(value, "normal", 6) < 1)
                        vid_mode = LINUX_VID_MODE_NORMAL;
                else if (strncmp(value, "ext", 3) < 1)
                        vid_mode = LINUX_VID_MODE_EXTENDED;
                else if (strncmp(value, "ask", 3) < 1)
                        vid_mode = LINUX_VID_MODE_ASK;
                else {
                        long mode;
                        errno = 0;

                        /*
                         * libstand sets ERANGE as only error case;
                         * however, the actual value is 16bit, so
                         * additional check is needed.
                         */
                        mode = strtol(value, NULL, 0);
                        if (errno != 0 || mode >> 16 != 0 || mode == 0) {
                                printf("bad value for video mode\n");
                                return (EINTR);
                        }
                        vid_mode = (uint16_t) mode;
                }
                lh->vid_mode = vid_mode;
        }

        src = arg;
        dst = (char *)PTOV(linux_data_tmp_addr + LINUX_CL_OFFSET);
        if (src != NULL) {
                while (*src != 0 && dst < (char *)
                    PTOV(linux_data_tmp_addr + LINUX_CL_END_OFFSET))
                        *(dst++) = *(src++);
        }
        *dst = 0;

        /* set up module relocation */
        if (mfp != NULL) {
                moveto = (bios_extmem / 1024 + 0x400) << 10;
                moveto = (moveto - mfp->f_size) & 0xfffff000;
                max_addr = (lh->header == LINUX_MAGIC_SIGNATURE &&
                    lh->version >= 0x0203 ?
                    lh->initrd_addr_max : LINUX_INITRD_MAX_ADDRESS);
                if (moveto + mfp->f_size >= max_addr)
                        moveto = (max_addr - mfp->f_size) & 0xfffff000;

                /*
                 * XXX: Linux 2.3.xx has a bug in the memory range check,
                 * so avoid the last page.
                 * XXX: Linux 2.2.xx has a bug in the memory range check,
                 * which is worse than that of Linux 2.3.xx, so avoid the
                 * last 64kb. *sigh*
                 */
                moveto -= 0x10000;

                /* need to relocate initrd first */
                if (linux_big == 0) {
                        relocater_data[2].src = relocater_data[1].src;
                        relocater_data[2].dest = relocater_data[1].dest;
                        relocater_data[2].size = relocater_data[1].size;
                        relocater_data[1].src = relocater_data[0].src;
                        relocater_data[1].dest = relocater_data[0].dest;
                        relocater_data[1].size = relocater_data[0].size;
                        relocater_data[0].src = mfp->f_addr;
                        relocater_data[0].dest = moveto;
                        relocater_data[0].size = mfp->f_size;
                } else {
                        relocater_data[1].src = relocater_data[0].src;
                        relocater_data[1].dest = relocater_data[0].dest;
                        relocater_data[1].size = relocater_data[0].size;
                        relocater_data[0].src = mfp->f_addr;
                        relocater_data[0].dest = moveto;
                        relocater_data[0].size = mfp->f_size;
                }
                lh->ramdisk_image = moveto;
                lh->ramdisk_size = mfp->f_size;
        }

        segment = linux_data_real_addr >> 4;
        relocator_ds = segment;
        relocator_es = segment;
        relocator_fs = segment;
        relocator_gs = segment;
        relocator_ss = segment;
        relocator_sp = LINUX_ESP;
        relocator_ip = 0;
        relocator_cs = segment + 0x20;
        relocator_a20_enabled = 1;
        i386_copyin(relocater, 0x600, relocater_size);

        /* Set VGA text mode */
        bios_set_text_mode(3);
        dev_cleanup();

        __exec((void *)0x600);

        panic("exec returned");

        return (EINTR);         /* not reached */
}

static int
linux_loadinitrd(char *filename, uint64_t dest __unused,
    struct preloaded_file **result)
{
        struct preloaded_file *mfp;

        if (filename == NULL)
                return (EFTYPE);

        /* check if the kernel is loaded */
        mfp = file_findfile(NULL, "Linux bzImage");
        if (mfp == NULL)
                mfp = file_findfile(NULL, "Linux zImage");
        if (mfp == NULL)
                return (EFTYPE);

        mfp = file_loadraw(filename, "module", 0, NULL, 0);
        if (mfp == NULL)
                return (EFTYPE);
        *result = mfp;
        return (0);
}

static int linux_execinitrd(struct preloaded_file *pf __unused)
{
        return (EFTYPE);
}