root/sys/dev/ksyms/ksyms.c
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>

#include <sys/conf.h>
#include <sys/elf.h>
#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/module.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/uio.h>

#include <machine/elf.h>

#include <vm/pmap.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>

#include "linker_if.h"

#define SHDR_NULL       0
#define SHDR_SYMTAB     1
#define SHDR_STRTAB     2
#define SHDR_SHSTRTAB   3

#define SHDR_NUM        4

#define STR_SYMTAB      ".symtab"
#define STR_STRTAB      ".strtab"
#define STR_SHSTRTAB    ".shstrtab"

#define KSYMS_DNAME     "ksyms"

static d_open_t ksyms_open;
static d_read_t ksyms_read;
static d_mmap_single_t ksyms_mmap_single;

static struct cdevsw ksyms_cdevsw = {
        .d_version =    D_VERSION,
        .d_flags =      0,
        .d_open =       ksyms_open,
        .d_read =       ksyms_read,
        .d_mmap_single = ksyms_mmap_single,
        .d_name =       KSYMS_DNAME
};

struct ksyms_softc {
        LIST_ENTRY(ksyms_softc) sc_list;
        vm_offset_t             sc_uaddr;
        size_t                  sc_usize;
        vm_object_t             sc_obj;
        vm_size_t               sc_objsz;
        struct proc            *sc_proc;
};

static struct sx                 ksyms_mtx;
static struct cdev              *ksyms_dev;
static LIST_HEAD(, ksyms_softc)  ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list);

static const char       ksyms_shstrtab[] =
        "\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0";

struct ksyms_hdr {
        Elf_Ehdr        kh_ehdr;
        Elf_Phdr        kh_txtphdr;
        Elf_Phdr        kh_datphdr;
        Elf_Shdr        kh_shdr[SHDR_NUM];
        char            kh_shstrtab[sizeof(ksyms_shstrtab)];
};

struct tsizes {
        size_t          ts_symsz;
        size_t          ts_strsz;
};

struct toffsets {
        struct ksyms_softc *to_sc;
        vm_offset_t     to_symoff;
        vm_offset_t     to_stroff;
        unsigned        to_stridx;
        size_t          to_resid;
};

static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table");

/*
 * Get the symbol and string table sizes for a kernel module. Add it to the
 * running total.
 */
static int
ksyms_size_permod(linker_file_t lf, void *arg)
{
        struct tsizes *ts;
        const Elf_Sym *symtab;
        caddr_t strtab;
        long syms;

        ts = arg;

        syms = LINKER_SYMTAB_GET(lf, &symtab);
        ts->ts_symsz += syms * sizeof(Elf_Sym);
        ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab);

        return (0);
}

/*
 * For kernel module get the symbol and string table sizes, returning the
 * totals in *ts.
 */
static void
ksyms_size_calc(struct tsizes *ts)
{

        ts->ts_symsz = 0;
        ts->ts_strsz = 0;

        (void)linker_file_foreach(ksyms_size_permod, ts);
}

static int
ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz)
{
        struct iovec iov;
        struct uio uio;

        iov.iov_base = buf;
        iov.iov_len = sz;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = off;
        uio.uio_resid = (ssize_t)sz;
        uio.uio_segflg = UIO_SYSSPACE;
        uio.uio_rw = UIO_WRITE;
        uio.uio_td = curthread;

        return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio));
}

#define SYMBLKSZ        (256 * sizeof(Elf_Sym))

/*
 * For a kernel module, add the symbol and string tables into the
 * snapshot buffer.  Fix up the offsets in the tables.
 */
static int
ksyms_add(linker_file_t lf, void *arg)
{
        char *buf;
        struct ksyms_softc *sc;
        struct toffsets *to;
        const Elf_Sym *symtab;
        Elf_Sym *symp;
        caddr_t strtab;
        size_t len, numsyms, strsz, symsz;
        linker_symval_t symval;
        int error, i, nsyms;
        bool fixup;

        buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
        to = arg;
        sc = to->to_sc;

        MOD_SLOCK;
        numsyms =  LINKER_SYMTAB_GET(lf, &symtab);
        strsz = LINKER_STRTAB_GET(lf, &strtab);
        symsz = numsyms * sizeof(Elf_Sym);

#ifdef RELOCATABLE_KERNEL
        fixup = true;
#else
        fixup = lf->id > 1;
#endif

        while (symsz > 0) {
                len = min(SYMBLKSZ, symsz);
                bcopy(symtab, buf, len);

                /*
                 * Fix up symbol table for kernel modules:
                 *   string offsets need adjusted
                 *   symbol values made absolute
                 */
                symp = (Elf_Sym *) buf;
                nsyms = len / sizeof(Elf_Sym);
                for (i = 0; i < nsyms; i++) {
                        symp[i].st_name += to->to_stridx;
                        if (fixup && LINKER_SYMBOL_VALUES(lf,
                            (c_linker_sym_t)&symtab[i], &symval) == 0) {
                                symp[i].st_value = (uintptr_t)symval.value;
                        }
                }

                if (len > to->to_resid) {
                        MOD_SUNLOCK;
                        free(buf, M_KSYMS);
                        return (ENXIO);
                }
                to->to_resid -= len;
                error = ksyms_emit(sc, buf, to->to_symoff, len);
                to->to_symoff += len;
                if (error != 0) {
                        MOD_SUNLOCK;
                        free(buf, M_KSYMS);
                        return (error);
                }

                symtab += nsyms;
                symsz -= len;
        }
        free(buf, M_KSYMS);
        MOD_SUNLOCK;

        if (strsz > to->to_resid)
                return (ENXIO);
        to->to_resid -= strsz;
        error = ksyms_emit(sc, strtab, to->to_stroff, strsz);
        to->to_stroff += strsz;
        to->to_stridx += strsz;

        return (error);
}

/*
 * Create a single ELF symbol table for the kernel and kernel modules loaded
 * at this time. Write this snapshot out in the process address space. Return
 * 0 on success, otherwise error.
 */
static int
ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts)
{
        struct toffsets to;
        struct ksyms_hdr *hdr;
        int error;

        hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO);

        /*
         * Create the ELF header.
         */
        hdr->kh_ehdr.e_ident[EI_PAD] = 0;
        hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0;
        hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1;
        hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2;
        hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3;
        hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA;
        hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
        hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS;
        hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT;
        hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0;
        hdr->kh_ehdr.e_type = ET_EXEC;
        hdr->kh_ehdr.e_machine = ELF_ARCH;
        hdr->kh_ehdr.e_version = EV_CURRENT;
        hdr->kh_ehdr.e_entry = 0;
        hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr);
        hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr);
        hdr->kh_ehdr.e_flags = 0;
        hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr);
        hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
        hdr->kh_ehdr.e_phnum = 2;       /* Text and Data */
        hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
        hdr->kh_ehdr.e_shnum = SHDR_NUM;
        hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB;

        /*
         * Add both the text and data program headers.
         */
        hdr->kh_txtphdr.p_type = PT_LOAD;
        /* XXX - is there a way to put the actual .text addr/size here? */
        hdr->kh_txtphdr.p_vaddr = 0;
        hdr->kh_txtphdr.p_memsz = 0;
        hdr->kh_txtphdr.p_flags = PF_R | PF_X;

        hdr->kh_datphdr.p_type = PT_LOAD;
        /* XXX - is there a way to put the actual .data addr/size here? */
        hdr->kh_datphdr.p_vaddr = 0;
        hdr->kh_datphdr.p_memsz = 0;
        hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X;

        /*
         * Add the section headers: null, symtab, strtab, shstrtab.
         */

        /* First section header - null */

        /* Second section header - symtab */
        hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */
        hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB;
        hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0;
        hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0;
        hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr);
        hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz;
        hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB;
        hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym);
        hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long);
        hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym);

        /* Third section header - strtab */
        hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB);
        hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB;
        hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0;
        hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0;
        hdr->kh_shdr[SHDR_STRTAB].sh_offset =
            hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz;
        hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz;
        hdr->kh_shdr[SHDR_STRTAB].sh_link = 0;
        hdr->kh_shdr[SHDR_STRTAB].sh_info = 0;
        hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char);
        hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0;

        /* Fourth section - shstrtab */
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) +
            sizeof(STR_STRTAB);
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB;
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0;
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0;
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset =
            offsetof(struct ksyms_hdr, kh_shstrtab);
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab);
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0;
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0;
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */;
        hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0;

        /* Copy shstrtab into the header. */
        bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));

        to.to_sc = sc;
        to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
        to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset;
        to.to_stridx = 0;
        to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr);

        /* emit header */
        error = ksyms_emit(sc, hdr, 0, sizeof(*hdr));
        free(hdr, M_KSYMS);
        if (error != 0)
                return (error);

        /* Add symbol and string tables for each kernel module. */
        error = linker_file_foreach(ksyms_add, &to);
        if (error != 0)
                return (error);
        if (to.to_resid != 0)
                return (ENXIO);
        return (0);
}

static void
ksyms_cdevpriv_dtr(void *data)
{
        struct ksyms_softc *sc;
        vm_object_t obj;

        sc = (struct ksyms_softc *)data;

        sx_xlock(&ksyms_mtx);
        LIST_REMOVE(sc, sc_list);
        sx_xunlock(&ksyms_mtx);
        obj = sc->sc_obj;
        if (obj != NULL)
                vm_object_deallocate(obj);
        free(sc, M_KSYMS);
}

static int
ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
{
        struct tsizes ts;
        struct ksyms_softc *sc;
        vm_object_t object;
        vm_size_t elfsz;
        int error, try;

        /*
         * Limit one open() per process. The process must close()
         * before open()'ing again.
         */
        sx_xlock(&ksyms_mtx);
        LIST_FOREACH(sc, &ksyms_list, sc_list) {
                if (sc->sc_proc == td->td_proc) {
                        sx_xunlock(&ksyms_mtx);
                        return (EBUSY);
                }
        }

        sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO);
        sc->sc_proc = td->td_proc;
        LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
        sx_xunlock(&ksyms_mtx);

        error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
        if (error != 0) {
                ksyms_cdevpriv_dtr(sc);
                return (error);
        }

        /*
         * MOD_SLOCK doesn't work here (because of a lock reversal with
         * KLD_SLOCK).  Therefore, simply try up to 3 times to get a "clean"
         * snapshot of the kernel symbol table.  This should work fine in the
         * rare case of a kernel module being loaded/unloaded at the same
         * time.
         */
        for (try = 0; try < 3; try++) {
                ksyms_size_calc(&ts);
                elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz;

                object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(elfsz),
                    VM_PROT_ALL, 0, td->td_ucred);
                sc->sc_obj = object;
                sc->sc_objsz = elfsz;

                error = ksyms_snapshot(sc, &ts);
                if (error == 0)
                        break;

                vm_object_deallocate(sc->sc_obj);
                sc->sc_obj = NULL;
        }
        return (error);
}

static int
ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
{
        struct ksyms_softc *sc;
        int error;

        error = devfs_get_cdevpriv((void **)&sc);
        if (error != 0)
                return (error);
        return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio));
}

static int
ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
    vm_object_t *objp, int nprot)
{
        struct ksyms_softc *sc;
        vm_object_t obj;
        int error;

        error = devfs_get_cdevpriv((void **)&sc);
        if (error != 0)
                return (error);

        if (*offset >= round_page(sc->sc_objsz) ||
            size > round_page(sc->sc_objsz) - *offset ||
            (nprot & ~PROT_READ) != 0)
                return (EINVAL);

        obj = sc->sc_obj;
        vm_object_reference(obj);
        *objp = obj;
        return (0);
}

static int
ksyms_modevent(module_t mod __unused, int type, void *data __unused)
{
        int error;

        error = 0;
        switch (type) {
        case MOD_LOAD:
                sx_init(&ksyms_mtx, "KSyms mtx");
                ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
                    0400, KSYMS_DNAME);
                break;
        case MOD_UNLOAD:
                if (!LIST_EMPTY(&ksyms_list))
                        return (EBUSY);
                destroy_dev(ksyms_dev);
                sx_destroy(&ksyms_mtx);
                break;
        case MOD_SHUTDOWN:
                break;
        default:
                error = EOPNOTSUPP;
                break;
        }
        return (error);
}

DEV_MODULE(ksyms, ksyms_modevent, NULL);
MODULE_VERSION(ksyms, 1);