root/lib/libkldelf/ef.c
/*-
 * SPDX-License-Identifier: BSD-4-Clause
 *
 * Copyright (c) 2000, Boris Popov
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *    This product includes software developed by Boris Popov.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/param.h>

#include <err.h>
#include <errno.h>
#include <gelf.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "kldelf.h"

#define MAXSEGS 16
struct ef_file {
        char            *ef_name;
        struct elf_file *ef_efile;
        GElf_Phdr       *ef_ph;
        void            *ef_fpage;              /* First block of the file */
        int             ef_fplen;               /* length of first block */
        GElf_Hashelt    ef_nbuckets;
        GElf_Hashelt    ef_nchains;
        GElf_Hashelt    *ef_buckets;
        GElf_Hashelt    *ef_chains;
        GElf_Hashelt    *ef_hashtab;
        caddr_t         ef_strtab;
        long            ef_strsz;
        GElf_Sym        *ef_symtab;
        int             ef_nsegs;
        GElf_Phdr       *ef_segs[MAXSEGS];
        int             ef_verbose;
        GElf_Rel        *ef_rel;                /* relocation table */
        long            ef_relsz;               /* number of entries */
        GElf_Rela       *ef_rela;               /* relocation table */
        long            ef_relasz;              /* number of entries */
};

static void     ef_print_phdr(GElf_Phdr *);
static GElf_Off ef_get_offset(elf_file_t, GElf_Addr);

static void     ef_close(elf_file_t ef);

static int      ef_seg_read_rel(elf_file_t ef, GElf_Addr address, size_t len,
                    void *dest);
static int      ef_seg_read_string(elf_file_t ef, GElf_Addr address, size_t len,
                    char *dest);

static GElf_Addr ef_symaddr(elf_file_t ef, GElf_Size symidx);
static int      ef_lookup_set(elf_file_t ef, const char *name,
                    GElf_Addr *startp, GElf_Addr *stopp, long *countp);
static int      ef_lookup_symbol(elf_file_t ef, const char *name,
                    GElf_Sym **sym, bool see_local);

static struct elf_file_ops ef_file_ops = {
        .close                  = ef_close,
        .seg_read_rel           = ef_seg_read_rel,
        .seg_read_string        = ef_seg_read_string,
        .symaddr                = ef_symaddr,
        .lookup_set             = ef_lookup_set,
        .lookup_symbol          = ef_lookup_symbol,
};

static void
ef_print_phdr(GElf_Phdr *phdr)
{

        if ((phdr->p_flags & PF_W) == 0) {
                printf("text=0x%jx ", (uintmax_t)phdr->p_filesz);
        } else {
                printf("data=0x%jx", (uintmax_t)phdr->p_filesz);
                if (phdr->p_filesz < phdr->p_memsz)
                        printf("+0x%jx",
                            (uintmax_t)(phdr->p_memsz - phdr->p_filesz));
                printf(" ");
        }
}

static GElf_Off
ef_get_offset(elf_file_t ef, GElf_Addr addr)
{
        GElf_Phdr *ph;
        int i;

        for (i = 0; i < ef->ef_nsegs; i++) {
                ph = ef->ef_segs[i];
                if (addr >= ph->p_vaddr && addr < ph->p_vaddr + ph->p_memsz) {
                        return (ph->p_offset + (addr - ph->p_vaddr));
                }
        }
        return (0);
}

/*
 * next two functions copied from link_elf.c
 */
static int
ef_lookup_symbol(elf_file_t ef, const char *name, GElf_Sym **sym, bool see_local)
{
        unsigned long hash, symnum;
        GElf_Sym *symp;
        char *strp;

        /* First, search hashed global symbols */
        hash = elf_hash(name);
        symnum = ef->ef_buckets[hash % ef->ef_nbuckets];

        while (symnum != STN_UNDEF) {
                if (symnum >= ef->ef_nchains) {
                        warnx("ef_lookup_symbol: file %s have corrupted symbol table\n",
                            ef->ef_name);
                        return (ENOENT);
                }

                symp = ef->ef_symtab + symnum;
                if (symp->st_name == 0) {
                        warnx("ef_lookup_symbol: file %s have corrupted symbol table\n",
                            ef->ef_name);
                        return (ENOENT);
                }

                strp = ef->ef_strtab + symp->st_name;

                if (strcmp(name, strp) == 0) {
                        if (symp->st_shndx != SHN_UNDEF ||
                            (symp->st_value != 0 &&
                                GELF_ST_TYPE(symp->st_info) == STT_FUNC)) {
                                if (see_local ||
                                    GELF_ST_BIND(symp->st_info) != STB_LOCAL) {
                                        *sym = symp;
                                        return (0);
                                }
                        } else
                                return (ENOENT);
                }

                symnum = ef->ef_chains[symnum];
        }

        return (ENOENT);
}

static int
ef_lookup_set(elf_file_t ef, const char *name, GElf_Addr *startp,
    GElf_Addr *stopp, long *countp)
{
        GElf_Sym *sym;
        char *setsym;
        int error, len;

        len = strlen(name) + sizeof("__start_set_"); /* sizeof includes \0 */
        setsym = malloc(len);
        if (setsym == NULL)
                return (errno);

        /* get address of first entry */
        snprintf(setsym, len, "%s%s", "__start_set_", name);
        error = ef_lookup_symbol(ef, setsym, &sym, true);
        if (error != 0)
                goto out;
        *startp = sym->st_value;

        /* get address of last entry */
        snprintf(setsym, len, "%s%s", "__stop_set_", name);
        error = ef_lookup_symbol(ef, setsym, &sym, true);
        if (error != 0)
                goto out;
        *stopp = sym->st_value;

        /* and the number of entries */
        *countp = (*stopp - *startp) / elf_pointer_size(ef->ef_efile);

out:
        free(setsym);
        return (error);
}

static GElf_Addr
ef_symaddr(elf_file_t ef, GElf_Size symidx)
{
        const GElf_Sym *sym;

        if (symidx >= ef->ef_nchains)
                return (0);
        sym = ef->ef_symtab + symidx;

        if (GELF_ST_BIND(sym->st_info) == STB_LOCAL &&
            sym->st_shndx != SHN_UNDEF && sym->st_value != 0)
                return (sym->st_value);
        return (0);
}

static int
ef_parse_dynamic(elf_file_t ef, const GElf_Phdr *phdyn)
{
        GElf_Shdr *shdr;
        GElf_Dyn *dyn, *dp;
        size_t i, ndyn, nshdr, nsym;
        int error;
        GElf_Off hash_off, sym_off, str_off;
        GElf_Off rel_off;
        GElf_Off rela_off;
        int rel_sz;
        int rela_sz;
        int dynamic_idx;

        /*
         * The kernel linker parses the PT_DYNAMIC segment to find
         * various important tables.  The gelf API of libelf is
         * section-oriented and requires extracting data from sections
         * instead of segments (program headers).  As a result,
         * iterate over section headers to read various tables after
         * parsing values from PT_DYNAMIC.
         */
        error = elf_read_shdrs(ef->ef_efile, &nshdr, &shdr);
        if (error != 0)
                return (EFTYPE);
        dyn = NULL;

        /* Find section for .dynamic. */
        dynamic_idx = -1;
        for (i = 0; i < nshdr; i++) {
                if (shdr[i].sh_type == SHT_DYNAMIC) {
                        /*
                         * PowerPC kernels contain additional sections
                         * beyond .dynamic in PT_DYNAMIC due to a linker
                         * script bug.  Permit a section with a smaller
                         * size as a workaround.
                         */
                        if (shdr[i].sh_offset != phdyn->p_offset ||
                            ((elf_machine(ef->ef_efile) == EM_PPC ||
                            elf_machine(ef->ef_efile) == EM_PPC64) ?
                            shdr[i].sh_size > phdyn->p_filesz :
                            shdr[i].sh_size != phdyn->p_filesz)) {
                                warnx(".dynamic section doesn't match phdr");
                                error = EFTYPE;
                                goto out;
                        }
                        if (dynamic_idx != -1) {
                                warnx("multiple SHT_DYNAMIC sections");
                                error = EFTYPE;
                                goto out;
                        }
                        dynamic_idx = i;
                }
        }

        error = elf_read_dynamic(ef->ef_efile, dynamic_idx, &ndyn, &dyn);
        if (error != 0)
                goto out;

        hash_off = rel_off = rela_off = sym_off = str_off = 0;
        rel_sz = rela_sz = 0;
        for (i = 0; i < ndyn; i++) {
                dp = &dyn[i];
                if (dp->d_tag == DT_NULL)
                        break;

                switch (dp->d_tag) {
                case DT_HASH:
                        if (hash_off != 0)
                                warnx("second DT_HASH entry ignored");
                        else
                                hash_off = ef_get_offset(ef, dp->d_un.d_ptr);
                        break;
                case DT_STRTAB:
                        if (str_off != 0)
                                warnx("second DT_STRTAB entry ignored");
                        else
                                str_off = ef_get_offset(ef, dp->d_un.d_ptr);
                        break;
                case DT_SYMTAB:
                        if (sym_off != 0)
                                warnx("second DT_SYMTAB entry ignored");
                        else
                                sym_off = ef_get_offset(ef, dp->d_un.d_ptr);
                        break;
                case DT_SYMENT:
                        if (dp->d_un.d_val != elf_object_size(ef->ef_efile,
                            ELF_T_SYM)) {
                                error = EFTYPE;
                                goto out;
                        }
                        break;
                case DT_REL:
                        if (rel_off != 0)
                                warnx("second DT_REL entry ignored");
                        else
                                rel_off = ef_get_offset(ef, dp->d_un.d_ptr);
                        break;
                case DT_RELSZ:
                        if (rel_sz != 0)
                                warnx("second DT_RELSZ entry ignored");
                        else
                                rel_sz = dp->d_un.d_val;
                        break;
                case DT_RELENT:
                        if (dp->d_un.d_val != elf_object_size(ef->ef_efile,
                            ELF_T_REL)) {
                                error = EFTYPE;
                                goto out;
                        }
                        break;
                case DT_RELA:
                        if (rela_off != 0)
                                warnx("second DT_RELA entry ignored");
                        else
                                rela_off = ef_get_offset(ef, dp->d_un.d_ptr);
                        break;
                case DT_RELASZ:
                        if (rela_sz != 0)
                                warnx("second DT_RELSZ entry ignored");
                        else
                                rela_sz = dp->d_un.d_val;
                        break;
                case DT_RELAENT:
                        if (dp->d_un.d_val != elf_object_size(ef->ef_efile,
                            ELF_T_RELA)) {
                                error = EFTYPE;
                                goto out;
                        }
                        break;
                }
        }
        if (hash_off == 0) {
                warnx("%s: no .hash section found\n", ef->ef_name);
                error = EFTYPE;
                goto out;
        }
        if (sym_off == 0) {
                warnx("%s: no .dynsym section found\n", ef->ef_name);
                error = EFTYPE;
                goto out;
        }
        if (str_off == 0) {
                warnx("%s: no .dynstr section found\n", ef->ef_name);
                error = EFTYPE;
                goto out;
        }

        nsym = 0;
        for (i = 0; i < nshdr; i++) {
                switch (shdr[i].sh_type) {
                case SHT_HASH:
                        if (shdr[i].sh_offset != hash_off) {
                                warnx("%s: ignoring SHT_HASH at different offset from DT_HASH",
                                    ef->ef_name);
                                break;
                        }

                        /*
                         * libelf(3) mentions ELF_T_HASH, but it is
                         * not defined.
                         */
                        if (shdr[i].sh_size < sizeof(*ef->ef_hashtab) * 2) {
                                warnx("hash section too small");
                                error = EFTYPE;
                                goto out;
                        }
                        error = elf_read_data(ef->ef_efile, ELF_T_WORD,
                            shdr[i].sh_offset, shdr[i].sh_size,
                            (void **)&ef->ef_hashtab);
                        if (error != 0) {
                                warnc(error, "can't read hash table");
                                goto out;
                        }
                        ef->ef_nbuckets = ef->ef_hashtab[0];
                        ef->ef_nchains = ef->ef_hashtab[1];
                        if ((2 + ef->ef_nbuckets + ef->ef_nchains) *
                            sizeof(*ef->ef_hashtab) != shdr[i].sh_size) {
                                warnx("inconsistent hash section size");
                                error = EFTYPE;
                                goto out;
                        }

                        ef->ef_buckets = ef->ef_hashtab + 2;
                        ef->ef_chains = ef->ef_buckets + ef->ef_nbuckets;
                        break;
                case SHT_DYNSYM:
                        if (shdr[i].sh_offset != sym_off) {
                                warnx("%s: ignoring SHT_DYNSYM at different offset from DT_SYMTAB",
                                    ef->ef_name);
                                break;
                        }
                        error = elf_read_symbols(ef->ef_efile, i, &nsym,
                            &ef->ef_symtab);
                        if (error != 0) {
                                if (ef->ef_verbose)
                                        warnx("%s: can't load .dynsym section (0x%jx)",
                                            ef->ef_name, (uintmax_t)sym_off);
                                goto out;
                        }
                        break;
                case SHT_STRTAB:
                        if (shdr[i].sh_offset != str_off)
                                break;
                        error = elf_read_string_table(ef->ef_efile,
                            &shdr[i], &ef->ef_strsz, &ef->ef_strtab);
                        if (error != 0) {
                                warnx("can't load .dynstr section");
                                error = EIO;
                                goto out;
                        }
                        break;
                case SHT_REL:
                        if (shdr[i].sh_offset != rel_off)
                                break;
                        if (shdr[i].sh_size != rel_sz) {
                                warnx("%s: size mismatch for DT_REL section",
                                    ef->ef_name);
                                error = EFTYPE;
                                goto out;
                        }
                        error = elf_read_rel(ef->ef_efile, i, &ef->ef_relsz,
                            &ef->ef_rel);
                        if (error != 0) {
                                warnx("%s: cannot load DT_REL section",
                                    ef->ef_name);
                                goto out;
                        }
                        break;
                case SHT_RELA:
                        if (shdr[i].sh_offset != rela_off)
                                break;
                        if (shdr[i].sh_size != rela_sz) {
                                warnx("%s: size mismatch for DT_RELA section",
                                    ef->ef_name);
                                error = EFTYPE;
                                goto out;
                        }
                        error = elf_read_rela(ef->ef_efile, i, &ef->ef_relasz,
                            &ef->ef_rela);
                        if (error != 0) {
                                warnx("%s: cannot load DT_RELA section",
                                    ef->ef_name);
                                goto out;
                        }
                        break;
                }
        }

        if (ef->ef_hashtab == NULL) {
                warnx("%s: did not find a symbol hash table", ef->ef_name);
                error = EFTYPE;
                goto out;
        }
        if (ef->ef_symtab == NULL) {
                warnx("%s: did not find a dynamic symbol table", ef->ef_name);
                error = EFTYPE;
                goto out;
        }
        if (nsym != ef->ef_nchains) {
                warnx("%s: symbol count mismatch", ef->ef_name);
                error = EFTYPE;
                goto out;
        }
        if (ef->ef_strtab == NULL) {
                warnx("%s: did not find a dynamic string table", ef->ef_name);
                error = EFTYPE;
                goto out;
        }
        if (rel_off != 0 && ef->ef_rel == NULL) {
                warnx("%s: did not find a DT_REL relocation table",
                    ef->ef_name);
                error = EFTYPE;
                goto out;
        }
        if (rela_off != 0 && ef->ef_rela == NULL) {
                warnx("%s: did not find a DT_RELA relocation table",
                    ef->ef_name);
                error = EFTYPE;
                goto out;
        }

        error = 0;
out:
        free(dyn);
        free(shdr);
        return (error);
}

static int
ef_seg_read_rel(elf_file_t ef, GElf_Addr address, size_t len, void *dest)
{
        GElf_Off ofs;
        const GElf_Rela *a;
        const GElf_Rel *r;
        int error;

        ofs = ef_get_offset(ef, address);
        if (ofs == 0) {
                if (ef->ef_verbose)
                        warnx("ef_seg_read_rel(%s): bad address (%jx)",
                            ef->ef_name, (uintmax_t)address);
                return (EFAULT);
        }
        error = elf_read_raw_data(ef->ef_efile, ofs, dest, len);
        if (error != 0)
                return (error);

        for (r = ef->ef_rel; r < &ef->ef_rel[ef->ef_relsz]; r++) {
                error = elf_reloc(ef->ef_efile, r, ELF_T_REL, 0, address,
                    len, dest);
                if (error != 0)
                        return (error);
        }
        for (a = ef->ef_rela; a < &ef->ef_rela[ef->ef_relasz]; a++) {
                error = elf_reloc(ef->ef_efile, a, ELF_T_RELA, 0, address,
                    len, dest);
                if (error != 0)
                        return (error);
        }
        return (0);
}

static int
ef_seg_read_string(elf_file_t ef, GElf_Addr address, size_t len, char *dest)
{
        GElf_Off ofs;

        ofs = ef_get_offset(ef, address);
        if (ofs == 0) {
                if (ef->ef_verbose)
                        warnx("ef_seg_read_string(%s): bad offset (%jx:%ju)",
                            ef->ef_name, (uintmax_t)address, (uintmax_t)ofs);
                return (EFAULT);
        }

        return (elf_read_raw_string(ef->ef_efile, ofs, dest, len));
}

int
ef_open(struct elf_file *efile, int verbose)
{
        elf_file_t ef;
        GElf_Ehdr *hdr;
        size_t i, nphdr, nsegs;
        int error;
        GElf_Phdr *phdr, *phdyn;

        hdr = &efile->ef_hdr;
        if (hdr->e_phnum == 0 ||
            hdr->e_phentsize != elf_object_size(efile, ELF_T_PHDR) ||
            hdr->e_shnum == 0 || hdr->e_shoff == 0 ||
            hdr->e_shentsize != elf_object_size(efile, ELF_T_SHDR))
                return (EFTYPE);

        ef = malloc(sizeof(*ef));
        if (ef == NULL)
                return (errno);

        efile->ef_ef = ef;
        efile->ef_ops = &ef_file_ops;

        bzero(ef, sizeof(*ef));
        ef->ef_verbose = verbose;
        ef->ef_name = strdup(efile->ef_filename);
        ef->ef_efile = efile;

        error = elf_read_phdrs(efile, &nphdr, &ef->ef_ph);
        if (error != 0) {
                phdr = NULL;
                goto out;
        }

        error = EFTYPE;
        nsegs = 0;
        phdyn = NULL;
        phdr = ef->ef_ph;
        for (i = 0; i < nphdr; i++, phdr++) {
                if (verbose > 1)
                        ef_print_phdr(phdr);
                switch (phdr->p_type) {
                case PT_LOAD:
                        if (nsegs < MAXSEGS)
                                ef->ef_segs[nsegs] = phdr;
                        nsegs++;
                        break;
                case PT_PHDR:
                        break;
                case PT_DYNAMIC:
                        phdyn = phdr;
                        break;
                }
        }
        if (verbose > 1)
                printf("\n");
        if (phdyn == NULL) {
                warnx("Skipping %s: not dynamically-linked",
                    ef->ef_name);
                goto out;
        }

        if (nsegs > MAXSEGS) {
                warnx("%s: too many segments", ef->ef_name);
                goto out;
        }
        ef->ef_nsegs = nsegs;

        error = ef_parse_dynamic(ef, phdyn);
out:
        if (error != 0)
                ef_close(ef);
        return (error);
}

static void
ef_close(elf_file_t ef)
{
        free(ef->ef_rela);
        free(ef->ef_rel);
        free(ef->ef_strtab);
        free(ef->ef_symtab);
        free(ef->ef_hashtab);
        free(ef->ef_ph);
        if (ef->ef_name)
                free(ef->ef_name);
        ef->ef_efile->ef_ops = NULL;
        ef->ef_efile->ef_ef = NULL;
        free(ef);
}