root/lib/libpmcstat/libpmcstat_image.c
/*-
 * Copyright (c) 2003-2008 Joseph Koshy
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/types.h>
#include <sys/cpuset.h>
#include <sys/param.h>
#include <sys/endian.h>
#include <sys/pmc.h>
#include <sys/sysctl.h>
#include <sys/imgact_aout.h>
#include <sys/imgact_elf.h>

#include <netinet/in.h>

#include <assert.h>
#include <err.h>
#include <fcntl.h>
#include <pmc.h>
#include <pmclog.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>

#include "libpmcstat.h"

#define min(A,B)                ((A) < (B) ? (A) : (B))
#define max(A,B)                ((A) > (B) ? (A) : (B))

/*
 * Add the list of symbols in the given section to the list associated
 * with the object.
 */
void
pmcstat_image_add_symbols(struct pmcstat_image *image, Elf *e,
    Elf_Scn *scn, GElf_Shdr *sh)
{
        int firsttime;
        size_t n, newsyms, nshsyms, nfuncsyms;
        struct pmcstat_symbol *symptr;
        char *fnname;
        GElf_Sym sym;
        Elf_Data *data;

        if ((data = elf_getdata(scn, NULL)) == NULL)
                return;

        /*
         * Determine the number of functions named in this
         * section.
         */

        nshsyms = sh->sh_size / sh->sh_entsize;
        for (n = nfuncsyms = 0; n < nshsyms; n++) {
                if (gelf_getsym(data, (int) n, &sym) != &sym)
                        return;
                if (GELF_ST_TYPE(sym.st_info) == STT_FUNC)
                        nfuncsyms++;
        }

        if (nfuncsyms == 0)
                return;

        /*
         * Allocate space for the new entries.
         */
        firsttime = image->pi_symbols == NULL;
        symptr = reallocarray(image->pi_symbols,
            image->pi_symcount + nfuncsyms, sizeof(*symptr));
        if (symptr == image->pi_symbols) /* realloc() failed. */
                return;
        image->pi_symbols = symptr;

        /*
         * Append new symbols to the end of the current table.
         */
        symptr += image->pi_symcount;

        for (n = newsyms = 0; n < nshsyms; n++) {
                if (gelf_getsym(data, (int) n, &sym) != &sym)
                        return;
                if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
                        continue;

                if (sym.st_shndx == STN_UNDEF)
                        continue;

                if (!firsttime && pmcstat_symbol_search(image, sym.st_value))
                        continue; /* We've seen this symbol already. */

                if ((fnname = elf_strptr(e, sh->sh_link, sym.st_name))
                    == NULL)
                        continue;

#if defined(__aarch64__) || defined(__arm__)
                /* Ignore ARM mapping symbols. */
                if (fnname[0] == '$' &&
                    (fnname[1] == 'a' || fnname[1] == 't' ||
                    fnname[1] == 'd' || fnname[1] == 'x'))
                        continue;

                /*
                 * Clear LSB from starting addresses for functions
                 * which execute in Thumb mode.  We should perhaps
                 * only do this for functions in a $t mapping symbol
                 * range, but parsing mapping symbols would be a lot
                 * of work and function addresses shouldn't have the
                 * LSB set otherwise.
                 */
                sym.st_value &= ~1;
#endif

                symptr->ps_name  = pmcstat_string_intern(fnname);
                symptr->ps_start = sym.st_value - image->pi_vaddr;
                symptr->ps_end   = symptr->ps_start + sym.st_size;

                symptr++;
                newsyms++;
        }

        image->pi_symcount += newsyms;
        if (image->pi_symcount == 0)
                return;

        assert(newsyms <= nfuncsyms);

        /*
         * Return space to the system if there were duplicates.
         */
        if (newsyms < nfuncsyms)
                image->pi_symbols = reallocarray(image->pi_symbols,
                    image->pi_symcount, sizeof(*symptr));

        /*
         * Keep the list of symbols sorted.
         */
        qsort(image->pi_symbols, image->pi_symcount, sizeof(*symptr),
            pmcstat_symbol_compare);

        /*
         * Deal with function symbols that have a size of 'zero' by
         * making them extend to the next higher address.  These
         * symbols are usually defined in assembly code.
         */
        for (symptr = image->pi_symbols;
             symptr < image->pi_symbols + (image->pi_symcount - 1);
             symptr++)
                if (symptr->ps_start == symptr->ps_end)
                        symptr->ps_end = (symptr+1)->ps_start;
}

/*
 * Record the fact that PC values from 'start' to 'end' come from
 * image 'image'.
 */

void
pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image,
    uintfptr_t start)
{
        struct pmcstat_pcmap *pcm, *pcmnew;
        uintfptr_t offset;
#ifdef __powerpc__
        unsigned long kernbase;
        size_t kernbase_len;
#endif

        assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN &&
            image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE);

        /*
         * It's possible to have images with nothing of value in .text
         * legitimately.  We shouldn't have any samples from this image, so
         * don't bother with a map entry either.
         */
        if (image->pi_start == 0 && image->pi_end == 0)
                return;

        if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
                err(EX_OSERR, "ERROR: Cannot create a map entry");

        /*
         * PowerPC kernel is of DYN type and it has a base address
         * where it is initially loaded, before being relocated.
         * As the address in 'start' is where the kernel was relocated to,
         * but the symbols always use the original base address, we need to
         * subtract it to get the correct offset.
         */
#ifdef __powerpc__
        if (pp->pp_pid == -1) {
                kernbase = 0;
                kernbase_len = sizeof(kernbase);
                if (sysctlbyname("kern.base_address", &kernbase, &kernbase_len,
                    NULL, 0) == -1)
                        warnx(
                            "WARNING: Could not retrieve kernel base address");
                else
                        start -= kernbase;
        }
#endif

        /*
         * Adjust the map entry to only cover the text portion
         * of the object.
         */

        offset = start - image->pi_vaddr;
        pcmnew->ppm_lowpc  = image->pi_start + offset;
        pcmnew->ppm_highpc = image->pi_end + offset;
        pcmnew->ppm_image  = image;

        assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc);

        /* Overlapped mmap()'s are assumed to never occur. */
        TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next)
            if (pcm->ppm_lowpc >= pcmnew->ppm_highpc)
                    break;

        if (pcm == NULL)
                TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next);
        else
                TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next);
}

/*
 * Determine whether a given executable image is an A.OUT object, and
 * if so, fill in its parameters from the text file.
 * Sets image->pi_type.
 */

void
pmcstat_image_get_aout_params(struct pmcstat_image *image,
    struct pmcstat_args *args)
{
        int fd;
        ssize_t nbytes;
        struct exec ex;
        const char *path;
        char buffer[PATH_MAX];

        path = pmcstat_string_unintern(image->pi_execpath);
        assert(path != NULL);

        if (image->pi_iskernelmodule)
                errx(EX_SOFTWARE,
                    "ERROR: a.out kernel modules are unsupported \"%s\"", path);

        (void) snprintf(buffer, sizeof(buffer), "%s%s",
            args->pa_fsroot, path);

        if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
            (nbytes = read(fd, &ex, sizeof(ex))) < 0) {
                if (args->pa_verbosity >= 2)
                        warn("WARNING: Cannot determine type of \"%s\"",
                            path);
                image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
                if (fd != -1)
                        (void) close(fd);
                return;
        }

        (void) close(fd);

        if ((unsigned) nbytes != sizeof(ex) ||
            N_BADMAG(ex))
                return;

        image->pi_type = PMCSTAT_IMAGE_AOUT;

        /* TODO: the rest of a.out processing */

        return;
}

/*
 * Examine an ELF file to determine the size of its text segment.
 * Sets image->pi_type if anything conclusive can be determined about
 * this image.
 */

void
pmcstat_image_get_elf_params(struct pmcstat_image *image,
    struct pmcstat_args *args)
{
        int fd;
        size_t i, nph, nsh;
        const char *path, *elfbase;
        char *p, *endp;
        bool first_exec_segment;
        uintfptr_t minva, maxva;
        Elf *e;
        Elf_Scn *scn;
        GElf_Ehdr eh;
        GElf_Phdr ph;
        GElf_Shdr sh;
        enum pmcstat_image_type image_type;
        char buffer[PATH_MAX];

        assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);

        image->pi_start = minva = ~(uintfptr_t) 0;
        image->pi_end = maxva = (uintfptr_t) 0;
        image->pi_type = image_type = PMCSTAT_IMAGE_INDETERMINABLE;
        image->pi_isdynamic = 0;
        image->pi_dynlinkerpath = NULL;
        image->pi_vaddr = 0;

        path = pmcstat_string_unintern(image->pi_execpath);
        assert(path != NULL);

        /*
         * Look for files under FSROOT/PATHNAME.
         */
        (void) snprintf(buffer, sizeof(buffer), "%s%s",
            args->pa_fsroot, path);

        e = NULL;
        fd = open(buffer, O_RDONLY, 0);
        if (fd < 0) {
                warnx("WARNING: Cannot open \"%s\".",
                    buffer);
                goto done;
        }

        if (elf_version(EV_CURRENT) == EV_NONE) {
                warnx("WARNING: failed to init elf\n");
                goto done;
        }

        if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
                warnx("WARNING: Cannot read \"%s\".",
                    buffer);
                goto done;
        }

        if (elf_kind(e) != ELF_K_ELF) {
                if (args->pa_verbosity >= 2)
                        warnx("WARNING: Cannot determine the type of \"%s\".",
                            buffer);
                goto done;
        }

        if (gelf_getehdr(e, &eh) != &eh) {
                warnx(
                    "WARNING: Cannot retrieve the ELF Header for \"%s\": %s.",
                    buffer, elf_errmsg(-1));
                goto done;
        }

        if (eh.e_type != ET_EXEC && eh.e_type != ET_DYN &&
            !(image->pi_iskernelmodule && eh.e_type == ET_REL)) {
                warnx("WARNING: \"%s\" is of an unsupported ELF type.",
                    buffer);
                goto done;
        }

        image_type = eh.e_ident[EI_CLASS] == ELFCLASS32 ?
            PMCSTAT_IMAGE_ELF32 : PMCSTAT_IMAGE_ELF64;

        /*
         * Determine the virtual address where an executable would be
         * loaded.  Additionally, for dynamically linked executables,
         * save the pathname to the runtime linker.
         */
        if (eh.e_type != ET_REL) {
                if (elf_getphnum(e, &nph) == 0) {
                        warnx(
"WARNING: Could not determine the number of program headers in \"%s\": %s.",
                            buffer,
                            elf_errmsg(-1));
                        goto done;
                }
                first_exec_segment = true;
                for (i = 0; i < eh.e_phnum; i++) {
                        if (gelf_getphdr(e, i, &ph) != &ph) {
                                warnx(
"WARNING: Retrieval of PHDR entry #%ju in \"%s\" failed: %s.",
                                    (uintmax_t) i, buffer, elf_errmsg(-1));
                                goto done;
                        }
                        switch (ph.p_type) {
                        case PT_DYNAMIC:
                                image->pi_isdynamic = 1;
                                break;
                        case PT_INTERP:
                                if ((elfbase = elf_rawfile(e, NULL)) == NULL) {
                                        warnx(
"WARNING: Cannot retrieve the interpreter for \"%s\": %s.",
                                            buffer, elf_errmsg(-1));
                                        goto done;
                                }
                                image->pi_dynlinkerpath =
                                    pmcstat_string_intern(elfbase +
                                        ph.p_offset);
                                break;
                        case PT_LOAD:
                                if ((ph.p_flags & PF_X) != 0 &&
                                    first_exec_segment) {
                                        image->pi_vaddr = ph.p_vaddr & (-ph.p_align);
                                        first_exec_segment = false;
                                }
                                break;
                        }
                }
        }

        /*
         * Get the min and max VA associated with this ELF object.
         */
        if (elf_getshnum(e, &nsh) == 0) {
                warnx(
"WARNING: Could not determine the number of sections for \"%s\": %s.",
                    buffer, elf_errmsg(-1));
                goto done;
        }

        for (i = 0; i < nsh; i++) {
                if ((scn = elf_getscn(e, i)) == NULL ||
                    gelf_getshdr(scn, &sh) != &sh) {
                        warnx(
"WARNING: Could not retrieve section header #%ju in \"%s\": %s.",
                            (uintmax_t) i, buffer, elf_errmsg(-1));
                        goto done;
                }
                if (sh.sh_flags & SHF_EXECINSTR) {
                        minva = min(minva, sh.sh_addr);
                        maxva = max(maxva, sh.sh_addr + sh.sh_size);
                }
                if (sh.sh_type == SHT_SYMTAB || sh.sh_type == SHT_DYNSYM)
                        pmcstat_image_add_symbols(image, e, scn, &sh);
        }

        image->pi_start = minva;
        image->pi_end   = maxva;
        image->pi_type  = image_type;
        image->pi_fullpath = pmcstat_string_intern(buffer);

        /* Build display name
         */
        endp = buffer;
        for (p = buffer; *p; p++)
                if (*p == '/')
                        endp = p+1;
        image->pi_name = pmcstat_string_intern(endp);

 done:
        (void) elf_end(e);
        if (fd >= 0)
                (void) close(fd);
        return;
}

/*
 * Given an image descriptor, determine whether it is an ELF, or AOUT.
 * If no handler claims the image, set its type to 'INDETERMINABLE'.
 */

void
pmcstat_image_determine_type(struct pmcstat_image *image,
    struct pmcstat_args *args)
{
        assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);

        /* Try each kind of handler in turn */
        if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
                pmcstat_image_get_elf_params(image, args);
        if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
                pmcstat_image_get_aout_params(image, args);

        /*
         * Otherwise, remember that we tried to determine
         * the object's type and had failed.
         */
        if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
                image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
}

/*
 * Locate an image descriptor given an interned path, adding a fresh
 * descriptor to the cache if necessary.  This function also finds a
 * suitable name for this image's sample file.
 *
 * We defer filling in the file format specific parts of the image
 * structure till the time we actually see a sample that would fall
 * into this image.
 */

struct pmcstat_image *
pmcstat_image_from_path(pmcstat_interned_string internedpath,
    int iskernelmodule, struct pmcstat_args *args,
    struct pmc_plugins *plugins)
{
        int hash;
        struct pmcstat_image *pi;

        hash = pmcstat_string_lookup_hash(internedpath);

        /* First, look for an existing entry. */
        LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
            if (pi->pi_execpath == internedpath &&
                  pi->pi_iskernelmodule == iskernelmodule)
                    return (pi);

        /*
         * Allocate a new entry and place it at the head of the hash
         * and LRU lists.
         */
        pi = malloc(sizeof(*pi));
        if (pi == NULL)
                return (NULL);

        pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
        pi->pi_execpath = internedpath;
        pi->pi_start = ~0;
        pi->pi_end = 0;
        pi->pi_entry = 0;
        pi->pi_vaddr = 0;
        pi->pi_isdynamic = 0;
        pi->pi_iskernelmodule = iskernelmodule;
        pi->pi_dynlinkerpath = NULL;
        pi->pi_symbols = NULL;
        pi->pi_symcount = 0;
        pi->pi_addr2line = NULL;

        if (plugins[args->pa_pplugin].pl_initimage != NULL)
                plugins[args->pa_pplugin].pl_initimage(pi);
        if (plugins[args->pa_plugin].pl_initimage != NULL)
                plugins[args->pa_plugin].pl_initimage(pi);

        LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);

        return (pi);
}