root/usr/src/cmd/dis/dis_main.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 *
 * Copyright 2011 Jason King.  All rights reserved.
 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
 * Copyright 2018, Joyent, Inc.
 * Copyright 2024 Oxide Computer Company
 */

#include <ctype.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/hexdump.h>
#include <sys/sysmacros.h>
#include <sys/elf_SPARC.h>

#include <libdisasm.h>

#include "dis_target.h"
#include "dis_util.h"
#include "dis_list.h"

int g_demangle;         /* Demangle C++ names */
int g_quiet;            /* Quiet mode */
int g_numeric;          /* Numeric mode */
int g_flags;            /* libdisasm language flags */
int g_doall;            /* true if no functions or sections were given */

dis_namelist_t *g_funclist;     /* list of functions to disassemble, if any */
dis_namelist_t *g_seclist;      /* list of sections to disassemble, if any */

/*
 * Section options for -d, -D, and -s
 */
#define DIS_DATA_RELATIVE       1
#define DIS_DATA_ABSOLUTE       2
#define DIS_TEXT                3

/*
 * libdisasm callback data.  Keeps track of current data (function or section)
 * and offset within that data.
 */
typedef struct dis_buffer {
        dis_tgt_t       *db_tgt;        /* current dis target */
        void            *db_data;       /* function or section data */
        uint64_t        db_addr;        /* address of function start */
        size_t          db_size;        /* size of data */
        uint64_t        db_nextaddr;    /* next address to be read */
} dis_buffer_t;

#define MINSYMWIDTH     22      /* Minimum width of symbol portion of line */

/*
 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
 * formatted symbol, based on the offset and current setttings.
 */
void
getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
    size_t buflen)
{
        if (symbol == NULL || g_numeric) {
                if (g_flags & DIS_OCTAL)
                        (void) snprintf(buf, buflen, "0%llo", addr);
                else
                        (void) snprintf(buf, buflen, "0x%llx", addr);
        } else {
                if (g_demangle)
                        symbol = dis_demangle(symbol);

                if (offset == 0)
                        (void) snprintf(buf, buflen, "%s", symbol);
                else if (g_flags & DIS_OCTAL)
                        (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
                else
                        (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
        }
}

/*
 * Determine if we are on an architecture with fixed-size instructions,
 * and if so, what size they are.
 */
static int
insn_size(dis_handle_t *dhp)
{
        int min = dis_min_instrlen(dhp);
        int max = dis_max_instrlen(dhp);

        if (min == max)
                return (min);

        return (0);
}

/*
 * The main disassembly routine.  Given a fixed-sized buffer and starting
 * address, disassemble the data using the supplied target and libdisasm handle.
 */
void
dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
    size_t datalen)
{
        dis_buffer_t db = { 0 };
        char buf[BUFSIZE];
        char symbuf[BUFSIZE];
        const char *symbol;
        const char *last_symbol;
        off_t symoffset;
        int i;
        int bytesperline;
        size_t symsize;
        int isfunc;
        size_t symwidth = 0;
        int ret;
        int insz = insn_size(dhp);

        db.db_tgt = tgt;
        db.db_data = data;
        db.db_addr = addr;
        db.db_size = datalen;

        dis_set_data(dhp, &db);

        if ((bytesperline = dis_max_instrlen(dhp)) > 6)
                bytesperline = 6;

        symbol = NULL;

        while (addr < db.db_addr + db.db_size) {

                ret = dis_disassemble(dhp, addr, buf, BUFSIZE);
                if (ret != 0 && insz > 0) {
                        /*
                         * Since we know instructions are fixed size, we
                         * always know the address of the next instruction
                         */
                        (void) snprintf(buf, sizeof (buf),
                            "*** invalid opcode ***");
                        db.db_nextaddr = addr + insz;

                } else if (ret != 0) {
                        off_t next;

                        (void) snprintf(buf, sizeof (buf),
                            "*** invalid opcode ***");

                        /*
                         * On architectures with variable sized instructions
                         * we have no way to figure out where the next
                         * instruction starts if we encounter an invalid
                         * instruction.  Instead we print the rest of the
                         * instruction stream as hex until we reach the
                         * next valid symbol in the section.
                         */
                        if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
                                db.db_nextaddr = db.db_addr + db.db_size;
                        } else {
                                if (next > db.db_size)
                                        db.db_nextaddr = db.db_addr +
                                            db.db_size;
                                else
                                        db.db_nextaddr = addr + next;
                        }
                }

                /*
                 * Print out the line as:
                 *
                 *      address:        bytes   text
                 *
                 * If there are more than 6 bytes in any given instruction,
                 * spread the bytes across two lines.  We try to get symbolic
                 * information for the address, but if that fails we print out
                 * the numeric address instead.
                 *
                 * We try to keep the address portion of the text aligned at
                 * MINSYMWIDTH characters.  If we are disassembling a function
                 * with a long name, this can be annoying.  So we pick a width
                 * based on the maximum width that the current symbol can be.
                 * This at least produces text aligned within each function.
                 */
                last_symbol = symbol;
                symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
                    &isfunc);
                if (symbol == NULL) {
                        symbol = dis_find_section(tgt, addr, &symoffset);
                        symsize = symoffset;
                }

                if (symbol != last_symbol)
                        getsymname(addr, symbol, symsize, symbuf,
                            sizeof (symbuf));

                symwidth = MAX(symwidth, strlen(symbuf));
                getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));

                /*
                 * If we've crossed a new function boundary, print out the
                 * function name on a blank line.
                 */
                if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
                        (void) printf("%s()\n", symbol);

                (void) printf("    %s:%*s ", symbuf,
                    symwidth - strlen(symbuf), "");

                /* print bytes */
                for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
                    i++) {
                        int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
                        if (g_flags & DIS_OCTAL)
                                (void) printf("%03o ", byte);
                        else
                                (void) printf("%02x ", byte);
                }

                /* trailing spaces for missing bytes */
                for (; i < bytesperline; i++) {
                        if (g_flags & DIS_OCTAL)
                                (void) printf("    ");
                        else
                                (void) printf("   ");
                }

                /* contents of disassembly */
                (void) printf(" %s", buf);

                /* excess bytes that spill over onto subsequent lines */
                for (; i < db.db_nextaddr - addr; i++) {
                        int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
                        if (i % bytesperline == 0)
                                (void) printf("\n    %*s  ", symwidth, "");
                        if (g_flags & DIS_OCTAL)
                                (void) printf("%03o ", byte);
                        else
                                (void) printf("%02x ", byte);
                }

                (void) printf("\n");

                addr = db.db_nextaddr;
        }
}

/*
 * libdisasm wrapper around symbol lookup.  Invoke the target-specific lookup
 * function, and convert the result using getsymname().
 */
int
do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
    size_t *symlen)
{
        dis_buffer_t *db = data;
        const char *symbol;
        off_t offset;
        size_t size;

        /*
         * If NULL symbol is returned, getsymname takes care of
         * printing appropriate address in buf instead of symbol.
         */
        symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);

        if (buf != NULL)
                getsymname(addr, symbol, offset, buf, buflen);

        if (start != NULL)
                *start = addr - offset;
        if (symlen != NULL)
                *symlen = size;

        if (symbol == NULL)
                return (-1);

        return (0);
}

/*
 * libdisasm wrapper around target reading.  libdisasm will always read data
 * in order, so update our current offset within the buffer appropriately.
 * We only support reading from within the current object; libdisasm should
 * never ask us to do otherwise.
 */
int
do_read(void *data, uint64_t addr, void *buf, size_t len)
{
        dis_buffer_t *db = data;
        size_t offset;

        if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
                return (-1);

        offset = addr - db->db_addr;
        len = MIN(len, db->db_size - offset);

        (void) memcpy(buf, (char *)db->db_data + offset, len);

        db->db_nextaddr = addr + len;

        return (len);
}

/*
 * Routine to dump raw data in a human-readable format.  Used by the -d and -D
 * options.
 */
void
dump_data(uint64_t addr, void *data, size_t datalen)
{
        hexdump_t h;

        hexdump_init(&h);
        /* Print out data in two-byte chunks. */
        hexdump_set_grouping(&h, 2);
        hexdump_set_addr(&h, addr);

        /*
         * Determine if the address given to us fits in 32-bit range, in which
         * case use a 4-byte width.
         */
        if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
                hexdump_set_addrwidth(&h, 8);
        else
                hexdump_set_addrwidth(&h, 16);


        (void) hexdump_fileh(&h, data, datalen, HDF_DEFAULT | HDF_ALIGN,
            stdout);

        hexdump_fini(&h);
}

/*
 * Disassemble a section implicitly specified as part of a file.  This function
 * is called for all sections when no other flags are specified.  We ignore any
 * data sections, and print out only those sections containing text.
 */
void
dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
{
        dis_handle_t *dhp = data;

        /* ignore data sections */
        if (!dis_section_istext(scn))
                return;

        if (!g_quiet)
                (void) printf("\nsection %s\n", dis_section_name(scn));

        dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
            dis_section_size(scn));
}

/*
 * Structure passed to dis_named_{section,function} which keeps track of both
 * the target and the libdisasm handle.
 */
typedef struct callback_arg {
        dis_tgt_t       *ca_tgt;
        dis_handle_t    *ca_handle;
} callback_arg_t;

/*
 * Disassemble a section explicitly named with -s, -d, or -D.  The 'type'
 * argument contains the type of argument given.  Pass the data onto the
 * appropriate helper routine.
 */
void
dis_named_section(dis_scn_t *scn, int type, void *data)
{
        callback_arg_t *ca = data;

        if (!g_quiet)
                (void) printf("\nsection %s\n", dis_section_name(scn));

        switch (type) {
        case DIS_DATA_RELATIVE:
                dump_data(0, dis_section_data(scn), dis_section_size(scn));
                break;
        case DIS_DATA_ABSOLUTE:
                dump_data(dis_section_addr(scn), dis_section_data(scn),
                    dis_section_size(scn));
                break;
        case DIS_TEXT:
                dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
                    dis_section_data(scn), dis_section_size(scn));
                break;
        }
}

/*
 * Disassemble a function explicitly specified with '-F'.  The 'type' argument
 * is unused.
 */
/* ARGSUSED */
void
dis_named_function(dis_func_t *func, int type, void *data)
{
        callback_arg_t *ca = data;

        dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
            dis_function_data(func), dis_function_size(func));
}

/*
 * Disassemble a complete file.  First, we determine the type of the file based
 * on the ELF machine type, and instantiate a version of the disassembler
 * appropriate for the file.  We then resolve any named sections or functions
 * against the file, and iterate over the results (or all sections if no flags
 * were specified).
 */
void
dis_file(const char *filename)
{
        dis_tgt_t *tgt, *current;
        dis_scnlist_t *sections;
        dis_funclist_t *functions;
        dis_handle_t *dhp;
        GElf_Ehdr ehdr;

        /*
         * First, initialize the target
         */
        if ((tgt = dis_tgt_create(filename)) == NULL)
                return;

        if (!g_quiet)
                (void) printf("disassembly for %s\n\n",  filename);

        /*
         * A given file may contain multiple targets (if it is an archive, for
         * example).  We iterate over all possible targets if this is the case.
         */
        for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
                dis_tgt_ehdr(current, &ehdr);

                /*
                 * Eventually, this should probably live within libdisasm, and
                 * we should be able to disassemble targets from different
                 * architectures.  For now, we only support objects as the
                 * native machine type.
                 */
                switch (ehdr.e_machine) {
                case EM_SPARC:
                        if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
                            ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
                                warn("invalid E_IDENT field for SPARC object");
                                return;
                        }
                        g_flags |= DIS_SPARC_V8;
                        break;

                case EM_SPARC32PLUS:
                {
                        uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;

                        if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
                            ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
                                warn("invalid E_IDENT field for SPARC object");
                                return;
                        }

                        if (flags != 0 &&
                            (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
                            EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
                                g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
                        else
                                g_flags |= DIS_SPARC_V9;
                        break;
                }

                case EM_SPARCV9:
                        if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
                            ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
                                warn("invalid E_IDENT field for SPARC object");
                                return;
                        }

                        g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
                        break;

                case EM_386:
                        g_flags |= DIS_X86_SIZE32;
                        break;

                case EM_AMD64:
                        g_flags |= DIS_X86_SIZE64;
                        break;

                case EM_S370:
                        g_flags |= DIS_S370;

                        if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
                            ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
                                warn("invalid E_IDENT field for S370 object");
                                return;
                        }
                        break;

                case EM_S390:
                        /*
                         * Both 390 and z/Architecture use EM_S390, the only
                         * differences is the class: ELFCLASS32 for plain
                         * old s390 and ELFCLASS64 for z/Architecture (aka.
                         * s390x).
                         */
                        if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
                                g_flags |= DIS_S390_31;
                        } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
                                g_flags |= DIS_S390_64;
                        } else {
                                warn("invalid E_IDENT field for S390 object");
                                return;
                        }

                        if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
                                warn("invalid E_IDENT field for S390 object");
                                return;
                        }
                        break;

                case EM_RISCV:
                        /*
                         * RISC-V is defined to be litle endian. The current ISA
                         * makes it clear that the 64-bit instructions can
                         * co-exist with the 32-bit ones and therefore we don't
                         * need a separate elf class at this time.
                         */
                        if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
                                warn("invalid EI_DATA field for RISC-V object");
                                return;
                        }

                        if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
                                g_flags |= DIS_RISCV_32;
                        } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
                                g_flags |= DIS_RISCV_64;
                        } else {
                                warn("invalid EI_CLASS field for RISC-V "
                                    "object");
                                return;
                        }
                        break;

                default:
                        die("%s: unsupported ELF machine 0x%x", filename,
                            ehdr.e_machine);
                }

                /*
                 * If ET_REL (.o), printing immediate symbols is likely to
                 * result in garbage, as symbol lookups on unrelocated
                 * immediates find false and useless matches.
                 */

                if (ehdr.e_type == ET_REL)
                        g_flags |= DIS_NOIMMSYM;

                if (!g_quiet && dis_tgt_member(current) != NULL)
                        (void) printf("\narchive member %s\n",
                            dis_tgt_member(current));

                /*
                 * Instantiate a libdisasm handle based on the file type.
                 */
                if ((dhp = dis_handle_create(g_flags, current, do_lookup,
                    do_read)) == NULL)
                        die("%s: failed to initialize disassembler: %s",
                            filename, dis_strerror(dis_errno()));

                if (g_doall) {
                        /*
                         * With no arguments, iterate over all sections and
                         * disassemble only those that contain text.
                         */
                        dis_tgt_section_iter(current, dis_text_section, dhp);
                } else {
                        callback_arg_t ca;

                        ca.ca_tgt = current;
                        ca.ca_handle = dhp;

                        /*
                         * If sections or functions were explicitly specified,
                         * resolve those names against the object, and iterate
                         * over just the resulting data.
                         */
                        sections = dis_namelist_resolve_sections(g_seclist,
                            current);
                        functions = dis_namelist_resolve_functions(g_funclist,
                            current);

                        dis_scnlist_iter(sections, dis_named_section, &ca);
                        dis_funclist_iter(functions, dis_named_function, &ca);

                        dis_scnlist_destroy(sections);
                        dis_funclist_destroy(functions);
                }

                dis_handle_destroy(dhp);
        }

        dis_tgt_destroy(tgt);
}

void
usage(void)
{
        (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
        (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
        exit(2);
}

typedef struct lib_node {
        char *path;
        struct lib_node *next;
} lib_node_t;

int
main(int argc, char **argv)
{
        int optchar;
        int i;
        lib_node_t *libs = NULL;

        g_funclist = dis_namelist_create();
        g_seclist = dis_namelist_create();

        while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
                switch (optchar) {
                case 'C':
                        g_demangle = 1;
                        break;
                case 'd':
                        dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
                        break;
                case 'D':
                        dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
                        break;
                case 'F':
                        dis_namelist_add(g_funclist, optarg, 0);
                        break;
                case 'l': {
                        /*
                         * The '-l foo' option historically would attempt to
                         * disassemble '$LIBDIR/libfoo.a'.  The $LIBDIR
                         * environment variable has never been supported or
                         * documented for our linker.  However, until this
                         * option is formally EOLed, we have to support it.
                         */
                        char *dir;
                        lib_node_t *node;
                        size_t len;

                        if ((dir = getenv("LIBDIR")) == NULL ||
                            dir[0] == '\0')
                                dir = "/usr/lib";
                        node = safe_malloc(sizeof (lib_node_t));
                        len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
                        node->path = safe_malloc(len);

                        (void) snprintf(node->path, len, "%s/lib%s.a", dir,
                            optarg);
                        node->next = libs;
                        libs = node;
                        break;
                }
                case 'L':
                        /*
                         * The '-L' option historically would attempt to read
                         * the .debug section of the target to determine source
                         * line information in order to annotate the output.
                         * No compiler has emitted these sections in many years,
                         * and the option has never done what it purported to
                         * do.  We silently consume the option for
                         * compatibility.
                         */
                        break;
                case 'n':
                        g_numeric = 1;
                        break;
                case 'o':
                        g_flags |= DIS_OCTAL;
                        break;
                case 'q':
                        g_quiet = 1;
                        break;
                case 't':
                        dis_namelist_add(g_seclist, optarg, DIS_TEXT);
                        break;
                case 'V':
                        (void) printf("Solaris disassembler version 1.0\n");
                        return (0);
                default:
                        usage();
                        break;
                }
        }

        argc -= optind;
        argv += optind;

        if (argc == 0 && libs == NULL) {
                warn("no objects specified");
                usage();
        }

        if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
                g_doall = 1;

        /*
         * See comment for 'l' option, above.
         */
        while (libs != NULL) {
                lib_node_t *node = libs->next;

                dis_file(libs->path);
                free(libs->path);
                free(libs);
                libs = node;
        }

        for (i = 0; i < argc; i++)
                dis_file(argv[i]);

        dis_namelist_destroy(g_funclist);
        dis_namelist_destroy(g_seclist);

        return (g_error);
}