root/arch/s390/tools/gen_opcode_table.c
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Generate opcode table initializers for the in-kernel disassembler.
 *
 *    Copyright IBM Corp. 2017
 *
 */

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>

#define STRING_SIZE_MAX 20

struct insn_type {
        unsigned char byte;
        unsigned char mask;
        char **format;
};

struct insn {
        struct insn_type *type;
        char opcode[STRING_SIZE_MAX];
        char name[STRING_SIZE_MAX];
        char upper[STRING_SIZE_MAX];
        char format[STRING_SIZE_MAX];
        unsigned int name_len;
};

struct insn_group {
        struct insn_type *type;
        int offset;
        int count;
        char opcode[2];
};

struct insn_format {
        char *format;
        int type;
};

struct gen_opcode {
        struct insn *insn;
        int nr;
        struct insn_group *group;
        int nr_groups;
};

/*
 * Table of instruction format types. Each opcode is defined with at
 * least one byte (two nibbles), three nibbles, or two bytes (four
 * nibbles).
 * The byte member of each instruction format type entry defines
 * within which byte of an instruction the third (and fourth) nibble
 * of an opcode can be found. The mask member is the and-mask that
 * needs to be applied on this byte in order to get the third (and
 * fourth) nibble of the opcode.
 * The format array defines all instruction formats (as defined in the
 * Principles of Operation) which have the same position of the opcode
 * nibbles.
 * A special case are instruction formats with 1-byte opcodes. In this
 * case the byte member always is zero, so that the mask is applied on
 * the (only) byte that contains the opcode.
 */
static struct insn_type insn_type_table[] = {
        {
                .byte = 0,
                .mask = 0xff,
                .format = (char *[]) {
                        "MII",
                        "RR",
                        "RS",
                        "RSI",
                        "RX",
                        "SI",
                        "SMI",
                        "SS",
                        NULL,
                },
        },
        {
                .byte = 1,
                .mask = 0x0f,
                .format = (char *[]) {
                        "RI",
                        "RIL",
                        "SSF",
                        NULL,
                },
        },
        {
                .byte = 1,
                .mask = 0xff,
                .format = (char *[]) {
                        "E",
                        "IE",
                        "RRE",
                        "RRF",
                        "RRR",
                        "S",
                        "SIL",
                        "SSE",
                        NULL,
                },
        },
        {
                .byte = 5,
                .mask = 0xff,
                .format = (char *[]) {
                        "RIE",
                        "RIS",
                        "RRS",
                        "RSE",
                        "RSL",
                        "RSY",
                        "RXE",
                        "RXF",
                        "RXY",
                        "SIY",
                        "VRI",
                        "VRR",
                        "VRS",
                        "VRV",
                        "VRX",
                        "VSI",
                        NULL,
                },
        },
};

static struct insn_type *insn_format_to_type(char *format)
{
        char tmp[STRING_SIZE_MAX];
        char *base_format, **ptr;
        int i;

        strcpy(tmp, format);
        base_format = tmp;
        base_format = strsep(&base_format, "_");
        for (i = 0; i < sizeof(insn_type_table) / sizeof(insn_type_table[0]); i++) {
                ptr = insn_type_table[i].format;
                while (*ptr) {
                        if (!strcmp(base_format, *ptr))
                                return &insn_type_table[i];
                        ptr++;
                }
        }
        exit(EXIT_FAILURE);
}

static void read_instructions(struct gen_opcode *desc)
{
        struct insn insn;
        int rc, i;

        while (1) {
                rc = scanf("%s %s %s", insn.opcode, insn.name, insn.format);
                if (rc == EOF)
                        break;
                if (rc != 3)
                        exit(EXIT_FAILURE);
                insn.type = insn_format_to_type(insn.format);
                insn.name_len = strlen(insn.name);
                for (i = 0; i <= insn.name_len; i++)
                        insn.upper[i] = toupper((unsigned char)insn.name[i]);
                desc->nr++;
                desc->insn = realloc(desc->insn, desc->nr * sizeof(*desc->insn));
                if (!desc->insn)
                        exit(EXIT_FAILURE);
                desc->insn[desc->nr - 1] = insn;
        }
}

static int cmpformat(const void *a, const void *b)
{
        return strcmp(((struct insn *)a)->format, ((struct insn *)b)->format);
}

static void print_formats(struct gen_opcode *desc)
{
        char *format;
        int i, count;

        qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpformat);
        format = "";
        count = 0;
        printf("enum {\n");
        for (i = 0; i < desc->nr; i++) {
                if (!strcmp(format, desc->insn[i].format))
                        continue;
                count++;
                format = desc->insn[i].format;
                printf("\tINSTR_%s,\n", format);
        }
        printf("}; /* %d */\n\n", count);
}

static int cmp_long_insn(const void *a, const void *b)
{
        return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
}

static void print_insn_name(const char *name)
{
        size_t i, len;

        len = strlen(name);
        printf("{");
        for (i = 0; i < len; i++)
                printf(" \'%c\',", name[i]);
        printf(" }");
}

static void print_long_insn(struct gen_opcode *desc)
{
        struct insn *insn;
        int i, count;

        qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmp_long_insn);
        count = 0;
        printf("enum {\n");
        for (i = 0; i < desc->nr; i++) {
                insn = &desc->insn[i];
                if (insn->name_len < 6)
                        continue;
                printf("\tLONG_INSN_%s,\n", insn->upper);
                count++;
        }
        printf("}; /* %d */\n\n", count);

        printf("#define LONG_INSN_INITIALIZER { \\\n");
        for (i = 0; i < desc->nr; i++) {
                insn = &desc->insn[i];
                if (insn->name_len < 6)
                        continue;
                printf("\t[LONG_INSN_%s] = ", insn->upper);
                print_insn_name(insn->name);
                printf(", \\\n");
        }
        printf("}\n\n");
}

static void print_opcode(struct insn *insn, int nr)
{
        char *opcode;

        opcode = insn->opcode;
        if (insn->type->byte != 0)
                opcode += 2;
        printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
        if (insn->name_len < 6) {
                printf(".name =  ");
                print_insn_name(insn->name);
        } else {
                printf(".offset = LONG_INSN_%s", insn->upper);
        }
        printf(" }, \\\n");
}

static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)
{
        struct insn_group *group;

        group = desc->group ? &desc->group[desc->nr_groups - 1] : NULL;
        if (group && (!strncmp(group->opcode, insn->opcode, 2) || group->type->byte == 0)) {
                group->count++;
                return;
        }
        desc->nr_groups++;
        desc->group = realloc(desc->group, desc->nr_groups * sizeof(*desc->group));
        if (!desc->group)
                exit(EXIT_FAILURE);
        group = &desc->group[desc->nr_groups - 1];
        memcpy(group->opcode, insn->opcode, 2);
        group->type = insn->type;
        group->offset = offset;
        group->count = 1;
}

static int cmpopcode(const void *a, const void *b)
{
        return strcmp(((struct insn *)a)->opcode, ((struct insn *)b)->opcode);
}

static void print_opcode_table(struct gen_opcode *desc)
{
        char opcode[2] = "";
        struct insn *insn;
        int i, offset;

        qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpopcode);
        printf("#define OPCODE_TABLE_INITIALIZER { \\\n");
        offset = 0;
        for (i = 0; i < desc->nr; i++) {
                insn = &desc->insn[i];
                if (insn->type->byte == 0)
                        continue;
                add_to_group(desc, insn, offset);
                if (strncmp(opcode, insn->opcode, 2)) {
                        memcpy(opcode, insn->opcode, 2);
                        printf("\t/* %.2s */ \\\n", opcode);
                }
                print_opcode(insn, offset);
                offset++;
        }
        printf("\t/* 1-byte opcode instructions */ \\\n");
        for (i = 0; i < desc->nr; i++) {
                insn = &desc->insn[i];
                if (insn->type->byte != 0)
                        continue;
                add_to_group(desc, insn, offset);
                print_opcode(insn, offset);
                offset++;
        }
        printf("}\n\n");
}

static void print_opcode_table_offsets(struct gen_opcode *desc)
{
        struct insn_group *group;
        int i;

        printf("#define OPCODE_OFFSET_INITIALIZER { \\\n");
        for (i = 0; i < desc->nr_groups; i++) {
                group = &desc->group[i];
                printf("\t{ .opcode = 0x%.2s, .mask = 0x%02x, .byte = %d, .offset = %d, .count = %d }, \\\n",
                       group->opcode, group->type->mask, group->type->byte, group->offset, group->count);
        }
        printf("}\n\n");
}

int main(int argc, char **argv)
{
        struct gen_opcode _desc = { 0 };
        struct gen_opcode *desc = &_desc;

        read_instructions(desc);
        printf("#ifndef __S390_GENERATED_DIS_DEFS_H__\n");
        printf("#define __S390_GENERATED_DIS_DEFS_H__\n");
        printf("/*\n");
        printf(" * DO NOT MODIFY.\n");
        printf(" *\n");
        printf(" * This file was generated by %s\n", __FILE__);
        printf(" */\n\n");
        print_formats(desc);
        print_long_insn(desc);
        print_opcode_table(desc);
        print_opcode_table_offsets(desc);
        printf("#endif\n");
        exit(EXIT_SUCCESS);
}