#include <sys/cdefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include <wchar.h>
#include <sys/types.h>
#include <assert.h>
#include "localedef.h"
#include "parser.h"
int com_char = '#';
int esc_char = '\\';
int mb_cur_min = 1;
int mb_cur_max = 1;
int lineno = 1;
int warnings = 0;
int is_stdin = 1;
FILE *input;
static int nextline;
static const char *filename = "<stdin>";
static int instring = 0;
static int escaped = 0;
static char *token = NULL;
static int tokidx;
static int toksz = 0;
static int hadtok = 0;
static wchar_t *widestr = NULL;
static int wideidx = 0;
static int widesz = 0;
int last_kw = 0;
static int category = T_END;
static struct token {
int id;
const char *name;
} keywords[] = {
{ T_COM_CHAR, "comment_char" },
{ T_ESC_CHAR, "escape_char" },
{ T_END, "END" },
{ T_COPY, "copy" },
{ T_MESSAGES, "LC_MESSAGES" },
{ T_YESSTR, "yesstr" },
{ T_YESEXPR, "yesexpr" },
{ T_NOSTR, "nostr" },
{ T_NOEXPR, "noexpr" },
{ T_MONETARY, "LC_MONETARY" },
{ T_INT_CURR_SYMBOL, "int_curr_symbol" },
{ T_CURRENCY_SYMBOL, "currency_symbol" },
{ T_MON_DECIMAL_POINT, "mon_decimal_point" },
{ T_MON_THOUSANDS_SEP, "mon_thousands_sep" },
{ T_POSITIVE_SIGN, "positive_sign" },
{ T_NEGATIVE_SIGN, "negative_sign" },
{ T_MON_GROUPING, "mon_grouping" },
{ T_INT_FRAC_DIGITS, "int_frac_digits" },
{ T_FRAC_DIGITS, "frac_digits" },
{ T_P_CS_PRECEDES, "p_cs_precedes" },
{ T_P_SEP_BY_SPACE, "p_sep_by_space" },
{ T_N_CS_PRECEDES, "n_cs_precedes" },
{ T_N_SEP_BY_SPACE, "n_sep_by_space" },
{ T_P_SIGN_POSN, "p_sign_posn" },
{ T_N_SIGN_POSN, "n_sign_posn" },
{ T_INT_P_CS_PRECEDES, "int_p_cs_precedes" },
{ T_INT_N_CS_PRECEDES, "int_n_cs_precedes" },
{ T_INT_P_SEP_BY_SPACE, "int_p_sep_by_space" },
{ T_INT_N_SEP_BY_SPACE, "int_n_sep_by_space" },
{ T_INT_P_SIGN_POSN, "int_p_sign_posn" },
{ T_INT_N_SIGN_POSN, "int_n_sign_posn" },
{ T_COLLATE, "LC_COLLATE" },
{ T_COLLATING_SYMBOL, "collating-symbol" },
{ T_COLLATING_ELEMENT, "collating-element" },
{ T_FROM, "from" },
{ T_ORDER_START, "order_start" },
{ T_ORDER_END, "order_end" },
{ T_FORWARD, "forward" },
{ T_BACKWARD, "backward" },
{ T_POSITION, "position" },
{ T_IGNORE, "IGNORE" },
{ T_UNDEFINED, "UNDEFINED" },
{ T_NUMERIC, "LC_NUMERIC" },
{ T_DECIMAL_POINT, "decimal_point" },
{ T_THOUSANDS_SEP, "thousands_sep" },
{ T_GROUPING, "grouping" },
{ T_TIME, "LC_TIME" },
{ T_ABDAY, "abday" },
{ T_DAY, "day" },
{ T_ABMON, "abmon" },
{ T_MON, "mon" },
{ T_D_T_FMT, "d_t_fmt" },
{ T_D_FMT, "d_fmt" },
{ T_T_FMT, "t_fmt" },
{ T_AM_PM, "am_pm" },
{ T_T_FMT_AMPM, "t_fmt_ampm" },
{ T_ERA, "era" },
{ T_ERA_D_FMT, "era_d_fmt" },
{ T_ERA_T_FMT, "era_t_fmt" },
{ T_ERA_D_T_FMT, "era_d_t_fmt" },
{ T_ALT_DIGITS, "alt_digits" },
{ T_CTYPE, "LC_CTYPE" },
{ T_ISUPPER, "upper" },
{ T_ISLOWER, "lower" },
{ T_ISALPHA, "alpha" },
{ T_ISDIGIT, "digit" },
{ T_ISPUNCT, "punct" },
{ T_ISXDIGIT, "xdigit" },
{ T_ISSPACE, "space" },
{ T_ISPRINT, "print" },
{ T_ISGRAPH, "graph" },
{ T_ISBLANK, "blank" },
{ T_ISCNTRL, "cntrl" },
{ T_ISSPECIAL, "special" },
{ T_ISENGLISH, "english" },
{ T_ISPHONOGRAM, "phonogram" },
{ T_ISIDEOGRAM, "ideogram" },
{ T_ISNUMBER, "number" },
{ T_ISALNUM, "alnum" },
{ T_TOUPPER, "toupper" },
{ T_TOLOWER, "tolower" },
{ T_CHARMAP, "CHARMAP" },
{ T_WIDTH, "WIDTH" },
{ -1, NULL },
};
static struct token symwords[] = {
{ T_COM_CHAR, "comment_char" },
{ T_ESC_CHAR, "escape_char" },
{ T_CODE_SET, "code_set_name" },
{ T_MB_CUR_MAX, "mb_cur_max" },
{ T_MB_CUR_MIN, "mb_cur_min" },
{ -1, NULL },
};
static int categories[] = {
T_CHARMAP,
T_CTYPE,
T_COLLATE,
T_MESSAGES,
T_MONETARY,
T_NUMERIC,
T_TIME,
T_WIDTH,
0
};
void
reset_scanner(const char *fname)
{
if (fname == NULL) {
filename = "<stdin>";
is_stdin = 1;
} else {
if (!is_stdin)
(void) fclose(input);
if ((input = fopen(fname, "r")) == NULL) {
perror("fopen");
exit(4);
} else {
is_stdin = 0;
}
filename = fname;
}
com_char = '#';
esc_char = '\\';
instring = 0;
escaped = 0;
lineno = 1;
nextline = 1;
tokidx = 0;
wideidx = 0;
}
#define hex(x) \
(isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
#define isodigit(x) ((x >= '0') && (x <= '7'))
static int
scanc(void)
{
int c;
if (is_stdin)
c = getc(stdin);
else
c = getc(input);
lineno = nextline;
if (c == '\n') {
nextline++;
}
return (c);
}
static void
unscanc(int c)
{
if (c == '\n') {
nextline--;
}
if (ungetc(c, is_stdin ? stdin : input) < 0) {
yyerror("ungetc failed");
}
}
static int
scan_hex_byte(void)
{
int c1, c2;
int v;
c1 = scanc();
if (!isxdigit(c1)) {
yyerror("malformed hex digit");
return (0);
}
c2 = scanc();
if (!isxdigit(c2)) {
yyerror("malformed hex digit");
return (0);
}
v = ((hex(c1) << 4) | hex(c2));
return (v);
}
static int
scan_dec_byte(void)
{
int c1, c2, c3;
int b;
c1 = scanc();
if (!isdigit(c1)) {
yyerror("malformed decimal digit");
return (0);
}
b = c1 - '0';
c2 = scanc();
if (!isdigit(c2)) {
yyerror("malformed decimal digit");
return (0);
}
b *= 10;
b += (c2 - '0');
c3 = scanc();
if (!isdigit(c3)) {
unscanc(c3);
} else {
b *= 10;
b += (c3 - '0');
}
return (b);
}
static int
scan_oct_byte(void)
{
int c1, c2, c3;
int b;
b = 0;
c1 = scanc();
if (!isodigit(c1)) {
yyerror("malformed octal digit");
return (0);
}
b = c1 - '0';
c2 = scanc();
if (!isodigit(c2)) {
yyerror("malformed octal digit");
return (0);
}
b *= 8;
b += (c2 - '0');
c3 = scanc();
if (!isodigit(c3)) {
unscanc(c3);
} else {
b *= 8;
b += (c3 - '0');
}
return (b);
}
void
add_tok(int c)
{
if ((tokidx + 1) >= toksz) {
toksz += 64;
if ((token = realloc(token, toksz)) == NULL) {
yyerror("out of memory");
tokidx = 0;
toksz = 0;
return;
}
}
token[tokidx++] = (char)c;
token[tokidx] = 0;
}
void
add_wcs(wchar_t c)
{
if ((wideidx + 1) >= widesz) {
widesz += 64;
widestr = realloc(widestr, (widesz * sizeof (wchar_t)));
if (widestr == NULL) {
yyerror("out of memory");
wideidx = 0;
widesz = 0;
return;
}
}
widestr[wideidx++] = c;
widestr[wideidx] = 0;
}
wchar_t *
get_wcs(void)
{
wchar_t *ws = widestr;
wideidx = 0;
widestr = NULL;
widesz = 0;
if (ws == NULL) {
if ((ws = wcsdup(L"")) == NULL) {
yyerror("out of memory");
}
}
return (ws);
}
static int
get_byte(void)
{
int c;
if ((c = scanc()) != esc_char) {
unscanc(c);
return (EOF);
}
c = scanc();
switch (c) {
case 'd':
case 'D':
return (scan_dec_byte());
case 'x':
case 'X':
return (scan_hex_byte());
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
unscanc(c);
return (scan_oct_byte());
default:
unscanc(c);
unscanc(esc_char);
return (EOF);
}
}
int
get_escaped(int c)
{
switch (c) {
case 'n':
return ('\n');
case 'r':
return ('\r');
case 't':
return ('\t');
case 'f':
return ('\f');
case 'v':
return ('\v');
case 'b':
return ('\b');
case 'a':
return ('\a');
default:
return (c);
}
}
int
get_wide(void)
{
static char mbs[MB_LEN_MAX + 1] = "";
static int mbi = 0;
int c;
wchar_t wc;
if (mb_cur_max >= (int)sizeof (mbs)) {
yyerror("max multibyte character size too big");
mbi = 0;
return (T_NULL);
}
for (;;) {
if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) {
mbi = 0;
yyerror("not a valid character encoding");
return (T_NULL);
}
mbs[mbi++] = c;
mbs[mbi] = 0;
if (to_wide(&wc, mbs) >= 0) {
break;
}
}
mbi = 0;
if ((category != T_CHARMAP) && (category != T_WIDTH)) {
if (check_charmap(wc) < 0) {
yyerror("no symbolic name for character");
return (T_NULL);
}
}
yylval.wc = wc;
return (T_CHAR);
}
int
get_symbol(void)
{
int c;
while ((c = scanc()) != EOF) {
if (escaped) {
escaped = 0;
if (c == '\n')
continue;
add_tok(get_escaped(c));
continue;
}
if (c == esc_char) {
escaped = 1;
continue;
}
if (c == '\n') {
yyerror("unterminated symbolic name");
continue;
}
if (c == '>') {
if (token == NULL) {
yyerror("missing symbolic name");
return (T_NULL);
}
tokidx = 0;
if (category == T_END) {
int i;
for (i = 0; symwords[i].name != 0; i++) {
if (strcmp(token, symwords[i].name) ==
0) {
last_kw = symwords[i].id;
return (last_kw);
}
}
}
if ((category != T_CHARMAP) &&
(lookup_charmap(token, &yylval.wc)) != -1) {
return (T_CHAR);
}
if ((yylval.collsym = lookup_collsym(token)) != NULL) {
return (T_COLLSYM);
}
if ((yylval.collelem = lookup_collelem(token)) !=
NULL) {
return (T_COLLELEM);
}
yylval.token = strdup(token);
token = NULL;
toksz = 0;
tokidx = 0;
return (T_SYMBOL);
}
add_tok(c);
}
yyerror("unterminated symbolic name");
return (EOF);
}
int
get_category(void)
{
return (category);
}
static int
consume_token(void)
{
int len = tokidx;
int i;
tokidx = 0;
if (token == NULL)
return (T_NULL);
if (strcmp(token, "...") == 0) {
return (T_ELLIPSIS);
}
for (i = 0; keywords[i].name; i++) {
int j;
if (strcmp(keywords[i].name, token) != 0) {
continue;
}
last_kw = keywords[i].id;
if (last_kw == T_END) {
category = T_END;
}
for (j = 0; categories[j]; j++) {
if (categories[j] != last_kw)
continue;
category = last_kw;
}
return (keywords[i].id);
}
if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
char *eptr;
yylval.num = strtol(token, &eptr, 10);
if (*eptr != 0)
yyerror("malformed number");
return (T_NUMBER);
}
if (len == 1) {
yylval.wc = token[0];
return (T_CHAR);
}
yylval.token = strdup(token);
token = NULL;
toksz = 0;
tokidx = 0;
return (T_NAME);
}
void
scan_to_eol(void)
{
int c;
while ((c = scanc()) != '\n') {
if (c == EOF) {
errf("missing newline");
return;
}
}
assert(c == '\n');
}
int
yylex(void)
{
int c;
while ((c = scanc()) != EOF) {
if (instring) {
if (escaped) {
escaped = 0;
if (c == '\n')
continue;
if (strchr("xXd01234567", c)) {
unscanc(c);
unscanc(esc_char);
return (get_wide());
}
yylval.wc = get_escaped(c);
return (T_CHAR);
}
if (c == esc_char) {
escaped = 1;
continue;
}
switch (c) {
case '<':
return (get_symbol());
case '>':
return (T_GT);
case '"':
instring = 0;
return (T_QUOTE);
default:
yylval.wc = c;
return (T_CHAR);
}
}
if (escaped) {
escaped = 0;
if (c == '\n') {
continue;
}
hadtok = 1;
if (tokidx) {
return (T_NULL);
}
if (strchr("xXd01234567", c)) {
unscanc(c);
unscanc(esc_char);
return (get_wide());
}
add_tok(get_escaped(c));
continue;
}
if (c == esc_char) {
escaped = 1;
continue;
}
if (c == com_char) {
while (c != '\n') {
if ((c = scanc()) == EOF) {
return (EOF);
}
}
assert(c == '\n');
if (!hadtok) {
continue;
}
hadtok = 0;
return (T_NL);
}
if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
unscanc(c);
return (consume_token());
}
switch (c) {
case '\n':
if (!hadtok) {
continue;
}
hadtok = 0;
return (T_NL);
case ',':
hadtok = 1;
return (T_COMMA);
case ';':
hadtok = 1;
return (T_SEMI);
case '(':
hadtok = 1;
return (T_LPAREN);
case ')':
hadtok = 1;
return (T_RPAREN);
case '>':
hadtok = 1;
return (T_GT);
case '<':
hadtok = 1;
return (get_symbol());
case ' ':
case '\t':
continue;
case '"':
hadtok = 1;
instring = 1;
return (T_QUOTE);
default:
hadtok = 1;
add_tok(c);
continue;
}
}
return (EOF);
}
void
yyerror(const char *msg)
{
(void) fprintf(stderr, "%s: %d: error: %s\n",
filename, lineno, msg);
exit(4);
}
void
errf(const char *fmt, ...)
{
char *msg;
va_list va;
va_start(va, fmt);
(void) vasprintf(&msg, fmt, va);
va_end(va);
(void) fprintf(stderr, "%s: %d: error: %s\n",
filename, lineno, msg);
free(msg);
exit(4);
}
void
warn(const char *fmt, ...)
{
char *msg;
va_list va;
va_start(va, fmt);
(void) vasprintf(&msg, fmt, va);
va_end(va);
(void) fprintf(stderr, "%s: %d: warning: %s\n",
filename, lineno, msg);
free(msg);
warnings++;
if (!warnok)
exit(4);
}