#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <errno.h>
#ifdef DEBUG
#include <sys/fcntl.h>
#include <sys/stat.h>
#endif
#include <cns11643_big5.h>
#include <big5_gb18030.h>
#define MSB 0x80
#define MBYTE 0x8e
#define PMASK 0xa0
#define ONEBYTE 0xff
#define MSB_OFF 0x7f
#define SI 0x0f
#define SO 0x0e
#define ESC 0x1b
#define SS2 0x4e
#define SS3 0x4f
#define NON_ID_CHAR_BYTE1 0xA1
#define NON_ID_CHAR_BYTE2 0xF5
typedef struct _icv_state {
char _buf[10];
size_t _bufcont;
char _keepc[4];
short _gstate;
short _istate;
int _plane;
int _last_plane;
int _errno;
} _iconv_st;
enum _GSTATE { G0, G1, G2, G3, G4, G5, G6, G7, G8, G9, \
G10,G11,G12,G13,G14,G15,G16,G17,G18,G19, \
G20,G21,G22,G23,G24,G25,G26,G27,G28,G29 };
enum _ISTATE { IN, OUT };
int iso_gb_to_gbk(_iconv_st * st, char* buf, size_t buflen);
int iso_to_big5_to_gbk(_iconv_st * st, char* buf, size_t buflen);
int binsearch(unsigned long x, table_t v[], int n);
int binsearch_big5_gbk(unsigned int big5code);
int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft);
int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft) {
if (!st->_bufcont)
return 0;
if (st->_bufcont > *outbytesleft) {
st->_errno = E2BIG;
return -1;
}
if (st->_istate != IN) {
st->_errno = EILSEQ;
return -1;
}
strncpy(st->_buf, *outbuf, st->_bufcont);
(*outbuf)+=(st->_bufcont);
(*outbytesleft)-=(st->_bufcont);
st->_bufcont = 0;
return st->_bufcont;
}
void *
_icv_open()
{
_iconv_st *st;
if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
errno = ENOMEM;
return ((void *) -1);
}
st->_gstate = G0;
st->_istate = IN;
st->_last_plane = st->_plane = -1;
st->_errno = 0;
st->_bufcont = 0;
return ((void *) st);
}
void
_icv_close(_iconv_st *st)
{
if (st == NULL)
errno = EBADF;
else
free(st);
}
size_t _icv_iconv(_iconv_st *st, \
char **inbuf, size_t *inbytesleft, \
char **outbuf, size_t *outbytesleft) {
int n;
char c;
if (st == NULL) {
errno = EBADF;
return ((size_t) -1);
}
if (inbuf == NULL || *inbuf == NULL) {
st->_gstate = G0;
st->_istate = IN;
st->_errno = 0;
st->_plane = st->_last_plane = -1;
return ((size_t) 0);
}
errno = st->_errno = 0;
while (*inbytesleft > 0 && *outbytesleft > 0) {
switch (st->_gstate) {
case G0:
if (**inbuf == ESC) {
st->_gstate = G1;
st->_buf[st->_bufcont++] = ESC;
} else {
**outbuf = **inbuf;
(*outbuf)++;
(*outbytesleft)--;
}
break;
case G1:
if (**inbuf == '$') {
st->_gstate = G2;
st->_buf[st->_bufcont++] = '$';
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_errno = 0;
st->_istate = IN;
continue;
}
break;
case G2:
if (**inbuf == ')') {
st->_gstate = G3;
} else if (**inbuf == '*') {
st->_gstate = G12;
st->_plane = 2;
} else if (**inbuf == '+') {
st->_gstate = G19;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_errno = 0;
st->_istate = IN;
continue;
}
st->_buf[st->_bufcont++] = **inbuf;
break;
case G3:
if (**inbuf == 'A') {
st->_plane = 0;
st->_gstate = G4;
} else if (**inbuf == 'G') {
st->_plane = 1;
st->_gstate = G8;
} else if (**inbuf == 'H') {
st->_plane = 2;
st->_gstate = G8;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_errno = 0;
st->_istate = IN;
continue;
}
st->_buf[st->_bufcont++] = **inbuf;
break;
case G4:
if (**inbuf == SO) {
st->_gstate = G5;
st->_istate = OUT;
st->_bufcont = 0;
st->_last_plane = st->_plane;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_errno = 0;
st->_istate = IN;
st->_plane = st->_last_plane;
continue;
}
break;
case G5:
if (**inbuf == SI) {
st->_istate = IN;
st->_gstate = G7;
st->_last_plane = st->_plane;
} else if (**inbuf == ESC) {
st->_bufcont = 0;
st->_gstate = G0;
continue;
} else {
st->_keepc[0] = **inbuf;
st->_gstate = G6;
}
break;
case G6:
st->_keepc[1] = **inbuf;
n = iso_gb_to_gbk(st, *outbuf, *outbytesleft);
if (n > 0) {
(*outbuf) += n;
(*outbytesleft) -= n;
} else {
errno = st->_errno;
return (size_t)-1;
}
st->_gstate = G5;
break;
case G7:
if (**inbuf == SO) {
st->_gstate = G5;
st->_istate = OUT;
st->_last_plane = st->_plane;
st->_bufcont = 0;
} else if (**inbuf == ESC) {
st->_gstate = G0;
continue;
} else {
**outbuf = **inbuf;
(*outbuf)++;
(*outbytesleft) --;
}
break;
case G8:
if (**inbuf == SO) {
st->_istate = OUT;
st->_gstate = G9;
st->_bufcont = 0;
st->_last_plane = st->_plane;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_errno = 0;
st->_plane = st->_last_plane;
st->_istate = IN;
continue;
}
break;
case G9:
if (**inbuf == SI) {
st->_istate = IN;
st->_gstate = G11;
st->_last_plane = st->_plane;
} else if (**inbuf == ESC) {
if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
}
st->_gstate = G0;
continue;
} else {
st->_keepc[0] = **inbuf;
st->_gstate = G10;
}
break;
case G10:
st->_keepc[1] = **inbuf;
n = iso_to_big5_to_gbk(st, *outbuf, *outbytesleft);
if (n > 0) {
(*outbuf) += n;
(*outbytesleft) -= n;
} else {
errno = st->_errno;
return (size_t)-1;
}
st->_gstate = G9;
break;
case G11:
st->_bufcont = 0;
if (**inbuf == SO) {
st->_istate = OUT;
st->_gstate = G9;
} else if (**inbuf == ESC) {
st->_gstate = G0;
continue;
} else {
**outbuf = **inbuf;
(*outbuf)++;
(*outbytesleft)--;
}
break;
case G12:
if (**inbuf == 'H') {
st->_buf[st->_bufcont++] = 'H';
st->_gstate = G13;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_istate = IN;
st->_plane = st->_last_plane;
st->_gstate = G0;
continue;
}
break;
case G13:
if (**inbuf == ESC) {
st->_buf[st->_bufcont++] = **inbuf;
st->_gstate = G14;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_istate = IN;
st->_plane = st->_last_plane;
continue;
}
break;
case G14:
if (**inbuf == SS2) {
st->_istate = OUT;
st->_gstate = G15;
st->_bufcont = 0;
st->_last_plane = st->_plane = 2;
} else if (**inbuf == '$') {
st->_bufcont --;
if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G1;
st->_plane = st->_last_plane;
st->_istate = IN;
continue;
}
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_istate = IN;
st->_plane = st->_last_plane;
continue;
}
break;
case G15:
if (**inbuf == SI) {
st->_gstate = G16;
st->_istate = IN;
st->_last_plane = st->_plane;
} else if (**inbuf == ESC) {
st->_bufcont = 0;
st->_gstate = G0;
continue;
} else {
st->_keepc[0] = **inbuf;
st->_gstate = G18;
}
break;
case G16:
if (**inbuf == ESC) {
st->_gstate = G17;
st->_buf[st->_bufcont++] = ESC;
} else {
**outbuf = **inbuf;
(*outbuf) ++;
(*outbytesleft) --;
st->_bufcont = 0;
}
break;
case G17:
if (**inbuf == '$') {
st->_gstate = G1;
st->_buf[st->_bufcont++] = '$';
continue;
} else if (**inbuf == SS2) {
st->_bufcont = 0;
st->_gstate = G15;
st->_istate = OUT;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G16;
st->_istate = IN;
}
break;
case G18:
st->_keepc[1] = **inbuf;
st->_gstate = G15;
if ((n = iso_to_big5_to_gbk(st, \
*outbuf, \
*outbytesleft)) > 0) {
(*outbuf)+=n;
(*outbytesleft)-=n;
} else {
errno = st->_errno;
return (size_t)-1;
}
break;
case G19:
c = **inbuf;
if (c == 'I' || \
c == 'J' || \
c == 'K' || \
c == 'L' || \
c == 'M' || \
c == 'N' || \
c == 'O' || \
c == 'P' || \
c == 'Q' || \
c == 'R' || \
c == 'S' || \
c == 'T' || \
c == 'U' || \
c == 'V') {
st->_plane = c - 'I' + 3;
st->_gstate = G20;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_errno = 0;
st->_istate = IN;
st->_plane = st->_last_plane;
continue;
}
st->_buf[st->_bufcont++] = c;
break;
case G20:
if (**inbuf == ESC) {
st->_buf[st->_bufcont++] = **inbuf;
st->_gstate = G21;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_istate = IN;
st->_last_plane = st->_plane;
continue;
}
break;
case G21:
if (**inbuf == SS3) {
st->_istate = OUT;
st->_gstate = G22;
st->_bufcont = 0;
} else if (**inbuf == '$') {
st->_bufcont --;
if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_istate = IN;
st->_last_plane = st->_plane;
st->_gstate = G1;
continue;
}
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G0;
st->_istate = IN;
st->_last_plane = st->_plane;
continue;
}
break;
case G22:
if (**inbuf == SI) {
st->_istate = IN;
st->_gstate = G24;
st->_last_plane = st->_plane;
} else {
st->_keepc[0] = (char)MBYTE;
st->_keepc[1] = (char)(PMASK + st->_plane);
st->_keepc[2] = **inbuf;
st->_gstate = G23;
}
break;
case G23:
st->_keepc[3] = **inbuf;
if ((n = iso_to_big5_to_gbk(st, \
*outbuf, \
*outbytesleft)) > 0) {
(*outbuf)+=n;
(*outbytesleft-=n);
} else {
st->_errno = errno;
return (size_t)-1;
}
st->_gstate = G22;
break;
case G24:
if (**inbuf == ESC) {
st->_gstate = G25;
st->_buf[st->_bufcont++] = ESC;
} else {
**outbuf = **inbuf;
(*outbuf)++;
(*outbytesleft)--;
st->_bufcont = 0;
}
break;
case G25:
if (**inbuf == '$') {
st->_gstate = G1;
continue;
} else if (**inbuf == SS3) {
st->_gstate = G22;
st->_bufcont = 0;
st->_istate = OUT;
} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
errno = st->_errno;
return (size_t)-1;
} else {
st->_gstate = G24;
st->_istate = IN;
}
break;
default:
st->_errno = errno = EILSEQ;
st->_gstate = G0;
break;
}
(*inbuf)++;
(*inbytesleft)--;
if (st->_errno) {
break;
}
if (errno)
return((size_t)(-1));
}
if (*inbytesleft > 0 && *outbytesleft == 0) {
errno = E2BIG;
return((size_t)(-1));
}
return (size_t)(*inbytesleft);
}
int iso_gb_to_gbk(_iconv_st * st, char* buf, size_t buflen) {
if ( buflen < 2 ) {
st->_errno = E2BIG;
return -1;
}
*buf = st->_keepc[0] | MSB;
*(buf+1) = st->_keepc[1] | MSB;
return 2;
}
int iso_to_big5_to_gbk(_iconv_st * st, char* buf, size_t buflen) {
char cns_str[3], c1, c2;
unsigned long cns_val;
int unidx;
unsigned long big5_val, val;
int idx;
if (st->_plane == 1) {
cns_str[0] = st->_keepc[0] & MSB_OFF;
cns_str[1] = st->_keepc[1] & MSB_OFF;
} else {
cns_str[0] = st->_keepc[0] & MSB_OFF;
cns_str[1] = st->_keepc[1] & MSB_OFF;
}
cns_val = (cns_str[0] << 8) + cns_str[1];
if (buflen < 2) {
errno = E2BIG;
return(0);
}
switch (st->_plane) {
case 1:
unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
if (unidx >= 0)
big5_val = cns_big5_tab1[unidx].value;
break;
case 2:
unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
if (unidx >= 0)
big5_val = cns_big5_tab2[unidx].value;
break;
default:
unidx = -1;
break;
}
if (unidx < 0) {
*buf = NON_ID_CHAR_BYTE1;
*(buf+1) = NON_ID_CHAR_BYTE2;
} else {
val = big5_val & 0xffff;
*buf = c1 = (char) ((val & 0xff00) >> 8);
*(buf+1) = c2 = (char) (val & 0xff);
}
if (unidx < 0) {
return 2;
} else {
idx = binsearch_big5_gbk((((*buf) & ONEBYTE) << 8) | ((*(buf+1)) & ONEBYTE));
if (idx < 0) {
*buf = NON_ID_CHAR_BYTE1;
*(buf+1) = NON_ID_CHAR_BYTE2;
} else {
*buf = (big5_gbk_tab[idx].value >> 8) & ONEBYTE;
*(buf+1) = big5_gbk_tab[idx].value & ONEBYTE;
}
}
return(2);
}
int binsearch(unsigned long x, table_t v[], int n)
{
int low, high, mid;
low = 0;
high = n - 1;
while (low <= high) {
mid = (low + high) / 2;
if (x < v[mid].key)
high = mid - 1;
else if (x > v[mid].key)
low = mid + 1;
else
return mid;
}
return (-1);
}
int binsearch_big5_gbk(unsigned int big5code)
{
int low, high, mid;
low = 0;
high = BIG5MAX - 1;
while (low <= high) {
mid = (low + high) / 2;
if (big5code < big5_gbk_tab[mid].key)
high = mid - 1;
else if (big5code > big5_gbk_tab[mid].key)
low = mid + 1;
else
return mid;
}
return (-1);
}
#ifdef DEBUG
main(int argc, char ** argv) {
char *inbuf, *outbuf, *in_tmp, *out_tmp;
size_t inbytesleft, outbytesleft;
int fd;
int i;
struct stat s;
_iconv_st * st;
if (argc < 2) {
fprintf(stderr, "Usage: %s input\n", argv[0]);
exit(-1);
}
if ((fd = open(argv[1], O_RDONLY)) == -1) {
perror("open");
exit(-2);
}
if (fstat(fd, &s) == -1) {
perror("stat");
exit(-3);
}
inbytesleft = outbytesleft = s.st_size;
in_tmp = inbuf = (char *)malloc(inbytesleft);
out_tmp = outbuf = (char *)malloc(outbytesleft);
if (!inbuf || !outbuf) {
perror("malloc");
exit(-1);
}
if (read(fd, inbuf, inbytesleft) != inbytesleft) {
perror("read");
exit(-4);
}
for (i = 0; i < inbytesleft; i++)
fprintf(stderr, "%x\t", *(inbuf+i));
fprintf(stderr, "\n");
st = (_iconv_st *)_icv_open();
if (st == (_iconv_st *) -1) {
perror("_icv_open");
exit(-1);
}
if (_icv_iconv(st, \
&inbuf, &inbytesleft, \
&outbuf, &outbytesleft) == -1) {
perror("icv_iconv");
fprintf(stderr, "\ninbytesleft = %d\n", inbytesleft);
exit(-2);
}
if (write(1, out_tmp, s.st_size - outbytesleft) == -1) {
perror("write");
exit(-1);
}
free(in_tmp);
free(out_tmp);
close(fd);
_icv_close(st);
}
#endif