#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <gb2312_unicode.h>
#define MSB 0x80
#define UTF8_NON_ID_CHAR1 0xEF
#define UTF8_NON_ID_CHAR2 0xBF
#define UTF8_NON_ID_CHAR3 0xBD
enum _GSTATE { G0, G1, G2, G3, G4, G5};
typedef struct _icv_state {
char _lastc;
short _gstate;
} _iconv_st;
int
hz2utf8(char in_byte1, char in_byte2, char *buf, int buflen);
void *
_icv_open()
{
_iconv_st *st;
if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
errno = ENOMEM;
return ((void *) -1);
}
st->_gstate = G0;
return ((void *)st);
}
void
_icv_close(_iconv_st *st)
{
if (st == NULL)
errno = EBADF;
else
free(st);
}
size_t
_icv_iconv(_iconv_st *st, char **inbuf, size_t*inbytesleft,
char **outbuf, size_t*outbytesleft)
{
int n;
if (st == NULL) {
errno = EBADF;
return -1;
}
if (inbuf == NULL || *inbuf == NULL) {
st->_gstate = G0;
return 0;
}
errno = 0;
while (*inbytesleft > 0 && *outbytesleft > 0) {
switch (st->_gstate) {
case G0:
if ( **inbuf == '~' ) {
st->_gstate = G1;
} else if (((**inbuf) & MSB) == 0) {
**outbuf = **inbuf;
(*outbuf)++, (*outbytesleft)--;
}
break;
case G1:
if ( **inbuf == '{' ) {
st->_gstate = G2;
} else if (**inbuf == '\n') {
st->_gstate = G0;
} else if (**inbuf == '~') {
**outbuf = '~';
(*outbuf)++, (*outbytesleft)--;
st->_gstate = G0;
} else {
errno = EINVAL;
}
break;
case G2:
if ( **inbuf == '~' ) {
st->_gstate = G4;
} else {
st->_lastc = **inbuf;
st->_gstate = G3;
}
break;
case G3:
n = hz2utf8(st->_lastc, **inbuf, *outbuf, *outbytesleft);
if (n > 0) {
(*outbuf) += n, (*outbytesleft) -= n;
} else {
errno = E2BIG;
return (size_t)-1;
}
st->_gstate = G2;
break;
case G4:
if ( **inbuf == '}' ) {
st->_gstate = G0;
} else if (**inbuf == '\n') {
st->_gstate = G2;
continue;
} else {
errno = EINVAL;
}
break;
}
(*inbuf)++, (*inbytesleft)--;
if (errno)
{
return (size_t)-1;
}
}
if (*inbytesleft > 0 && *outbytesleft == 0) {
errno = E2BIG;
return (size_t)-1;
}
return ((size_t)(*inbytesleft));
}
int
hz2utf8(in_byte1, in_byte2, buf, buflen)
char in_byte1, in_byte2;
char *buf;
int buflen;
{
int idx;
int unicode;
if ( buflen < 2 )
return 0;
in_byte1 |= MSB;
in_byte2 |= MSB;
idx = (((in_byte1 & 0xff) - 0xa1) * 94) + (in_byte2 & 0xff) - 0xa1;
if (idx >= 0) {
unicode = Unicode[idx];
if (unicode >= 0x0080 && unicode <= 0x07ff) {
if ( buflen < 2 )
return 0;
*buf = ((unicode >> 6) & 0x1f) | 0xc0;
*(buf+1) = (unicode & 0x3f) | MSB;
return 2;
}
if (unicode >= 0x0800 && unicode <= 0xffff) {
if ( buflen < 3 )
return 0;
*buf = ((unicode >> 12) & 0x0f) | 0xe0;
*(buf+1) = ((unicode >> 6) & 0x3f) | MSB;
*(buf+2) = (unicode & 0x3f) | MSB;
return 3;
}
}
if ( buflen < 3 )
return 0;
*buf = UTF8_NON_ID_CHAR1;
*(buf+1) = UTF8_NON_ID_CHAR2;
*(buf+2) = UTF8_NON_ID_CHAR3;
return 3;
}