#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include "tab_lookup.h"
#define MSB 0x80
#define ONEBYTE 0xff
enum _USTATE { U0, U1, U11, U2, U3, U4 };
size_t
_icv_iconv(_icv_state *st, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
char c1, c2;
int n, unidx;
unsigned long ibm_code;
#ifdef DEBUG
fprintf(stderr, "========== iconv(): UTF8 --> IBM ==========\n");
#endif
if (st == NULL) {
errno = EBADF;
return ((size_t) -1);
}
if (inbuf == NULL || *inbuf == NULL) {
st->ustate = U0;
st->_errno = 0;
return ((size_t) 0);
}
st->_errno = 0;
errno = 0;
while (*inbytesleft > 0 && *outbytesleft > 0) {
switch (st->ustate) {
case U0:
if ((**inbuf & MSB) == 0) {
**outbuf = **inbuf;
(*outbuf)++;
(*outbytesleft)--;
} else {
if ((**inbuf & 0xe0) == 0xc0) {
st->ustate = U1;
st->keepc[0] = **inbuf;
} else if ((**inbuf & 0xf0) == 0xe0) {
st->ustate = U2;
st->keepc[0] = **inbuf;
} else {
c1 =st->keepc[0] = **inbuf;
st->ustate = U11;
break;
}
}
break;
case U1:
if ((**inbuf & 0xc0) == MSB) {
st->ustate = U4;
st->keepc[1] = **inbuf;
c1 = (st->keepc[0]&0x1c)>>2;
c2 = ((st->keepc[0]&0x03)<<6) | ((**inbuf)&0x3f);
#ifdef DEBUG
fprintf(stderr, "UTF8: %02x%02x --> ",
st->keepc[0]&ONEBYTE, st->keepc[1]&ONEBYTE);
#endif
continue;
} else {
st->_errno = errno = EINVAL;
}
break;
case U11:
c2 =st->keepc[1] = **inbuf;
st->ustate = U4;
continue;
break;
case U2:
if ((**inbuf & 0xc0) == MSB) {
st->ustate = U3;
st->keepc[1] = **inbuf;
} else {
st->_errno = errno = EINVAL;
}
break;
case U3:
if ((**inbuf & 0xc0) == MSB) {
st->ustate = U4;
st->keepc[2] = **inbuf;
c1 = ((st->keepc[0]&0x0f)<<4) |
((st->keepc[1]&0x3c)>>2);
c2 = ((st->keepc[1]&0x03)<<6) | ((**inbuf)&0x3f);
#ifdef DEBUG
fprintf(stderr, "UTF8: %02x%02x%02x --> ", st->keepc[0]&ONEBYTE,
st->keepc[1]&ONEBYTE, **inbuf&ONEBYTE);
#endif
continue;
} else {
st->_errno = errno = EINVAL;
}
break;
case U4:
n = get_ibm_by_utf(st, c1, c2, &unidx, &ibm_code);
if (n != 0) {
st->_errno = errno = EILSEQ;
break;
}
n = utf8_to_ibm(unidx, ibm_code,
*outbuf, *outbytesleft);
if (n > 0) {
(*outbuf) += n;
(*outbytesleft) -= n;
} else {
st->_errno = errno;
return((size_t)-1);
}
st->ustate = U0;
st->_errno = 0;
break;
default:
st->_errno = errno = EILSEQ;
st->ustate = U0;
break;
}
(*inbuf)++;
(*inbytesleft)--;
if (st->_errno) {
#ifdef DEBUG
fprintf(stderr, "!!!!!\tst->_errno = %d\tst->ustate = %d\n",
st->_errno, st->ustate);
#endif
break;
}
if (errno)
return((size_t)-1);
}
if (*outbytesleft == 0) {
errno = E2BIG;
return((size_t)-1);
}
return (*inbytesleft);
}
int get_ibm_by_utf(st, c1, c2, unidx, ibm_code)
_icv_state *st;
char c1, c2;
int *unidx;
unsigned long *ibm_code;
{
unsigned long unicode;
unicode = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
*unidx = bisearch(unicode, st, st->table_size);
if ((*unidx) >= 0)
{
if ( st->left_to_right )
*ibm_code = st->table[*unidx].right_code;
else
*ibm_code = st->table[*unidx].left_code;
}
else
;
#ifdef DEBUG
fprintf(stderr, "Unicode=%04x, idx=%5d, IBM=%x ", unicode, *unidx, *ibm_code);
#endif
return(0);
}
int utf8_to_ibm(unidx, ibm_code, buf, buflen)
int unidx;
unsigned long ibm_code;
char *buf;
size_t buflen;
{
unsigned long val;
char c1, c2, ibm_str[3];
if (unidx < 0)
ibm_code = (unsigned long)NON_ID_CHAR;
{
val = ibm_code & 0xffff;
c1 = (char) ((val & 0xff00) >> 8);
c2 = (char) (val & 0xff);
}
*buf = ibm_str[0] = c1;
*(buf+1) = ibm_str[1] = c2;
ibm_str[2] = NULL;
#ifdef DEBUG
fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1));
#endif
if (buflen < 2) {
errno = E2BIG;
return(0);
}
return(2);
}