#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <strings.h>
#include "iscii.h"
#define MSB 0x80
#define ONEBYTE 0xff
#define REPLACE_CHAR1 0xEF
#define REPLACE_CHAR2 0xBF
#define REPLACE_CHAR3 0xBD
#define UTF8_SET1B(b,v) \
(b[0]=(v&0x7f))
#define UTF8_SET2B(b,v) \
(b[0]=(0xc0|((v>>6)&0x1f))); \
(b[1]=(0x80|((v&0x3f))))
#define UTF8_SET3B(b,v) \
(b[0]=(0xe0|((v>>12)&0xf))); \
(b[1]=(0x80|((v>>6)&0x3f))); \
(b[2]=(0x80|((v&0x3f))))
typedef struct _icv_state {
char keepc[3];
short pState;
int _errno;
} _iconv_st;
enum _CSTATE { S_BASIC, S_ATR, S_EXT, S_NONE };
#define have_nukta(isc_type) ( nukta_type[isc_type] != NULL )
#define have_EXT(isc_type) ( EXT_type[isc_type] != NULL )
#define FIRST_CHAR 0xA0
static int copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen);
static ucs_t
get_nukta(uchar iscii, int type)
{
int indx = iscii - FIRST_CHAR;
int *iscii_nukta = nukta_type[type];
return ((indx >= 0) ? iscii_nukta[indx] : 0 );
}
static ucs_t
get_EXT(uchar iscii, int type)
{
int indx = iscii - FIRST_CHAR;
int *iscii_EXT = EXT_type[type];
return ((indx >= 0) ? iscii_EXT[indx] : 0 );
}
static ucs_t
traverse_table(Entry *entry, int num, uchar iscii)
{
int i=0;
ucs_t retucs=0;
for ( ; i < num; ++i ) {
Entry en = entry[i];
if ( iscii < en.iscii ) break;
if ( iscii >= en.iscii && iscii < en.iscii + en.count ) {
retucs = en.ucs + ( iscii - en.iscii );
break;
}
}
return retucs;
}
int
iscii_to_utf8(_iconv_st *st, char *buf, size_t buflen)
{
#define DEV_ATR 0x42
ucs_t uniid;
int nBytes=0;
ISCII isc_type = isc_TYPE[st->keepc[0] - DEV_ATR];
Entries en = iscii_table[isc_type];
unsigned int keepc1 = (unsigned int) (st->keepc[1] & ONEBYTE);
unsigned int keepc2 = (unsigned int) (st->keepc[2] & ONEBYTE);
if (keepc1 == 0xFF) {
if ( buflen < 3 ) {
errno = E2BIG;
return 0;
}
*buf = (char)REPLACE_CHAR1;
*(buf+1) = (char)REPLACE_CHAR2;
*(buf+2) = (char)REPLACE_CHAR3;
return (3);
}
if (keepc2 == 0) {
if (keepc1 & MSB) {
uniid = traverse_table(en.entry, en.items, keepc1);
} else
uniid = keepc1;
if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
st->keepc[1] = 0;
} else {
if (keepc1 & MSB) {
switch (keepc1)
{
case ISC_ext:
if ( have_EXT(isc_type) && is_valid_ext_code(keepc2) )
{
uniid = get_EXT(keepc2, isc_type);
if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
}
else
errno = EILSEQ;
st->keepc[1] = st->keepc[2] = 0;
break;
case ISC_halant:
if ((keepc2 == ISC_halant || keepc2 == ISC_nukta) && buflen < 6 )
goto E2big;
uniid = traverse_table(en.entry, en.items, keepc1);
if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
st->keepc[1] = st->keepc[2];
if ( keepc2 == ISC_halant || keepc2 == ISC_nukta )
{
int nbytes_2 = 0;
if (keepc2 == ISC_halant) uniid = UNI_ZWNJ;
if (keepc2 == ISC_nukta) uniid = UNI_ZWJ;
if ((nbytes_2 = copy_to_outbuf(uniid, buf+nBytes, buflen)) == 0) goto E2big;
st->keepc[1] = st->keepc[2] = 0;
nBytes += nbytes_2;
}
break;
case ISC_danda:
if ( isc_type == DEV && keepc2 == ISC_danda )
{
uniid = UNI_DOUBLE_DANDA;
if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
st->keepc[1] = st->keepc[2] = 0;
break;
}
default:
uniid = traverse_table(en.entry, en.items, keepc1);
if ( have_nukta(isc_type) && keepc2 == ISC_nukta) {
int ucs;
if (( ucs = get_nukta(keepc1, isc_type)) != 0 ) {
uniid = ucs;
if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
st->keepc[1] = st->keepc[2] = 0;
} else {
if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
st->keepc[1] = st->keepc[2];
}
} else {
if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
st->keepc[1] = st->keepc[2];
}
break;
}
} else {
uniid = keepc1;
if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big;
st->keepc[1] = st->keepc[2];
}
st->keepc[2] = 0;
}
E2big:
return nBytes;
}
static int
copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen)
{
if (uniid > 0) {
if (uniid <= 0x7f) {
if (buflen < 1) {
errno = E2BIG;
return(0);
}
UTF8_SET1B(buf, uniid);
return (1);
}
if (uniid >= 0x80 && uniid <= 0x7ff) {
if (buflen < 2) {
errno = E2BIG;
return(0);
}
UTF8_SET2B(buf, uniid);
return (2);
}
if (uniid >= 0x800 && uniid <= 0xffff) {
if (buflen < 3) {
errno = E2BIG;
return(0);
}
UTF8_SET3B(buf, uniid);
return (3);
}
} else {
if ( buflen < 3 ) {
errno = E2BIG;
return 0;
}
*buf = (char)REPLACE_CHAR1;
*(buf+1) = (char)REPLACE_CHAR2;
*(buf+2) = (char)REPLACE_CHAR3;
return (3);
}
return (0);
}
void *
_icv_open()
{
_iconv_st *st;
if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) {
errno = ENOMEM;
return ((void*)-1);
}
bzero(st, sizeof(_iconv_st));
st->keepc[0] = DEV_ATR;
st->pState = S_BASIC;
return ((void*)st);
}
void
_icv_close(_iconv_st *st)
{
if (!st)
errno = EBADF;
else
free(st);
}
size_t
_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
int n;
short curState;
if (st == NULL) {
errno = EBADF;
return ((size_t) -1);
}
if (inbuf == NULL || *inbuf == NULL) {
st->keepc[0] = DEV_ATR;
st->pState = S_BASIC;
st->_errno = 0;
return ((size_t)0);
}
if ( st->_errno == E2BIG ) {
n = iscii_to_utf8(st, *outbuf, *outbytesleft);
(*outbuf) += n;
(*outbytesleft) -= n;
}
st->_errno = errno = 0;
while (*inbytesleft > 0 && *outbytesleft > 0) {
unsigned int curChar = (unsigned int)(**inbuf & ONEBYTE);
unsigned int prevChar = (unsigned int)(st->keepc[1] & ONEBYTE);
if (curChar == ISC_ext)
curState = S_EXT;
else if (curChar == ISC_atr)
curState = S_ATR;
else
curState = S_BASIC;
switch (curState) {
case S_BASIC:
if (prevChar == 0)
st->keepc[1] = curChar;
else
st->keepc[2] = curChar;
if (st->pState == S_ATR) {
st->keepc[1] = 0;
if ((curChar >= 0x42) && (curChar <= 0x4b) && curChar != 0x46) {
st->keepc[0] = curChar;
}
} else {
if ((curChar > 0 && curChar <= 0x7f) || prevChar != 0) {
n=iscii_to_utf8(st, *outbuf, *outbytesleft);
if (n > 0) {
(*outbuf) += n;
(*outbytesleft) -= n;
} else
st->_errno = errno;
}
}
break;
case S_ATR:
case S_EXT:
if (st->pState == S_BASIC) {
if ( st->keepc[1] == 0 )
{
if (curState == S_EXT) st->keepc[1] = ISC_ext;
break;
}
n = iscii_to_utf8(st, *outbuf, *outbytesleft);
if (n > 0) {
(*outbuf) += n;
(*outbytesleft) -= n;
} else
st->_errno = errno;
if (curState == S_EXT) st->keepc[1] = ISC_ext;
} else {
errno = EILSEQ;
return (size_t)-1;
}
break;
default:
st->_errno = errno = EILSEQ;
st->pState = S_BASIC;
break;
}
st->pState = curState;
(*inbuf)++;
(*inbytesleft)--;
if (errno)
return(size_t)-1;
}
if (*inbytesleft > 0 && *outbytesleft == 0) {
errno = E2BIG;
return(size_t)-1;
}
return (size_t)(*inbytesleft);
}