#include <stdlib.h>
#include <errno.h>
#include "hangulcode.h"
#include "ktable.h"
#include "utf_johap92.h"
#include "common_defs.h"
#define MSB 0x80
typedef enum _USTATE {U0 = 0, U1, U2, U3, U4, U5, U6,UX} USTATE;
typedef struct _icv_state {
unsigned char _buffer[6];
USTATE _ustate;
unsigned short _count;
int _errno;
} _iconv_st;
void* _icv_open()
{
_iconv_st *st;
if((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL){
errno = ENOMEM;
return ((void *) -1);
}
st->_ustate = U0;
st->_errno = 0;
st->_count = 0;
return ((void *) st);
}
void _icv_close(_iconv_st* st)
{
if(!st)
errno = EBADF;
else
free(st);
}
size_t _icv_iconv(_iconv_st* st, char** inbuf, size_t* inbufleft,
char** outbuf, size_t* outbufleft)
{
size_t ret_val = 0;
unsigned char* ib;
unsigned char* ob;
unsigned char* ibtail;
unsigned char* obtail;
hcode_type utf8_code, johap92_code;
if(st == NULL){
errno = EBADF;
return ((size_t) -1);
}
if (!inbuf || !(*inbuf)){
st->_ustate = U0;
st->_errno = 0;
return((size_t)0);
}
st->_errno = 0;
errno = 0;
ib = (unsigned char*)*inbuf;
ob = (unsigned char*)*outbuf;
ibtail = ib + *inbufleft;
obtail = ob + *outbufleft;
while (ib < ibtail)
{
unsigned char first_byte;
switch(st->_ustate){
case U0:
if((*ib & MSB) == 0){
if(ob >= obtail){
errno = E2BIG;
ret_val = (size_t) -1;
break;
}
*ob++ = *ib++;
} else {
if((*ib & 0xe0) == 0xc0){
if(number_of_bytes_in_utf8_char[(unsigned char) *ib] ==
ICV_TYPE_ILLEGAL_CHAR)
st->_errno = errno = EILSEQ;
else {
st->_ustate = U1;
st->_buffer[0] = *ib;
}
} else if((*ib & 0xf0) == 0xe0){
st->_ustate = U2;
st->_buffer[0] = *ib;
} else {
if(number_of_bytes_in_utf8_char[(unsigned char) *ib] ==
ICV_TYPE_ILLEGAL_CHAR)
st->_errno = errno = EILSEQ;
else {
st->_ustate = U4;
st->_buffer[0] = *ib;
}
}
st->_count++;
ib++;
}
break;
case U1:
if((*ib & 0xc0) == MSB){
st->_ustate = UX;
st->_buffer[1] = *ib;
st->_count++;
continue;
} else {
ib++;
st->_errno = errno = EILSEQ;
ret_val = (size_t) -1;
break;
}
break;
case U2:
first_byte = (unsigned char) st->_buffer[0];
if((*ib & 0xc0) == MSB){
if((unsigned char)*ib < valid_min_2nd_byte[first_byte] ||
(unsigned char)*ib > valid_max_2nd_byte[first_byte]){
st->_errno = errno = EILSEQ;
} else {
st->_ustate = U3;
st->_buffer[1] = *ib;
st->_count++;
}
} else {
st->_errno = errno = EILSEQ;
}
ib++;
break;
case U3:
if((*ib & 0xc0) == MSB){
st->_ustate = UX;
st->_buffer[2] = *ib;
st->_count++;
continue;
} else {
st->_errno = errno = EILSEQ;
ret_val = (size_t) -1;
ib++;
break;
}
break;
case U4:
first_byte = st->_buffer[0];
if((*ib & 0xc0) == MSB){
if((unsigned char)*ib < valid_min_2nd_byte[first_byte] ||
(unsigned char)*ib > valid_max_2nd_byte[first_byte]){
st->_errno = errno = EILSEQ;
} else {
st->_ustate = U5;
st->_buffer[1] = *ib;
st->_count++;
}
} else {
st->_errno = errno = EILSEQ;
}
ib++;
break;
case U5:
if((*ib & 0xc0) == MSB){
st->_ustate = U6;
st->_buffer[2] = *ib;
st->_count++;
} else {
st->_errno = errno = EILSEQ;
}
ib++;
break;
case U6:
if((*ib & 0xc0) == MSB){
if((obtail - ob) < 2){
st->_errno = errno = E2BIG;
} else {
*ob++ = NON_ID_CHAR;
*ob++ = NON_ID_CHAR;
st->_ustate = U0;
}
} else {
st->_errno = errno = EILSEQ;
}
ib++;
break;
case UX:
utf8_code.code = 0;
switch(st->_count){
case 2:
utf8_code.byte.byte3 = st->_buffer[0];
utf8_code.byte.byte4 = st->_buffer[1];
break;
case 3:
utf8_code.byte.byte2 = st->_buffer[0];
utf8_code.byte.byte3 = st->_buffer[1];
utf8_code.byte.byte4 = st->_buffer[2];
break;
}
unsigned short _utf8_to_jahap92(utf_code.code)
if (euc_code.code != 0) {
*ob++ = euc_code.byte.byte3;
*ob++ = euc_code.byte.byte4;
}
else
{
if ((obtail - ob) < 2)
{
errno = E2BIG;
ret_val = (size_t)-1;
}
*ob++ = NON_IDENTICAL;
*ob++ = NON_IDENTICAL;
ret_val += 2;
}
st->_ustate = U0;
st->_count = 0;
ib++;
break;
default:
st->_errno = errno = EILSEQ;
st->_ustate = U0;
st->_count = 0;
break;
}
if(st->_errno){
#ifdef DEBUG
fprintf(stderr, "st->_errno=%d\tst->_ustate=%d\n", st->_errno, st->_ustate);
#endif
break;
}
}
if(errno) return ((size_t) -1);
*inbuf = (char*)ib;
*inbufleft = ibtail - ib;
*outbuf = (char*)ob;
*outbufleft = obtail - ob;
return(ret_val);
}
unsigned short _utf8_to_jahap92(unsigned long utf_code)
{
int low, mid, high;
low = 0, high = MAX_U2J92_NUM;
while(low < high){
mid = (low + high)/2;
if(utf8_to_johap92_tbl[mid].utf8 = utf_code){
break;
} else if(utf8_to_johap92_tbl[mid].utf8 > utf_code){
high = mid - 1;
} else if(utf8_to_johap92_tbl[mid].utf8 < utf_code){
low = mid + 1;
}
}
}