root/usr/src/lib/iconv_modules/zh/common/zh_TW-iso2022-CN-EXT.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
#include <stdlib.h>
 * Copyright (c) 1997, by Sun Microsystems, Inc.
 * All rights reserved.
 */


/*
   Converts From:       ISO2022-CN-EXT encoding.
   Converts To:         Taiwanese EUC encoding ( CNS11643 ) and big5 encoding

 */

#include "iso2022-cn.h"

/* Forward reference the functions constrained to the scope of this file */
static int process_esc_seq(char, _iconv_st *);
static int ascii_to_euc(char, _iconv_st *, unsigned char **, size_t *);
static int iscns( _iconv_st * );


extern int errno;

/*
 * _icv_open: Called from iconv_open(). Allocates and initializes _iconv_st
 *            structure. Returns pointer to the structure as (void *).
 */


void *
_icv_open()
{
        _iconv_st  *st;

        /* Allocate */
        if (( st = (_iconv_st *) malloc( sizeof( _iconv_st ))) == NULL ){
            errno = ENOMEM;
            return ((void *) -1);
        }

        /* Initialize */
        st->Sfunc = SI;
        st->SSfunc = NONE;
        st->ESCstate = OFF;
        st->firstbyte = True;
        st->numsav = 0;
        st->SOcharset = 0;              /* no default charset */
        st->SS2charset = 0;             /* no default charset */
        st->SS3charset = 0;             /* no default charset */
        st->nonidcount = 0;
        st->_errno = 0;

        /* Return struct */
        return ((void *) st);
}



/*
 * _icv_close: Called from iconv_close(). Frees the _iconv_st structure as
 *             pointed by the argument.
 */

void
_icv_close(_iconv_st *st)
{
        if (st == NULL )
            errno = EBADF;
        else
            free(st);
}


/*
 * _icv_iconv: Called from iconv(). Does the convertion from ISO2022-CN-EXT
 *                         to CNS11643
 */
/*=======================================================
 *
 *   State machine for interpreting ISO2022-CN-EXT code
 *
 *=======================================================
 *
 *
 *=======================================================*/

size_t
iso2022_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
                        unsigned char **outbuf, size_t *outbytesleft, int (*convert)() )
{

        int ret, n;

        if (st == NULL) {
            errno = EBADF;
            return ((size_t) -1);
        }

        if ( inbuf == NULL || *inbuf == NULL || inbytesleft == NULL ||
                        *inbytesleft <= 0 ) { /* Reset request */
            st->Sfunc = SI;
            st->SSfunc = NONE;
            st->ESCstate = OFF;
            st->firstbyte = True;
            st->numsav = 0;
            st->SOcharset = 0;
            st->SS2charset = 0;
            st->SS3charset = 0;
            st->nonidcount = 0;
            st->_errno = 0;
            return ((size_t) 0);
        }

        st->_errno = 0;
        errno = 0;

        /* Before we use *inbytesleft or *outbytesleft we should confirm that
        inbytesleft and outbytesleft are non-NULL. I am considering inbytesleft
        or *inbytesleft having 0 or negative value as a reset request. I am
        considering outbytesleft having 0 value as no space in output buffer.
        Also, here itself I am verifying that outbuf and *outbuf should be non-NULL
        pointers so I do not have to worry about them being NULL below in the
        conversion sub-routines. I also confirm here that *outbytesleft should be
        greater than 0 before we can continue further */

        if ( outbytesleft == NULL || *outbytesleft <= 0 ||
                        outbuf == NULL || *outbuf == NULL ) {
            errno = E2BIG;
            return((size_t)-1);
        }

        /* A state machine to interpret ISO, driven by the shift functions SI, SO */

        do {
            if (st->firstbyte == False) { /* Is SO, SS2, SS3 second byte */
                st->keepc[1] = **inbuf;
                n = (*convert)( st, outbuf, outbytesleft, iscns(st) );
                if ( n < 0 )
                    return((size_t)-1); /* Insufficient space in output buffer */
                else if ( n > 0 ){ /* No CNS for this Chinese code */
                    n = ascii_to_euc(NON_ID_CHAR, st, outbuf, outbytesleft);
                    if ( n < 0 )
                        return((size_t)-1);
                    st->nonidcount += 1;
                } else
                    st->nonidcount -= 1; /* The first byte identified as
                                                valid Chinese byte and is
                                                processed */
                st->firstbyte = True;
                st->SSfunc = NONE;      /* If we just processed SS bytes,
                                           this will reset SSfunc to NONE. If
                                           we just processed SO bytes, this was
                                           already NONE */
            } else if ( st->SSfunc != NONE ) { /* We are currently expecting
                                                 SS2 or SS3 Chinese bytes */
                    st->keepc[0] = **inbuf;
                    st->nonidcount += 1;
                    st->firstbyte = False;
            } else if ( **inbuf == ESC && st->ESCstate == OFF ) {
                    st->nonidcount += 1; /* For the ESC character */
                    st->ESCstate = E0;
            } else if ( st->ESCstate != OFF ) { /* Continue processing the
                                                  escape sequence */
                ret = process_esc_seq( **inbuf, st );
                if ( ret == DONE ) {    /* ESC seq interpreted correctly.
                                             Switch off the escape machine */
                    st->ESCstate = OFF;
                } else if ( ret == INVALID ){
                    if (st->Sfunc == SI){       /* An invalid ESC sequence
                                                 encountered.  Process
                                                 the text saved in
                                                 st->savbuf as ASCII. Switch
                                                 off the escape machine */
                        n = ascii_to_euc( **inbuf, st, outbuf, outbytesleft );
                        if ( n < 0 ) /* Insufficient space in output buffer */
                                return((size_t)-1);
                        st->nonidcount -= st->numsav; /* Since invalid Esc
                                                       sequence is outputted
                                                       as ASCII */
                    } else if (st->Sfunc == SO) { /* An invalid ESC sequence
                                                     encountered. Don't know
                                                     what to do. So flag
                                                     error illegal seq. It is
                                                     wise not to continue
                                                     processing input. Switch
                                                     off the escape machine */
                        st->_errno = errno = EILSEQ;
                        st->nonidcount += 1; /* For this character */
                    }
                    st->numsav = 0;      /* Discard the saved characters of
                                            invalid sequence */
                    st->ESCstate = OFF;
                } /* more char. needed for escape sequence */
            } else if (st->Sfunc  == SI) {
                /* Switch state to SO only if SOdesignation is set. */
                if ( **inbuf == SO && st->SOcharset != 0 ){
                    st->Sfunc = SO;
                } else { /* Is ASCII */
                    n = ascii_to_euc(**inbuf, st, outbuf, outbytesleft );
                    if ( n < 0 ) /* Insufficient space in output buffer */
                        return((size_t)-1);
                }
            } else if (st->Sfunc  == SO) {
                if ( **inbuf == SI ){ /* Switch state to SO */
                    st->Sfunc = SI;
                }
                else {
                    st->keepc[0] = **inbuf;
                    st->nonidcount += 1;
                    st->firstbyte = False;
                }
            }
            else
                fprintf(stderr,
                    "_icv_iconv():ISO-CN-EXT->CNS:Should never have come here\n");

            (*inbuf)++;
            (*inbytesleft)--;

            if ( st->_errno)
                break; /* Break out of while loop */

            if (errno) /* We set st->_errno before we set errno. If errno is set
                                      somewhere else we handle that here */
                return((size_t)-1);

        } while (*inbytesleft > 0 && *outbytesleft > 0);


/* We now have to handle the case where we have successfully processed the
   previous input character which exhausted the output buffer. This is handled
   by the while loop. However, since there are more input characters that
   haven't been processed yet, we need to set the errno appropriately and
   return -1. */
        if ( *inbytesleft > 0 && *outbytesleft == 0) {
            errno = E2BIG;
            return((size_t)-1);
        }
        return (*inbytesleft + st->nonidcount);
}


static int
process_esc_seq( char c, _iconv_st *st )
{

        switch(st->ESCstate){
        case E0:
            switch (c){
            case SS2LOW:
                if ( st->SS2charset == 0 ){
                    /* We do not expect SS2 shift function before
                       SS2 designation is set */
                    st->savbuf[0] = ESC;
                    st->numsav = 1;
                    return(INVALID);
                }
                st->SSfunc = SS2;
                /* Since valid ESC sequence remove the ESC from the
                   nonidcount */
                st->nonidcount -= 1;
                return(DONE);
            case SS3LOW:
                if ( st->SS3charset == 0 ){
                    /* We do not expect SS3 shift function before
                       SS3 designation is set */
                    st->savbuf[0] = ESC;
                    st->numsav = 1;
                    return(INVALID);
                }
                st->SSfunc = SS3;
                /* Since valid ESC sequence remove the ESC from the
                   nonidcount */
                st->nonidcount -= 1;
                return(DONE);
            case '$':
                st->nonidcount += 1; /* ESC sequence not complete yet */
                st->ESCstate = E1;
                return(NEEDMORE);
            default:
                st->savbuf[0] = ESC;
                st->numsav = 1;
                return(INVALID);
            } /* end switch */


        case E1:
            switch (c){
            case ')':
                st->nonidcount += 1; /* ESC sequence not complete yet */
                st->ESCstate = E2;
                return(NEEDMORE);
            case '*':
                st->nonidcount += 1; /* ESC sequence not complete yet */
                st->ESCstate = E3;
                return(NEEDMORE);
            case '+':
                st->nonidcount += 1; /* ESC sequence not complete yet */
                st->ESCstate = E4;
                return(NEEDMORE);
            default:
                st->savbuf[0] = ESC;
                st->savbuf[1] = '$';
                st->numsav = 2;
                return(INVALID);
            }

        case E2:
            st->SOcharset = c;
            /* Since valid ESC sequence remove decriment nonidcount
               appropriately for all earlier characters in escape sequence */
            st->nonidcount -= 3;
            return(DONE);

        case E3:
            st->SS2charset = c;
            /* Since valid ESC sequence remove decriment nonidcount
               appropriately for all earlier characters in escape sequence */
            st->nonidcount -= 3;
            return(DONE);

        case E4:
            st->SS3charset = c;
            /* Since valid ESC sequence remove decriment nonidcount
               appropriately for all earlier characters in escape sequence */
            st->nonidcount -= 3;
            return(DONE);

        default:
            fprintf(stderr,
                    "process_esc_seq():ISO-CN-EXT->CNS:Should never have come here\n");
            st->_errno = errno = EILSEQ;
            return(DONE);

        } /* end switch */
}


static int
ascii_to_euc( char c, _iconv_st *st, unsigned char **outbuf, size_t *outbytesleft )
{

        int i;

        if ( *outbytesleft < (1 + st->numsav) ) {
            st->_errno = errno = E2BIG;
            return (-1);
        }

        for ( i=0; i < st->numsav; i++ ) {
            *(*outbuf)++ = (unsigned char) st->savbuf[i];
            (*outbytesleft)--;
        }

        *(*outbuf)++ = (unsigned char) c;
        (*outbytesleft)--;

        return(0);
}


static int
iscns( _iconv_st *st )
{
        int plane_no = -1;

        if ( st->SSfunc == NONE && st->SOcharset == 'G' )
            plane_no = 1;
        else if ( st->SSfunc == SS2 && st->SS2charset == 'H' )
            plane_no = 2;
        else if ( st->SSfunc == SS3 )
            switch ( st->SS3charset ){
            case 'I': plane_no = 3; break;
            case 'J': plane_no = 4; break;
            case 'K': plane_no = 5; break;
            case 'L': plane_no = 6; break;
            case 'M': plane_no = 7; break;
            }
        return (plane_no);
}