root/usr/src/uts/common/kiconv/kiconv_ko/kiconv_ko.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/sunddi.h>
#include <sys/byteorder.h>
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/u8_textprep.h>
#include <sys/kiconv.h>
#include <sys/kiconv_cck_common.h>
#include <sys/kiconv_ko.h>
#include <sys/kiconv_uhc_utf8.h>
#include <sys/kiconv_utf8_uhc.h>
#include <sys/kiconv_euckr_utf8.h>
#include <sys/kiconv_utf8_euckr.h>

static int8_t utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
        uchar_t *ob, uchar_t *obtail, size_t *ret_val);
static int8_t utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
        uchar_t *ob, uchar_t *obtail, size_t *ret_val);
static int8_t ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail,
        size_t *ret_val, kiconv_table_array_t *table, size_t nitems);


#define KICONV_KO_EUCKR         (0x01)
#define KICONV_KO_UHC           (0x02)
#define KICONV_KO_MAX_MAGIC_ID  (0x02)

static void *
open_fr_euckr()
{
        return ((void *)KICONV_KO_EUCKR);
}

static void *
open_fr_uhc()
{
        return ((void *)KICONV_KO_UHC);
}

static int
close_fr_ko(void *s)
{
        if ((uintptr_t)s > KICONV_KO_MAX_MAGIC_ID)
                return (EBADF);

        return (0);
}

/*
 * Encoding convertor from EUC-KR to UTF-8.
 */
static size_t
kiconv_fr_euckr(void *kcd, char **inbuf, size_t *inbufleft,
    char **outbuf, size_t *outbufleft, int *errno)
{
        uchar_t         *ib;
        uchar_t         *ob;
        uchar_t         *ibtail;
        uchar_t         *obtail;
        size_t          ret_val;
        int8_t          sz;
        uint32_t        euckr_val;

        /* Check on the kiconv code conversion descriptor. */
        if (kcd == NULL || kcd == (void *)-1) {
                *errno = EBADF;
                return ((size_t)-1);
        }

        /* If this is a state reset request, process and return. */
        if (inbuf == NULL || *inbuf == NULL) {
                return (0);
        }

        ret_val = 0;
        ib = (uchar_t *)*inbuf;
        ob = (uchar_t *)*outbuf;
        ibtail = ib + *inbufleft;
        obtail = ob + *outbufleft;

        while (ib < ibtail) {
                if (KICONV_IS_ASCII(*ib)) {
                        if (ob >= obtail) {
                                KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                        }

                        *ob++ = *ib++;
                        continue;
                }

                /*
                 * Issue EILSEQ error if the first byte is not a
                 * valid EUC-KR leading byte.
                 */
                if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
                        KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
                }

                /*
                 * Issue EINVAL error if input buffer has an incomplete
                 * character at the end of the buffer.
                 */
                if (ibtail - ib < 2) {
                        KICONV_SET_ERRNO_AND_BREAK(EINVAL);
                }

                /*
                 * Issue EILSEQ error if the remaining byte is not
                 * a valid EUC-KR byte.
                 */
                if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
                        KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
                }

                euckr_val = (uint32_t)(*ib) << 8 | *(ib + 1);
                sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
                    kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);

                if (sz < 0) {
                        KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                }

                ib += 2;
                ob += sz;
        }

        *inbuf = (char *)ib;
        *inbufleft = ibtail - ib;
        *outbuf = (char *)ob;
        *outbufleft = obtail - ob;

        return (ret_val);
}

/*
 * String based encoding convertor from EUC-KR to UTF-8.
 */
static size_t
kiconvstr_fr_euckr(char *inarray, size_t *inlen, char *outarray,
    size_t *outlen, int flag, int *errno)
{
        uchar_t         *ib;
        uchar_t         *ob;
        uchar_t         *ibtail;
        uchar_t         *obtail;
        uchar_t         *oldib;
        size_t          ret_val;
        int8_t          sz;
        uint32_t        euckr_val;
        boolean_t       do_not_ignore_null;

        ret_val = 0;
        ib = (uchar_t *)inarray;
        ob = (uchar_t *)outarray;
        ibtail = ib + *inlen;
        obtail = ob + *outlen;
        do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);

        while (ib < ibtail) {
                if (*ib == '\0' && do_not_ignore_null)
                        break;

                if (KICONV_IS_ASCII(*ib)) {
                        if (ob >= obtail) {
                                KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                        }

                        *ob++ = *ib++;
                        continue;
                }

                oldib = ib;

                if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
                        KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
                }

                if (ibtail - ib < 2) {
                        KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
                }

                if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
                        KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
                }

                euckr_val = *ib++;
                euckr_val = (euckr_val << 8) | *ib++;
                sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
                    kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);

                if (sz < 0) {
                        ib = oldib;
                        KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                }

                ob += sz;
                continue;

REPLACE_INVALID:
                if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
                        ib = oldib;
                        KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                }

                *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
                *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
                *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
                ret_val++;
        }

        *inlen = ibtail - ib;
        *outlen = obtail - ob;

        return (ret_val);
}

/*
 * Encoding convertor from Unified Hangul Code to UTF-8.
 */
static size_t
kiconv_fr_uhc(void *kcd, char **inbuf, size_t *inbufleft,
    char **outbuf, size_t *outbufleft, int *errno)
{
        uchar_t         *ib;
        uchar_t         *ob;
        uchar_t         *ibtail;
        uchar_t         *obtail;
        size_t          ret_val;
        int8_t          sz;
        uint32_t        uhc_val;

        /* Check on the kiconv code conversion descriptor. */
        if (kcd == NULL || kcd == (void *)-1) {
                *errno = EBADF;
                return ((size_t)-1);
        }

        /* If this is a state reset request, process and return. */
        if (inbuf == NULL || *inbuf == NULL) {
                return (0);
        }

        ret_val = 0;
        ib = (uchar_t *)*inbuf;
        ob = (uchar_t *)*outbuf;
        ibtail = ib + *inbufleft;
        obtail = ob + *outbufleft;

        while (ib < ibtail) {
                if (KICONV_IS_ASCII(*ib)) {
                        if (ob >= obtail) {
                                KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                        }

                        *ob++ = *ib++;
                        continue;
                }

                /*
                 * Issue EILSEQ error if the first byte is not a
                 * valid UHC leading byte.
                 */
                if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
                        KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
                }

                /*
                 * Issue EINVAL error if input buffer has an incomplete
                 * character at the end of the buffer.
                 */
                if (ibtail - ib < 2) {
                        KICONV_SET_ERRNO_AND_BREAK(EINVAL);
                }

                /*
                 * Issue EILSEQ error if the remaining byte is not
                 * a valid UHC byte.
                 */
                if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
                        KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
                }

                uhc_val = (uint32_t)(*ib) << 8 | *(ib + 1);
                sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
                    kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);

                if (sz < 0) {
                        KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                }

                ib += 2;
                ob += sz;
        }

        *inbuf = (char *)ib;
        *inbufleft = ibtail - ib;
        *outbuf = (char *)ob;
        *outbufleft = obtail - ob;

        return (ret_val);
}

/*
 * String based encoding convertor from Unified Hangul Code to UTF-8.
 */
static size_t
kiconvstr_fr_uhc(char *inarray, size_t *inlen, char *outarray,
    size_t *outlen, int flag, int *errno)
{
        uchar_t         *ib;
        uchar_t         *ob;
        uchar_t         *ibtail;
        uchar_t         *obtail;
        uchar_t         *oldib;
        size_t          ret_val;
        int8_t          sz;
        uint32_t        uhc_val;
        boolean_t       do_not_ignore_null;

        ret_val = 0;
        ib = (uchar_t *)inarray;
        ob = (uchar_t *)outarray;
        ibtail = ib + *inlen;
        obtail = ob + *outlen;
        do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);

        while (ib < ibtail) {
                if (*ib == '\0' && do_not_ignore_null)
                        break;

                if (KICONV_IS_ASCII(*ib)) {
                        if (ob >= obtail) {
                                KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                        }

                        *ob++ = *ib++;
                        continue;
                }

                oldib = ib;

                if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
                        KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
                }

                if (ibtail - ib < 2) {
                        KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
                }

                if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
                        KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
                }

                uhc_val = *ib++;
                uhc_val = (uhc_val << 8) | *ib++;
                sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
                    kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);

                if (sz < 0) {
                        ib = oldib;
                        KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                }

                ob += sz;
                continue;

REPLACE_INVALID:
                if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
                        ib = oldib;
                        KICONV_SET_ERRNO_AND_BREAK(E2BIG);
                }

                *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
                *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
                *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
                ret_val++;
        }

        *inlen = ibtail - ib;
        *outlen = obtail - ob;

        return (ret_val);
}

/*
 * Encoding convertor from UTF-8 to EUC-KR.
 */
static size_t
kiconv_to_euckr(void *kcd, char **inbuf, size_t *inbytesleft,
    char **outbuf, size_t *outbytesleft, int *errno)
{
        return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
            outbytesleft, errno, utf8_to_euckr));
}

/*
 * Encoding convertor from UTF-8 to Unified Hangul Code.
 */
static size_t
kiconv_to_uhc(void *kcd, char **inbuf, size_t *inbytesleft,
    char **outbuf, size_t *outbytesleft, int *errno)
{
        return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
            outbytesleft, errno, utf8_to_uhc));
}

/*
 * String based encoding convertor from UTF-8 to EUC-KR.
 */
static size_t
kiconvstr_to_euckr(char *inarray, size_t *inlen, char *outarray,
    size_t *outlen, int flag, int *errno)
{
        return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
            (uchar_t *)outarray, outlen, flag, errno, utf8_to_euckr);
}

/*
 * String based encoding convertor from UTF-8 to Unified Hangul Code.
 */
static size_t
kiconvstr_to_uhc(char *inarray, size_t *inlen, char *outarray,
    size_t *outlen, int flag, int *errno)
{
        return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
            (uchar_t *)outarray, outlen, flag, errno, utf8_to_uhc);
}

/*
 * Convert an UTF-8 character to a character of ko encodings
 * (EUC-KR or UHC).
 */
static int8_t
utf8_to_ko(uint32_t utf8, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
    kiconv_table_t *table, size_t nitems)
{
        size_t  index;
        size_t  kocode;
        int8_t  kolen;

        if (KICONV_KO_IS_UDC_IN_UTF8(utf8)) {
                /* User Definable Area handing. */
                kocode = (((utf8 & 0xF0000) >> 4) | ((utf8 & 0x3F00) >> 2) |
                    (utf8 & 0x3F)) - KICONV_KO_UDA_UCS4_START;
                if (kocode < KICONV_KO_UDA_RANGE) {
                        kocode = (KICONV_KO_UDA_EUC_SEG1 << 8) |
                            (kocode + KICONV_KO_UDA_OFFSET_START);
                } else {
                        /* 0x43 = 0xA1 - 0x5E */
                        kocode = (KICONV_KO_UDA_EUC_SEG2 << 8) |
                            (kocode + 0x43);
                }

                index = 1;
        } else {
                index = kiconv_binsearch(utf8, table, nitems);
                kocode = table[index].value;
        }

        kolen = (kocode <= 0xFF) ? 1 : 2;

        if (obtail - ob < kolen) {
                *ret_val = (size_t)-1;
                return (-1);
        }

        if (index == 0)
                (*ret_val)++;

        if (kolen > 1)
                *ob++ = (uchar_t)(kocode >> 8);
        *ob = (uchar_t)(kocode & 0xFF);

        return (kolen);
}

/*
 * Convert an UTF-8 character to Unified Hangual Code.
 */
/* ARGSUSED */
static int8_t
utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
    uchar_t *ob, uchar_t *obtail, size_t *ret_val)
{
        return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_uhc,
            KICONV_UTF8_UHC_MAX));
}

/*
 * Convert an UTF-8 character to EUC-KR.
 */
/* ARGSUSED */
static int8_t
utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
    uchar_t *ob, uchar_t *obtail, size_t *ret_val)
{
        return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_euckr,
            KICONV_UTF8_EUCKR_MAX));
}

/*
 * Convert a single ko encoding (EUC-KR or UHC) character to UTF-8.
 */
static int8_t
ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
    kiconv_table_array_t *table, size_t nitems)
{
        size_t  index;
        int8_t  sz;
        uchar_t udc[3];
        uchar_t *u8;

        if (KICONV_KO_IS_UDC_IN_EUC(ko_val)) {
                /* UDA(User Definable Area) handling. */
                uint32_t u32;

                u32 = (ko_val & 0xFF) + (((ko_val & 0xFF00) == 0xC900) ?
                    KICONV_KO_UDA_OFFSET_1 : KICONV_KO_UDA_OFFSET_2);
                udc[0] = 0xEF;
                udc[1] = (uchar_t)(0x80 | (u32 & 0x00000FC0) >> 6);
                udc[2] = (uchar_t)(0x80 | (u32 & 0x0000003F));
                u8 = udc;
                index = 1;
        } else {
                index = kiconv_binsearch(ko_val, table, nitems);
                u8 = table[index].u8;
        }

        sz = u8_number_of_bytes[u8[0]];

        if (obtail - ob < sz) {
                *ret_val = (size_t)-1;
                return (-1);
        }

        if (index == 0)
                (*ret_val)++;   /* Non-identical conversion */

        for (index = 0; index < sz; index++)
                *ob++ = u8[index];

        return (sz);
}

static kiconv_ops_t kiconv_ko_ops_tbl[] = {
        {
                "euc-kr", "utf-8", kiconv_open_to_cck, kiconv_to_euckr,
                kiconv_close_to_cck, kiconvstr_to_euckr
        },
        {
                "utf-8", "euc-kr", open_fr_euckr, kiconv_fr_euckr,
                close_fr_ko, kiconvstr_fr_euckr
        },
        {
                "unifiedhangul", "utf-8", kiconv_open_to_cck, kiconv_to_uhc,
                kiconv_close_to_cck, kiconvstr_to_uhc
        },
        {
                "utf-8", "unifiedhangul", open_fr_uhc, kiconv_fr_uhc,
                close_fr_ko, kiconvstr_fr_uhc
        }
};

static kiconv_module_info_t kiconv_ko_info = {
        "kiconv_ko",            /* module name */
        sizeof (kiconv_ko_ops_tbl) / sizeof (kiconv_ko_ops_tbl[0]),
        kiconv_ko_ops_tbl,
        0,
        NULL,
        NULL,
        0
};

static struct modlkiconv modlkiconv_ko = {
        &mod_kiconvops,
        "kiconv korean module 1.0",
        &kiconv_ko_info
};

static struct modlinkage modlinkage = {
        MODREV_1,
        (void *)&modlkiconv_ko,
        NULL
};

int
_init(void)
{
        int err;

        err = mod_install(&modlinkage);
        if (err)
                cmn_err(CE_WARN, "kiconv_ko: failed to load kernel module");

        return (err);
}

int
_fini(void)
{
        int err;

        /*
         * If this module is being used, then, we cannot remove the module.
         * The following checking will catch pretty much all usual cases.
         *
         * Any remaining will be catached by the kiconv_unregister_module()
         * during mod_remove() at below.
         */
        if (kiconv_module_ref_count(KICONV_MODULE_ID_KO))
                return (EBUSY);

        err = mod_remove(&modlinkage);
        if (err)
                cmn_err(CE_WARN, "kiconv_ko: failed to remove kernel module");

        return (err);
}

int
_info(struct modinfo *modinfop)
{
        return (mod_info(&modlinkage, modinfop));
}