root/usr/src/uts/common/sys/kiconv_cck_common.h
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef _SYS_KICONV_CCK_COMMON_H
#define _SYS_KICONV_CCK_COMMON_H

#ifdef __cplusplus
extern "C" {
#endif

#ifdef  _KERNEL

/* The start value of leading byte of EUC encoding. */
#define KICONV_EUC_START                (0xA1)

/* Valid EUC range or not. */
#define KICONV_IS_VALID_EUC_BYTE(v)     ((v) >= 0xA1 && (v) <= 0xFE)

/* Is ASCII character or not: 0x00 - 0x7F. */
#define KICONV_IS_ASCII(c)              (((uchar_t)(c)) <= 0x7F)

/* UTF-8 replacement character for non-identicals and its length. */
#define KICONV_UTF8_REPLACEMENT_CHAR1           (0xEF)
#define KICONV_UTF8_REPLACEMENT_CHAR2           (0xBF)
#define KICONV_UTF8_REPLACEMENT_CHAR3           (0xBD)
#define KICONV_UTF8_REPLACEMENT_CHAR            (0xefbfbd)
#define KICONV_UTF8_REPLACEMENT_CHAR_LEN        (3)

/*
 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not.
 */
#define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first)               \
            ((second) < u8_valid_min_2nd_byte[(first)] ||               \
            (second) > u8_valid_max_2nd_byte[(first)])

/*
 * If we haven't checked on the UTF-8 signature BOM character in
 * the beginning of the conversion data stream, we check it and if
 * find one, we skip it since we have no use for it.
 */
#define KICONV_CHECK_UTF8_BOM(ib, ibtail)                               \
        if (((kiconv_state_t)kcd)->bom_processed == 0 &&                \
                ((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&              \
                *((ib) + 1) == 0xbb &&  *((ib) + 2) == 0xbf) {          \
                (ib) += 3;                                              \
        }                                                               \
        ((kiconv_state_t)kcd)->bom_processed = 1

/*
 * Check BOM of UTF-8 without state information.
 */
#define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail)                 \
        if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&                  \
                *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) {           \
                (ib) += 3;                                              \
        }

/*
 * Set errno and break.
 */
#define KICONV_SET_ERRNO_AND_BREAK(err)                                 \
        *errno = (err);                                                 \
        ret_val = (size_t)-1;                                           \
        break

/*
 * Handling flag, advance input buffer, set errno and break.
 */
#define KICONV_SET_ERRNO_WITH_FLAG(advance, err)                        \
        if (flag & KICONV_REPLACE_INVALID) {                            \
                ib += (advance);                                        \
                goto REPLACE_INVALID;                                   \
        }                                                               \
        KICONV_SET_ERRNO_AND_BREAK((err))

/* Conversion table for UTF-8 -> CCK encoding. */
typedef struct {
        uint32_t key;
        uint32_t value;
} kiconv_table_t;

/* Conversion table for CCK encoding -> utf8. */
typedef struct {
        uint32_t key;
        uchar_t u8[4];
} kiconv_table_array_t;

/*
 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC...
 * Currently parameter ib/ibtail are used by BIG5HKSCS only.
 */
typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib,
        uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);

/* Common open and close function for UTF-8 to CCK conversion. */
void *  kiconv_open_to_cck(void);
int     kiconv_close_to_cck(void *);

/* Binary search funciton. */
size_t  kiconv_binsearch(uint32_t key, void *tbl, size_t nitems);

/* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */
size_t  kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft,
        char **outbuf, size_t *outbytesleft, int *errno,
        kiconv_utf8tocck_t ptr_utf8tocck);

/*
 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC...
 */
size_t  kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen,
        uchar_t *outarray, size_t *outlen, int flag, int *errno,
        kiconv_utf8tocck_t ptr_utf8tocck);

/*
 * The following tables are coming from u8_textprep.c. We use them to
 * check on validity of UTF-8 characters and their bytes.
 */
extern const int8_t u8_number_of_bytes[];
extern const uint8_t u8_valid_min_2nd_byte[];
extern const uint8_t u8_valid_max_2nd_byte[];

#endif  /* _KERNEL */

#ifdef __cplusplus
}
#endif

#endif  /* _SYS_KICONV_CCK_COMMON_H */