#include <sys/types.h>
#ifdef _KERNEL
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/sunddi.h>
#else
#include <sys/u8_textprep.h>
#endif
#include <sys/byteorder.h>
#include <sys/errno.h>
#define UCONV_U16_HI_MIN (0xd800U)
#define UCONV_U16_HI_MAX (0xdbffU)
#define UCONV_U16_LO_MIN (0xdc00U)
#define UCONV_U16_LO_MAX (0xdfffU)
#define UCONV_U16_BIT_SHIFT (0x0400U)
#define UCONV_U16_BIT_MASK (0x0fffffU)
#define UCONV_U16_START (0x010000U)
#define UCONV_UNICODE_MAX (0x10ffffU)
#define UCONV_ASCII_MAX (0x7fU)
#define UCONV_IN_ENDIAN_MASKS (UCONV_IN_BIG_ENDIAN | UCONV_IN_LITTLE_ENDIAN)
#define UCONV_OUT_ENDIAN_MASKS (UCONV_OUT_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN)
#ifdef _BIG_ENDIAN
#define UCONV_IN_NAT_ENDIAN UCONV_IN_BIG_ENDIAN
#define UCONV_IN_REV_ENDIAN UCONV_IN_LITTLE_ENDIAN
#define UCONV_OUT_NAT_ENDIAN UCONV_OUT_BIG_ENDIAN
#define UCONV_OUT_REV_ENDIAN UCONV_OUT_LITTLE_ENDIAN
#else
#define UCONV_IN_NAT_ENDIAN UCONV_IN_LITTLE_ENDIAN
#define UCONV_IN_REV_ENDIAN UCONV_IN_BIG_ENDIAN
#define UCONV_OUT_NAT_ENDIAN UCONV_OUT_LITTLE_ENDIAN
#define UCONV_OUT_REV_ENDIAN UCONV_OUT_BIG_ENDIAN
#endif
#define UCONV_BOM_NORMAL (0xfeffU)
#define UCONV_BOM_SWAPPED (0xfffeU)
#define UCONV_BOM_SWAPPED_32 (0xfffe0000U)
#define UCONV_U8_ONE_BYTE (0x7fU)
#define UCONV_U8_TWO_BYTES (0x7ffU)
#define UCONV_U8_THREE_BYTES (0xffffU)
#define UCONV_U8_FOUR_BYTES (0x10ffffU)
#define UCONV_U8_BYTE_MIN (0x80U)
#define UCONV_U8_BYTE_MAX (0xbfU)
#define UCONV_U8_BIT_SHIFT 6
#define UCONV_U8_BIT_MASK 0x3f
static const uchar_t remaining_bytes_tbl[0x100] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
#ifdef _KERNEL
const uchar_t u8_masks_tbl[6] = { 0x00, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
#else
static const uchar_t u8_masks_tbl[6] = { 0x00, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
#endif
static const uchar_t valid_min_2nd_byte[0x100] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
static const uchar_t valid_max_2nd_byte[0x100] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
static int
check_endian(int flag, int *in, int *out)
{
*in = flag & UCONV_IN_ENDIAN_MASKS;
if (*in == UCONV_IN_ENDIAN_MASKS)
return (EBADF);
if (*in == 0)
*in = UCONV_IN_NAT_ENDIAN;
*out = flag & UCONV_OUT_ENDIAN_MASKS;
if (*out == UCONV_OUT_ENDIAN_MASKS)
return (EBADF);
if (*out == 0)
*out = UCONV_OUT_NAT_ENDIAN;
return (0);
}
static boolean_t
check_bom16(const uint16_t *u16s, size_t u16l, int *in)
{
if (u16l > 0) {
if (*u16s == UCONV_BOM_NORMAL) {
*in = UCONV_IN_NAT_ENDIAN;
return (B_TRUE);
}
if (*u16s == UCONV_BOM_SWAPPED) {
*in = UCONV_IN_REV_ENDIAN;
return (B_TRUE);
}
}
return (B_FALSE);
}
static boolean_t
check_bom32(const uint32_t *u32s, size_t u32l, int *in)
{
if (u32l > 0) {
if (*u32s == UCONV_BOM_NORMAL) {
*in = UCONV_IN_NAT_ENDIAN;
return (B_TRUE);
}
if (*u32s == UCONV_BOM_SWAPPED_32) {
*in = UCONV_IN_REV_ENDIAN;
return (B_TRUE);
}
}
return (B_FALSE);
}
int
uconv_u16tou32(const uint16_t *u16s, size_t *utf16len,
uint32_t *u32s, size_t *utf32len, int flag)
{
int inendian;
int outendian;
size_t u16l;
size_t u32l;
uint32_t hi;
uint32_t lo;
boolean_t do_not_ignore_null;
if (u16s == NULL || utf16len == NULL)
return (EILSEQ);
if (u32s == NULL || utf32len == NULL)
return (E2BIG);
if (check_endian(flag, &inendian, &outendian) != 0)
return (EBADF);
u16l = u32l = 0;
hi = 0;
do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
if ((flag & UCONV_IN_ACCEPT_BOM) &&
check_bom16(u16s, *utf16len, &inendian))
u16l++;
inendian &= UCONV_IN_NAT_ENDIAN;
outendian &= UCONV_OUT_NAT_ENDIAN;
if (*utf16len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM))
u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL :
UCONV_BOM_SWAPPED_32;
for (; u16l < *utf16len; u16l++) {
if (u16s[u16l] == 0 && do_not_ignore_null)
break;
lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l]));
if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) {
if (hi)
return (EILSEQ);
hi = lo;
continue;
} else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) {
if (! hi)
return (EILSEQ);
lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT +
lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK)
+ UCONV_U16_START;
hi = 0;
} else if (hi) {
return (EILSEQ);
}
if (u32l >= *utf32len)
return (E2BIG);
u32s[u32l++] = (outendian) ? lo : BSWAP_32(lo);
}
if (hi)
return (EINVAL);
*utf16len = u16l;
*utf32len = u32l;
return (0);
}
int
uconv_u16tou8(const uint16_t *u16s, size_t *utf16len,
uchar_t *u8s, size_t *utf8len, int flag)
{
int inendian;
int outendian;
size_t u16l;
size_t u8l;
uint32_t hi;
uint32_t lo;
boolean_t do_not_ignore_null;
if (u16s == NULL || utf16len == NULL)
return (EILSEQ);
if (u8s == NULL || utf8len == NULL)
return (E2BIG);
if (check_endian(flag, &inendian, &outendian) != 0)
return (EBADF);
u16l = u8l = 0;
hi = 0;
do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
if ((flag & UCONV_IN_ACCEPT_BOM) &&
check_bom16(u16s, *utf16len, &inendian))
u16l++;
inendian &= UCONV_IN_NAT_ENDIAN;
for (; u16l < *utf16len; u16l++) {
if (u16s[u16l] == 0 && do_not_ignore_null)
break;
lo = (uint32_t)((inendian) ? u16s[u16l] : BSWAP_16(u16s[u16l]));
if (lo >= UCONV_U16_HI_MIN && lo <= UCONV_U16_HI_MAX) {
if (hi)
return (EILSEQ);
hi = lo;
continue;
} else if (lo >= UCONV_U16_LO_MIN && lo <= UCONV_U16_LO_MAX) {
if (! hi)
return (EILSEQ);
lo = (((hi - UCONV_U16_HI_MIN) * UCONV_U16_BIT_SHIFT +
lo - UCONV_U16_LO_MIN) & UCONV_U16_BIT_MASK)
+ UCONV_U16_START;
hi = 0;
} else if (hi) {
return (EILSEQ);
}
if (lo <= UCONV_U8_ONE_BYTE) {
if (u8l >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)lo;
} else if (lo <= UCONV_U8_TWO_BYTES) {
if ((u8l + 1) >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6));
u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x003f));
} else if (lo <= UCONV_U8_THREE_BYTES) {
if ((u8l + 2) >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12));
u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6));
u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x0003f));
} else if (lo <= UCONV_U8_FOUR_BYTES) {
if ((u8l + 3) >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18));
u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12));
u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6));
u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x000003f));
} else {
return (EILSEQ);
}
}
if (hi)
return (EINVAL);
*utf16len = u16l;
*utf8len = u8l;
return (0);
}
int
uconv_u32tou16(const uint32_t *u32s, size_t *utf32len,
uint16_t *u16s, size_t *utf16len, int flag)
{
int inendian;
int outendian;
size_t u16l;
size_t u32l;
uint32_t hi;
uint32_t lo;
boolean_t do_not_ignore_null;
if (u32s == NULL || utf32len == NULL)
return (EILSEQ);
if (u16s == NULL || utf16len == NULL)
return (E2BIG);
if (check_endian(flag, &inendian, &outendian) != 0)
return (EBADF);
u16l = u32l = 0;
do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
if ((flag & UCONV_IN_ACCEPT_BOM) &&
check_bom32(u32s, *utf32len, &inendian))
u32l++;
inendian &= UCONV_IN_NAT_ENDIAN;
outendian &= UCONV_OUT_NAT_ENDIAN;
if (*utf32len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM))
u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL :
UCONV_BOM_SWAPPED;
for (; u32l < *utf32len; u32l++) {
if (u32s[u32l] == 0 && do_not_ignore_null)
break;
hi = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]);
if (hi > UCONV_UNICODE_MAX)
return (EILSEQ);
if (hi >= UCONV_U16_START) {
lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) +
UCONV_U16_LO_MIN;
hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) +
UCONV_U16_HI_MIN;
if ((u16l + 1) >= *utf16len)
return (E2BIG);
if (outendian) {
u16s[u16l++] = (uint16_t)hi;
u16s[u16l++] = (uint16_t)lo;
} else {
u16s[u16l++] = BSWAP_16(((uint16_t)hi));
u16s[u16l++] = BSWAP_16(((uint16_t)lo));
}
} else {
if (u16l >= *utf16len)
return (E2BIG);
u16s[u16l++] = (outendian) ? (uint16_t)hi :
BSWAP_16(((uint16_t)hi));
}
}
*utf16len = u16l;
*utf32len = u32l;
return (0);
}
int
uconv_u32tou8(const uint32_t *u32s, size_t *utf32len,
uchar_t *u8s, size_t *utf8len, int flag)
{
int inendian;
int outendian;
size_t u32l;
size_t u8l;
uint32_t lo;
boolean_t do_not_ignore_null;
if (u32s == NULL || utf32len == NULL)
return (EILSEQ);
if (u8s == NULL || utf8len == NULL)
return (E2BIG);
if (check_endian(flag, &inendian, &outendian) != 0)
return (EBADF);
u32l = u8l = 0;
do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
if ((flag & UCONV_IN_ACCEPT_BOM) &&
check_bom32(u32s, *utf32len, &inendian))
u32l++;
inendian &= UCONV_IN_NAT_ENDIAN;
for (; u32l < *utf32len; u32l++) {
if (u32s[u32l] == 0 && do_not_ignore_null)
break;
lo = (inendian) ? u32s[u32l] : BSWAP_32(u32s[u32l]);
if (lo <= UCONV_U8_ONE_BYTE) {
if (u8l >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)lo;
} else if (lo <= UCONV_U8_TWO_BYTES) {
if ((u8l + 1) >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)(0xc0 | ((lo & 0x07c0) >> 6));
u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x003f));
} else if (lo <= UCONV_U8_THREE_BYTES) {
if ((u8l + 2) >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)(0xe0 | ((lo & 0x0f000) >> 12));
u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x00fc0) >> 6));
u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x0003f));
} else if (lo <= UCONV_U8_FOUR_BYTES) {
if ((u8l + 3) >= *utf8len)
return (E2BIG);
u8s[u8l++] = (uchar_t)(0xf0 | ((lo & 0x01c0000) >> 18));
u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x003f000) >> 12));
u8s[u8l++] = (uchar_t)(0x80 | ((lo & 0x0000fc0) >> 6));
u8s[u8l++] = (uchar_t)(0x80 | (lo & 0x000003f));
} else {
return (EILSEQ);
}
}
*utf32len = u32l;
*utf8len = u8l;
return (0);
}
int
uconv_u8tou16(const uchar_t *u8s, size_t *utf8len,
uint16_t *u16s, size_t *utf16len, int flag)
{
int inendian;
int outendian;
size_t u16l;
size_t u8l;
uint32_t hi;
uint32_t lo;
int remaining_bytes;
int first_b;
boolean_t do_not_ignore_null;
if (u8s == NULL || utf8len == NULL)
return (EILSEQ);
if (u16s == NULL || utf16len == NULL)
return (E2BIG);
if (check_endian(flag, &inendian, &outendian) != 0)
return (EBADF);
u16l = u8l = 0;
do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
outendian &= UCONV_OUT_NAT_ENDIAN;
if (*utf8len > 0 && *utf16len > 0 && (flag & UCONV_OUT_EMIT_BOM))
u16s[u16l++] = (outendian) ? UCONV_BOM_NORMAL :
UCONV_BOM_SWAPPED;
for (; u8l < *utf8len; ) {
if (u8s[u8l] == 0 && do_not_ignore_null)
break;
hi = (uint32_t)u8s[u8l++];
if (hi > UCONV_ASCII_MAX) {
if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0)
return (EILSEQ);
first_b = hi;
hi = hi & u8_masks_tbl[remaining_bytes];
for (; remaining_bytes > 0; remaining_bytes--) {
if (u8l >= *utf8len)
return (EINVAL);
lo = (uint32_t)u8s[u8l++];
if (first_b) {
if (lo < valid_min_2nd_byte[first_b] ||
lo > valid_max_2nd_byte[first_b])
return (EILSEQ);
first_b = 0;
} else if (lo < UCONV_U8_BYTE_MIN ||
lo > UCONV_U8_BYTE_MAX) {
return (EILSEQ);
}
hi = (hi << UCONV_U8_BIT_SHIFT) |
(lo & UCONV_U8_BIT_MASK);
}
}
if (hi >= UCONV_U16_START) {
lo = ((hi - UCONV_U16_START) % UCONV_U16_BIT_SHIFT) +
UCONV_U16_LO_MIN;
hi = ((hi - UCONV_U16_START) / UCONV_U16_BIT_SHIFT) +
UCONV_U16_HI_MIN;
if ((u16l + 1) >= *utf16len)
return (E2BIG);
if (outendian) {
u16s[u16l++] = (uint16_t)hi;
u16s[u16l++] = (uint16_t)lo;
} else {
u16s[u16l++] = BSWAP_16(((uint16_t)hi));
u16s[u16l++] = BSWAP_16(((uint16_t)lo));
}
} else {
if (u16l >= *utf16len)
return (E2BIG);
u16s[u16l++] = (outendian) ? (uint16_t)hi :
BSWAP_16(((uint16_t)hi));
}
}
*utf16len = u16l;
*utf8len = u8l;
return (0);
}
int
uconv_u8tou32(const uchar_t *u8s, size_t *utf8len,
uint32_t *u32s, size_t *utf32len, int flag)
{
int inendian;
int outendian;
size_t u32l;
size_t u8l;
uint32_t hi;
uint32_t c;
int remaining_bytes;
int first_b;
boolean_t do_not_ignore_null;
if (u8s == NULL || utf8len == NULL)
return (EILSEQ);
if (u32s == NULL || utf32len == NULL)
return (E2BIG);
if (check_endian(flag, &inendian, &outendian) != 0)
return (EBADF);
u32l = u8l = 0;
do_not_ignore_null = ((flag & UCONV_IGNORE_NULL) == 0);
outendian &= UCONV_OUT_NAT_ENDIAN;
if (*utf8len > 0 && *utf32len > 0 && (flag & UCONV_OUT_EMIT_BOM))
u32s[u32l++] = (outendian) ? UCONV_BOM_NORMAL :
UCONV_BOM_SWAPPED_32;
for (; u8l < *utf8len; ) {
if (u8s[u8l] == 0 && do_not_ignore_null)
break;
hi = (uint32_t)u8s[u8l++];
if (hi > UCONV_ASCII_MAX) {
if ((remaining_bytes = remaining_bytes_tbl[hi]) == 0)
return (EILSEQ);
first_b = hi;
hi = hi & u8_masks_tbl[remaining_bytes];
for (; remaining_bytes > 0; remaining_bytes--) {
if (u8l >= *utf8len)
return (EINVAL);
c = (uint32_t)u8s[u8l++];
if (first_b) {
if (c < valid_min_2nd_byte[first_b] ||
c > valid_max_2nd_byte[first_b])
return (EILSEQ);
first_b = 0;
} else if (c < UCONV_U8_BYTE_MIN ||
c > UCONV_U8_BYTE_MAX) {
return (EILSEQ);
}
hi = (hi << UCONV_U8_BIT_SHIFT) |
(c & UCONV_U8_BIT_MASK);
}
}
if (u32l >= *utf32len)
return (E2BIG);
u32s[u32l++] = (outendian) ? hi : BSWAP_32(hi);
}
*utf32len = u32l;
*utf8len = u8l;
return (0);
}