#include <locale.h>
#include <err.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <strings.h>
#include <wchar.h>
#include <uchar.h>
#include <errno.h>
static const char *uchar_wide = "光";
static const char32_t uchar_value = 0x5149;
static const char *uchar_hello = "hello";
static void
update_locale(const char *loc)
{
const char *newloc = setlocale(LC_CTYPE, loc);
if (newloc == NULL) {
err(EXIT_FAILURE, "TEST FAILED: failed to update locale to %s",
loc);
}
if (strcmp(newloc, loc) != 0) {
errx(EXIT_FAILURE, "TEST FAILED: locale set to %s, but got %s",
loc, newloc);
}
}
static boolean_t
mbrtoc32_ascii(mbstate_t *mbs)
{
char32_t out;
size_t len;
boolean_t ret = B_TRUE;
if ((len = mbrtoc32(&out, uchar_hello, 5, mbs)) != 1) {
warnx("expected mbrtoc32 to return 1, returned %zu", len);
ret = B_FALSE;
}
if (out != 'h') {
warnx("got bad char32_t, expected 0x%x, found 0x%x\n", 'h',
out);
ret = B_FALSE;
}
if ((len = mbrtoc32(&out, uchar_hello + 1, 4, mbs)) != 1) {
warnx("expected mbrtoc32 to return 1, returned %zu", len);
ret = B_FALSE;
}
if (out != 'e') {
warnx("got bad char32_t, expected 0x%x, found 0x%x\n", 'h',
out);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc32_ascii_internal(void)
{
return (mbrtoc32_ascii(NULL));
}
static boolean_t
mbrtoc32_ascii_mbstate(void)
{
mbstate_t mbs;
bzero(&mbs, sizeof (mbs));
return (mbrtoc32_ascii(&mbs));
}
static boolean_t
mbrtoc32_badseq_utf8(void)
{
mbstate_t mbs;
size_t len;
char32_t out;
boolean_t ret = B_TRUE;
char *badstr;
bzero(&mbs, sizeof (mbs));
len = mbrtoc32(&out, "\xa9", 1, &mbs);
if (len != (size_t)-1) {
warnx("mbrtoc32 returned %zu, not %zu", len, (size_t)-1);
ret = B_FALSE;
}
if (errno != EILSEQ) {
warnx("found bad errno, expected %d, found %d\n", errno,
EILSEQ);
ret = B_FALSE;
}
badstr = strdup(uchar_wide);
if (badstr == NULL) {
warn("failed to duplicate uchar_wide");
return (B_FALSE);
}
badstr[1] = '?';
bzero(&mbs, sizeof (mbs));
len = mbrtoc32(&out, badstr, strlen(badstr), &mbs);
free(badstr);
if (len != (size_t)-1) {
warnx("mbrtoc32 returned %zu, not %zu", len, (size_t)-1);
ret = B_FALSE;
}
if (errno != EILSEQ) {
warnx("found bad errno, expected %d, found %d\n", errno,
EILSEQ);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc32_roundtrip(void)
{
char32_t out;
size_t len, clen;
mbstate_t mbs;
char buf[MB_CUR_MAX];
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc32(&out, uchar_wide, strlen(uchar_wide), &mbs);
if (len != 3) {
warnx("mbrtoc32 returned %zu, expected %u", len, 3);
ret = B_FALSE;
}
if (out != uchar_value) {
warnx("mbrtoc32 converted character to 0x%x not 0x%x",
out, uchar_value);
ret = B_FALSE;
}
clen = c32rtomb(buf, out, &mbs);
if (clen != len) {
warnx("c32rtomb returned %zu bytes, but we originally used %zu",
clen, len);
ret = B_FALSE;
}
if (strncmp(buf, uchar_wide, len) != 0) {
warnx("round trip string comparison failed");
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc32_partial(void)
{
char32_t out;
size_t len, i;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
for (i = 0; i < strlen(uchar_wide) - 1; i++) {
len = mbrtoc32(&out, uchar_wide + i, 1, &mbs);
if (len != (size_t)-2) {
warnx("partial mbrtoc32 returned %zu, not -2", len);
ret = B_FALSE;
}
}
len = mbrtoc32(&out, uchar_wide + i, 1, &mbs);
if (len != 1) {
warnx("partial mbrtoc32 returned %zu, not 1", len);
ret = B_FALSE;
}
if (out != uchar_value) {
warnx("mbrtoc32 converted character to 0x%x not 0x%x",
out, uchar_value);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc32_zero(void)
{
char32_t out, exp = L'\0';
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc32(&out, "", 1, &mbs);
if (len != 0) {
warnx("partial mbrtoc32 returned %zu, not 0", len);
ret = B_FALSE;
}
if (out != exp) {
warnx("mbrtoc32 converted character to 0x%x not 0x%x",
out, exp);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc32_zero_len(void)
{
char32_t out = 0x12345, exp = 0x12345;
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc32(&out, uchar_wide, 0, &mbs);
if (len != (size_t)-2) {
warnx("partial mbrtoc32 returned %zu, not -2", len);
ret = B_FALSE;
}
if (out != exp) {
warnx("mbrtoc32 incorrectly wrote to char32_t value with "
"zero string, found 0x%x not 0x%x", out, exp);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc32_null(void)
{
char32_t out = 0x123456, exp = 0x123456;
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc32(&out, NULL, 1, &mbs);
if (len != 0) {
warnx("partial mbrtoc32 returned %zu, not 0", len);
ret = B_FALSE;
}
if (out != exp) {
warnx("mbrtoc32 incorrectly wrote to char32_t value with "
"null string, found 0x%x not 0x%x", out, exp);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc16_ascii(mbstate_t *mbs)
{
char16_t out;
size_t len;
boolean_t ret = B_TRUE;
if ((len = mbrtoc16(&out, uchar_hello, 5, mbs)) != 1) {
warnx("expected mbrtoc16 to return 1, returned %zu", len);
ret = B_FALSE;
}
if (out != 'h') {
warnx("got bad char16_t, expected 0x%x, found 0x%x\n", 'h',
out);
ret = B_FALSE;
}
if ((len = mbrtoc16(&out, uchar_hello + 1, 4, mbs)) != 1) {
warnx("expected mbrtoc16 to return 1, returned %zu", len);
ret = B_FALSE;
}
if (out != 'e') {
warnx("got bad char16_t, expected 0x%x, found 0x%x\n", 'h',
out);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc16_ascii_internal(void)
{
return (mbrtoc16_ascii(NULL));
}
static boolean_t
mbrtoc16_ascii_mbstate(void)
{
mbstate_t mbs;
bzero(&mbs, sizeof (mbs));
return (mbrtoc16_ascii(&mbs));
}
static boolean_t
mbrtoc16_null(void)
{
char16_t out = 0x1234, exp = 0x1234;
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc16(&out, NULL, 1, &mbs);
if (len != 0) {
warnx("partial mbrtoc16 returned %zu, not 0", len);
ret = B_FALSE;
}
if (out != exp) {
warnx("mbrtoc16 incorrectly wrote to char16_t value with "
"null string, found 0x%x not 0x%x", out, exp);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc16_zero(void)
{
char16_t out, exp = L'\0';
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc16(&out, "", 1, &mbs);
if (len != 0) {
warnx("partial mbrtoc16 returned %zu, not 0", len);
ret = B_FALSE;
}
if (out != exp) {
warnx("mbrtoc16 converted character to 0x%x not 0x%x",
out, exp);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc16_zero_len(void)
{
char16_t out = 0x5432, exp = 0x5432;
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc16(&out, uchar_wide, 0, &mbs);
if (len != (size_t)-2) {
warnx("partial mbrtoc16 returned %zu, not -2", len);
ret = B_FALSE;
}
if (out != exp) {
warnx("mbrtoc16 incorrectly wrote to char16_t value with "
"zero length string, found 0x%x not 0x%x", out, exp);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc16_roundtrip(void)
{
char16_t out;
size_t len, clen;
mbstate_t mbs;
char buf[MB_CUR_MAX];
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = mbrtoc16(&out, uchar_wide, strlen(uchar_wide), &mbs);
if (len != 3) {
warnx("mbrtoc16 returned %zu, expected %u", len, 3);
ret = B_FALSE;
}
if (out != uchar_value) {
warnx("mbrtoc16 converted character to 0x%x not 0x%x",
out, uchar_value);
ret = B_FALSE;
}
clen = c16rtomb(buf, out, &mbs);
if (clen != len) {
warnx("c16rtomb returned %zu bytes, but we originally used %zu",
clen, len);
ret = B_FALSE;
}
if (strncmp(buf, uchar_wide, len) != 0) {
warnx("round trip string comparison failed");
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc16_partial(void)
{
char16_t out;
size_t len, i;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
for (i = 0; i < strlen(uchar_wide) - 1; i++) {
len = mbrtoc16(&out, uchar_wide + i, 1, &mbs);
if (len != (size_t)-2) {
warnx("partial mbrtoc16 returned %zu, not -2", len);
ret = B_FALSE;
}
}
len = mbrtoc16(&out, uchar_wide + i, 1, &mbs);
if (len != 1) {
warnx("partial mbrtoc16 returned %zu, not 1", len);
ret = B_FALSE;
}
if (out != uchar_value) {
warnx("mbrtoc16 converted character to 0x%x not 0x%x",
out, uchar_value);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
mbrtoc16_surrogate(void)
{
char16_t out0, out1;
size_t len, clen;
mbstate_t mbs;
const char *surrogate = "\xF0\x9F\x92\xA9";
char16_t exp0 = 0xd83d, exp1 = 0xdca9;
size_t slen = strlen(surrogate);
boolean_t ret = B_TRUE;
char buf[MB_CUR_MAX];
bzero(&mbs, sizeof (mbs));
len = mbrtoc16(&out0, surrogate, slen, &mbs);
if (len != slen) {
warnx("mbrtoc16 returned %zu, expected %zu", len, slen);
ret = B_FALSE;
}
if (out0 != exp0) {
warnx("mbrtoc16 converted character to 0x%x not 0x%x",
out0, exp0);
ret = B_FALSE;
}
if (mbsinit(&mbs) != 0) {
warnx("mb state with a surrogate character is somehow in the "
"initial state");
ret = B_FALSE;
}
len = mbrtoc16(&out1, uchar_wide, strlen(uchar_wide), &mbs);
if (len != (size_t)-3) {
warnx("mbrtoc16 returned %zu, expected -3", len);
ret = B_FALSE;
}
if (mbsinit(&mbs) == 0) {
warnx("mb state with after both surrogate characters isn't "
"in initial state");
ret = B_FALSE;
}
if (out1 != exp1) {
warnx("mbrtoc32 converted character to 0x%x not 0x%x",
out1, exp1);
ret = B_FALSE;
}
clen = c16rtomb(buf, out0, &mbs);
if (clen != 0) {
warnx("c16rtomb returned %zu bytes, but expected zero for the "
"first surrogate", clen);
ret = B_FALSE;
}
if (mbsinit(&mbs) != 0) {
warnx("mb state with a surrogate character is somehow in the "
"initial state");
ret = B_FALSE;
}
clen = c16rtomb(buf, out1, &mbs);
if (clen != slen) {
warnx("c16rtomb returned %zu, expected %zu", len, slen);
ret = B_FALSE;
}
if (mbsinit(&mbs) == 0) {
warnx("mb state with after both surrogate characters isn't "
"in initial state");
ret = B_FALSE;
}
if (strncmp(buf, surrogate, slen) != 0) {
warnx("round trip string comparison failed");
ret = B_FALSE;
}
return (ret);
}
static boolean_t
c32rtomb_eilseq_iso8859(void)
{
char buf[MB_CUR_MAX];
mbstate_t mbs;
size_t len;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = c32rtomb(buf, uchar_value, &mbs);
if (len != (size_t)-1) {
warnx("c32rtomb returned %zd, expected -1\n", len);
ret = B_FALSE;
}
if (errno != EILSEQ) {
warnx("expected errno set to %d was %d", EILSEQ, errno);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
c16rtomb_eilseq_iso8859(void)
{
char buf[MB_CUR_MAX];
mbstate_t mbs;
size_t len;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = c32rtomb(buf, (char16_t)uchar_value, &mbs);
if (len != (size_t)-1) {
warnx("c32rtomb returned %zd, expected -1\n", len);
ret = B_FALSE;
}
if (errno != EILSEQ) {
warnx("expected errno set to %d was %d", EILSEQ, errno);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
c32rtomb_eilseq_utf8(void)
{
char buf[MB_CUR_MAX];
mbstate_t mbs;
size_t len;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = c32rtomb(buf, UINT32_MAX, &mbs);
if (len != (size_t)-1) {
warnx("c32rtomb returned %zd, expected -1\n", len);
ret = B_FALSE;
}
if (errno != EILSEQ) {
warnx("expected errno set to %d was %d", EILSEQ, errno);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
c16rtomb_bad_first(void)
{
char buf[MB_CUR_MAX];
mbstate_t mbs;
size_t len, i;
char16_t first = 0xd83d;
char16_t bad[] = { 0x0, 0xd7ff, 0xd83d, 0xd900, 0xffff };
boolean_t ret = B_TRUE;
for (i = 0; i < ARRAY_SIZE(bad); i++) {
bzero(&mbs, sizeof (mbs));
len = c16rtomb(buf, first, &mbs);
if (len != 0) {
warnx("c16rtomb returned %zd, expected 0\n", len);
ret = B_FALSE;
}
len = c16rtomb(buf, bad[i], &mbs);
if (len != (size_t)-1) {
warnx("c16rtomb surrogate %x returned %zd, expected "
"-1\n", bad[i], len);
ret = B_FALSE;
}
if (errno != EILSEQ) {
warnx("expected errno set to %d was %d", EILSEQ, errno);
ret = B_FALSE;
}
}
return (ret);
}
static boolean_t
c16rtomb_bad_second(void)
{
char buf[MB_CUR_MAX];
mbstate_t mbs;
size_t len, i;
char16_t bad[] = { 0xdc00, 0xdd34, 0xdfff };
boolean_t ret = B_TRUE;
for (i = 0; i < ARRAY_SIZE(bad); i++) {
bzero(&mbs, sizeof (mbs));
len = c16rtomb(buf, bad[i], &mbs);
if (len != (size_t)-1) {
warnx("c16rtomb surrogate %x returned %zd, expected "
"-1\n", bad[i], len);
ret = B_FALSE;
}
if (errno != EILSEQ) {
warnx("expected errno set to %d was %d", EILSEQ, errno);
ret = B_FALSE;
}
}
return (ret);
}
static boolean_t
c32rtomb_null(void)
{
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = c32rtomb(NULL, uchar_value, &mbs);
if (len != 1) {
warnx("c32rtomb returned %zu, expected %d", len, 1);
ret = B_FALSE;
}
return (ret);
}
static boolean_t
c16rtomb_null(void)
{
size_t len;
mbstate_t mbs;
boolean_t ret = B_TRUE;
bzero(&mbs, sizeof (mbs));
len = c16rtomb(NULL, uchar_value, &mbs);
if (len != 1) {
warnx("c16rtomb returned %zu, expected %d", len, 1);
ret = B_FALSE;
}
return (ret);
}
typedef boolean_t (*uchar_test_f)(void);
typedef struct uchar_test {
uchar_test_f ut_func;
const char *ut_test;
const char *ut_locale;
} uchar_test_t;
static const uchar_test_t uchar_tests[] = {
{ mbrtoc32_ascii_mbstate, "mbrtoc32: ascii conversion" },
{ mbrtoc32_ascii_internal, "mbrtoc32: ascii conversion (internal "
"mbstate_t)" },
{ mbrtoc32_badseq_utf8, "mbrtoc32: bad locale sequence (UTF-8)" },
{ mbrtoc32_roundtrip, "mbrtoc32: round trip conversion" },
{ mbrtoc32_partial, "mbrtoc32: correctly consume partial sequences" },
{ mbrtoc32_zero, "mbrtoc32: correctly handle L'\\0'" },
{ mbrtoc32_zero_len, "mbrtoc32: correctly handle length of zero" },
{ mbrtoc32_null, "mbrtoc32: correctly handle null string" },
{ mbrtoc16_ascii_mbstate, "mbrtoc16: ascii conversion" },
{ mbrtoc16_ascii_internal, "mbrtoc16: ascii conversion (internal "
"mbstate_t)" },
{ mbrtoc16_null, "mbrtoc16: correctly handle null string" },
{ mbrtoc16_zero, "mbrtoc16: correctly handle L'\\0'" },
{ mbrtoc16_zero_len, "mbrtoc16: correctly handle length of zero" },
{ mbrtoc16_roundtrip, "mbrtoc16: round trip conversion" },
{ mbrtoc16_partial, "mbrtoc16: correctly consume partial sequences" },
{ mbrtoc16_surrogate, "mbrtoc16: correctly generate surrogate pairs "
"and round trip conversion" },
{ c32rtomb_eilseq_iso8859, "c32rtomb: character outside of locale is "
"caught", "en_US.ISO8859-1" },
{ c16rtomb_eilseq_iso8859, "c16rtomb: character outside of locale is "
"caught", "en_US.ISO8859-1" },
{ c32rtomb_eilseq_utf8, "c32rtomb: character outside of locale is "
"caught" },
{ c16rtomb_bad_first, "c16rtomb: bad first surrogate pair" },
{ c16rtomb_bad_second, "c16rtomb: bad second surrogate pair" },
{ c32rtomb_null, "c32rtomb: correctly handle null buffer" },
{ c16rtomb_null, "c16rtomb: correctly handle null buffer" },
};
int
main(void)
{
uint_t i;
uint_t passes = 0;
uint_t ntests = ARRAY_SIZE(uchar_tests);
for (i = 0; i < ntests; i++) {
boolean_t r;
if (uchar_tests[i].ut_locale != NULL) {
update_locale(uchar_tests[i].ut_locale);
} else {
update_locale("en_US.UTF-8");
}
r = uchar_tests[i].ut_func();
(void) fprintf(stderr, "TEST %s: %s\n", r ? "PASSED" : "FAILED",
uchar_tests[i].ut_test);
if (r) {
passes++;
}
}
(void) printf("%d/%d test%s passed\n", passes, ntests,
passes > 1 ? "s" : "");
return (passes == ntests ? EXIT_SUCCESS : EXIT_FAILURE);
}