#include <sys/types.h>
#include <errno.h>
#include <string.h>
#include <wchar.h>
#include "citrus_ctype.h"
struct _utf8_state {
wchar_t ch;
int want;
wchar_t lbound;
};
size_t
_citrus_utf8_ctype_mbrtowc(wchar_t * __restrict pwc,
const char * __restrict s, size_t n, mbstate_t * __restrict ps)
{
struct _utf8_state *us;
int ch, i, mask, want;
wchar_t lbound, wch;
us = (struct _utf8_state *)ps;
if (us->want < 0 || us->want > _CITRUS_UTF8_MB_CUR_MAX) {
errno = EINVAL;
return -1;
}
if (s == NULL) {
s = "";
n = 1;
pwc = NULL;
}
if (n == 0)
return -2;
if (us->want == 0 && ((ch = (unsigned char)*s) & ~0x7f) == 0) {
if (pwc != NULL)
*pwc = ch;
return ch != '\0' ? 1 : 0;
}
if (us->want == 0) {
ch = (unsigned char)*s;
if ((ch & 0x80) == 0) {
mask = 0x7f;
want = 1;
lbound = 0;
} else if ((ch & 0xe0) == 0xc0) {
mask = 0x1f;
want = 2;
lbound = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
mask = 0x0f;
want = 3;
lbound = 0x800;
} else if ((ch & 0xf8) == 0xf0) {
mask = 0x07;
want = 4;
lbound = 0x10000;
} else {
errno = EILSEQ;
return -1;
}
} else {
want = us->want;
lbound = us->lbound;
}
if (us->want == 0)
wch = (unsigned char)*s++ & mask;
else
wch = us->ch;
for (i = (us->want == 0) ? 1 : 0; i < want && (size_t)i < n; i++) {
if ((*s & 0xc0) != 0x80) {
errno = EILSEQ;
return -1;
}
wch <<= 6;
wch |= *s++ & 0x3f;
}
if (i < want) {
us->want = want - i;
us->lbound = lbound;
us->ch = wch;
return -2;
}
if (wch < lbound) {
errno = EILSEQ;
return -1;
}
if (wch >= 0xd800 && wch <= 0xdfff) {
errno = EILSEQ;
return -1;
}
if (wch > 0x10ffff) {
errno = EILSEQ;
return -1;
}
if (pwc != NULL)
*pwc = wch;
us->want = 0;
return wch == L'\0' ? 0 : want;
}
int
_citrus_utf8_ctype_mbsinit(const mbstate_t * __restrict ps)
{
return ((const struct _utf8_state *)ps)->want == 0;
}
size_t
_citrus_utf8_ctype_mbsnrtowcs(wchar_t * __restrict dst,
const char ** __restrict src, size_t nmc, size_t len,
mbstate_t * __restrict ps)
{
struct _utf8_state *us;
size_t i, o, r;
us = (struct _utf8_state *)ps;
if (dst == NULL) {
if (nmc > 0 && us->want > 0 && (unsigned char)(*src)[0] < 0x80) {
errno = EILSEQ;
return -1;
}
for (i = o = 0; i < nmc; i += r, o++) {
if ((unsigned char)(*src)[i] < 0x80) {
if ((*src)[i] == '\0')
return o;
r = 1;
} else {
r = _citrus_utf8_ctype_mbrtowc(NULL, *src + i,
nmc - i, ps);
if (r == (size_t)-1)
return r;
if (r == (size_t)-2)
return o;
if (r == 0)
return o;
}
}
return o;
}
if (len > 0 && nmc > 0 && us->want > 0 &&
(unsigned char)(*src)[0] < 0x80) {
errno = EILSEQ;
return -1;
}
for (i = o = 0; i < nmc && o < len; i += r, o++) {
if ((unsigned char)(*src)[i] < 0x80) {
dst[o] = (wchar_t)(unsigned char)(*src)[i];
if ((*src)[i] == '\0') {
*src = NULL;
return o;
}
r = 1;
} else {
r = _citrus_utf8_ctype_mbrtowc(dst + o, *src + i,
nmc - i, ps);
if (r == (size_t)-1) {
*src += i;
return r;
}
if (r == (size_t)-2) {
*src += nmc;
return o;
}
if (r == 0) {
*src = NULL;
return o;
}
}
}
*src += i;
return o;
}
size_t
_citrus_utf8_ctype_wcrtomb(char * __restrict s, wchar_t wc,
mbstate_t * __restrict ps)
{
struct _utf8_state *us;
unsigned char lead;
int i, len;
us = (struct _utf8_state *)ps;
if (us->want != 0) {
errno = EINVAL;
return -1;
}
if (s == NULL)
return 1;
if (wc < 0 || (wc > 0xd7ff && wc < 0xe000) || wc > 0x10ffff) {
errno = EILSEQ;
return -1;
}
if (wc <= 0x7f) {
*s = (char)wc;
return 1;
} else if (wc <= 0x7ff) {
lead = 0xc0;
len = 2;
} else if (wc <= 0xffff) {
lead = 0xe0;
len = 3;
} else {
lead = 0xf0;
len = 4;
}
for (i = len - 1; i > 0; i--) {
s[i] = (wc & 0x3f) | 0x80;
wc >>= 6;
}
*s = (wc & 0xff) | lead;
return len;
}
size_t
_citrus_utf8_ctype_wcsnrtombs(char * __restrict dst,
const wchar_t ** __restrict src, size_t nwc, size_t len,
mbstate_t * __restrict ps)
{
struct _utf8_state *us;
char buf[_CITRUS_UTF8_MB_CUR_MAX];
size_t i, o, r;
us = (struct _utf8_state *)ps;
if (us->want != 0) {
errno = EINVAL;
return -1;
}
if (dst == NULL) {
for (i = o = 0; i < nwc; i++, o += r) {
wchar_t wc = (*src)[i];
if (wc >= 0 && wc < 0x80) {
if (wc == 0)
return o;
r = 1;
} else {
r = _citrus_utf8_ctype_wcrtomb(buf, wc, ps);
if (r == (size_t)-1)
return r;
}
}
return o;
}
for (i = o = 0; i < nwc && o < len; i++, o += r) {
wchar_t wc = (*src)[i];
if (wc >= 0 && wc < 0x80) {
dst[o] = (wchar_t)wc;
if (wc == 0) {
*src = NULL;
return o;
}
r = 1;
} else if (len - o >= _CITRUS_UTF8_MB_CUR_MAX) {
r = _citrus_utf8_ctype_wcrtomb(dst + o, wc, ps);
if (r == (size_t)-1) {
*src += i;
return r;
}
} else {
r = _citrus_utf8_ctype_wcrtomb(buf, wc, ps);
if (r == (size_t)-1) {
*src += i;
return r;
}
if (r > len - o)
break;
memcpy(dst + o, buf, r);
}
}
*src += i;
return o;
}