#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
#include <CoreFoundation/CoreFoundation.h>
#endif
#endif
#include "compat.h"
#include "attrib.h"
#include "types.h"
#include "unistr.h"
#include "debug.h"
#include "logging.h"
#include "misc.h"
#ifndef ALLOW_BROKEN_UNICODE
#define ALLOW_BROKEN_UNICODE 1
#endif
static int use_utf8 = 1;
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
static int nfconvert_utf8 = 1;
#endif
#endif
#if 0
static const u8 legal_ansi_char_array[0x40] = {
0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
};
#endif
BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
const ntfschar *s2, size_t s2_len,
const IGNORE_CASE_BOOL ic,
const ntfschar *upcase, const u32 upcase_size)
{
if (s1_len != s2_len)
return FALSE;
if (!s1_len)
return TRUE;
if (ic == CASE_SENSITIVE)
return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;
return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:
TRUE;
}
int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
const ntfschar *name2, const u32 name2_len,
const IGNORE_CASE_BOOL ic, const ntfschar *upcase,
const u32 upcase_len)
{
u32 cnt;
u16 c1, c2;
u16 u1, u2;
#ifdef DEBUG
if (!name1 || !name2 || !upcase || !upcase_len) {
ntfs_log_debug("ntfs_names_collate received NULL pointer!\n");
exit(1);
}
#endif
cnt = min(name1_len, name2_len);
if (cnt > 0) {
if (ic == CASE_SENSITIVE) {
while (--cnt && (*name1 == *name2)) {
name1++;
name2++;
}
u1 = c1 = le16_to_cpu(*name1);
u2 = c2 = le16_to_cpu(*name2);
if (u1 < upcase_len)
u1 = le16_to_cpu(upcase[u1]);
if (u2 < upcase_len)
u2 = le16_to_cpu(upcase[u2]);
if ((u1 == u2) && cnt)
do {
name1++;
u1 = le16_to_cpu(*name1);
name2++;
u2 = le16_to_cpu(*name2);
if (u1 < upcase_len)
u1 = le16_to_cpu(upcase[u1]);
if (u2 < upcase_len)
u2 = le16_to_cpu(upcase[u2]);
} while ((u1 == u2) && --cnt);
if (u1 < u2)
return -1;
if (u1 > u2)
return 1;
if (name1_len < name2_len)
return -1;
if (name1_len > name2_len)
return 1;
if (c1 < c2)
return -1;
if (c1 > c2)
return 1;
} else {
do {
u1 = le16_to_cpu(*name1);
name1++;
u2 = le16_to_cpu(*name2);
name2++;
if (u1 < upcase_len)
u1 = le16_to_cpu(upcase[u1]);
if (u2 < upcase_len)
u2 = le16_to_cpu(upcase[u2]);
} while ((u1 == u2) && --cnt);
if (u1 < u2)
return -1;
if (u1 > u2)
return 1;
if (name1_len < name2_len)
return -1;
if (name1_len > name2_len)
return 1;
}
} else {
if (name1_len < name2_len)
return -1;
if (name1_len > name2_len)
return 1;
}
return 0;
}
int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
{
u16 c1, c2;
size_t i;
#ifdef DEBUG
if (!s1 || !s2) {
ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n");
exit(1);
}
#endif
for (i = 0; i < n; ++i) {
c1 = le16_to_cpu(s1[i]);
c2 = le16_to_cpu(s2[i]);
if (c1 < c2)
return -1;
if (c1 > c2)
return 1;
if (!c1)
break;
}
return 0;
}
int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
const ntfschar *upcase, const u32 upcase_size)
{
u16 c1, c2;
size_t i;
#ifdef DEBUG
if (!s1 || !s2 || !upcase) {
ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n");
exit(1);
}
#endif
for (i = 0; i < n; ++i) {
if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
c1 = le16_to_cpu(upcase[c1]);
if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
c2 = le16_to_cpu(upcase[c2]);
if (c1 < c2)
return -1;
if (c1 > c2)
return 1;
if (!c1)
break;
}
return 0;
}
u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)
{
u32 i;
for (i = 0; i < maxlen; i++) {
if (!le16_to_cpu(s[i]))
break;
}
return i;
}
ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)
{
ntfschar *dst;
u32 len;
len = ntfs_ucsnlen(s, maxlen);
dst = ntfs_malloc((len + 1) * sizeof(ntfschar));
if (dst) {
memcpy(dst, s, len * sizeof(ntfschar));
dst[len] = const_cpu_to_le16(L'\0');
}
return dst;
}
void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,
const u32 upcase_len)
{
u32 i;
u16 u;
for (i = 0; i < name_len; i++)
if ((u = le16_to_cpu(name[i])) < upcase_len)
name[i] = upcase[u];
}
void ntfs_name_locase(ntfschar *name, u32 name_len, const ntfschar *locase,
const u32 locase_len)
{
u32 i;
u16 u;
if (locase)
for (i = 0; i < name_len; i++)
if ((u = le16_to_cpu(name[i])) < locase_len)
name[i] = locase[u];
}
void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
const ntfschar *upcase, const u32 upcase_len)
{
ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,
file_name_attr->file_name_length, upcase, upcase_len);
}
static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len)
{
int i, ret = -1;
int count = 0;
BOOL surrog;
surrog = FALSE;
for (i = 0; i < ins_len && ins[i] && count <= outs_len; i++) {
unsigned short c = le16_to_cpu(ins[i]);
if (surrog) {
if ((c >= 0xdc00) && (c < 0xe000)) {
surrog = FALSE;
count += 4;
} else {
#if ALLOW_BROKEN_UNICODE
surrog = FALSE;
count += 3;
--i;
continue;
#else
goto fail;
#endif
}
} else
if (c < 0x80)
count++;
else if (c < 0x800)
count += 2;
else if (c < 0xd800)
count += 3;
else if (c < 0xdc00)
surrog = TRUE;
#if ALLOW_BROKEN_UNICODE
else if (c < 0xe000)
count += 3;
else if (c >= 0xe000)
#else
else if ((c >= 0xe000) && (c < 0xfffe))
#endif
count += 3;
else
goto fail;
}
if (surrog && count <= outs_len) {
#if ALLOW_BROKEN_UNICODE
count += 3;
#else
goto fail;
#endif
}
if (count > outs_len) {
errno = ENAMETOOLONG;
goto out;
}
ret = count;
out:
return ret;
fail:
errno = EILSEQ;
goto out;
}
static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
char **outs, int outs_len)
{
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
char *original_outs_value = *outs;
int original_outs_len = outs_len;
#endif
#endif
char *t;
int i, size, ret = -1;
int halfpair;
halfpair = 0;
if (!*outs) {
outs_len = PATH_MAX;
}
size = utf16_to_utf8_size(ins, ins_len, outs_len - 1);
if (size < 0)
goto out;
if (!*outs) {
outs_len = size + 1;
*outs = ntfs_malloc(outs_len);
if (!*outs)
goto out;
}
t = *outs;
for (i = 0; i < ins_len && ins[i]; i++) {
unsigned short c = le16_to_cpu(ins[i]);
if (halfpair) {
if ((c >= 0xdc00) && (c < 0xe000)) {
*t++ = 0xf0 + (((halfpair + 64) >> 8) & 7);
*t++ = 0x80 + (((halfpair + 64) >> 2) & 63);
*t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
*t++ = 0x80 + (c & 63);
halfpair = 0;
} else {
#if ALLOW_BROKEN_UNICODE
*t++ = 0xe0 | (halfpair >> 12);
*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
*t++ = 0x80 | (halfpair & 0x3f);
halfpair = 0;
--i;
continue;
#else
goto fail;
#endif
}
} else if (c < 0x80) {
*t++ = c;
} else {
if (c < 0x800) {
*t++ = (0xc0 | ((c >> 6) & 0x3f));
*t++ = 0x80 | (c & 0x3f);
} else if (c < 0xd800) {
*t++ = 0xe0 | (c >> 12);
*t++ = 0x80 | ((c >> 6) & 0x3f);
*t++ = 0x80 | (c & 0x3f);
} else if (c < 0xdc00)
halfpair = c;
#if ALLOW_BROKEN_UNICODE
else if (c < 0xe000) {
*t++ = 0xe0 | (c >> 12);
*t++ = 0x80 | ((c >> 6) & 0x3f);
*t++ = 0x80 | (c & 0x3f);
}
#endif
else if (c >= 0xe000) {
*t++ = 0xe0 | (c >> 12);
*t++ = 0x80 | ((c >> 6) & 0x3f);
*t++ = 0x80 | (c & 0x3f);
} else
goto fail;
}
}
#if ALLOW_BROKEN_UNICODE
if (halfpair) {
*t++ = 0xe0 | (halfpair >> 12);
*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
*t++ = 0x80 | (halfpair & 0x3f);
}
#endif
*t = '\0';
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
if(nfconvert_utf8 && (t - *outs) > 0) {
char *new_outs = NULL;
int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0);
if(new_outs_len >= 0 && new_outs != NULL) {
if(original_outs_value != *outs) {
free(*outs);
*outs = new_outs;
t = *outs + new_outs_len;
}
else {
memset(*outs, 0, original_outs_len);
strncpy(*outs, new_outs, original_outs_len-1);
t = *outs + original_outs_len;
free(new_outs);
}
}
else {
ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
ntfs_log_error(" new_outs=0x%p\n", new_outs);
ntfs_log_error(" new_outs_len=%d\n", new_outs_len);
}
}
#endif
#endif
ret = t - *outs;
out:
return ret;
fail:
errno = EILSEQ;
goto out;
}
static int utf8_to_utf16_size(const char *s)
{
int ret = -1;
unsigned int byte;
size_t count = 0;
while ((byte = *((const unsigned char *)s++))) {
if (++count >= PATH_MAX)
goto fail;
if (byte >= 0xc0) {
if (byte >= 0xF5) {
errno = EILSEQ;
goto out;
}
if (!*s)
break;
if (byte >= 0xC0)
s++;
if (!*s)
break;
if (byte >= 0xE0)
s++;
if (!*s)
break;
if (byte >= 0xF0) {
s++;
if (++count >= PATH_MAX)
goto fail;
}
}
}
ret = count;
out:
return ret;
fail:
errno = ENAMETOOLONG;
goto out;
}
static int utf8_to_unicode(u32 *wc, const char *s)
{
unsigned int byte = *((const unsigned char *)s);
if (byte == 0) {
*wc = (u32) 0;
return 0;
} else if (byte < 0x80) {
*wc = (u32) byte;
return 1;
} else if (byte < 0xc2) {
goto fail;
} else if (byte < 0xE0) {
if ((s[1] & 0xC0) == 0x80) {
*wc = ((u32)(byte & 0x1F) << 6)
| ((u32)(s[1] & 0x3F));
return 2;
} else
goto fail;
} else if (byte < 0xF0) {
if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) {
*wc = ((u32)(byte & 0x0F) << 12)
| ((u32)(s[1] & 0x3F) << 6)
| ((u32)(s[2] & 0x3F));
#if ALLOW_BROKEN_UNICODE
if (((*wc >= 0x800) && (*wc <= 0xD7FF))
|| ((*wc >= 0xD800) && (*wc <= 0xDFFF))
|| ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
return 3;
#else
if (((*wc >= 0x800) && (*wc <= 0xD7FF))
|| ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
return 3;
#endif
}
goto fail;
} else if (byte < 0xF5) {
if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)
&& ((s[3] & 0xC0) == 0x80)) {
*wc = ((u32)(byte & 0x07) << 18)
| ((u32)(s[1] & 0x3F) << 12)
| ((u32)(s[2] & 0x3F) << 6)
| ((u32)(s[3] & 0x3F));
if ((*wc <= 0x10ffff) && (*wc >= 0x10000))
return 4;
}
goto fail;
}
fail:
errno = EILSEQ;
return -1;
}
static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
{
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
char *new_ins = NULL;
if(nfconvert_utf8) {
int new_ins_len;
new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1);
if(new_ins_len >= 0)
ins = new_ins;
else
ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
}
#endif
#endif
const char *t = ins;
u32 wc;
BOOL allocated;
ntfschar *outpos;
int shorts, ret = -1;
shorts = utf8_to_utf16_size(ins);
if (shorts < 0)
goto fail;
allocated = FALSE;
if (!*outs) {
*outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar));
if (!*outs)
goto fail;
allocated = TRUE;
}
outpos = *outs;
while(1) {
int m = utf8_to_unicode(&wc, t);
if (m <= 0) {
if (m < 0) {
if (allocated) {
free(*outs);
*outs = (ntfschar*)NULL;
}
goto fail;
}
*outpos++ = const_cpu_to_le16(0);
break;
}
if (wc < 0x10000)
*outpos++ = cpu_to_le16(wc);
else {
wc -= 0x10000;
*outpos++ = cpu_to_le16((wc >> 10) + 0xd800);
*outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00);
}
t += m;
}
ret = --outpos - *outs;
fail:
#if defined(__APPLE__) || defined(__DARWIN__)
#ifdef ENABLE_NFCONV
if(new_ins != NULL)
free(new_ins);
#endif
#endif
return ret;
}
int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,
int outs_len)
{
char *mbs;
int mbs_len;
#ifdef MB_CUR_MAX
wchar_t wc;
int i, o;
int cnt = 0;
#ifdef HAVE_MBSINIT
mbstate_t mbstate;
#endif
#endif
if (!ins || !outs) {
errno = EINVAL;
return -1;
}
mbs = *outs;
mbs_len = outs_len;
if (mbs && !mbs_len) {
errno = ENAMETOOLONG;
return -1;
}
if (use_utf8)
return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len);
#ifdef MB_CUR_MAX
if (!mbs) {
mbs_len = (ins_len + 1) * MB_CUR_MAX;
mbs = ntfs_malloc(mbs_len);
if (!mbs)
return -1;
}
#ifdef HAVE_MBSINIT
memset(&mbstate, 0, sizeof(mbstate));
#else
#ifndef __HAIKU__
wctomb(NULL, 0);
#endif
#endif
for (i = o = 0; i < ins_len; i++) {
if ((int)(o + MB_CUR_MAX) > mbs_len) {
char *tc;
if (mbs == *outs) {
errno = ENAMETOOLONG;
return -1;
}
tc = ntfs_malloc((mbs_len + 64) & ~63);
if (!tc)
goto err_out;
memcpy(tc, mbs, mbs_len);
mbs_len = (mbs_len + 64) & ~63;
free(mbs);
mbs = tc;
}
wc = (wchar_t)le16_to_cpu(ins[i]);
if (!wc)
break;
#ifdef HAVE_MBSINIT
cnt = wcrtomb(mbs + o, wc, &mbstate);
#elif defined(__HAIKU__)
cnt = -1;
#else
cnt = wctomb(mbs + o, wc);
#endif
if (cnt == -1)
goto err_out;
if (cnt <= 0) {
ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt);
errno = EINVAL;
goto err_out;
}
o += cnt;
}
#ifdef HAVE_MBSINIT
if (!mbsinit(&mbstate)) {
ntfs_log_debug("Eeek. mbstate not in initial state!\n");
errno = EILSEQ;
goto err_out;
}
#endif
mbs[o] = '\0';
if (*outs != mbs)
*outs = mbs;
return o;
err_out:
if (mbs != *outs) {
int eo = errno;
free(mbs);
errno = eo;
}
#else
errno = EILSEQ;
#endif
return -1;
}
int ntfs_mbstoucs(const char *ins, ntfschar **outs)
{
#ifdef MB_CUR_MAX
ntfschar *ucs;
const char *s;
wchar_t wc;
int i, o, cnt, ins_len, ucs_len, ins_size;
#ifdef HAVE_MBSINIT
mbstate_t mbstate;
#endif
#endif
if (!ins || !outs) {
errno = EINVAL;
return -1;
}
if (use_utf8)
return ntfs_utf8_to_utf16(ins, outs);
#ifdef MB_CUR_MAX
ins_size = strlen(ins);
s = ins;
#if defined(HAVE_MBSINIT)
memset(&mbstate, 0, sizeof(mbstate));
ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
#ifdef __CYGWIN32__
if (!ins_len && *ins) {
ins_len = strlen(ins);
}
#endif
#elif !defined(DJGPP) && !defined(__HAIKU__)
ins_len = mbstowcs(NULL, s, 0);
#else
ins_len = strlen(ins);
#endif
if (ins_len == -1)
return ins_len;
#ifdef HAVE_MBSINIT
if ((s != ins) || !mbsinit(&mbstate)) {
#else
if (s != ins) {
#endif
errno = EILSEQ;
return -1;
}
ins_len++;
ucs_len = ins_len;
ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));
if (!ucs)
return -1;
#ifdef HAVE_MBSINIT
memset(&mbstate, 0, sizeof(mbstate));
#else
#ifndef __HAIKU__
mbtowc(NULL, NULL, 0);
#endif
#endif
for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
if (o >= ucs_len) {
ntfschar *tc;
ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
tc = realloc(ucs, ucs_len);
if (!tc)
goto err_out;
ucs = tc;
ucs_len /= sizeof(ntfschar);
}
#ifdef HAVE_MBSINIT
cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
#elif defined(__HAIKU__)
cnt = -1;
#else
cnt = mbtowc(&wc, ins + i, ins_size - i);
#endif
if (!cnt)
break;
if (cnt == -1)
goto err_out;
if (cnt < -1) {
ntfs_log_trace("Eeek. cnt = %i\n", cnt);
errno = EINVAL;
goto err_out;
}
if ((unsigned long)wc >= (unsigned long)(1 <<
(8 * sizeof(ntfschar)))) {
errno = EILSEQ;
goto err_out;
}
ucs[o] = cpu_to_le16(wc);
}
#ifdef HAVE_MBSINIT
if (!mbsinit(&mbstate)) {
ntfs_log_trace("Eeek. mbstate not in initial state!\n");
errno = EILSEQ;
goto err_out;
}
#endif
ucs[o] = const_cpu_to_le16(L'\0');
*outs = ucs;
return o;
err_out:
free(ucs);
#else
errno = EILSEQ;
#endif
return -1;
}
char *ntfs_uppercase_mbs(const char *low,
const ntfschar *upcase, u32 upcase_size)
{
int size;
char *upp;
u32 wc;
int n;
const char *s;
char *t;
size = strlen(low);
upp = (char*)ntfs_malloc(3*size + 1);
if (upp) {
s = low;
t = upp;
do {
n = utf8_to_unicode(&wc, s);
if (n > 0) {
if (wc < upcase_size)
wc = le16_to_cpu(upcase[wc]);
if (wc < 0x80)
*t++ = wc;
else if (wc < 0x800) {
*t++ = (0xc0 | ((wc >> 6) & 0x3f));
*t++ = 0x80 | (wc & 0x3f);
} else if (wc < 0x10000) {
*t++ = 0xe0 | (wc >> 12);
*t++ = 0x80 | ((wc >> 6) & 0x3f);
*t++ = 0x80 | (wc & 0x3f);
} else {
*t++ = 0xf0 | ((wc >> 18) & 7);
*t++ = 0x80 | ((wc >> 12) & 63);
*t++ = 0x80 | ((wc >> 6) & 0x3f);
*t++ = 0x80 | (wc & 0x3f);
}
s += n;
}
} while (n > 0);
if (n < 0) {
free(upp);
upp = (char*)NULL;
errno = EILSEQ;
}
*t = 0;
}
return (upp);
}
void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)
{
struct NEWUPPERCASE {
unsigned short first;
unsigned short last;
short diff;
unsigned char step;
unsigned char osmajor;
unsigned char osminor;
} ;
static int uc_run_table[][3] = {
{0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74},
{0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86},
{0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
{0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128},
{0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112},
{0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126},
{0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8},
{0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8},
{0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8},
{0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7},
{0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16},
{0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26},
{0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32},
{0}
};
static int uc_dup_table[][2] = {
{0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
{0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
{0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
{0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
{0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
{0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
{0}
};
static int uc_byte_table[][2] = {
{0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
{0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
{0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
{0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
{0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
{0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
{0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
{0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
{0}
};
static const struct NEWUPPERCASE newuppercase[] = {
{ 0x37b, 0x37d, 0x82, 1, 6, 0 },
{ 0x1f80, 0x1f87, 0x8, 1, 6, 0 },
{ 0x1f90, 0x1f97, 0x8, 1, 6, 0 },
{ 0x1fa0, 0x1fa7, 0x8, 1, 6, 0 },
{ 0x2c30, 0x2c5e, -0x30, 1, 6, 0 },
{ 0x2d00, 0x2d25, -0x1c60, 1, 6, 0 },
{ 0x2c68, 0x2c6c, -0x1, 2, 6, 0 },
{ 0x219, 0x21f, -0x1, 2, 6, 0 },
{ 0x223, 0x233, -0x1, 2, 6, 0 },
{ 0x247, 0x24f, -0x1, 2, 6, 0 },
{ 0x3d9, 0x3e1, -0x1, 2, 6, 0 },
{ 0x48b, 0x48f, -0x1, 2, 6, 0 },
{ 0x4fb, 0x513, -0x1, 2, 6, 0 },
{ 0x2c81, 0x2ce3, -0x1, 2, 6, 0 },
{ 0x3f8, 0x3fb, -0x1, 3, 6, 0 },
{ 0x4c6, 0x4ce, -0x1, 4, 6, 0 },
{ 0x23c, 0x242, -0x1, 6, 6, 0 },
{ 0x4ed, 0x4f7, -0x1, 10, 6, 0 },
{ 0x450, 0x45d, -0x50, 13, 6, 0 },
{ 0x2c61, 0x2c76, -0x1, 21, 6, 0 },
{ 0x1fcc, 0x1ffc, -0x9, 48, 6, 0 },
{ 0x180, 0x180, 0xc3, 1, 6, 0 },
{ 0x195, 0x195, 0x61, 1, 6, 0 },
{ 0x19a, 0x19a, 0xa3, 1, 6, 0 },
{ 0x19e, 0x19e, 0x82, 1, 6, 0 },
{ 0x1bf, 0x1bf, 0x38, 1, 6, 0 },
{ 0x1f9, 0x1f9, -0x1, 1, 6, 0 },
{ 0x23a, 0x23a, 0x2a2b, 1, 6, 0 },
{ 0x23e, 0x23e, 0x2a28, 1, 6, 0 },
{ 0x26b, 0x26b, 0x29f7, 1, 6, 0 },
{ 0x27d, 0x27d, 0x29e7, 1, 6, 0 },
{ 0x280, 0x280, -0xda, 1, 6, 0 },
{ 0x289, 0x289, -0x45, 1, 6, 0 },
{ 0x28c, 0x28c, -0x47, 1, 6, 0 },
{ 0x3f2, 0x3f2, 0x7, 1, 6, 0 },
{ 0x4cf, 0x4cf, -0xf, 1, 6, 0 },
{ 0x1d7d, 0x1d7d, 0xee6, 1, 6, 0 },
{ 0x1fb3, 0x1fb3, 0x9, 1, 6, 0 },
{ 0x214e, 0x214e, -0x1c, 1, 6, 0 },
{ 0x2184, 0x2184, -0x1, 1, 6, 0 },
{ 0x23a, 0x23e, 0x0, 4, 6, 1 },
{ 0x250, 0x250, 0x2a1f, 2, 6, 1 },
{ 0x251, 0x251, 0x2a1c, 2, 6, 1 },
{ 0x271, 0x271, 0x29fd, 2, 6, 1 },
{ 0x371, 0x373, -0x1, 2, 6, 1 },
{ 0x377, 0x377, -0x1, 2, 6, 1 },
{ 0x3c2, 0x3c2, 0x0, 2, 6, 1 },
{ 0x3d7, 0x3d7, -0x8, 2, 6, 1 },
{ 0x515, 0x523, -0x1, 2, 6, 1 },
{ 0x1d79, 0x1d79, -0x75fc, 2, 6, 1 },
{ 0x1efb, 0x1eff, -0x1, 2, 6, 1 },
{ 0x1fc3, 0x1ff3, 0x9, 48, 6, 1 },
{ 0x1fcc, 0x1ffc, 0x0, 48, 6, 1 },
{ 0x2c65, 0x2c65, -0x2a2b, 2, 6, 1 },
{ 0x2c66, 0x2c66, -0x2a28, 2, 6, 1 },
{ 0x2c73, 0x2c73, -0x1, 2, 6, 1 },
{ 0xa641, 0xa65f, -0x1, 2, 6, 1 },
{ 0xa663, 0xa66d, -0x1, 2, 6, 1 },
{ 0xa681, 0xa697, -0x1, 2, 6, 1 },
{ 0xa723, 0xa72f, -0x1, 2, 6, 1 },
{ 0xa733, 0xa76f, -0x1, 2, 6, 1 },
{ 0xa77a, 0xa77c, -0x1, 2, 6, 1 },
{ 0xa77f, 0xa787, -0x1, 2, 6, 1 },
{ 0xa78c, 0xa78c, -0x1, 2, 6, 1 },
{ 0 }
} ;
int i, r;
int k, off;
const struct NEWUPPERCASE *puc;
memset((char*)uc, 0, uc_len);
uc_len >>= 1;
if (uc_len > 65536)
uc_len = 65536;
for (i = 0; (u32)i < uc_len; i++)
uc[i] = cpu_to_le16(i);
for (r = 0; uc_run_table[r][0]; r++) {
off = uc_run_table[r][2];
for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
uc[i] = cpu_to_le16(i + off);
}
for (r = 0; uc_dup_table[r][0]; r++)
for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
uc[i + 1] = cpu_to_le16(i);
for (r = 0; uc_byte_table[r][0]; r++) {
k = uc_byte_table[r][1];
uc[uc_byte_table[r][0]] = cpu_to_le16(k);
}
for (r=0; newuppercase[r].first; r++) {
puc = &newuppercase[r];
if ((puc->osmajor < UPCASE_MAJOR)
|| ((puc->osmajor == UPCASE_MAJOR)
&& (puc->osminor <= UPCASE_MINOR))) {
off = puc->diff;
for (i = puc->first; i <= puc->last; i += puc->step)
uc[i] = cpu_to_le16(i + off);
}
}
}
#define UPCASE_LEN 65536
u32 ntfs_upcase_build_default(ntfschar **upcase)
{
u32 upcase_len = 0;
*upcase = (ntfschar*)ntfs_malloc(UPCASE_LEN*2);
if (*upcase) {
ntfs_upcase_table_build(*upcase, UPCASE_LEN*2);
upcase_len = UPCASE_LEN;
}
return (upcase_len);
}
ntfschar *ntfs_locase_table_build(const ntfschar *uc, u32 uc_cnt)
{
ntfschar *lc;
u32 upp;
u32 i;
lc = (ntfschar*)ntfs_malloc(uc_cnt*sizeof(ntfschar));
if (lc) {
for (i=0; i<uc_cnt; i++)
lc[i] = cpu_to_le16(i);
for (i=0; i<uc_cnt; i++) {
upp = le16_to_cpu(uc[i]);
if ((upp != i) && (upp < uc_cnt))
lc[upp] = cpu_to_le16(i);
}
} else
ntfs_log_error("Could not build the locase table\n");
return (lc);
}
ntfschar *ntfs_str2ucs(const char *s, int *len)
{
ntfschar *ucs = NULL;
if (s && ((*len = ntfs_mbstoucs(s, &ucs)) == -1)) {
ntfs_log_perror("Couldn't convert '%s' to Unicode", s);
return NULL;
}
if (*len > NTFS_MAX_NAME_LEN) {
free(ucs);
errno = ENAMETOOLONG;
return NULL;
}
if (!ucs || !*len) {
ucs = AT_UNNAMED;
*len = 0;
}
return ucs;
}
void ntfs_ucsfree(ntfschar *ucs)
{
if (ucs && (ucs != AT_UNNAMED))
free(ucs);
}
BOOL ntfs_forbidden_chars(const ntfschar *name, int len, BOOL strict)
{
BOOL forbidden;
int ch;
int i;
static const u32 mainset = (1L << ('\"' - 0x20))
| (1L << ('*' - 0x20))
| (1L << ('/' - 0x20))
| (1L << (':' - 0x20))
| (1L << ('<' - 0x20))
| (1L << ('>' - 0x20))
| (1L << ('?' - 0x20));
forbidden = (len == 0) ||
(strict && (name[len-1] == const_cpu_to_le16(' ') ||
name[len-1] == const_cpu_to_le16('.')));
for (i=0; i<len; i++) {
ch = le16_to_cpu(name[i]);
if ((ch < 0x20)
|| ((ch < 0x40)
&& ((1L << (ch - 0x20)) & mainset))
|| (ch == '\\')
|| (ch == '|'))
forbidden = TRUE;
}
if (forbidden)
errno = EINVAL;
return (forbidden);
}
BOOL ntfs_forbidden_names(ntfs_volume *vol, const ntfschar *name, int len,
BOOL strict)
{
BOOL forbidden;
int h;
static const ntfschar dot = const_cpu_to_le16('.');
static const ntfschar con[] = { const_cpu_to_le16('c'),
const_cpu_to_le16('o'), const_cpu_to_le16('n') };
static const ntfschar prn[] = { const_cpu_to_le16('p'),
const_cpu_to_le16('r'), const_cpu_to_le16('n') };
static const ntfschar aux[] = { const_cpu_to_le16('a'),
const_cpu_to_le16('u'), const_cpu_to_le16('x') };
static const ntfschar nul[] = { const_cpu_to_le16('n'),
const_cpu_to_le16('u'), const_cpu_to_le16('l') };
static const ntfschar com[] = { const_cpu_to_le16('c'),
const_cpu_to_le16('o'), const_cpu_to_le16('m') };
static const ntfschar lpt[] = { const_cpu_to_le16('l'),
const_cpu_to_le16('p'), const_cpu_to_le16('t') };
forbidden = ntfs_forbidden_chars(name, len, strict);
if (!forbidden && (len >= 3)) {
h = ((le16_to_cpu(name[0]) & 31)*48)
^ ((le16_to_cpu(name[1]) & 31)*165);
if ((h % 23) == 17) {
switch (le16_to_cpu(name[2]) & ~0x20) {
case 'N' :
if (((len == 3) || (name[3] == dot))
&& (!ntfs_ucsncasecmp(name, con, 3,
vol->upcase, vol->upcase_len)
|| !ntfs_ucsncasecmp(name, prn, 3,
vol->upcase, vol->upcase_len)))
forbidden = TRUE;
break;
case 'X' :
if (((len == 3) || (name[3] == dot))
&& !ntfs_ucsncasecmp(name, aux, 3,
vol->upcase, vol->upcase_len))
forbidden = TRUE;
break;
case 'L' :
if (((len == 3) || (name[3] == dot))
&& !ntfs_ucsncasecmp(name, nul, 3,
vol->upcase, vol->upcase_len))
forbidden = TRUE;
break;
case 'M' :
if ((len > 3)
&& (le16_to_cpu(name[3]) >= '1')
&& (le16_to_cpu(name[3]) <= '9')
&& ((len == 4) || (name[4] == dot))
&& !ntfs_ucsncasecmp(name, com, 3,
vol->upcase, vol->upcase_len))
forbidden = TRUE;
break;
case 'T' :
if ((len > 3)
&& (le16_to_cpu(name[3]) >= '1')
&& (le16_to_cpu(name[3]) <= '9')
&& ((len == 4) || (name[4] == dot))
&& !ntfs_ucsncasecmp(name, lpt, 3,
vol->upcase, vol->upcase_len))
forbidden = TRUE;
break;
}
}
}
if (forbidden)
errno = EINVAL;
return (forbidden);
}
BOOL ntfs_collapsible_chars(ntfs_volume *vol,
const ntfschar *shortname, int shortlen,
const ntfschar *longname, int longlen)
{
BOOL collapsible;
unsigned int ch;
unsigned int cs;
int i;
collapsible = shortlen == longlen;
for (i=0; collapsible && (i<shortlen); i++) {
ch = le16_to_cpu(longname[i]);
cs = le16_to_cpu(shortname[i]);
if ((cs != ch)
&& ((ch >= vol->upcase_len)
|| (cs >= vol->upcase_len)
|| (vol->upcase[cs] != vol->upcase[ch])))
collapsible = FALSE;
}
return (collapsible);
}
int ntfs_set_char_encoding(const char *locale)
{
use_utf8 = 0;
if (!locale || strstr(locale,"utf8") || strstr(locale,"UTF8")
|| strstr(locale,"utf-8") || strstr(locale,"UTF-8"))
use_utf8 = 1;
else
#ifndef __HAIKU__
if (setlocale(LC_ALL, locale))
use_utf8 = 0;
else
#endif
{
ntfs_log_error("Invalid locale, encoding to UTF-8\n");
use_utf8 = 1;
}
return 0;
}
#if defined(__APPLE__) || defined(__DARWIN__)
int ntfs_macosx_normalize_filenames(int normalize) {
#ifdef ENABLE_NFCONV
if (normalize == 0 || normalize == 1) {
nfconvert_utf8 = normalize;
return 0;
}
else {
return -1;
}
#else
return -1;
#endif
}
int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
int composed)
{
#ifdef ENABLE_NFCONV
CFStringRef cfSourceString;
CFMutableStringRef cfMutableString;
CFRange rangeToProcess;
CFIndex requiredBufferLength;
char *result = NULL;
int resultLength = -1;
cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault,
utf8_string, kCFStringEncodingUTF8);
if (cfSourceString == NULL) {
ntfs_log_error("CFStringCreateWithCString failed!\n");
return -2;
}
cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0,
cfSourceString);
CFRelease(cfSourceString);
if (cfMutableString == NULL) {
ntfs_log_error("CFStringCreateMutableCopy failed!\n");
return -3;
}
CFStringNormalize(cfMutableString, (composed != 0 ?
kCFStringNormalizationFormC : kCFStringNormalizationFormD));
rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
if (CFStringGetBytes(cfMutableString, rangeToProcess,
kCFStringEncodingUTF8, 0, false, NULL, 0,
&requiredBufferLength) > 0)
{
resultLength = sizeof(char) * (requiredBufferLength + 1);
result = ntfs_calloc(resultLength);
if (result != NULL) {
if (CFStringGetBytes(cfMutableString, rangeToProcess,
kCFStringEncodingUTF8, 0, false,
(UInt8*) result, resultLength - 1,
&requiredBufferLength) <= 0)
{
ntfs_log_error("Could not perform UTF-8 "
"conversion of normalized "
"CFMutableString.\n");
free(result);
result = NULL;
}
}
else {
ntfs_log_error("Could not perform a ntfs_calloc of %d "
"bytes for char *result.\n", resultLength);
}
}
else {
ntfs_log_error("Could not perform check for required length of "
"UTF-8 conversion of normalized CFMutableString.\n");
}
CFRelease(cfMutableString);
if (result != NULL) {
*target = result;
return resultLength - 1;
}
else {
return -1;
}
#else
return -1;
#endif
}
#endif