#ifndef _UNICODE_CHAR_H_
#define _UNICODE_CHAR_H_
#include <SupportDefs.h>
enum unicode_char_category
{
B_UNICODE_UNASSIGNED = 0,
B_UNICODE_GENERAL_OTHER_TYPES = 0,
B_UNICODE_UPPERCASE_LETTER = 1,
B_UNICODE_LOWERCASE_LETTER = 2,
B_UNICODE_TITLECASE_LETTER = 3,
B_UNICODE_MODIFIER_LETTER = 4,
B_UNICODE_OTHER_LETTER = 5,
B_UNICODE_NON_SPACING_MARK = 6,
B_UNICODE_ENCLOSING_MARK = 7,
B_UNICODE_COMBINING_SPACING_MARK = 8,
B_UNICODE_DECIMAL_DIGIT_NUMBER = 9,
B_UNICODE_LETTER_NUMBER = 10,
B_UNICODE_OTHER_NUMBER = 11,
B_UNICODE_SPACE_SEPARATOR = 12,
B_UNICODE_LINE_SEPARATOR = 13,
B_UNICODE_PARAGRAPH_SEPARATOR = 14,
B_UNICODE_CONTROL_CHAR = 15,
B_UNICODE_FORMAT_CHAR = 16,
B_UNICODE_PRIVATE_USE_CHAR = 17,
B_UNICODE_SURROGATE = 18,
B_UNICODE_DASH_PUNCTUATION = 19,
B_UNICODE_START_PUNCTUATION = 20,
B_UNICODE_END_PUNCTUATION = 21,
B_UNICODE_CONNECTOR_PUNCTUATION = 22,
B_UNICODE_OTHER_PUNCTUATION = 23,
B_UNICODE_MATH_SYMBOL = 24,
B_UNICODE_CURRENCY_SYMBOL = 25,
B_UNICODE_MODIFIER_SYMBOL = 26,
B_UNICODE_OTHER_SYMBOL = 27,
B_UNICODE_INITIAL_PUNCTUATION = 28,
B_UNICODE_FINAL_PUNCTUATION = 29,
B_UNICODE_CATEGORY_COUNT
};
enum unicode_char_direction {
B_UNICODE_LEFT_TO_RIGHT = 0,
B_UNICODE_RIGHT_TO_LEFT = 1,
B_UNICODE_EUROPEAN_NUMBER = 2,
B_UNICODE_EUROPEAN_NUMBER_SEPARATOR = 3,
B_UNICODE_EUROPEAN_NUMBER_TERMINATOR = 4,
B_UNICODE_ARABIC_NUMBER = 5,
B_UNICODE_COMMON_NUMBER_SEPARATOR = 6,
B_UNICODE_BLOCK_SEPARATOR = 7,
B_UNICODE_SEGMENT_SEPARATOR = 8,
B_UNICODE_WHITE_SPACE_NEUTRAL = 9,
B_UNICODE_OTHER_NEUTRAL = 10,
B_UNICODE_LEFT_TO_RIGHT_EMBEDDING = 11,
B_UNICODE_LEFT_TO_RIGHT_OVERRIDE = 12,
B_UNICODE_RIGHT_TO_LEFT_ARABIC = 13,
B_UNICODE_RIGHT_TO_LEFT_EMBEDDING = 14,
B_UNICODE_RIGHT_TO_LEFT_OVERRIDE = 15,
B_UNICODE_POP_DIRECTIONAL_FORMAT = 16,
B_UNICODE_DIR_NON_SPACING_MARK = 17,
B_UNICODE_BOUNDARY_NEUTRAL = 18,
B_UNICODE_DIRECTION_COUNT
};
enum unicode_char_script {
B_UNICODE_NO_BLOCK = 0,
B_UNICODE_BASIC_LATIN = 1,
B_UNICODE_LATIN_1_SUPPLEMENT = 2,
B_UNICODE_LATIN_EXTENDED_A = 3,
B_UNICODE_LATIN_EXTENDED_B = 4,
B_UNICODE_IPA_EXTENSIONS = 5,
B_UNICODE_SPACING_MODIFIER_LETTERS = 6,
B_UNICODE_COMBINING_DIACRITICAL_MARKS = 7,
B_UNICODE_GREEK = 8,
B_UNICODE_CYRILLIC = 9,
B_UNICODE_ARMENIAN = 10,
B_UNICODE_HEBREW = 11,
B_UNICODE_ARABIC = 12,
B_UNICODE_SYRIAC = 13,
B_UNICODE_THAANA = 14,
B_UNICODE_DEVANAGARI = 15,
B_UNICODE_BENGALI = 16,
B_UNICODE_GURMUKHI = 17,
B_UNICODE_GUJARATI = 18,
B_UNICODE_ORIYA = 19,
B_UNICODE_TAMIL = 20,
B_UNICODE_TELUGU = 21,
B_UNICODE_KANNADA = 22,
B_UNICODE_MALAYALAM = 23,
B_UNICODE_SINHALA = 24,
B_UNICODE_THAI = 25,
B_UNICODE_LAO = 26,
B_UNICODE_TIBETAN = 27,
B_UNICODE_MYANMAR = 28,
B_UNICODE_GEORGIAN = 29,
B_UNICODE_HANGUL_JAMO = 30,
B_UNICODE_ETHIOPIC = 31,
B_UNICODE_CHEROKEE = 32,
B_UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33,
B_UNICODE_OGHAM = 34,
B_UNICODE_RUNIC = 35,
B_UNICODE_KHMER = 36,
B_UNICODE_MONGOLIAN = 37,
B_UNICODE_LATIN_EXTENDED_ADDITIONAL = 38,
B_UNICODE_GREEK_EXTENDED = 39,
B_UNICODE_GENERAL_PUNCTUATION = 40,
B_UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS = 41,
B_UNICODE_CURRENCY_SYMBOLS = 42,
B_UNICODE_COMBINING_MARKS_FOR_SYMBOLS = 43,
B_UNICODE_LETTERLIKE_SYMBOLS = 44,
B_UNICODE_NUMBER_FORMS = 45,
B_UNICODE_ARROWS = 46,
B_UNICODE_MATHEMATICAL_OPERATORS = 47,
B_UNICODE_MISCELLANEOUS_TECHNICAL = 48,
B_UNICODE_CONTROL_PICTURES = 49,
B_UNICODE_OPTICAL_CHARACTER_RECOGNITION = 50,
B_UNICODE_ENCLOSED_ALPHANUMERICS = 51,
B_UNICODE_BOX_DRAWING = 52,
B_UNICODE_BLOCK_ELEMENTS = 53,
B_UNICODE_GEOMETRIC_SHAPES = 54,
B_UNICODE_MISCELLANEOUS_SYMBOLS = 55,
B_UNICODE_DINGBATS = 56,
B_UNICODE_BRAILLE_PATTERNS = 57,
B_UNICODE_CJK_RADICALS_SUPPLEMENT = 58,
B_UNICODE_KANGXI_RADICALS = 59,
B_UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
B_UNICODE_CJK_SYMBOLS_AND_PUNCTUATION = 61,
B_UNICODE_HIRAGANA = 62,
B_UNICODE_KATAKANA = 63,
B_UNICODE_BOPOMOFO = 64,
B_UNICODE_HANGUL_COMPATIBILITY_JAMO = 65,
B_UNICODE_KANBUN = 66,
B_UNICODE_BOPOMOFO_EXTENDED = 67,
B_UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
B_UNICODE_CJK_COMPATIBILITY = 69,
B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
B_UNICODE_CJK_UNIFIED_IDEOGRAPHS = 71,
B_UNICODE_YI_SYLLABLES = 72,
B_UNICODE_YI_RADICALS = 73,
B_UNICODE_HANGUL_SYLLABLES = 74,
B_UNICODE_HIGH_SURROGATES = 75,
B_UNICODE_HIGH_PRIVATE_USE_SURROGATES = 76,
B_UNICODE_LOW_SURROGATES = 77,
B_UNICODE_PRIVATE_USE = 78,
B_UNICODE_PRIVATE_USE_AREA = B_UNICODE_PRIVATE_USE,
B_UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS = 79,
B_UNICODE_ALPHABETIC_PRESENTATION_FORMS = 80,
B_UNICODE_ARABIC_PRESENTATION_FORMS_A = 81,
B_UNICODE_COMBINING_HALF_MARKS = 82,
B_UNICODE_CJK_COMPATIBILITY_FORMS = 83,
B_UNICODE_SMALL_FORM_VARIANTS = 84,
B_UNICODE_ARABIC_PRESENTATION_FORMS_B = 85,
B_UNICODE_SPECIALS = 86,
B_UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS = 87,
B_UNICODE_OLD_ITALIC = 88,
B_UNICODE_GOTHIC = 89,
B_UNICODE_DESERET = 90,
B_UNICODE_BYZANTINE_MUSICAL_SYMBOLS = 91,
B_UNICODE_MUSICAL_SYMBOLS = 92,
B_UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93,
B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94,
B_UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95,
B_UNICODE_TAGS = 96,
B_UNICODE_CYRILLIC_SUPPLEMENTARY = 97,
B_UNICODE_CYRILLIC_SUPPLEMENT = B_UNICODE_CYRILLIC_SUPPLEMENTARY,
B_UNICODE_TAGALOG = 98,
B_UNICODE_HANUNOO = 99,
B_UNICODE_BUHID = 100,
B_UNICODE_TAGBANWA = 101,
B_UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102,
B_UNICODE_SUPPLEMENTAL_ARROWS_A = 103,
B_UNICODE_SUPPLEMENTAL_ARROWS_B = 104,
B_UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105,
B_UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106,
B_UNICODE_KATAKANA_PHONETIC_EXTENSIONS = 107,
B_UNICODE_VARIATION_SELECTORS = 108,
B_UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109,
B_UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110,
B_UNICODE_LIMBU = 111,
B_UNICODE_TAI_LE = 112,
B_UNICODE_KHMER_SYMBOLS = 113,
B_UNICODE_PHONETIC_EXTENSIONS = 114,
B_UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115,
B_UNICODE_YIJING_HEXAGRAM_SYMBOLS = 116,
B_UNICODE_LINEAR_B_SYLLABARY = 117,
B_UNICODE_LINEAR_B_IDEOGRAMS = 118,
B_UNICODE_AEGEAN_NUMBERS = 119,
B_UNICODE_UGARITIC = 120,
B_UNICODE_SHAVIAN = 121,
B_UNICODE_OSMANYA = 122,
B_UNICODE_CYPRIOT_SYLLABARY = 123,
B_UNICODE_TAI_XUAN_JING_SYMBOLS = 124,
B_UNICODE_VARIATION_SELECTORS_SUPPLEMENT = 125,
B_UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION = 126,
B_UNICODE_ANCIENT_GREEK_NUMBERS = 127,
B_UNICODE_ARABIC_SUPPLEMENT = 128,
B_UNICODE_BUGINESE = 129,
B_UNICODE_CJK_STROKES = 130,
B_UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131,
B_UNICODE_COPTIC = 132,
B_UNICODE_ETHIOPIC_EXTENDED = 133,
B_UNICODE_ETHIOPIC_SUPPLEMENT = 134,
B_UNICODE_GEORGIAN_SUPPLEMENT = 135,
B_UNICODE_GLAGOLITIC = 136,
B_UNICODE_KHAROSHTHI = 137,
B_UNICODE_MODIFIER_TONE_LETTERS = 138,
B_UNICODE_NEW_TAI_LUE = 139,
B_UNICODE_OLD_PERSIAN = 140,
B_UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT = 141,
B_UNICODE_SUPPLEMENTAL_PUNCTUATION = 142,
B_UNICODE_SYLOTI_NAGRI = 143,
B_UNICODE_TIFINAGH = 144,
B_UNICODE_VERTICAL_FORMS = 145,
B_UNICODE_NKO = 146,
B_UNICODE_BALINESE = 147,
B_UNICODE_LATIN_EXTENDED_C = 148,
B_UNICODE_LATIN_EXTENDED_D = 149,
B_UNICODE_PHAGS_PA = 150,
B_UNICODE_PHOENICIAN = 151,
B_UNICODE_CUNEIFORM = 152,
B_UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153,
B_UNICODE_COUNTING_ROD_NUMERALS = 154,
B_UNICODE_SUNDANESE = 155,
B_UNICODE_LEPCHA = 156,
B_UNICODE_OL_CHIKI = 157,
B_UNICODE_CYRILLIC_EXTENDED_A = 158,
B_UNICODE_VAI = 159,
B_UNICODE_CYRILLIC_EXTENDED_B = 160,
B_UNICODE_SAURASHTRA = 161,
B_UNICODE_KAYAH_LI = 162,
B_UNICODE_REJANG = 163,
B_UNICODE_CHAM = 164,
B_UNICODE_ANCIENT_SYMBOLS = 165,
B_UNICODE_PHAISTOS_DISC = 166,
B_UNICODE_LYCIAN = 167,
B_UNICODE_CARIAN = 168,
B_UNICODE_LYDIAN = 169,
B_UNICODE_MAHJONG_TILES = 170,
B_UNICODE_DOMINO_TILES = 171,
B_UNICODE_SAMARITAN = 172,
B_UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173,
B_UNICODE_TAI_THAM = 174,
B_UNICODE_VEDIC_EXTENSIONS = 175,
B_UNICODE_LISU = 176,
B_UNICODE_BAMUM = 177,
B_UNICODE_COMMON_INDIC_NUMBER_FORMS = 178,
B_UNICODE_DEVANAGARI_EXTENDED = 179,
B_UNICODE_HANGUL_JAMO_EXTENDED_A = 180,
B_UNICODE_JAVANESE = 181,
B_UNICODE_MYANMAR_EXTENDED_A = 182,
B_UNICODE_TAI_VIET = 183,
B_UNICODE_MEETEI_MAYEK = 184,
B_UNICODE_HANGUL_JAMO_EXTENDED_B = 185,
B_UNICODE_IMPERIAL_ARAMAIC = 186,
B_UNICODE_OLD_SOUTH_ARABIAN = 187,
B_UNICODE_AVESTAN = 188,
B_UNICODE_INSCRIPTIONAL_PARTHIAN = 189,
B_UNICODE_INSCRIPTIONAL_PAHLAVI = 190,
B_UNICODE_OLD_TURKIC = 191,
B_UNICODE_RUMI_NUMERAL_SYMBOLS = 192,
B_UNICODE_KAITHI = 193,
B_UNICODE_EGYPTIAN_HIEROGLYPHS = 194,
B_UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195,
B_UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196,
B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197,
B_UNICODE_MANDAIC = 198,
B_UNICODE_BATAK = 199,
B_UNICODE_ETHIOPIC_EXTENDED_A = 200,
B_UNICODE_BRAHMI = 201,
B_UNICODE_BAMUM_SUPPLEMENT = 202,
B_UNICODE_KANA_SUPPLEMENT = 203,
B_UNICODE_PLAYING_CARDS = 204,
B_UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205,
B_UNICODE_EMOTICONS = 206,
B_UNICODE_TRANSPORT_AND_MAP_SYMBOLS = 207,
B_UNICODE_ALCHEMICAL_SYMBOLS = 208,
B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209,
B_UNICODE_SCRIPT_COUNT = 210,
B_UNICODE_NO_SCRIPT = B_UNICODE_SCRIPT_COUNT,
B_UNICODE_INVALID_CODE = -1
};
enum unicode_east_asian_width
{
B_UNICODE_EA_NEUTRAL,
B_UNICODE_EA_AMBIGUOUS,
B_UNICODE_EA_HALFWIDTH,
B_UNICODE_EA_FULLWIDTH,
B_UNICODE_EA_NARROW,
B_UNICODE_EA_WIDE,
B_UNICODE_EA_COUNT
};
class BUnicodeChar {
public:
static bool IsAlpha(uint32 c);
static bool IsAlNum(uint32 c);
static bool IsDigit(uint32 c);
static bool IsHexDigit(uint32 c);
static bool IsUpper(uint32 c);
static bool IsLower(uint32 c);
static bool IsSpace(uint32 c);
static bool IsWhitespace(uint32 c);
static bool IsControl(uint32 c);
static bool IsPunctuation(uint32 c);
static bool IsPrintable(uint32 c);
static bool IsTitle(uint32 c);
static bool IsDefined(uint32 c);
static bool IsBase(uint32 c);
static int8 Type(uint32 c);
static uint32 ToLower(uint32 c);
static uint32 ToUpper(uint32 c);
static uint32 ToTitle(uint32 c);
static int32 DigitValue(uint32 c);
static unicode_east_asian_width EastAsianWidth(uint32 c);
static void ToUTF8(uint32 c, char** out);
static uint32 FromUTF8(const char** in);
static uint32 FromUTF8(const char* in);
static size_t UTF8StringLength(const char* string);
static size_t UTF8StringLength(const char* string, size_t maxLength);
private:
BUnicodeChar();
};
inline uint32
BUnicodeChar::FromUTF8(const char* in)
{
const char* string = in;
return FromUTF8(&string);
}
#endif