src/kits/locale/TextEncoding.cpp

root/src/kits/locale/TextEncoding.cpp
/*
 * Copyright 2016, Haiku, inc.
 * Distributed under terms of the MIT license.
 */


#include "TextEncoding.h"

#include <unicode/ucnv.h>
#include <unicode/ucsdet.h>

#include <algorithm>


namespace BPrivate {


BTextEncoding::BTextEncoding(BString name)
        :
        fName(name),
        fUtf8Converter(NULL),
        fConverter(NULL)
{
}


BTextEncoding::BTextEncoding(const char* data, size_t length)
        :
        fUtf8Converter(NULL),
        fConverter(NULL)
{
        UErrorCode error = U_ZERO_ERROR;

        UCharsetDetector* detector = ucsdet_open(&error);
        ucsdet_setText(detector, data, length, &error);
        const UCharsetMatch* encoding = ucsdet_detect(detector, &error);

        fName = ucsdet_getName(encoding, &error);
        ucsdet_close(detector);
}


BTextEncoding::~BTextEncoding()
{
        if (fUtf8Converter != NULL)
                ucnv_close(fUtf8Converter);

        if (fConverter != NULL)
                ucnv_close(fConverter);
}


status_t
BTextEncoding::InitCheck()
{
        if (fName.IsEmpty())
                return B_NO_INIT;
        else
                return B_OK;
}


status_t
BTextEncoding::Decode(const char* input, size_t& inputLength, char* output,
        size_t& outputLength)
{
        const char* base = input;
        char* target = output;

        // Optimize the easy case.
        // Note: we don't check the input to be valid UTF-8 when doing that.
        if (fName == "UTF-8") {
                outputLength = std::min(inputLength, outputLength);
                inputLength = outputLength;
                memcpy(output, input, inputLength);
                return B_OK;
        }

        UErrorCode error = U_ZERO_ERROR;

        if (fUtf8Converter == NULL)
                fUtf8Converter = ucnv_open("UTF-8", &error);

        if (fConverter == NULL)
                fConverter = ucnv_open(fName.String(), &error);

        ucnv_convertEx(fUtf8Converter, fConverter, &target, output + outputLength,
                &base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE,
                &error);

        // inputLength is set to the number of bytes consumed. We may not use all of
        // the input data (for example if it is cut in the middle of an utf-8 char).
        inputLength = base - input;
        outputLength = target - output;

        if (!U_SUCCESS(error))
                return B_ERROR;

        return B_OK;
}


status_t
BTextEncoding::Encode(const char* input, size_t& inputLength, char* output,
        size_t& outputLength)
{
        const char* base = input;
        char* target = output;

        // Optimize the easy case.
        // Note: we don't check the input to be valid UTF-8 when doing that.
        if (fName == "UTF-8") {
                outputLength = std::min(inputLength, outputLength);
                inputLength = outputLength;
                memcpy(output, input, inputLength);
                return B_OK;
        }

        UErrorCode error = U_ZERO_ERROR;

        if (fUtf8Converter == NULL)
                fUtf8Converter = ucnv_open("UTF-8", &error);

        if (fConverter == NULL)
                fConverter = ucnv_open(fName.String(), &error);

        ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength,
                &base, input + inputLength, NULL, NULL, NULL, NULL, FALSE, TRUE,
                &error);

        // inputLength is set to the number of bytes consumed. We may not use all of
        // the input data (for example if it is cut in the middle of an utf-8 char).
        inputLength = base - input;
        outputLength = target - output;

        if (!U_SUCCESS(error))
                return B_ERROR;

        return B_OK;
}


status_t
BTextEncoding::Flush(char* output, size_t& outputLength)
{
        char* target = output;

        if (fName == "UTF-8")
                return B_OK;

        if (fUtf8Converter == NULL || fConverter == NULL)
                return B_NO_INIT;

        UErrorCode error = U_ZERO_ERROR;

        ucnv_convertEx(fConverter, fUtf8Converter, &target, output + outputLength,
                NULL, NULL, NULL, NULL, NULL, NULL, FALSE, TRUE,
                &error);

        if (!U_SUCCESS(error))
                return B_ERROR;

        return B_OK;
}


BString
BTextEncoding::GetName()
{
        return fName;
}


};