root/lib/libiconv_modules/iconv_std/citrus_iconv_std.c
/*      $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $        */

/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c)2003 Citrus Project,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#include <sys/endian.h>
#include <sys/queue.h>

#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "citrus_namespace.h"
#include "citrus_types.h"
#include "citrus_module.h"
#include "citrus_region.h"
#include "citrus_mmap.h"
#include "citrus_hash.h"
#include "citrus_iconv.h"
#include "citrus_stdenc.h"
#include "citrus_mapper.h"
#include "citrus_csmapper.h"
#include "citrus_memstream.h"
#include "citrus_iconv_std.h"
#include "citrus_esdb.h"

/* ---------------------------------------------------------------------- */

_CITRUS_ICONV_DECLS(iconv_std);
_CITRUS_ICONV_DEF_OPS(iconv_std);


/* ---------------------------------------------------------------------- */

int
_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
{

        memcpy(ops, &_citrus_iconv_std_iconv_ops,
            sizeof(_citrus_iconv_std_iconv_ops));

        return (0);
}

/* ---------------------------------------------------------------------- */

/*
 * convenience routines for stdenc.
 */
static __inline void
save_encoding_state(struct _citrus_iconv_std_encoding *se)
{

        if (se->se_ps)
                memcpy(se->se_pssaved, se->se_ps,
                    _stdenc_get_state_size(se->se_handle));
}

static __inline void
restore_encoding_state(struct _citrus_iconv_std_encoding *se)
{

        if (se->se_ps)
                memcpy(se->se_ps, se->se_pssaved,
                    _stdenc_get_state_size(se->se_handle));
}

static __inline void
init_encoding_state(struct _citrus_iconv_std_encoding *se)
{

        if (se->se_ps)
                _stdenc_init_state(se->se_handle, se->se_ps);
}

static __inline int
mbtocsx(struct _citrus_iconv_std_encoding *se,
    _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
    struct iconv_hooks *hooks)
{

        return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
                              nresult, hooks));
}

static __inline int
cstombx(struct _citrus_iconv_std_encoding *se,
    char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
    struct iconv_hooks *hooks)
{

        return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
                              nresult, hooks));
}

static __inline int
wctombx(struct _citrus_iconv_std_encoding *se,
    char *s, size_t n, _wc_t wc, size_t *nresult,
    struct iconv_hooks *hooks)
{

        return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
                             hooks));
}

static __inline int
put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
    size_t *nresult)
{

        return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
}

static __inline int
get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
{
        struct _stdenc_state_desc ssd;
        int ret;

        ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
            _STDENC_SDID_GENERIC, &ssd);
        if (!ret)
                *rstate = ssd.u.generic.state;

        return (ret);
}

/*
 * init encoding context
 */
static int
init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
    void *ps1, void *ps2)
{
        int ret = -1;

        se->se_handle = cs;
        se->se_ps = ps1;
        se->se_pssaved = ps2;

        if (se->se_ps)
                ret = _stdenc_init_state(cs, se->se_ps);
        if (!ret && se->se_pssaved)
                ret = _stdenc_init_state(cs, se->se_pssaved);

        return (ret);
}

static int
open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
    unsigned long *rnorm)
{
        struct _csmapper *cm;
        int ret;

        ret = _csmapper_open(&cm, src, dst, 0, rnorm);
        if (ret)
                return (ret);
        if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
            _csmapper_get_state_size(cm) != 0) {
                _csmapper_close(cm);
                return (EINVAL);
        }

        *rcm = cm;

        return (0);
}

static void
close_dsts(struct _citrus_iconv_std_dst_list *dl)
{
        struct _citrus_iconv_std_dst *sd;

        while ((sd = TAILQ_FIRST(dl)) != NULL) {
                TAILQ_REMOVE(dl, sd, sd_entry);
                _csmapper_close(sd->sd_mapper);
                free(sd);
        }
}

static int
open_dsts(struct _citrus_iconv_std_dst_list *dl,
    const struct _esdb_charset *ec, const struct _esdb *dbdst)
{
        struct _citrus_iconv_std_dst *sd, *sdtmp;
        unsigned long norm;
        int i, ret;

        sd = malloc(sizeof(*sd));
        if (sd == NULL)
                return (errno);

        for (i = 0; i < dbdst->db_num_charsets; i++) {
                ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
                    dbdst->db_charsets[i].ec_csname, &norm);
                if (ret == 0) {
                        sd->sd_csid = dbdst->db_charsets[i].ec_csid;
                        sd->sd_norm = norm;
                        /* insert this mapper by sorted order. */
                        TAILQ_FOREACH(sdtmp, dl, sd_entry) {
                                if (sdtmp->sd_norm > norm) {
                                        TAILQ_INSERT_BEFORE(sdtmp, sd,
                                            sd_entry);
                                        sd = NULL;
                                        break;
                                }
                        }
                        if (sd)
                                TAILQ_INSERT_TAIL(dl, sd, sd_entry);
                        sd = malloc(sizeof(*sd));
                        if (sd == NULL) {
                                ret = errno;
                                close_dsts(dl);
                                return (ret);
                        }
                } else if (ret != ENOENT) {
                        close_dsts(dl);
                        free(sd);
                        return (ret);
                }
        }
        free(sd);
        return (0);
}

static void
close_srcs(struct _citrus_iconv_std_src_list *sl)
{
        struct _citrus_iconv_std_src *ss;

        while ((ss = TAILQ_FIRST(sl)) != NULL) {
                TAILQ_REMOVE(sl, ss, ss_entry);
                close_dsts(&ss->ss_dsts);
                free(ss);
        }
}

static int
open_srcs(struct _citrus_iconv_std_src_list *sl,
    const struct _esdb *dbsrc, const struct _esdb *dbdst)
{
        struct _citrus_iconv_std_src *ss;
        int count = 0, i, ret;

        ss = malloc(sizeof(*ss));
        if (ss == NULL)
                return (errno);

        TAILQ_INIT(&ss->ss_dsts);

        for (i = 0; i < dbsrc->db_num_charsets; i++) {
                ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
                if (ret)
                        goto err;
                if (!TAILQ_EMPTY(&ss->ss_dsts)) {
                        ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
                        TAILQ_INSERT_TAIL(sl, ss, ss_entry);
                        ss = malloc(sizeof(*ss));
                        if (ss == NULL) {
                                ret = errno;
                                goto err;
                        }
                        count++;
                        TAILQ_INIT(&ss->ss_dsts);
                }
        }
        free(ss);

        return (count ? 0 : ENOENT);

err:
        free(ss);
        close_srcs(sl);
        return (ret);
}

/* do convert a character */
#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
static int
/*ARGSUSED*/
do_conv(const struct _citrus_iconv_std_shared *is,
        _csid_t *csid, _index_t *idx)
{
        struct _citrus_iconv_std_dst *sd;
        struct _citrus_iconv_std_src *ss;
        _index_t tmpidx;
        int ret;

        TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
                if (ss->ss_csid == *csid) {
                        TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
                                ret = _csmapper_convert(sd->sd_mapper,
                                    &tmpidx, *idx, NULL);
                                switch (ret) {
                                case _MAPPER_CONVERT_SUCCESS:
                                        *csid = sd->sd_csid;
                                        *idx = tmpidx;
                                        return (0);
                                case _MAPPER_CONVERT_NONIDENTICAL:
                                        break;
                                case _MAPPER_CONVERT_SRC_MORE:
                                        /*FALLTHROUGH*/
                                case _MAPPER_CONVERT_DST_MORE:
                                        /*FALLTHROUGH*/
                                case _MAPPER_CONVERT_ILSEQ:
                                        return (EILSEQ);
                                case _MAPPER_CONVERT_FATAL:
                                        return (EINVAL);
                                }
                        }
                        break;
                }
        }

        return (E_NO_CORRESPONDING_CHAR);
}
/* ---------------------------------------------------------------------- */

static int
/*ARGSUSED*/
_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
    const char * __restrict src, const char * __restrict dst)
{
        struct _citrus_esdb esdbdst, esdbsrc;
        struct _citrus_iconv_std_shared *is;
        int ret;

        is = malloc(sizeof(*is));
        if (is == NULL) {
                ret = errno;
                goto err0;
        }
        ret = _citrus_esdb_open(&esdbsrc, src);
        if (ret)
                goto err1;
        ret = _citrus_esdb_open(&esdbdst, dst);
        if (ret)
                goto err2;
        ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
            esdbsrc.db_variable, esdbsrc.db_len_variable);
        if (ret)
                goto err3;
        ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
            esdbdst.db_variable, esdbdst.db_len_variable);
        if (ret)
                goto err4;
        is->is_use_invalid = esdbdst.db_use_invalid;
        is->is_invalid = esdbdst.db_invalid;

        TAILQ_INIT(&is->is_srcs);
        ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
        if (ret)
                goto err5;

        _esdb_close(&esdbsrc);
        _esdb_close(&esdbdst);
        ci->ci_closure = is;

        return (0);

err5:
        _stdenc_close(is->is_dst_encoding);
err4:
        _stdenc_close(is->is_src_encoding);
err3:
        _esdb_close(&esdbdst);
err2:
        _esdb_close(&esdbsrc);
err1:
        free(is);
err0:
        return (ret);
}

static void
_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
{
        struct _citrus_iconv_std_shared *is = ci->ci_closure;

        if (is == NULL)
                return;

        _stdenc_close(is->is_src_encoding);
        _stdenc_close(is->is_dst_encoding);
        close_srcs(&is->is_srcs);
        free(is);
}

static int
_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
{
        const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
        struct _citrus_iconv_std_context *sc;
        char *ptr;
        size_t sz, szpsdst, szpssrc;

        szpssrc = _stdenc_get_state_size(is->is_src_encoding);
        szpsdst = _stdenc_get_state_size(is->is_dst_encoding);

        sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
        sc = malloc(sz);
        if (sc == NULL)
                return (errno);

        ptr = (char *)&sc[1];
        if (szpssrc > 0)
                init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
                    ptr, ptr+szpssrc);
        else
                init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
                    NULL, NULL);
        ptr += szpssrc*2;
        if (szpsdst > 0)
                init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
                    ptr, ptr+szpsdst);
        else
                init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
                    NULL, NULL);

        cv->cv_closure = (void *)sc;

        return (0);
}

static void
_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
{

        free(cv->cv_closure);
}

static int
_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
    char * __restrict * __restrict in, size_t * __restrict inbytes,
    char * __restrict * __restrict out, size_t * __restrict outbytes,
    uint32_t flags, size_t * __restrict invalids)
{
        const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
        struct _citrus_iconv_std_context *sc = cv->cv_closure;
        _csid_t csid;
        _index_t idx;
        char *tmpin;
        size_t inval, in_mb_cur_min, szrin, szrout;
        int ret, state = 0;

        inval = 0;
        if (in == NULL || *in == NULL) {
                /* special cases */
                if (out != NULL && *out != NULL) {
                        /* init output state and store the shift sequence */
                        save_encoding_state(&sc->sc_src_encoding);
                        save_encoding_state(&sc->sc_dst_encoding);
                        szrout = 0;

                        ret = put_state_resetx(&sc->sc_dst_encoding,
                            *out, *outbytes, &szrout);
                        if (ret)
                                goto err;

                        if (szrout == (size_t)-2) {
                                /* too small to store the character */
                                ret = EINVAL;
                                goto err;
                        }
                        *out += szrout;
                        *outbytes -= szrout;
                } else
                        /* otherwise, discard the shift sequence */
                        init_encoding_state(&sc->sc_dst_encoding);
                init_encoding_state(&sc->sc_src_encoding);
                *invalids = 0;
                return (0);
        }

        in_mb_cur_min = _stdenc_get_mb_cur_min(is->is_src_encoding);

        /* normal case */
        for (;;) {
                if (*inbytes == 0) {
                        ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
                        if (state == _STDENC_SDGEN_INITIAL ||
                            state == _STDENC_SDGEN_STABLE)
                                break;
                }

                /* save the encoding states for the error recovery */
                save_encoding_state(&sc->sc_src_encoding);
                save_encoding_state(&sc->sc_dst_encoding);

                /* mb -> csid/index */
                tmpin = *in;
                szrin = szrout = 0;
                ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
                    *inbytes, &szrin, cv->cv_shared->ci_hooks);
                if (ret != 0 && (ret != EILSEQ ||
                    !cv->cv_shared->ci_discard_ilseq)) {
                        goto err;
                } else if (ret == EILSEQ) {
                        /*
                         * If //IGNORE was specified, we'll just keep crunching
                         * through invalid characters.
                         */
                        *in += in_mb_cur_min;
                        *inbytes -= in_mb_cur_min;
                        restore_encoding_state(&sc->sc_src_encoding);
                        restore_encoding_state(&sc->sc_dst_encoding);
                        continue;
                }

                if (szrin == (size_t)-2) {
                        /* incompleted character */
                        ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
                        if (ret) {
                                ret = EINVAL;
                                goto err;
                        }
                        switch (state) {
                        case _STDENC_SDGEN_INITIAL:
                        case _STDENC_SDGEN_STABLE:
                                /* fetch shift sequences only. */
                                goto next;
                        }
                        ret = EINVAL;
                        goto err;
                }
                /* convert the character */
                ret = do_conv(is, &csid, &idx);
                if (ret) {
                        if (ret == E_NO_CORRESPONDING_CHAR) {
                                /*
                                 * GNU iconv returns EILSEQ when no
                                 * corresponding character in the output.
                                 * Some software depends on this behavior
                                 * though this is against POSIX specification.
                                 */
                                if (cv->cv_shared->ci_ilseq_invalid != 0) {
                                        ret = EILSEQ;
                                        goto err;
                                }
                                inval++;
                                szrout = 0;
                                if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
                                    !cv->cv_shared->ci_discard_ilseq) &&
                                    is->is_use_invalid) {
                                        ret = wctombx(&sc->sc_dst_encoding,
                                            *out, *outbytes, is->is_invalid,
                                            &szrout, cv->cv_shared->ci_hooks);
                                        if (ret)
                                                goto err;
                                }
                                goto next;
                        } else
                                goto err;
                }
                /* csid/index -> mb */
                ret = cstombx(&sc->sc_dst_encoding,
                    *out, *outbytes, csid, idx, &szrout,
                    cv->cv_shared->ci_hooks);
                if (ret)
                        goto err;
next:
                *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
                *in = tmpin;
                *outbytes -= szrout;
                *out += szrout;
        }
        *invalids = inval;

        return (0);

err:
        restore_encoding_state(&sc->sc_src_encoding);
        restore_encoding_state(&sc->sc_dst_encoding);
        *invalids = inval;

        return (ret);
}