root/usr/src/cmd/iconv/iconv_main.c
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */

/*
 * iconv(1) command.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <limits.h>
#include <iconv.h>
#include <libintl.h>
#include <langinfo.h>
#include <locale.h>
#include "charmap.h"

#include <assert.h>

const char *progname;

char *from_cs;
char *to_cs;
int debug;
int cflag;      /* skip invalid characters */
int sflag;      /* silent */
int lflag;      /* list conversions */

void iconv_file(FILE *, const char *);
extern int list_codesets(void);

iconv_t ich;    /* iconv(3c) lib handle */
size_t (*pconv)(const char **iptr, size_t *ileft,
                char **optr, size_t *oleft);

size_t
lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
{
        return (iconv(ich, iptr, ileft, optr, oleft));
}

void
usage(void)
{
        (void) fprintf(stderr, gettext(
            "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
            "[file ...]\n"), progname);
        (void) fprintf(stderr, gettext("\t%s -l\n"), progname);
        exit(1);
}

int
main(int argc, char **argv)
{
        FILE *fp;
        char *fslash, *tslash;
        int c;

        yydebug = 0;
        progname = getprogname();

        (void) setlocale(LC_ALL, "");

#if !defined(TEXT_DOMAIN)
#define TEXT_DOMAIN     "SYS_TEST"
#endif
        (void) textdomain(TEXT_DOMAIN);

        while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
                switch (c) {
                case 'c':
                        cflag++;
                        break;
                case 'd':
                        debug++;
                        break;
                case 'l':
                        lflag++;
                        break;
                case 's':
                        sflag++;
                        break;
                case 'f':
                        from_cs = optarg;
                        break;
                case 't':
                        to_cs = optarg;
                        break;
                case '?':
                        usage();
                }
        }

        if (lflag) {
                if (from_cs != NULL || to_cs != NULL || optind != argc)
                        usage();
                exit(list_codesets());
        }

        if (from_cs == NULL)
                from_cs = nl_langinfo(CODESET);
        if (to_cs == NULL)
                to_cs = nl_langinfo(CODESET);

        /*
         * If either "from" or "to" contains a slash,
         * then we're using charmaps.
         */
        fslash = strchr(from_cs, '/');
        tslash = strchr(to_cs, '/');
        if (fslash != NULL || tslash != NULL) {
                charmap_init(to_cs, from_cs);
                pconv = cm_iconv;
                if (debug)
                        charmap_dump();
        } else {
                ich = iconv_open(to_cs, from_cs);
                if (ich == ((iconv_t)-1)) {
                        switch (errno) {
                        case EINVAL:
                                (void) fprintf(stderr,
                                    _("Not supported %s to %s\n"),
                                    from_cs, to_cs);
                                break;
                        default:
                                (void) fprintf(stderr,
                                    _("iconv_open failed: %s\n"),
                                    strerror(errno));
                                break;
                        }
                        exit(1);
                }
                pconv = lib_iconv;
        }

        if (optind == argc ||
            (optind == argc - 1 && 0 == strcmp(argv[optind], "-"))) {
                iconv_file(stdin, "stdin");
                exit(warnings ? 1 : 0);
        }

        for (; optind < argc; optind++) {
                fp = fopen(argv[optind], "r");
                if (fp == NULL) {
                        perror(argv[optind]);
                        exit(1);
                }
                iconv_file(fp, argv[optind]);
                (void) fclose(fp);
        }
        exit(warnings ? 1 : 0);
}

/*
 * Conversion buffer sizes:
 *
 * The input buffer has room to prepend one mbs character if needed for
 * handling a left-over at the end of a previous conversion buffer.
 *
 * Conversions may grow or shrink data, so using a larger output buffer
 * to reduce the likelihood of leftover input buffer data in each pass.
 */
#define IBUFSIZ (MB_LEN_MAX + BUFSIZ)
#define OBUFSIZ (2 * BUFSIZ)

void
iconv_file(FILE *fp, const char *fname)
{
        static char ibuf[IBUFSIZ];
        static char obuf[OBUFSIZ];
        const char *iptr;
        char *optr;
        off64_t offset;
        size_t ileft, oleft, ocnt;
        int iconv_errno;
        int nr, nw, rc;

        offset = 0;
        ileft = 0;
        iptr = ibuf + MB_LEN_MAX;

        while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {

                assert(iptr <= ibuf+MB_LEN_MAX);
                assert(ileft <= MB_LEN_MAX);
                ileft += nr;
                offset += nr;

                optr = obuf;
                oleft = OBUFSIZ;

                /*
                 * Note: the *pconv function is either iconv(3c) or our
                 * private equivalent when using charmaps. Both update
                 * ileft, oleft etc. even when conversion stops due to
                 * an illegal sequence or whatever, so we need to copy
                 * the partially converted buffer even on error.
                 */
        iconv_again:
                rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
                iconv_errno = errno;

                ocnt = OBUFSIZ - oleft;
                if (ocnt > 0) {
                        nw = fwrite(obuf, 1, ocnt, stdout);
                        if (nw != ocnt) {
                                perror("fwrite");
                                exit(1);
                        }
                }
                optr = obuf;
                oleft = OBUFSIZ;

                if (rc == (size_t)-1) {
                        switch (iconv_errno) {

                        case E2BIG:     /* no room in output buffer */
                                goto iconv_again;

                        case EINVAL:    /* incomplete sequence on input */
                                if (debug) {
                                        (void) fprintf(stderr,
                        _("Incomplete sequence in %s at offset %lld\n"),
                                            fname, offset - ileft);
                                }
                                /*
                                 * Copy the remainder to the space reserved
                                 * at the start of the input buffer.
                                 */
                                assert(ileft > 0);
                                if (ileft <= MB_LEN_MAX) {
                                        char *p = ibuf+MB_LEN_MAX-ileft;
                                        (void) memmove(p, iptr, ileft);
                                        iptr = p;
                                        continue; /* read again */
                                }
                                /*
                                 * Should not see ileft > MB_LEN_MAX,
                                 * but if we do, handle as EILSEQ.
                                 */
                                /* FALLTHROUGH */

                        case EILSEQ:    /* invalid sequence on input */
                                if (!sflag) {
                                        (void) fprintf(stderr,
                        _("Illegal sequence in %s at offset %lld\n"),
                                            fname, offset - ileft);
                                        (void) fprintf(stderr,
                        _("bad seq: \\x%02x\\x%02x\\x%02x\n"),
                                            iptr[0] & 0xff,
                                            iptr[1] & 0xff,
                                            iptr[2] & 0xff);
                                }
                                assert(ileft > 0);
                                /* skip one */
                                iptr++;
                                ileft--;
                                assert(oleft > 0);
                                if (!cflag) {
                                        *optr++ = '?';
                                        oleft--;
                                }
                                goto iconv_again;

                        default:
                                (void) fprintf(stderr,
                        _("iconv error (%s) in file $s at offset %lld\n"),
                                    strerror(iconv_errno), fname,
                                    offset - ileft);
                                break;
                        }
                }

                /* normal iconv return */
                ileft = 0;
                iptr = ibuf + MB_LEN_MAX;
        }

        /*
         * End of file
         * Flush any shift encodings.
         */
        iptr = NULL;
        ileft = 0;
        optr = obuf;
        oleft = OBUFSIZ;
        (*pconv)(&iptr, &ileft, &optr, &oleft);
        ocnt = OBUFSIZ - oleft;
        if (ocnt > 0) {
                nw = fwrite(obuf, 1, ocnt, stdout);
                if (nw != ocnt) {
                        perror("fwrite");
                        exit(1);
                }
        }
}