root/usr/src/cmd/sort/utility.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include "utility.h"

#include "initialize.h"
#include "statistics.h"
#include "streams_common.h"
#include "streams.h"

/*
 * utility
 *
 * Overview
 *   utility.c contains the general purpose routines used in various locations
 *   throughout sort.  It provides a number of interfaces that maintain local
 *   state relevant to this instance of sort.  We discuss the more significant
 *   of these interfaces below.
 *
 * Output guard
 *   sort is one of the few Unix utilities that is capable of working "in
 *   place"; that is, sort can manipulate an input file and place its output in
 *   a file of the same name safely.  This is handled in this implementation by
 *   the output guard facility.  In the case of an interrupt or other fatal
 *   signal, sort essays to restore the original input file.
 *
 * Temporary file cleanup
 *   Similar to the output guard facility, sort cleans up its temporary files in
 *   the case of interruption (or normal exit, for that matter); this is handled
 *   by registering a list of file pointers for later use by the atexit handler.
 *
 * Temporary filename security
 *   sort protects against "open-through-link" security attacks by verifying
 *   that the selected temporary file name is unused.  If the file name is in
 *   use, the pattern is readjusted until an available name pattern is
 *   discovered.
 *
 * Buffered I/O
 *   sort has a simple buffered I/O facility of its own, to facilitate writing
 *   data in large quantities (particularly for multibyte locales).  cxwrite()
 *   is the base routine, while wxwrite(), which handles multibyte buffers, is
 *   built on top of cxwrite().
 */

#define XBUFFER_SIZE    (32 * KILOBYTE)

#define EXIT_OK         0
#define EXIT_FAILURE    1
#define EXIT_ERROR      2
#define EXIT_INTERNAL   3

static int held_fd = -1;

static stream_t **cleanup_chain = NULL;

static char *output_guard_tempname = NULL;
static ssize_t output_guard_size = 0;
static char *output_guard_filename = NULL;
static int output_guard_copy_complete = 0;

static const char *default_tmpdir = "/var/tmp";
static const char *default_template = "/stmAAAXXXXXX";
static const char *default_template_count = ".00000000";
static char *current_tmpdir;
static char *current_template;

static const char PNAME_FMT[] = "%s: ";
static const char ERRNO_FMT[] = ": %s\n";
static const char *pname = "sort";

void
swap(void **a, void **b)
{
        void *t;

        t = *a;
        *a = *b;
        *b = t;

        __S(stats_incr_swaps());
}

/*
 * Temporary file name template handling.
 */
static void
reset_file_template()
{
        struct stat s;

        do {
                (void) strcpy(current_template, current_tmpdir);
                (void) strcat(current_template, default_template);
                (void) mktemp(current_template);
                (void) strcat(current_template, default_template_count);
        } while (lstat(current_template, &s) != -1);
}

int
bump_file_template()
{
        struct stat s;
        int n = strlen(current_template);
        int i;

        for (i = n - 1; isdigit((uchar_t)current_template[i]); i--) {
                current_template[i]++;
                if (current_template[i] > '9')
                        current_template[i] = '0';
                else
                        break;
        }

        if (!isdigit((uchar_t)current_template[i])) {
                /*
                 * Template has been exhausted, so reset.
                 */
                reset_file_template();
        }

        if (lstat(current_template, &s) == 0) {
                /*
                 * Our newly bumped template has been anticipated; reset to
                 * avoid possible "link-through" attack.
                 */
                reset_file_template();
        }

        return (0);
}

void
set_file_template(char **T)
{
        struct stat s;
        int check_tmpdir = 0;

        if (*T != NULL) {
                current_tmpdir = strdup(*T);
                check_tmpdir = 1;
        } else if ((current_tmpdir = getenv("TMPDIR")) != NULL) {
                check_tmpdir = 1;
        } else {
                current_tmpdir = (char *)default_tmpdir;
        }

        /*
         * Check that the temporary directory given exists, and is a directory.
         */
        if (check_tmpdir) {
                if (stat(current_tmpdir, &s) != 0) {
                        warn(gettext("cannot stat temporary directory %s"),
                            current_tmpdir);

                        current_tmpdir = (char *)default_tmpdir;
                } else if (!S_ISDIR(s.st_mode)) {
                        warn(gettext("%s is not a directory; "
                            "using default temporary directory"),
                            current_tmpdir);

                        current_tmpdir = (char *)default_tmpdir;
                }
        }

        ASSERT(current_tmpdir != NULL);

        current_template = safe_realloc(NULL, strlen(current_tmpdir)
            + strlen(default_template) + strlen(default_template_count) + 1);

        reset_file_template();
}

char *
get_file_template()
{
        return (current_template);
}

/*
 * Output guard routines.
 */
void
establish_output_guard(sort_t *S)
{
        struct stat output_stat;

        if (S->m_output_to_stdout)
                return;

        if (stat(S->m_output_filename, &output_stat) == 0) {
                stream_t *strp = S->m_input_streams;

                while (strp != NULL) {
                        /*
                         * We needn't protect an empty file.
                         */
                        if (!(strp->s_status & STREAM_NOTFILE) &&
                            strp->s_dev == output_stat.st_dev &&
                            strp->s_ino == output_stat.st_ino &&
                            strp->s_filesize > 0) {
                                output_guard_filename = S->m_output_filename;
                                output_guard_size = strp->s_filesize;

                                ASSERT(output_guard_filename != NULL);

                                if (bump_file_template() < 0)
                                        die(EMSG_TEMPORARY);

                                if ((strp->s_filename = output_guard_tempname =
                                    strdup(get_file_template())) == NULL)
                                        die(EMSG_ALLOC);

                                xcp(output_guard_tempname,
                                    output_guard_filename, output_guard_size);

                                output_guard_copy_complete = 1;

                                return;
                        }
                        strp = strp->s_next;
                }
        }
}

void
remove_output_guard()
{
        if (output_guard_tempname && unlink(output_guard_tempname) == -1)
                warn(gettext("unable to unlink %s"), output_guard_tempname);

        output_guard_tempname = NULL;
}

void
set_cleanup_chain(stream_t **strp)
{
        ASSERT(strp != NULL);

        cleanup_chain = strp;
}

/*
 * atexit_handler() cleans up any temporary files outstanding after a fatal
 * signal, a call to die() or at exit().  To preserve the input file under low
 * storage conditions (and both the output file and the temporary files are
 * directed at the same filesystem), we remove all temporary files but the
 * output guard first, and then restore the original file.  Of course, this is
 * not foolproof, as another writer may have exhausted storage.
 */
void
atexit_handler()
{
        stream_t *strp;

        if (cleanup_chain && *cleanup_chain)
                for (strp = *cleanup_chain; strp != NULL; strp = strp->s_next)
                        stream_unlink_temporary(strp);

        if (output_guard_tempname) {
                if (output_guard_copy_complete)
                        xcp(output_guard_filename, output_guard_tempname,
                            output_guard_size);

                remove_output_guard();
        }

        __S(stats_display());
}

size_t
strtomem(char *S)
{
        const char *format_str = "%lf%c";
        double val = 0.0;
        size_t retval;
        char units = 'k';
        size_t phys_total = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);

        if (sscanf(S, format_str, &val, &units) < 1 || val < 0)
                return (0);

        if (units == '%') {
                if (val < 0 || val > 100)
                        return (0);
                val *= phys_total / 100;
        } else
                switch (units) {
                        case 't' : /* terabytes */
                        case 'T' :
                                val *= 1024;
                                /*FALLTHROUGH*/
                        case 'g' : /* gigabytes */
                        case 'G' :
                                val *= 1024;
                                /*FALLTHROUGH*/
                        case 'm' : /* megabytes */
                        case 'M' :
                                val *= 1024;
                                /*FALLTHROUGH*/
                        case 'k' : /* kilobytes */
                        case 'K' :
                                val *= 1024;
                                /*FALLTHROUGH*/
                        case 'b' : /* bytes */
                        case 'B' :
                                break;
                        default :
                                /*
                                 * default is kilobytes
                                 */
                                val *= 1024;
                                break;
                }

        if (val > SIZE_MAX)
                return (0);

        retval = (size_t)val;

        return (retval);
}

size_t
available_memory(size_t mem_limit)
{
        size_t phys_avail = sysconf(_SC_AVPHYS_PAGES) * sysconf(_SC_PAGESIZE);
        size_t avail;

        if (mem_limit != 0) {
#ifdef DEBUG
                /*
                 * In the debug case, we want to test the temporary files
                 * handling, so no lower bound on the memory limit is imposed.
                 */
                avail = mem_limit;
#else
                avail = MAX(64 * KILOBYTE, mem_limit);
#endif /* DEBUG */
        } else {
                avail = MAX(64 * KILOBYTE, MIN(AV_MEM_MULTIPLIER * phys_avail /
                    AV_MEM_DIVISOR, 16 * MEGABYTE));
        }

        __S(stats_set_available_memory(avail));

        return (avail);
}

void
set_memory_ratio(sort_t *S, int *numerator, int *denominator)
{
        if (S->m_c_locale) {
                *numerator = CHAR_AVG_LINE;
                *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
                    CHAR_AVG_LINE + CHAR_AVG_LINE;
                return;
        }

        if (S->m_single_byte_locale) {
                *numerator = CHAR_AVG_LINE;
                *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
                    CHAR_AVG_LINE + XFRM_MULTIPLIER * CHAR_AVG_LINE;
                return;
        }

        *numerator = WCHAR_AVG_LINE;
        *denominator = sizeof (line_rec_t) + sizeof (line_rec_t *) +
            WCHAR_AVG_LINE + WCHAR_AVG_LINE;
}

void *
safe_realloc(void *ptr, size_t sz)
{
        /*
         * safe_realloc() is not meant as an alternative free() mechanism--we
         * disallow reallocations to size zero.
         */
        ASSERT(sz != 0);

        if ((ptr = realloc(ptr, sz)) != NULL)
                return (ptr);

        die(gettext("unable to reallocate buffer"));
        /*NOTREACHED*/
        return (NULL);  /* keep gcc happy */
}

void
safe_free(void *ptr)
{
        if (ptr)
                free(ptr);
}

void *
xzmap(void *addr, size_t len, int prot, int flags, off_t off)
{
        void *pa;

        pa = mmap(addr, len, prot, flags | MAP_ANON, -1, off);
        if (pa == MAP_FAILED)
                die(gettext("can't mmap anonymous memory"));

        return (pa);
}

void
usage()
{
        (void) fprintf(stderr,
            gettext("usage: %s [-cmu] [-o output] [-T directory] [-S mem]"
            " [-z recsz]\n\t[-dfiMnr] [-b] [-t char] [-k keydef]"
            " [+pos1 [-pos2]] files...\n"), CMDNAME);
        exit(E_USAGE);
}

/*
 * hold_file_descriptor() and release_file_descriptor() reserve a single file
 * descriptor entry for later use.  We issue the hold prior to any loop that has
 * an exit condition based on the receipt of EMFILE from an open() call; once we
 * have exited, we can release, typically prior to opening a file for output.
 */
void
hold_file_descriptor()
{
        ASSERT(held_fd == -1);

        if ((held_fd = open("/dev/null", O_RDONLY)) == -1)
                die(gettext("insufficient available file descriptors\n"));
}

void
release_file_descriptor()
{
        ASSERT(held_fd != -1);

        (void) close(held_fd);
        held_fd = -1;
}

void
copy_line_rec(const line_rec_t *a, line_rec_t *b)
{
        (void) memcpy(b, a, sizeof (line_rec_t));
}

void
trip_eof(FILE *f)
{
        if (feof(f))
                return;

        (void) ungetc(fgetc(f), f);
}

/*
 * int cxwrite(int, char *, size_t)
 *
 * Overview
 *   cxwrite() implements a buffered version of fwrite(ptr, nbytes, 1, .) on
 *   file descriptors.  It returns -1 in the case that the write() fails to
 *   write the current buffer contents.  cxwrite() must be flushed before being
 *   applied to a new file descriptor.
 *
 * Return values
 *   0 on success, -1 on error.
 */
int
cxwrite(int fd, char *ptr, size_t nbytes)
{
        static char buffer[XBUFFER_SIZE];
        static size_t offset = 0;
        size_t mbytes;

        if (ptr == NULL) {
                errno = 0;
                while (offset -= write(fd, buffer, offset)) {
                        if (errno)
                                break;
                }

                if (offset)
                        return (-1);

                return (0);
        }

        while (nbytes != 0) {
                if (offset + nbytes > XBUFFER_SIZE)
                        mbytes = XBUFFER_SIZE - offset;
                else
                        mbytes = nbytes;

                (void) memcpy(buffer + offset, ptr, mbytes);
                nbytes -= mbytes;
                offset += mbytes;
                ptr += mbytes;

                if (nbytes) {
                        errno = 0;
                        while (offset -= write(fd, buffer, offset)) {
                                if (errno)
                                        break;
                        }

                        if (offset)
                                return (-1);
                }
        }

        return (0);
}

/*
 * int wxwrite(int, wchar_t *)
 *
 * Overview
 *   wxwrite() implements a buffered write() function for null-terminated wide
 *   character buffers with similar calling semantics to cxwrite().  It returns
 *   -1 in the case that it fails to write the current buffer contents.
 *   wxwrite() must be flushed before being applied to a new file descriptor.
 *
 * Return values
 *   0 on success, -1 on error.
 */
int
wxwrite(int fd, wchar_t *ptr)
{
        static char *convert_buffer;
        static size_t convert_bufsize = 1024;
        size_t req_bufsize;

        if (ptr == NULL)
                return (cxwrite(fd, NULL, 0));

        if (convert_buffer == NULL)
                convert_buffer = safe_realloc(NULL, convert_bufsize);
        /*
         * We use wcstombs(NULL, ., .) to verify that we have an adequate
         * buffer size for the conversion.  Since this buffer was converted into
         * wide character format earlier, we can safely assume that the buffer
         * can be converted back to the external multibyte form.
         */
        req_bufsize = wcstombs(NULL, ptr, convert_bufsize);
        if (req_bufsize > convert_bufsize) {
                convert_bufsize = req_bufsize + 1;
                convert_buffer = safe_realloc(convert_buffer, convert_bufsize);
        }

        (void) wcstombs(convert_buffer, ptr, convert_bufsize);

        return (cxwrite(fd, convert_buffer, req_bufsize));
}

int
xstreql(const char *a, const char *b)
{
        return (strcmp(a, b) == 0);
}

int
xstrneql(const char *a, const char *b, const size_t l)
{
        return (strncmp(a, b, l) == 0);
}

char *
xstrnchr(const char *S, const int c, const size_t n)
{
        const char      *eS = S + n;

        do {
                if (*S == (char)c)
                        return ((char *)S);
        } while (++S < eS);

        return (NULL);
}

void
xstrninv(char *s, ssize_t start, ssize_t length)
{
        ssize_t i;

        for (i = start; i < start + length; i++)
                s[i] = UCHAR_MAX - s[i];
}

int
xwcsneql(const wchar_t *a, const wchar_t *b, const size_t length)
{
        return (wcsncmp(a, b, length) == 0);
}

wchar_t *
xwsnchr(const wchar_t *ws, const wint_t wc, const size_t n)
{
        const wchar_t   *ews = ws + n;

        do {
                if (*ws == (wchar_t)wc)
                        return ((wchar_t *)ws);
        } while (++ws < ews);

        return (NULL);
}

void
xwcsninv(wchar_t *s, ssize_t start, ssize_t length)
{
        ssize_t i;

        for (i = start; i < start + length; i++)
                s[i] = WCHAR_MAX - s[i];
}

#ifdef _LITTLE_ENDIAN
void
xwcsntomsb(wchar_t *s, ssize_t length)
{
        ssize_t i;

        ASSERT(sizeof (wchar_t) == sizeof (uint32_t));

        for (i = 0; i < length; i++, s++) {
                char *t = (char *)s;
                char u;

                u = *t;
                *t = *(t + 3);
                *(t + 3) = u;

                u = *(t + 1);
                *(t + 1) = *(t + 2);
                *(t + 2) = u;
        }
}
#endif /* _LITTLE_ENDIAN */

wchar_t *
xmemwchar(wchar_t *s, wchar_t w, ssize_t length)
{
        ssize_t i = length;

        while (--i > 0) {
                if (*s == w)
                        return (s);
                s++;
        }

        return (NULL);
}

void
xcp(char *dst, char *src, off_t size)
{
        int fd_in, fd_out;
        void *mm_in;
        size_t chunksize = 2 * MEGABYTE;
        int i;
        ssize_t nchunks = size / chunksize;
        ssize_t lastchunk = size % chunksize;

        if (dst == NULL || src == NULL)
                return;

        if ((fd_in = open(src, O_RDONLY)) < 0)
                die(EMSG_OPEN, src);
        if ((fd_out = open(dst, O_RDWR | O_CREAT | O_TRUNC, OUTPUT_MODE)) < 0)
                die(EMSG_OPEN, dst);

        for (i = 0; i < nchunks; i++) {
                if ((mm_in = mmap(0, chunksize, PROT_READ, MAP_SHARED, fd_in,
                    i * chunksize)) == MAP_FAILED)
                        die(EMSG_MMAP, src);

                if (write(fd_out, mm_in, chunksize) != chunksize)
                        die(EMSG_WRITE, dst);

                (void) munmap(mm_in, chunksize);
        }

        if (lastchunk) {
                if ((mm_in = mmap(0, lastchunk, PROT_READ, MAP_SHARED, fd_in,
                    nchunks * chunksize)) == MAP_FAILED)
                        die(EMSG_MMAP, src);

                if (write(fd_out, mm_in, lastchunk) != lastchunk)
                        die(EMSG_WRITE, dst);

                (void) munmap(mm_in, lastchunk);
        }

        (void) close(fd_in);

        if (close(fd_out) == -1)
                die(EMSG_CLOSE, dst);
}

/*PRINTFLIKE1*/
void
warn(const char *format, ...)
{
        int err = errno;
        va_list alist;

        if (pname != NULL)
                (void) fprintf(stderr, gettext(PNAME_FMT), pname);

        va_start(alist, format);
        (void) vfprintf(stderr, format, alist);
        va_end(alist);

        if (strrchr(format, '\n') == NULL)
                (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));
}

/*PRINTFLIKE1*/
void
die(const char *format, ...)
{
        int err = errno;
        va_list alist;

        if (pname != NULL)
                (void) fprintf(stderr, gettext(PNAME_FMT), pname);

        va_start(alist, format);
        (void) vfprintf(stderr, format, alist);
        va_end(alist);

        if (strrchr(format, '\n') == NULL)
                (void) fprintf(stderr, gettext(ERRNO_FMT), strerror(err));

        exit(E_ERROR);
}

#ifdef DEBUG
/*
 * pprintc() is called only by xdump().
 */
#define BYTES_PER_LINE  16
static void
pprintc(FILE *fp, char c)
{
        if (isspace((uchar_t)c))
                (void) fprintf(fp, " ");
        else if (isprint((uchar_t)c))
                (void) fprintf(fp, "%c", c);
        else
                (void) fprintf(fp, ".");
}

static void
pprintwc(FILE *fp, wchar_t c)
{
        if (iswspace(c))
                (void) fprintf(fp, " ");
        else if (iswprint(c))
                (void) fprintf(fp, "%wc", c);
        else
                (void) fprintf(fp, ".");
}

/*
 * xdump() is used only for debugging purposes.
 */
void
xdump(FILE *fp, uchar_t *buf, size_t bufsize, int wide)
{
        int i;
        size_t nc = 0;
        uchar_t d[BYTES_PER_LINE];

        for (; nc < bufsize; buf++) {
                d[nc % BYTES_PER_LINE] = *buf;
                if (nc % BYTES_PER_LINE == 0) {
                        (void) fprintf(fp, "%08x:", nc);
                }
                (void) fprintf(fp, " %02x", *buf);
                nc++;
                if (nc % BYTES_PER_LINE == 0) {
                        (void) fprintf(fp, "  ");
                        if (wide) {
                                for (i = 0; i < BYTES_PER_LINE;
                                    i += sizeof (wchar_t))
                                        pprintwc(fp, *(wchar_t *)(d + i));
                        } else {
                                for (i = 0; i < BYTES_PER_LINE; i++)
                                        pprintc(fp, d[i]);
                        }
                        (void) fprintf(fp, "\n");
                }
        }

        for (i = nc % BYTES_PER_LINE; i < BYTES_PER_LINE; i++)
                (void) fprintf(fp, "   ");

        (void) fprintf(fp, "  ");

        if (wide) {
                for (i = 0; i < nc % BYTES_PER_LINE; i += sizeof (wchar_t))
                        pprintwc(fp, *(wchar_t *)(d + i));
        } else {
                for (i = 0; i < nc % BYTES_PER_LINE; i++)
                        pprintc(fp, d[i]);
        }

        (void) fprintf(fp, "\n");
}
#endif /* DEBUG */