root/lib/libc/time/strptime.c
/*      $OpenBSD: strptime.c,v 1.34 2025/11/20 10:59:56 tb Exp $ */
/*      $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $    */
/*-
 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code was contributed to The NetBSD Foundation by Klaus Klein.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "localedef.h"
#include "private.h"
#include "tzfile.h"

#define _ctloc(x)               (_CurrentTimeLocale->x)

/*
 * We do not implement alternate representations. However, we always
 * check whether a given modifier is allowed for a certain conversion.
 */
#define _ALT_E                  0x01
#define _ALT_O                  0x02
#define _LEGAL_ALT(x)           { if (alt_format & ~(x)) return (0); }

/*
 * We keep track of some of the fields we set in order to compute missing ones.
 */
#define FIELD_TM_MON    (1 << 0)
#define FIELD_TM_MDAY   (1 << 1)
#define FIELD_TM_WDAY   (1 << 2)
#define FIELD_TM_YDAY   (1 << 3)
#define FIELD_TM_YEAR   (1 << 4)

static char const gmt[] = { "GMT" };
static char const utc[] = { "UTC" };
/* RFC-822/RFC-2822 */
static const char * const nast[5] = {
       "EST",    "CST",    "MST",    "PST",    "\0\0\0"
};
static const char * const nadt[5] = {
       "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
};

static const int mon_lengths[2][MONSPERYEAR] = {
        { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
        { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
};

static  int _conv_num(const unsigned char **, int *, int, int);
static  int epoch_to_tm(const unsigned char **, struct tm *);
static  int leaps_thru_end_of(const int y);
static  char *_strptime(const char *, const char *, struct tm *, int);
static  const u_char *_find_string(const u_char *, int *, const char * const *,
            const char * const *, int);


char *
strptime(const char *buf, const char *fmt, struct tm *tm)
{
        return(_strptime(buf, fmt, tm, 1));
}
DEF_WEAK(strptime);

static char *
_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
{
        unsigned char c;
        const unsigned char *bp, *ep;
        size_t len;
        int alt_format, i, offs;
        int neg = 0;
        static int century, relyear, fields;

        if (initialize) {
                century = TM_YEAR_BASE;
                relyear = -1;
                fields = 0;
        }

        bp = (const unsigned char *)buf;
        while ((c = *fmt) != '\0') {
                /* Clear `alternate' modifier prior to new conversion. */
                alt_format = 0;

                /* Eat up white-space. */
                if (isspace(c)) {
                        while (isspace(*bp))
                                bp++;

                        fmt++;
                        continue;
                }

                if ((c = *fmt++) != '%')
                        goto literal;


again:          switch (c = *fmt++) {
                case '%':       /* "%%" is converted to "%". */
literal:
                if (c != *bp++)
                        return (NULL);

                break;

                /*
                 * "Alternative" modifiers. Just set the appropriate flag
                 * and start over again.
                 */
                case 'E':       /* "%E?" alternative conversion modifier. */
                        _LEGAL_ALT(0);
                        alt_format |= _ALT_E;
                        goto again;

                case 'O':       /* "%O?" alternative conversion modifier. */
                        _LEGAL_ALT(0);
                        alt_format |= _ALT_O;
                        goto again;

                /*
                 * "Complex" conversion rules, implemented through recursion.
                 */
                case 'c':       /* Date and time, using the locale's format. */
                        _LEGAL_ALT(_ALT_E);
                        if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
                                return (NULL);
                        break;

                case 'D':       /* The date as "%m/%d/%y". */
                        _LEGAL_ALT(0);
                        if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
                                return (NULL);
                        break;

                case 'F':       /* The date as "%Y-%m-%d". */
                        _LEGAL_ALT(0);
                        if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
                                return (NULL);
                        continue;

                case 'R':       /* The time as "%H:%M". */
                        _LEGAL_ALT(0);
                        if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
                                return (NULL);
                        break;

                case 'r':       /* The time as "%I:%M:%S %p". */
                        _LEGAL_ALT(0);
                        if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
                                return (NULL);
                        break;

                case 'T':       /* The time as "%H:%M:%S". */
                        _LEGAL_ALT(0);
                        if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
                                return (NULL);
                        break;

                case 'v':       /* The date as "%e-%b-%Y". */
                        _LEGAL_ALT(0);
                        if (!(bp = _strptime(bp, "%e-%b-%Y", tm, 0)))
                                return (NULL);
                        break;

                case 'X':       /* The time, using the locale's format. */
                        _LEGAL_ALT(_ALT_E);
                        if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
                                return (NULL);
                        break;

                case 'x':       /* The date, using the locale's format. */
                        _LEGAL_ALT(_ALT_E);
                        if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
                                return (NULL);
                        break;

                /*
                 * "Elementary" conversion rules.
                 */
                case 'A':       /* The day of week, using the locale's form. */
                case 'a':
                        _LEGAL_ALT(0);
                        for (i = 0; i < 7; i++) {
                                /* Full name. */
                                len = strlen(_ctloc(day[i]));
                                if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
                                        break;

                                /* Abbreviated name. */
                                len = strlen(_ctloc(abday[i]));
                                if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
                                        break;
                        }

                        /* Nothing matched. */
                        if (i == 7)
                                return (NULL);

                        tm->tm_wday = i;
                        bp += len;
                        fields |= FIELD_TM_WDAY;
                        break;

                case 'B':       /* The month, using the locale's form. */
                case 'b':
                case 'h':
                        _LEGAL_ALT(0);
                        for (i = 0; i < 12; i++) {
                                /* Full name. */
                                len = strlen(_ctloc(mon[i]));
                                if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
                                        break;

                                /* Abbreviated name. */
                                len = strlen(_ctloc(abmon[i]));
                                if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
                                        break;
                        }

                        /* Nothing matched. */
                        if (i == 12)
                                return (NULL);

                        tm->tm_mon = i;
                        bp += len;
                        fields |= FIELD_TM_MON;
                        break;

                case 'C':       /* The century number. */
                        _LEGAL_ALT(_ALT_E);
                        if (!(_conv_num(&bp, &i, 0, 99)))
                                return (NULL);

                        century = i * 100;
                        break;

                case 'e':       /* The day of month. */
                        if (isspace(*bp))
                                bp++;
                        /* FALLTHROUGH */
                case 'd':
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
                                return (NULL);
                        fields |= FIELD_TM_MDAY;
                        break;

                case 'k':       /* The hour (24-hour clock representation). */
                        _LEGAL_ALT(0);
                        /* FALLTHROUGH */
                case 'H':
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
                                return (NULL);
                        break;

                case 'l':       /* The hour (12-hour clock representation). */
                        _LEGAL_ALT(0);
                        /* FALLTHROUGH */
                case 'I':
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
                                return (NULL);
                        break;

                case 'j':       /* The day of year. */
                        _LEGAL_ALT(0);
                        if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
                                return (NULL);
                        tm->tm_yday--;
                        fields |= FIELD_TM_YDAY;
                        break;

                case 'M':       /* The minute. */
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
                                return (NULL);
                        break;

                case 'm':       /* The month. */
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
                                return (NULL);
                        tm->tm_mon--;
                        fields |= FIELD_TM_MON;
                        break;

                case 'p':       /* The locale's equivalent of AM/PM. */
                        _LEGAL_ALT(0);
                        /* AM? */
                        len = strlen(_ctloc(am_pm[0]));
                        if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
                                if (tm->tm_hour > 12)   /* i.e., 13:00 AM ?! */
                                        return (NULL);
                                else if (tm->tm_hour == 12)
                                        tm->tm_hour = 0;

                                bp += len;
                                break;
                        }
                        /* PM? */
                        len = strlen(_ctloc(am_pm[1]));
                        if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
                                if (tm->tm_hour > 12)   /* i.e., 13:00 PM ?! */
                                        return (NULL);
                                else if (tm->tm_hour < 12)
                                        tm->tm_hour += 12;

                                bp += len;
                                break;
                        }

                        /* Nothing matched. */
                        return (NULL);

                case 'S':       /* The seconds. */
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
                                return (NULL);
                        break;
                case 's':       /* Seconds since epoch. */
                        if (!(epoch_to_tm(&bp, tm)))
                                return (NULL);
                        fields = 0xffff;         /* everything */
                        break;
                case 'U':       /* The week of year, beginning on sunday. */
                case 'W':       /* The week of year, beginning on monday. */
                        _LEGAL_ALT(_ALT_O);
                        /*
                         * XXX This is bogus, as we can not assume any valid
                         * information present in the tm structure at this
                         * point to calculate a real value, so just check the
                         * range for now.
                         */
                         if (!(_conv_num(&bp, &i, 0, 53)))
                                return (NULL);
                         break;

                case 'w':       /* The day of week, beginning on sunday. */
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
                                return (NULL);
                        fields |= FIELD_TM_WDAY;
                        break;

                case 'u':       /* The day of week, monday = 1. */
                        _LEGAL_ALT(_ALT_O);
                        if (!(_conv_num(&bp, &i, 1, 7)))
                                return (NULL);
                        tm->tm_wday = i % 7;
                        fields |= FIELD_TM_WDAY;
                        continue;

                case 'g':       /* The year corresponding to the ISO week
                                 * number but without the century.
                                 */
                        if (!(_conv_num(&bp, &i, 0, 99)))
                                return (NULL);
                        continue;

                case 'G':       /* The year corresponding to the ISO week
                                 * number with century.
                                 */
                        do
                                bp++;
                        while (isdigit(*bp));
                        continue;

                case 'V':       /* The ISO 8601:1988 week number as decimal */
                        if (!(_conv_num(&bp, &i, 0, 53)))
                                return (NULL);
                        continue;

                case 'Y':       /* The year. */
                        _LEGAL_ALT(_ALT_E);
                        if (!(_conv_num(&bp, &i, 0, 9999)))
                                return (NULL);

                        relyear = -1;
                        tm->tm_year = i - TM_YEAR_BASE;
                        fields |= FIELD_TM_YEAR;
                        break;

                case 'y':       /* The year within the century (2 digits). */
                        _LEGAL_ALT(_ALT_E | _ALT_O);
                        if (!(_conv_num(&bp, &relyear, 0, 99)))
                                return (NULL);
                        break;

                case 'Z':
                        tzset();
                        if (strncmp((const char *)bp, gmt, 3) == 0) {
                                tm->tm_isdst = 0;
                                tm->tm_gmtoff = 0;
                                tm->tm_zone = gmt;
                                bp += 3;
                        } else if (strncmp((const char *)bp, utc, 3) == 0) {
                                tm->tm_isdst = 0;
                                tm->tm_gmtoff = 0;
                                tm->tm_zone = utc;
                                bp += 3;
                        } else {
                                ep = _find_string(bp, &i,
                                                 (const char * const *)tzname,
                                                  NULL, 2);
                                if (ep == NULL)
                                        return (NULL);

                                tm->tm_isdst = i;
                                tm->tm_gmtoff = -(timezone);
                                tm->tm_zone = tzname[i];
                                bp = ep;
                        }
                        continue;

                case 'z':
                        /*
                         * We recognize all ISO 8601 formats:
                         * Z    = Zulu time/UTC
                         * [+-]hhmm
                         * [+-]hh:mm
                         * [+-]hh
                         * We recognize all RFC-822/RFC-2822 formats:
                         * UT|GMT
                         *          North American : UTC offsets
                         * E[DS]T = Eastern : -4 | -5
                         * C[DS]T = Central : -5 | -6
                         * M[DS]T = Mountain: -6 | -7
                         * P[DS]T = Pacific : -7 | -8
                         */
                        while (isspace(*bp))
                                bp++;

                        switch (*bp++) {
                        case 'G':
                                if (*bp++ != 'M')
                                        return NULL;
                                /*FALLTHROUGH*/
                        case 'U':
                                if (*bp++ != 'T')
                                        return NULL;
                                /*FALLTHROUGH*/
                        case 'Z':
                                tm->tm_isdst = 0;
                                tm->tm_gmtoff = 0;
                                tm->tm_zone = utc;
                                continue;
                        case '+':
                                neg = 0;
                                break;
                        case '-':
                                neg = 1;
                                break;
                        default:
                                --bp;
                                ep = _find_string(bp, &i, nast, NULL, 4);
                                if (ep != NULL) {
                                        tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
                                        tm->tm_zone = nast[i];
                                        bp = ep;
                                        continue;
                                }
                                ep = _find_string(bp, &i, nadt, NULL, 4);
                                if (ep != NULL) {
                                        tm->tm_isdst = 1;
                                        tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
                                        tm->tm_zone = nadt[i];
                                        bp = ep;
                                        continue;
                                }
                                return NULL;
                        }
                        if (!isdigit(bp[0]) || !isdigit(bp[1]))
                                return NULL;
                        offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
                        bp += 2;
                        if (*bp == ':')
                                bp++;
                        if (isdigit(*bp)) {
                                offs += (*bp++ - '0') * 10 * SECSPERMIN;
                                if (!isdigit(*bp))
                                        return NULL;
                                offs += (*bp++ - '0') * SECSPERMIN;
                        }
                        if (neg)
                                offs = -offs;
                        tm->tm_isdst = 0;       /* XXX */
                        tm->tm_gmtoff = offs;
                        tm->tm_zone = NULL;     /* XXX */
                        continue;

                /*
                 * Miscellaneous conversions.
                 */
                case 'n':       /* Any kind of white-space. */
                case 't':
                        _LEGAL_ALT(0);
                        while (isspace(*bp))
                                bp++;
                        break;


                default:        /* Unknown/unsupported conversion. */
                        return (NULL);
                }


        }

        /*
         * We need to evaluate the two digit year spec (%y)
         * last as we can get a century spec (%C) at any time.
         */
        if (relyear != -1) {
                if (century == TM_YEAR_BASE) {
                        if (relyear <= 68)
                                tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
                        else
                                tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
                } else {
                        tm->tm_year = relyear + century - TM_YEAR_BASE;
                }
                fields |= FIELD_TM_YEAR;
        }

        /* Compute some missing values when possible. */
        if (fields & FIELD_TM_YEAR) {
                const int year = tm->tm_year + TM_YEAR_BASE;
                const int *mon_lens = mon_lengths[isleap(year)];
                if (!(fields & FIELD_TM_YDAY) &&
                    (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
                        tm->tm_yday = tm->tm_mday - 1;
                        for (i = 0; i < tm->tm_mon; i++)
                                tm->tm_yday += mon_lens[i];
                        fields |= FIELD_TM_YDAY;
                }
                if (fields & FIELD_TM_YDAY) {
                        int days = tm->tm_yday;
                        if (!(fields & FIELD_TM_WDAY)) {
                                tm->tm_wday = EPOCH_WDAY +
                                    ((year - EPOCH_YEAR) % DAYSPERWEEK) *
                                    (DAYSPERNYEAR % DAYSPERWEEK) +
                                    leaps_thru_end_of(year - 1) -
                                    leaps_thru_end_of(EPOCH_YEAR - 1) +
                                    tm->tm_yday;
                                tm->tm_wday %= DAYSPERWEEK;
                                if (tm->tm_wday < 0)
                                        tm->tm_wday += DAYSPERWEEK;
                        }
                        if (!(fields & FIELD_TM_MON)) {
                                tm->tm_mon = 0;
                                while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
                                        days -= mon_lens[tm->tm_mon++];
                        }
                        if (!(fields & FIELD_TM_MDAY))
                                tm->tm_mday = days + 1;
                }
        }

        return ((char *)bp);
}


static int
_conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
{
        int result = 0;
        int rulim = ulim;

        if (**buf < '0' || **buf > '9')
                return (0);

        /* we use rulim to break out of the loop when we run out of digits */
        do {
                result *= 10;
                result += *(*buf)++ - '0';
                rulim /= 10;
        } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');

        if (result < llim || result > ulim)
                return (0);

        *dest = result;
        return (1);
}

static int
epoch_to_tm(const unsigned char **buf, struct tm *tm)
{
        int saved_errno = errno;
        int ret = 0;
        time_t secs;
        char *ep;

        errno = 0;
        secs = strtoll(*buf, &ep, 10);
        if (*buf == (unsigned char *)ep)
                goto done;
        if (secs < 0 ||
            secs == LLONG_MAX && errno == ERANGE)
                goto done;
        if (localtime_r(&secs, tm) == NULL)
                goto done;
        ret = 1;
done:
        *buf = ep;
        errno = saved_errno;
        return (ret);
}

static const u_char *
_find_string(const u_char *bp, int *tgt, const char * const *n1,
                const char * const *n2, int c)
{
        int i;
        unsigned int len;

        /* check full name - then abbreviated ones */
        for (; n1 != NULL; n1 = n2, n2 = NULL) {
                for (i = 0; i < c; i++, n1++) {
                        len = strlen(*n1);
                        if (strncasecmp(*n1, (const char *)bp, len) == 0) {
                                *tgt = i;
                                return bp + len;
                        }
                }
        }

        /* Nothing matched */
        return NULL;
}

static int
leaps_thru_end_of(const int y)
{
        return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
                -(leaps_thru_end_of(-(y + 1)) + 1);
}