#include <sys/param.h>
#include <sys/stat.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <libutil.h>
#include <limits.h>
#include <locale.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <regex.h>
#include <sysexits.h>
#define DEFLINE 1000
static off_t bytecnt;
static long chunks;
static bool clobber = true;
static long numlines;
static int file_open;
static int ifd = -1, ofd = -1;
static char fname[MAXPATHLEN];
static regex_t rgx;
static int pflag;
static bool dflag;
static long sufflen = 2;
static bool autosfx = true;
static void newfile(void);
static void split1(void);
static void split2(void);
static void split3(void);
static void usage(void) __dead2;
int
main(int argc, char **argv)
{
char errbuf[64];
const char *p, *errstr;
int ch, error;
setlocale(LC_ALL, "");
dflag = false;
while ((ch = getopt(argc, argv, "0::1::2::3::4::5::6::7::8::9::a:b:cdl:n:p:")) != -1)
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (numlines != 0)
usage();
numlines = ch - '0';
p = optarg ? optarg : "";
while (numlines >= 0 && *p >= '0' && *p <= '9')
numlines = numlines * 10 + *p++ - '0';
if (numlines <= 0 || *p != '\0')
errx(EX_USAGE, "%c%s: line count is invalid",
ch, optarg ? optarg : "");
break;
case 'a':
sufflen = strtonum(optarg, 0, INT_MAX, &errstr);
if (errstr != NULL) {
errx(EX_USAGE, "%s: suffix length is %s",
optarg, errstr);
}
if (sufflen == 0) {
sufflen = 2;
autosfx = true;
} else {
autosfx = false;
}
break;
case 'b':
if (expand_number(optarg, &bytecnt) != 0) {
errx(EX_USAGE, "%s: byte count is invalid",
optarg);
}
break;
case 'c':
clobber = false;
break;
case 'd':
dflag = true;
break;
case 'l':
if (numlines != 0)
usage();
numlines = strtonum(optarg, 1, LONG_MAX, &errstr);
if (errstr != NULL) {
errx(EX_USAGE, "%s: line count is %s",
optarg, errstr);
}
break;
case 'n':
chunks = strtonum(optarg, 1, LONG_MAX, &errstr);
if (errstr != NULL) {
errx(EX_USAGE, "%s: number of chunks is %s",
optarg, errstr);
}
break;
case 'p':
error = regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB);
if (error != 0) {
regerror(error, &rgx, errbuf, sizeof(errbuf));
errx(EX_USAGE, "%s: regex is invalid: %s",
optarg, errbuf);
}
pflag = 1;
break;
default:
usage();
}
argv += optind;
argc -= optind;
if (argc > 0) {
if (strcmp(*argv, "-") == 0)
ifd = STDIN_FILENO;
else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
err(EX_NOINPUT, "%s", *argv);
++argv;
--argc;
}
if (argc > 0) {
if (strlcpy(fname, *argv, sizeof(fname)) >= sizeof(fname)) {
errx(EX_USAGE, "%s: file name prefix is too long",
*argv);
}
++argv;
--argc;
}
if (argc > 0)
usage();
if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
errx(EX_USAGE, "suffix is too long");
if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
usage();
if (numlines == 0)
numlines = DEFLINE;
else if (bytecnt != 0 || chunks != 0)
usage();
if (bytecnt != 0 && chunks != 0)
usage();
if (ifd == -1)
ifd = 0;
if (bytecnt != 0) {
split1();
exit (0);
} else if (chunks != 0) {
split3();
exit (0);
}
split2();
if (pflag)
regfree(&rgx);
exit(0);
}
static void
split1(void)
{
static char bfr[MAXBSIZE];
off_t bcnt;
char *C;
ssize_t dist, len;
int nfiles;
nfiles = 0;
for (bcnt = 0;;)
switch ((len = read(ifd, bfr, sizeof(bfr)))) {
case 0:
exit(0);
case -1:
err(EX_IOERR, "read");
default:
if (!file_open) {
if (chunks == 0 || nfiles < chunks) {
newfile();
nfiles++;
}
}
if (bcnt + len >= bytecnt) {
dist = bytecnt - bcnt;
if (write(ofd, bfr, dist) != dist)
err(EX_IOERR, "write");
len -= dist;
for (C = bfr + dist; len >= bytecnt;
len -= bytecnt, C += bytecnt) {
if (chunks == 0 || nfiles < chunks) {
newfile();
nfiles++;
}
if (write(ofd, C, bytecnt) != bytecnt)
err(EX_IOERR, "write");
}
if (len != 0) {
if (chunks == 0 || nfiles < chunks) {
newfile();
nfiles++;
}
if (write(ofd, C, len) != len)
err(EX_IOERR, "write");
} else {
file_open = 0;
}
bcnt = len;
} else {
bcnt += len;
if (write(ofd, bfr, len) != len)
err(EX_IOERR, "write");
}
}
}
static void
split2(void)
{
char *buf;
size_t bufsize;
ssize_t len;
long lcnt = 0;
FILE *infp;
buf = NULL;
bufsize = 0;
if ((infp = fdopen(ifd, "r")) == NULL)
err(EX_NOINPUT, "fdopen");
while ((errno = 0, len = getline(&buf, &bufsize, infp)) > 0) {
if (pflag) {
regmatch_t pmatch;
pmatch.rm_so = 0;
pmatch.rm_eo = len - 1;
if (regexec(&rgx, buf, 0, &pmatch, REG_STARTEND) == 0)
newfile();
} else if (lcnt++ == numlines) {
newfile();
lcnt = 1;
}
if (!file_open)
newfile();
if (write(ofd, buf, len) != len)
err(EX_IOERR, "write");
}
if ((len == -1 && errno != 0) || ferror(infp))
err(EX_IOERR, "read");
else
exit(0);
}
static void
split3(void)
{
struct stat sb;
if (fstat(ifd, &sb) == -1) {
err(1, "stat");
}
if (chunks > sb.st_size) {
errx(1, "can't split into more than %d files",
(int)sb.st_size);
}
bytecnt = sb.st_size / chunks;
split1();
}
static void
newfile(void)
{
long i, maxfiles, tfnum;
static long fnum;
static char *fpnt;
char beg, end;
int pattlen;
int flags = O_WRONLY | O_CREAT | O_TRUNC;
if (!clobber)
flags |= O_EXCL;
if (ofd == -1) {
if (fname[0] == '\0') {
fname[0] = 'x';
fpnt = fname + 1;
} else {
fpnt = fname + strlen(fname);
}
} else if (close(ofd) != 0)
err(1, "%s", fname);
again:
if (dflag) {
beg = '0';
end = '9';
}
else {
beg = 'a';
end = 'z';
}
pattlen = end - beg + 1;
if (!dflag && autosfx && (fpnt[0] == 'y') &&
strspn(fpnt+1, "z") == strlen(fpnt+1)) {
if (strlen(fname) + 2 >= sizeof(fname))
errx(EX_USAGE, "combined filenames would be too long");
fpnt = fname + strlen(fname) - sufflen;
fpnt[sufflen + 2] = '\0';
fpnt[0] = end;
fpnt[1] = beg;
fpnt++;
sufflen++;
fnum = 0;
}
for (maxfiles = 1, i = 0; i < sufflen; i++)
if (LONG_MAX / pattlen < maxfiles)
errx(EX_USAGE, "suffix is too long (max %ld)", i);
else
maxfiles *= pattlen;
if (fnum == maxfiles)
errx(EX_DATAERR, "too many files");
tfnum = fnum;
i = sufflen - 1;
do {
fpnt[i] = tfnum % pattlen + beg;
tfnum /= pattlen;
} while (i-- > 0);
fpnt[sufflen] = '\0';
++fnum;
if ((ofd = open(fname, flags, DEFFILEMODE)) < 0) {
if (!clobber && errno == EEXIST)
goto again;
err(EX_IOERR, "%s", fname);
}
file_open = 1;
}
static void
usage(void)
{
(void)fprintf(stderr,
"usage: split [-cd] [-l line_count] [-a suffix_length] [file [prefix]]\n"
" split [-cd] -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
" split [-cd] -n chunk_count [-a suffix_length] [file [prefix]]\n"
" split [-cd] -p pattern [-a suffix_length] [file [prefix]]\n");
exit(EX_USAGE);
}