#include <sys/cdefs.h>
#include "opt_pmap.h"
#include "opt_watchdog.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cons.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/msgbuf.h>
#include <sys/sysctl.h>
#include <sys/watchdog.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
#include <vm/vm_dumpset.h>
#include <vm/pmap.h>
#include <machine/atomic.h>
#include <machine/elf.h>
#include <machine/md_var.h>
#include <machine/minidump.h>
#include <machine/vmparam.h>
CTASSERT(sizeof(struct kerneldumpheader) == 512);
static struct kerneldumpheader kdh;
static size_t fragsz;
static void *dump_va;
static size_t progress, dumpsize, wdog_next;
static int dump_retry_count = 5;
SYSCTL_INT(_machdep, OID_AUTO, dump_retry_count, CTLFLAG_RWTUN,
&dump_retry_count, 0, "Number of times dump has to retry before bailing out");
static int
blk_flush(struct dumperinfo *di)
{
int error;
if (fragsz == 0)
return (0);
error = dump_append(di, dump_va, fragsz);
fragsz = 0;
return (error);
}
#define WDOG_DUMP_INTERVAL (128 * 1024 * 1024)
static int
blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
{
size_t len;
int error, i, c;
u_int maxdumpsz;
maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
if (maxdumpsz == 0)
maxdumpsz = PAGE_SIZE;
error = 0;
if ((sz % PAGE_SIZE) != 0) {
printf("size not page aligned\n");
return (EINVAL);
}
if (ptr != NULL && pa != 0) {
printf("can't have both va and pa!\n");
return (EINVAL);
}
if ((((uintptr_t)pa) % PAGE_SIZE) != 0) {
printf("address not page aligned %p\n", ptr);
return (EINVAL);
}
if (ptr != NULL) {
error = blk_flush(di);
if (error)
return (error);
}
while (sz) {
len = maxdumpsz - fragsz;
if (len > sz)
len = sz;
progress -= len;
dumpsys_pb_progress(len);
if (progress <= wdog_next) {
wdog_kern_pat(WD_LASTVAL);
if (wdog_next > WDOG_DUMP_INTERVAL)
wdog_next -= WDOG_DUMP_INTERVAL;
else
wdog_next = 0;
}
if (ptr) {
error = dump_append(di, ptr, len);
if (error)
return (error);
ptr += len;
sz -= len;
} else {
for (i = 0; i < len; i += PAGE_SIZE)
dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
fragsz += len;
pa += len;
sz -= len;
if (fragsz == maxdumpsz) {
error = blk_flush(di);
if (error)
return (error);
}
}
c = cncheckc();
if (c == 0x03)
return (ECANCELED);
if (c != -1)
printf(" (CTRL-C to abort) ");
}
return (0);
}
static pd_entry_t fakepd[NPDEPG];
int
cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
{
uint32_t pmapsize;
vm_offset_t va, kva_end;
int error;
uint64_t *pml4, *pdp, *pd, *pt, pa;
uint64_t pdpe, pde, pte;
int ii, j, k, n;
int retry_count;
struct minidumphdr mdhdr;
struct msgbuf *mbp;
retry_count = 0;
retry:
retry_count++;
kva_end = MAX(KERNBASE + nkpt * NBPDR, kernel_vm_end);
pmapsize = 0;
for (va = kva_layout.km_low; va < kva_end; ) {
pmapsize += PAGE_SIZE;
ii = pmap_pml4e_index(va);
pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii;
pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
pdpe = atomic_load_64(&pdp[pmap_pdpe_index(va)]);
if ((pdpe & PG_V) == 0) {
va += NBPDP;
continue;
}
if ((pdpe & PG_PS) != 0) {
va += NBPDP;
pa = pdpe & PG_PS_FRAME;
for (n = 0; n < NPDEPG * NPTEPG; n++) {
if (vm_phys_is_dumpable(pa))
vm_page_dump_add(state->dump_bitset,
pa);
pa += PAGE_SIZE;
}
continue;
}
pd = (uint64_t *)PHYS_TO_DMAP(pdpe & PG_FRAME);
for (n = 0; n < NPDEPG; n++, va += NBPDR) {
pde = atomic_load_64(&pd[pmap_pde_index(va)]);
if ((pde & PG_V) == 0)
continue;
if ((pde & PG_PS) != 0) {
pa = pde & PG_PS_FRAME;
for (k = 0; k < NPTEPG; k++) {
if (vm_phys_is_dumpable(pa))
vm_page_dump_add(
state->dump_bitset, pa);
pa += PAGE_SIZE;
}
continue;
}
pa = pde & PG_FRAME;
if (vm_phys_is_dumpable(pa))
vm_page_dump_add(state->dump_bitset, pa);
pt = (uint64_t *)PHYS_TO_DMAP(pde & PG_FRAME);
for (k = 0; k < NPTEPG; k++) {
pte = atomic_load_64(&pt[k]);
if ((pte & PG_V) == 0)
continue;
pa = pte & PG_FRAME;
if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa))
vm_page_dump_add(state->dump_bitset,
pa);
}
}
}
mbp = state->msgbufp;
dumpsize = pmapsize;
dumpsize += round_page(mbp->msg_size);
dumpsize += round_page(sizeof(dump_avail));
dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages));
VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) {
dumpsize += PAGE_SIZE;
} else {
vm_page_dump_drop(state->dump_bitset, pa);
}
}
dumpsize += PAGE_SIZE;
wdog_next = progress = dumpsize;
dumpsys_pb_init(dumpsize);
bzero(&mdhdr, sizeof(mdhdr));
strcpy(mdhdr.magic, MINIDUMP_MAGIC);
mdhdr.version = MINIDUMP_VERSION;
mdhdr.msgbufsize = mbp->msg_size;
mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages));
mdhdr.pmapsize = pmapsize;
mdhdr.kernbase = kva_layout.km_low;
mdhdr.dmapbase = kva_layout.dmap_low;
mdhdr.dmapend = kva_layout.dmap_high;
mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION,
dumpsize);
error = dump_start(di, &kdh);
if (error != 0)
goto fail;
printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20,
ptoa((uintmax_t)physmem) / 1048576);
bzero(&fakepd, sizeof(fakepd));
bcopy(&mdhdr, &fakepd, sizeof(mdhdr));
error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
if (error)
goto fail;
error = blk_write(di, mbp->msg_ptr, 0, round_page(mbp->msg_size));
if (error)
goto fail;
_Static_assert(sizeof(dump_avail) <= sizeof(fakepd),
"Large dump_avail not handled");
bzero(&fakepd, sizeof(fakepd));
memcpy(fakepd, dump_avail, sizeof(dump_avail));
error = blk_write(di, (char *)fakepd, 0, PAGE_SIZE);
if (error)
goto fail;
error = blk_write(di, (char *)state->dump_bitset, 0,
round_page(BITSET_SIZE(vm_page_dump_pages)));
if (error)
goto fail;
bzero(fakepd, sizeof(fakepd));
for (va = kva_layout.km_low; va < kva_end; va += NBPDP) {
ii = pmap_pml4e_index(va);
pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii;
pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
pdpe = atomic_load_64(&pdp[pmap_pdpe_index(va)]);
if ((pdpe & PG_V) == 0) {
error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
if (error)
goto fail;
error = blk_flush(di);
if (error)
goto fail;
continue;
}
if ((pdpe & PG_PS) != 0) {
fakepd[0] = pdpe;
for (j = 1; j < NPDEPG; j++)
fakepd[j] = fakepd[j - 1] + NBPDR;
error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
if (error)
goto fail;
error = blk_flush(di);
if (error)
goto fail;
bzero(fakepd, sizeof(fakepd));
continue;
}
pa = pdpe & PG_FRAME;
if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) {
pd = (uint64_t *)PHYS_TO_DMAP(pa);
error = blk_write(di, (char *)pd, 0, PAGE_SIZE);
} else {
error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE);
}
if (error)
goto fail;
error = blk_flush(di);
if (error)
goto fail;
}
VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) {
error = blk_write(di, 0, pa, PAGE_SIZE);
if (error)
goto fail;
}
error = blk_flush(di);
if (error)
goto fail;
error = dump_finish(di, &kdh);
if (error != 0)
goto fail;
printf("\nDump complete\n");
return (0);
fail:
if (error < 0)
error = -error;
printf("\n");
if (error == ENOSPC) {
printf("Dump map grown while dumping. ");
if (retry_count < dump_retry_count) {
printf("Retrying...\n");
goto retry;
}
printf("Dump failed.\n");
}
else if (error == ECANCELED)
printf("Dump aborted\n");
else if (error == E2BIG) {
printf("Dump failed. Partition too small (about %lluMB were "
"needed this time).\n", (long long)dumpsize >> 20);
} else
printf("** DUMP FAILED (ERROR %d) **\n", error);
return (error);
}