#include <sys/types.h>
#include <sys/systm.h>
#include <sys/machparam.h>
#include <sys/machsystm.h>
#include <sys/ddi.h>
#define SUNDDI_IMPL
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/devctl.h>
#include <sys/time.h>
#include <sys/kmem.h>
#include <nfs/lm.h>
#include <sys/ddi_impldefs.h>
#include <sys/ndi_impldefs.h>
#include <sys/obpdefs.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/errno.h>
#include <sys/callb.h>
#include <sys/clock.h>
#include <sys/x_call.h>
#include <sys/cpuvar.h>
#include <sys/epm.h>
#include <sys/vfs.h>
#include <sys/promif.h>
#include <sys/conf.h>
#include <sys/cyclic.h>
#include <sys/dr.h>
#include <sys/dr_util.h>
extern void e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
extern void e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
extern int is_pseudo_device(dev_info_t *dip);
extern kmutex_t cpu_lock;
extern dr_unsafe_devs_t dr_unsafe_devs;
static int dr_is_real_device(dev_info_t *dip);
static int dr_is_unsafe_major(major_t major);
static int dr_bypass_device(char *dname);
static int dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
static int dr_resolve_devname(dev_info_t *dip, char *buffer,
char *alias);
static sbd_error_t *drerr_int(int e_code, uint64_t *arr, int idx,
int majors);
static int dr_add_int(uint64_t *arr, int idx, int len,
uint64_t val);
int dr_pt_test_suspend(dr_handle_t *hp);
typedef enum dr_suspend_state {
DR_SRSTATE_BEGIN = 0,
DR_SRSTATE_USER,
DR_SRSTATE_DRIVER,
DR_SRSTATE_FULL
} suspend_state_t;
struct dr_sr_handle {
dr_handle_t *sr_dr_handlep;
dev_info_t *sr_failed_dip;
suspend_state_t sr_suspend_state;
uint_t sr_flags;
uint64_t sr_err_ints[DR_MAX_ERR_INT];
int sr_err_idx;
};
#define SR_FLAG_WATCHDOG 0x1
static char *dr_bypass_list[] = {
""
};
#define SKIP_SYNC
static int dr_skip_user_threads = 0;
static int dr_check_user_stop_result = 1;
static int dr_allow_blocked_threads = 1;
#define DR_CPU_LOOP_MSEC 1000
static void
dr_stop_intr(void)
{
ASSERT(MUTEX_HELD(&cpu_lock));
kpreempt_disable();
cyclic_suspend();
}
static void
dr_enable_intr(void)
{
ASSERT(MUTEX_HELD(&cpu_lock));
cyclic_resume();
kpreempt_enable();
}
dr_sr_handle_t *
dr_get_sr_handle(dr_handle_t *hp)
{
dr_sr_handle_t *srh;
srh = GETSTRUCT(dr_sr_handle_t, 1);
srh->sr_dr_handlep = hp;
return (srh);
}
void
dr_release_sr_handle(dr_sr_handle_t *srh)
{
ASSERT(srh->sr_failed_dip == NULL);
FREESTRUCT(srh, dr_sr_handle_t, 1);
}
static int
dr_is_real_device(dev_info_t *dip)
{
struct regspec *regbuf = NULL;
int length = 0;
int rc;
if (ddi_get_driver(dip) == NULL)
return (0);
if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
return (1);
if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
return (0);
rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
(caddr_t)®buf, &length);
ASSERT(rc != DDI_PROP_NO_MEMORY);
if (rc != DDI_PROP_SUCCESS) {
return (0);
} else {
if ((length > 0) && (regbuf != NULL))
kmem_free(regbuf, length);
return (1);
}
}
static int
dr_is_unsafe_major(major_t major)
{
char *dname, **cpp;
int i, ndevs;
if ((dname = ddi_major_to_name(major)) == NULL) {
PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
return (0);
}
ndevs = dr_unsafe_devs.ndevs;
for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
if (strcmp(dname, *cpp++) == 0)
return (1);
}
return (0);
}
static int
dr_bypass_device(char *dname)
{
int i;
char **lname;
if (dname == NULL)
return (0);
for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
if (strcmp(dname, dr_bypass_list[i++]) == 0)
return (1);
}
return (0);
}
static int
dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
{
major_t devmajor;
char *aka, *name;
*buffer = *alias = 0;
if (dip == NULL)
return (-1);
if ((name = ddi_get_name(dip)) == NULL)
name = "<null name>";
aka = name;
if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
aka = ddi_major_to_name(devmajor);
(void) strcpy(buffer, name);
if (strcmp(name, aka))
(void) strcpy(alias, aka);
else
*alias = 0;
return (0);
}
struct dr_ref {
int *refcount;
int *refcount_non_gldv3;
uint64_t *arr;
int *idx;
int len;
};
static int
dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
{
major_t major;
char *dname;
struct dr_ref *rp = (struct dr_ref *)arg;
if (dip == NULL)
return (DDI_WALK_CONTINUE);
if (!dr_is_real_device(dip))
return (DDI_WALK_CONTINUE);
dname = ddi_binding_name(dip);
if (dr_bypass_device(dname))
return (DDI_WALK_CONTINUE);
if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
if (ref && rp->refcount) {
*rp->refcount += ref;
PR_QR("\n %s (major# %d) is referenced(%u)\n", dname,
major, ref);
}
if (ref && rp->refcount_non_gldv3) {
if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
*rp->refcount_non_gldv3 += ref;
}
if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
PR_QR("\n %s (major# %d) not hotpluggable\n", dname,
major);
if (rp->arr != NULL && rp->idx != NULL)
*rp->idx = dr_add_int(rp->arr, *rp->idx,
rp->len, (uint64_t)major);
}
}
return (DDI_WALK_CONTINUE);
}
static int
dr_check_unsafe_major(dev_info_t *dip, void *arg)
{
return (dr_check_dip(dip, arg, 0));
}
void
dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
{
struct dr_ref bref = {0};
if (dip == NULL)
return;
bref.refcount = refcount;
bref.refcount_non_gldv3 = refcount_non_gldv3;
bref.arr = arr;
bref.idx = idx;
bref.len = len;
ASSERT(e_ddi_branch_held(dip));
(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
}
static int
dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
{
dr_handle_t *handle;
major_t major;
char *dname;
for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
char d_name[40], d_alias[40], *d_info;
ndi_devi_enter(dip);
if (dr_suspend_devices(ddi_get_child(dip), srh)) {
ndi_devi_exit(dip);
return (ENXIO);
}
ndi_devi_exit(dip);
if (!dr_is_real_device(dip))
continue;
major = (major_t)-1;
if ((dname = ddi_binding_name(dip)) != NULL)
major = ddi_name_to_major(dname);
if (dr_bypass_device(dname)) {
PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
major);
continue;
}
if (drmach_verify_sr(dip, 1)) {
PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
major);
continue;
}
if ((d_info = ddi_get_name_addr(dip)) == NULL)
d_info = "<null>";
d_name[0] = 0;
if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
if (d_alias[0] != 0) {
prom_printf("\tsuspending %s@%s (aka %s)\n",
d_name, d_info, d_alias);
} else {
prom_printf("\tsuspending %s@%s\n", d_name,
d_info);
}
} else {
prom_printf("\tsuspending %s@%s\n", dname, d_info);
}
if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
prom_printf("\tFAILED to suspend %s@%s\n",
d_name[0] ? d_name : dname, d_info);
srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
ndi_hold_devi(dip);
srh->sr_failed_dip = dip;
handle = srh->sr_dr_handlep;
dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
d_name[0] ? d_name : dname, d_info);
return (DDI_FAILURE);
}
}
return (DDI_SUCCESS);
}
static void
dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
{
dr_handle_t *handle;
dev_info_t *dip, *next, *last = NULL;
major_t major;
char *bn;
major = (major_t)-1;
while (last != start) {
dip = start;
next = ddi_get_next_sibling(dip);
while (next != last && dip != srh->sr_failed_dip) {
dip = next;
next = ddi_get_next_sibling(dip);
}
if (dip == srh->sr_failed_dip) {
srh->sr_failed_dip = NULL;
ndi_rele_devi(dip);
} else if (dr_is_real_device(dip) &&
srh->sr_failed_dip == NULL) {
if ((bn = ddi_binding_name(dip)) != NULL) {
major = ddi_name_to_major(bn);
} else {
bn = "<null>";
}
if (!dr_bypass_device(bn) &&
!drmach_verify_sr(dip, 0)) {
char d_name[40], d_alias[40], *d_info;
d_name[0] = 0;
d_info = ddi_get_name_addr(dip);
if (d_info == NULL)
d_info = "<null>";
if (!dr_resolve_devname(dip, d_name, d_alias)) {
if (d_alias[0] != 0) {
prom_printf("\tresuming "
"%s@%s (aka %s)\n", d_name,
d_info, d_alias);
} else {
prom_printf("\tresuming "
"%s@%s\n", d_name, d_info);
}
} else {
prom_printf("\tresuming %s@%s\n", bn,
d_info);
}
if (devi_attach(dip, DDI_RESUME) !=
DDI_SUCCESS) {
prom_printf("\tFAILED to resume %s@%s",
d_name[0] ? d_name : bn, d_info);
srh->sr_err_idx =
dr_add_int(srh->sr_err_ints,
srh->sr_err_idx, DR_MAX_ERR_INT,
(uint64_t)major);
handle = srh->sr_dr_handlep;
dr_op_err(CE_IGNORE, handle,
ESBD_RESUME, "%s@%s",
d_name[0] ? d_name : bn, d_info);
}
}
}
ndi_devi_enter(dip);
dr_resume_devices(ddi_get_child(dip), srh);
ndi_devi_exit(dip);
last = dip;
}
}
#define DR_VSTOPPED(t) \
((t)->t_state == TS_SLEEP && \
(t)->t_wchan != NULL && \
(t)->t_astflag && \
((t)->t_proc_flag & TP_CHKPT))
static int
dr_stop_user_threads(dr_sr_handle_t *srh)
{
int count;
int bailout;
dr_handle_t *handle = srh->sr_dr_handlep;
static fn_t f = "dr_stop_user_threads";
kthread_id_t tp;
extern void add_one_utstop();
extern void utstop_timedwait(clock_t);
extern void utstop_init(void);
#define DR_UTSTOP_RETRY 4
#define DR_UTSTOP_WAIT hz
if (dr_skip_user_threads)
return (DDI_SUCCESS);
utstop_init();
srh->sr_err_idx = 0;
for (count = 0; count < DR_UTSTOP_RETRY; count++) {
mutex_enter(&pidlock);
for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
proc_t *p = ttoproc(tp);
if (p->p_as == &kas || p->p_stat == SZOMB)
continue;
mutex_enter(&p->p_lock);
thread_lock(tp);
if (tp->t_state == TS_STOPPED) {
tp->t_schedflag &= ~TS_RESUME;
} else {
tp->t_proc_flag |= TP_CHKPT;
thread_unlock(tp);
mutex_exit(&p->p_lock);
add_one_utstop();
mutex_enter(&p->p_lock);
thread_lock(tp);
aston(tp);
if (ISWAKEABLE(tp) || ISWAITING(tp)) {
setrun_locked(tp);
}
}
if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
poke_cpu(tp->t_cpu->cpu_id);
thread_unlock(tp);
mutex_exit(&p->p_lock);
}
mutex_exit(&pidlock);
utstop_timedwait(count * count * DR_UTSTOP_WAIT);
mutex_enter(&pidlock);
for (tp = curthread->t_next, bailout = 0;
tp != curthread; tp = tp->t_next) {
proc_t *p = ttoproc(tp);
if (p->p_as == &kas || p->p_stat == SZOMB)
continue;
thread_lock(tp);
if (!CPR_ISTOPPED(tp) &&
!(dr_allow_blocked_threads &&
DR_VSTOPPED(tp))) {
bailout = 1;
if (count == DR_UTSTOP_RETRY - 1) {
srh->sr_err_idx =
dr_add_int(srh->sr_err_ints,
srh->sr_err_idx, DR_MAX_ERR_INT,
(uint64_t)p->p_pid);
cmn_err(CE_WARN, "%s: "
"failed to stop thread: "
"process=%s, pid=%d",
f, p->p_user.u_psargs, p->p_pid);
PR_QR("%s: failed to stop thread: "
"process=%s, pid=%d, t_id=0x%p, "
"t_state=0x%x, t_proc_flag=0x%x, "
"t_schedflag=0x%x\n",
f, p->p_user.u_psargs, p->p_pid,
(void *)tp, tp->t_state,
tp->t_proc_flag, tp->t_schedflag);
}
}
thread_unlock(tp);
}
mutex_exit(&pidlock);
if (!bailout)
break;
}
if (bailout) {
handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
srh->sr_err_idx, 0);
return (ESRCH);
}
return (DDI_SUCCESS);
}
static void
dr_start_user_threads(void)
{
kthread_id_t tp;
mutex_enter(&pidlock);
for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
proc_t *p = ttoproc(tp);
if (ttoproc(tp)->p_as == &kas)
continue;
mutex_enter(&p->p_lock);
tp->t_proc_flag &= ~TP_CHKPT;
mutex_exit(&p->p_lock);
thread_lock(tp);
if (CPR_ISTOPPED(tp)) {
tp->t_schedflag |= TS_RESUME;
setrun_locked(tp);
}
thread_unlock(tp);
}
mutex_exit(&pidlock);
}
static void
dr_signal_user(int sig)
{
struct proc *p;
mutex_enter(&pidlock);
for (p = practive; p != NULL; p = p->p_next) {
if (p->p_exec == NULL || p->p_stat == SZOMB ||
p == proc_init || p == ttoproc(curthread))
continue;
mutex_enter(&p->p_lock);
sigtoproc(p, NULL, sig);
mutex_exit(&p->p_lock);
}
mutex_exit(&pidlock);
delay(hz);
}
void
dr_resume(dr_sr_handle_t *srh)
{
switch (srh->sr_suspend_state) {
case DR_SRSTATE_FULL:
ASSERT(MUTEX_HELD(&cpu_lock));
mutex_enter(&tod_lock);
tod_status_set(TOD_DR_RESUME_DONE);
mutex_exit(&tod_lock);
dr_enable_intr();
start_cpus();
mutex_exit(&cpu_lock);
drmach_resume_first();
case DR_SRSTATE_DRIVER:
srh->sr_err_idx = 0;
dr_resume_devices(ddi_root_node(), srh);
if (srh->sr_err_idx && srh->sr_dr_handlep) {
(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
srh->sr_err_ints, srh->sr_err_idx, 1);
}
lm_cprresume();
case DR_SRSTATE_USER:
if (!dr_skip_user_threads) {
prom_printf("DR: resuming user threads...\n");
dr_start_user_threads();
}
case DR_SRSTATE_BEGIN:
default:
PR_QR("sending SIGTHAW...\n");
dr_signal_user(SIGTHAW);
break;
}
prom_printf("DR: resume COMPLETED\n");
}
int
dr_suspend(dr_sr_handle_t *srh)
{
dr_handle_t *handle;
int force;
int dev_errs_idx;
uint64_t dev_errs[DR_MAX_ERR_INT];
int rc = DDI_SUCCESS;
handle = srh->sr_dr_handlep;
force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
prom_printf("\nDR: suspending user threads...\n");
srh->sr_suspend_state = DR_SRSTATE_USER;
if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
dr_check_user_stop_result) {
dr_resume(srh);
return (rc);
}
if (!force) {
struct dr_ref drc = {0};
prom_printf("\nDR: checking devices...\n");
dev_errs_idx = 0;
drc.arr = dev_errs;
drc.idx = &dev_errs_idx;
drc.len = DR_MAX_ERR_INT;
ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
if (dev_errs_idx) {
handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
dev_errs_idx, 1);
dr_resume(srh);
return (DDI_FAILURE);
}
PR_QR("done\n");
} else {
prom_printf("\nDR: dr_suspend invoked with force flag\n");
}
#ifndef SKIP_SYNC
vfs_sync(SYNC_ALL);
#endif
lm_cprsuspend();
#ifndef SKIP_SYNC
sync();
#endif
prom_printf("DR: suspending drivers...\n");
srh->sr_suspend_state = DR_SRSTATE_DRIVER;
srh->sr_err_idx = 0;
if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
if (srh->sr_err_idx && srh->sr_dr_handlep) {
(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
srh->sr_err_ints, srh->sr_err_idx, 1);
}
dr_resume(srh);
return (rc);
}
drmach_suspend_last();
srh->sr_suspend_state = DR_SRSTATE_FULL;
mutex_enter(&cpu_lock);
pause_cpus(NULL, NULL);
dr_stop_intr();
return (rc);
}
int
dr_pt_test_suspend(dr_handle_t *hp)
{
dr_sr_handle_t *srh;
int err;
uint_t psmerr;
static fn_t f = "dr_pt_test_suspend";
PR_QR("%s...\n", f);
srh = dr_get_sr_handle(hp);
if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
dr_resume(srh);
if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
PR_QR("%s: error on dr_resume()", f);
switch (psmerr) {
case ESBD_RESUME:
PR_QR("Couldn't resume devices: %s\n",
DR_GET_E_RSC(hp->h_err));
break;
case ESBD_KTHREAD:
PR_ALL("psmerr is ESBD_KTHREAD\n");
break;
default:
PR_ALL("Resume error unknown = %d\n", psmerr);
break;
}
}
} else {
PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
switch (psmerr) {
case ESBD_UNSAFE:
PR_ALL("Unsafe devices (major #): %s\n",
DR_GET_E_RSC(hp->h_err));
break;
case ESBD_RTTHREAD:
PR_ALL("RT threads (PIDs): %s\n",
DR_GET_E_RSC(hp->h_err));
break;
case ESBD_UTHREAD:
PR_ALL("User threads (PIDs): %s\n",
DR_GET_E_RSC(hp->h_err));
break;
case ESBD_SUSPEND:
PR_ALL("Non-suspendable devices (major #): %s\n",
DR_GET_E_RSC(hp->h_err));
break;
case ESBD_RESUME:
PR_ALL("Could not resume devices (major #): %s\n",
DR_GET_E_RSC(hp->h_err));
break;
case ESBD_KTHREAD:
PR_ALL("psmerr is ESBD_KTHREAD\n");
break;
case ESBD_NOERROR:
PR_ALL("sbd_error_t error code not set\n");
break;
default:
PR_ALL("Unknown error psmerr = %d\n", psmerr);
break;
}
}
dr_release_sr_handle(srh);
return (0);
}
static int
dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
{
int i;
if (arr == NULL)
return (0);
if (idx >= len)
return (idx);
for (i = 0; i < idx; i++) {
if (arr[i] == val)
return (idx);
}
arr[idx++] = val;
return (idx);
}
static sbd_error_t *
drerr_int(int e_code, uint64_t *arr, int idx, int majors)
{
int i, n, buf_len, buf_idx, buf_avail;
char *dname;
char *buf;
sbd_error_t *new_sbd_err;
static char s_ellipsis[] = "...";
if (arr == NULL || idx <= 0)
return (NULL);
buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
for (buf_idx = 0, i = 0; i < idx; i++) {
buf_avail = buf_len - buf_idx;
if (majors) {
dname = ddi_major_to_name(arr[i]);
if (dname) {
n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
dname);
} else {
n = snprintf(&buf[buf_idx], buf_avail,
"major %" PRIu64 ", ", arr[i]);
}
} else {
n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
arr[i]);
}
if (n >= buf_avail) {
(void) strcpy(&buf[buf_idx], s_ellipsis);
break;
}
buf_idx += n;
}
if (n < buf_avail) {
buf[--buf_idx] = '\0';
buf[--buf_idx] = '\0';
}
new_sbd_err = drerr_new(1, e_code, buf);
kmem_free(buf, MAXPATHLEN);
return (new_sbd_err);
}