--- /home/dsmith/crash-suspend.c 2008-07-30 15:36:46.000000000 -0500 +++ crash-suspend2.c 2008-07-30 16:01:58.000000000 -0500 @@ -18,6 +18,14 @@ module_param(verbose, bool, 0); UTRACE_EVENT(SIGNAL_CORE) | UTRACE_EVENT(JCTL) | \ UTRACE_EVENT(QUIESCE)) +#define SHUTDOWN_EVENTS (UTRACE_EVENT(QUIESCE) | UTRACE_EVENT(DEATH)) +#define CS_STARTING 0 +#define CS_STOPPING 1 +atomic_t state = ATOMIC_INIT(CS_STARTING); +atomic_t attach_count = ATOMIC_INIT (0); + +static DECLARE_WAIT_QUEUE_HEAD(crash_suspend_wq); + /* * This is the interesting hook. */ @@ -61,6 +69,13 @@ crash_suspend_quiesce(u32 action, struct */ if (!event) engine->data = NULL; + + if (atomic_read(&state) == CS_STOPPING) { + if (atomic_dec_return(&attach_count) <= 0) { + wake_up(&crash_suspend_wq); + } + return UTRACE_DETACH; + } return UTRACE_RESUME; } @@ -85,7 +100,7 @@ crash_suspend_jctl(enum utrace_resume_ac * proper weirdo status, stop the rest of the * process group too in normal job control fashion. */ - (void) kill_pgrp(find_pid(-pgid), SIGTTOU, 1); + (void) kill_pgrp(find_vpid(-pgid), SIGTTOU, 1); } else if (engine->data) { /* * We've been resumed after a crash. @@ -117,6 +132,7 @@ crash_suspend_clone(enum utrace_resume_a } else { int err = utrace_set_events(child, child_engine, MY_EVENTS); WARN_ON(err); + atomic_inc(&attach_count); } return UTRACE_RESUME; @@ -131,13 +147,21 @@ crash_suspend_death(struct utrace_attach struct task_struct *tsk, bool group_dead, int signal) { + if (atomic_read(&state) == CS_STOPPING) { + if (atomic_dec_return(&attach_count) <= 0) { + wake_up(&crash_suspend_wq); + } + } + else { + atomic_dec(&attach_count); + } return UTRACE_DETACH; } - static const struct utrace_engine_ops crash_suspend_ops = { .report_clone = crash_suspend_clone, + .report_quiesce = crash_suspend_quiesce, .report_death = crash_suspend_death, .report_signal = crash_suspend_signal, .report_jctl = crash_suspend_jctl, @@ -151,7 +175,7 @@ static int __init init_crash_suspend(voi int ret; rcu_read_lock(); - target = find_task_by_pid(target_pid); + target = find_task_by_vpid(target_pid); if (target) get_task_struct(target); rcu_read_unlock(); @@ -173,8 +197,10 @@ static int __init init_crash_suspend(voi ret = utrace_set_events(target, engine, MY_EVENTS); if (ret == -ESRCH) printk("pid %d died during setup\n", target->pid); - else + else { + atomic_inc(&attach_count); WARN_ON(ret); + } WARN_ON(atomic_dec_and_test(&target->usage)); return 0; @@ -186,6 +212,7 @@ static void __exit exit_crash_suspend(vo struct utrace_attached_engine *engine; int n = 0; + atomic_set(&state, CS_STOPPING); rcu_read_lock(); for_each_process(t) { engine = utrace_attach(t, UTRACE_ATTACH_MATCH_OPS, @@ -197,14 +224,33 @@ static void __exit exit_crash_suspend(vo error, t->pid); } else { - int ret = utrace_control(t, engine, UTRACE_DETACH); + int ret = utrace_set_events(t, engine, + SHUTDOWN_EVENTS); WARN_ON(ret); + + ret = utrace_control(t, engine, UTRACE_STOP); + if (ret == -EINPROGRESS) { + ret = utrace_control(t, engine, + UTRACE_INTERRUPT); + WARN_ON(ret); + } + else { + WARN_ON(ret); + } ++n; } } rcu_read_unlock(); - printk("detached from %d threads, unloading\n", n); + printk(KERN_INFO "requested %d threads to stop\n", n); + + if (wait_event_timeout(crash_suspend_wq, + (atomic_read(&attach_count) <= 0), + 60 * HZ) <= 0) { + printk(KERN_ERR "gave up waiting.\n"); + } + printk(KERN_INFO "unloading, %d threads left to stop\n", + atomic_read(&attach_count)); } module_init(init_crash_suspend);