[PATCH 2/3] Quiesce all threads of a process

Ananth N Mavinakayanahalli ananth at in.ibm.com
Thu Jul 30 09:56:56 UTC 2009


o Attach utrace engines to all threads of the process
o Stub in handlers of interest
o Quiesce all threads of the process
o Currently this patch does nothing interesting beyond the point. Once
  the threads are quiesced, it just calls complete(), which causes the
  'echo' command to return.

The current implementation uses two top level structures:
- core_proc: One per process. It tracks the number of threads in the
  process and the count of quiesced threads. This also lives in the
  *data of the struct utrace_engine.
- core_task: One per task, also chained to the core_proc.

Currently, we rendezvous threads via UTRACE_INTERRUPT. The intention is
to take a non-disruptive core dump, triggered by the last thread of the
process that quiesced, from its quiesce callback. The ideal way of doing
it would've been via UTRACE_STOP. However, for the most trivial of cases
(say bash at waiting at the command line), the task won't quiesce until
some activity on the shell; similarly for any thread blocked in the
kernel.

There are a number of points that need clarification/fixing in the patch:
- Surely, there are races that I've ignored.
- The mechanism to attach engines to all threads of the process is less
  than optimal. The 'correct' way to do it is similar to follow how
  uprobes does it.
  (https://www.redhat.com/archives/utrace-devel/2009-June/msg00022.html).
  A better possibility is to have a utrace callback that implements it
  in the utrace layer.
- Do we need the exec callback? Is it implemented correctly here?
- There possibly are more gotchas that am currently unaware of.


Signed-off-by: Ananth N Mavinakayanahalli <ananth at in.ibm.com>
---
 fs/proc/proc_gencore.c |  354 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 353 insertions(+), 1 deletion(-)

Index: utrace-13jul/fs/proc/proc_gencore.c
===================================================================
--- utrace-13jul.orig/fs/proc/proc_gencore.c
+++ utrace-13jul/fs/proc/proc_gencore.c
@@ -24,10 +24,350 @@
 
 #include <asm/uaccess.h>
 
+static const struct utrace_engine_ops core_ops;
+
+/* All threads of a process having the same utrace ops */
+struct core_proc {
+	/* list of struct core_task */
+	struct list_head list;
+
+	struct task_struct *tgid_task;
+	atomic_t num_threads;
+	atomic_t num_quiesced;
+	/* Protects list and num_threads above */
+	spinlock_t lock;
+
+	struct completion complete;
+};
+
+struct core_task {
+	struct list_head list;
+	struct task_struct *task;
+	struct utrace_engine *engine;
+	bool quiesced;
+};
+
+static void cleanup_core_proc(struct core_proc *core_proc)
+{
+	struct core_task *core_task, *temp;
+	int ret;
+
+	if (core_proc == NULL)
+		return;
+
+	spin_lock(&core_proc->lock);
+	if (list_empty(&core_proc->list))
+		goto out;
+
+	/* walk core_proc->list and free core_task */
+	list_for_each_entry_safe(core_task, temp, &core_proc->list, list) {
+		if (core_task->engine)
+			/* do we really care about the retval here? */
+			ret = utrace_control(core_task->task,
+					core_task->engine, UTRACE_DETACH);
+		list_del(&core_task->list);
+		kfree(core_task);
+	}
+out:
+	spin_unlock(&core_proc->lock);
+	kfree(core_proc);
+	return;
+}
+
+static int setup_core_task(struct task_struct *t, struct core_proc *core_proc)
+{
+	struct core_task *core_task;
+	int ret = 0;
+
+	if (!t || !core_proc)
+		return -EINVAL;
+
+	core_task = kzalloc(sizeof(*core_task), GFP_KERNEL);
+	if (!core_task)
+		return -ENOMEM;
+
+	/*
+	 * The engine->data will point to core_proc.
+	 *
+	 * If a core dump is already in progress, a new request will fail
+	 * with -EALREADY since the core dump engine is EXCLUSIVE.
+	 */
+	core_task->engine = utrace_attach_task(t, (UTRACE_ATTACH_CREATE |
+			UTRACE_ATTACH_MATCH_OPS | UTRACE_ATTACH_EXCLUSIVE),
+			&core_ops, core_proc);
+	if (core_task->engine == ERR_PTR(-EEXIST)) {
+		ret = -EALREADY;
+		goto err_out;
+	}
+	if (IS_ERR(core_task->engine)) {
+		ret = PTR_ERR(core_task->engine);
+		goto err_out;
+	}
+
+	/*
+	 * We don't need a utrace_barrier() here since we are just setting
+	 * events the first time and there is no risk of a prior requested
+	 * event callback being in progress.
+	 */
+	ret = utrace_set_events(t, core_task->engine, UTRACE_EVENT(QUIESCE) |
+			UTRACE_EVENT(CLONE) | UTRACE_EVENT(EXIT));
+	if (ret) {
+		/* -EINPROGRESS indicates a callback is due soon */
+		if (ret != -EINPROGRESS) {
+			ret = utrace_control(core_task->task,
+					core_task->engine, UTRACE_DETACH);
+			goto err_out;
+		}
+	}
+
+	INIT_LIST_HEAD(&core_task->list);
+	core_task->task = t;
+
+	spin_lock(&core_proc->lock);
+	list_add_tail(&core_task->list, &core_proc->list);
+	atomic_inc(&core_proc->num_threads);
+	spin_unlock(&core_proc->lock);
+
+	return ret;
+err_out:
+	kfree(core_task);
+	return ret;
+}
+
+static struct core_task *get_core_task(struct task_struct *task,
+		struct core_proc *core_proc)
+{
+	struct core_task *core_task, *temp;
+
+	if (!task || !core_proc)
+		return NULL;
+
+	spin_lock(&core_proc->lock);
+	list_for_each_entry_safe(core_task, temp, &core_proc->list, list) {
+		if (core_task->task == task) {
+			spin_unlock(&core_proc->lock);
+			return core_task;
+		}
+	}
+	spin_unlock(&core_proc->lock);
+	return NULL;
+}
+
+static inline void set_quiesced(struct core_task *core_task,
+		struct core_proc *core_proc)
+{
+	if (!core_task->quiesced) {
+		core_task->quiesced = 1;
+		atomic_inc(&core_proc->num_quiesced);
+	}
+}
+
+/* XXX Do quiesce_*thread routines below need to return a value? */
+static void quiesce_thread(struct core_task *core_task,
+		struct core_proc *core_proc)
+{
+	int ret;
+
+	if (!core_task || !core_proc)
+		return;
+
+	/*
+	 * If the task in question is just starting up, it will quiesce
+	 * before heading to userspace. Don't force it through
+	 * UTRACE_INTERRUPT.
+	 */
+	if (!(core_task->task->flags & PF_STARTING)) {
+		ret = utrace_control(core_task->task, core_task->engine,
+				UTRACE_INTERRUPT);
+		if (ret == 0)
+			set_quiesced(core_task, core_proc);
+	}
+	return;
+}
+
+static void quiesce_all_threads(struct core_proc *core_proc, bool quiesce)
+{
+	struct core_task *core_task, *temp;
+	int ret = 0;
+
+	if (!core_proc)
+		return;
+
+	list_for_each_entry_safe(core_task, temp, &core_proc->list, list) {
+		if (quiesce)
+			quiesce_thread(core_task, core_proc);
+		else
+			ret = utrace_control(core_task->task,
+					core_task->engine, UTRACE_DETACH);
+	}
+	return;
+}
+
+static u32 core_quiesce(enum utrace_resume_action action,
+		struct utrace_engine *engine,
+		struct task_struct *task, unsigned long event)
+{
+	struct core_proc *core_proc = (struct core_proc *)engine->data;
+	struct core_task *core_task = get_core_task(task, core_proc);
+
+	set_quiesced(core_task, core_proc);
+	if (atomic_read(&core_proc->num_quiesced) ==
+			atomic_read(&core_proc->num_threads)) {
+
+		/* All threads quiesced, do your thing :-) */
+
+		/* Let everyone run, indicate we are done! */
+		quiesce_all_threads(core_proc, false);
+		complete(&core_proc->complete);
+	} else {
+		/* Keep thread quiescent */
+		return UTRACE_STOP;
+	}
+	/* All done, detach */
+	return UTRACE_DETACH;
+}
+
+static u32 core_clone(enum utrace_resume_action action,
+		struct utrace_engine *engine,
+		struct task_struct *parent, unsigned long clone_flags,
+		struct task_struct *child)
+{
+	struct core_proc *core_proc = (struct core_proc *)engine->data;
+	struct core_task *core_task;
+	int ret;
+
+	if (clone_flags & CLONE_THREAD) {
+		ret = setup_core_task(child, core_proc);
+		/*
+		 * -EINPROGRESS => thread on the way to quiesce
+		 * -EALREADY => we may be racing with attach_utrace_engines
+		 *  and it may already have attached an engine for us.
+		 */
+		if (ret && ((ret != -EINPROGRESS) || (ret != -EALREADY))) {
+			/* Error! Bail out */
+			complete(&core_proc->complete);
+			return UTRACE_RESUME;
+		}
+		core_task = get_core_task(child, core_proc);
+		if (core_task)
+			quiesce_thread(core_task, core_proc);
+	}
+
+	/* We'll quiesce during clone_complete check soon */
+	return UTRACE_RESUME;
+}
+
+/*
+ * Process exec()ing. Abort and exit from here
+ *
+ * XXX:
+ * a. Is this necessary?
+ * b. Are we safe here wrt the core dump being in progress?
+ */
+static u32 core_exec(enum utrace_resume_action action,
+		struct utrace_engine *engine, struct task_struct *task,
+		const struct linux_binfmt *fmt, const struct linux_binprm *bprm,
+		struct pt_regs *regs)
+{
+	struct core_proc *core_proc = (struct core_proc *)engine->data;
+
+	if (core_proc)
+		complete(&core_proc->complete);
+	return UTRACE_DETACH;
+}
+
+/*
+ * XXX Are there subtle races when one thread of a multi-threaded process
+ * is exiting while the non-disruptive core dump is in progress?
+ *
+ * Of course, we still need to honor SIGKILL during core dumping.
+ */
+static u32 core_exit(enum utrace_resume_action action,
+		struct utrace_engine *engine, struct task_struct *task,
+		long orig_code, long *code)
+{
+	struct core_task *core_task;
+	struct core_proc *core_proc = (struct core_proc *)engine->data;
+
+	core_task = get_core_task(task, core_proc);
+	if (core_task) {
+		spin_lock(&core_proc->lock);
+		atomic_dec(&core_proc->num_threads);
+		list_del(&core_task->list);
+		kfree(core_task);
+
+		/* If we are the last task, ask for cleanup! */
+		if (unlikely(list_empty(&core_proc->list)))
+			complete(&core_proc->complete);
+		spin_unlock(&core_proc->lock);
+	}
+	return UTRACE_DETACH;
+}
+
+static const struct utrace_engine_ops core_ops =
+{
+	.report_clone = core_clone,	/* new thread */
+	.report_quiesce = core_quiesce,
+	.report_exec = core_exec,
+	.report_exit = core_exit,	/* thread exit */
+};
+
+static struct core_proc *attach_utrace_engines(struct task_struct *task)
+{
+	int ret = 0;
+	struct task_struct *t = task;
+	struct core_proc *core_proc;
+
+	if (!task)
+		return ERR_PTR(-EINVAL);
+
+	core_proc = kzalloc(sizeof(*core_proc), GFP_KERNEL);
+	if (!core_proc)
+		return ERR_PTR(-ENOMEM);
+	core_proc->tgid_task = task;
+	INIT_LIST_HEAD(&core_proc->list);
+	atomic_set(&core_proc->num_threads, 0);
+	atomic_set(&core_proc->num_quiesced, 0);
+	spin_lock_init(&core_proc->lock);
+	init_completion(&core_proc->complete);
+
+	rcu_read_lock();
+	do {
+		get_task_struct(t);
+		rcu_read_unlock();
+		ret = setup_core_task(t, core_proc);
+		/*
+		 * -EINPROGRESS => we are on our way to quiesce.
+		 * -EEXIST => we may be racing with core_clone and it has
+		 *  already attached an engine for us.
+		 */
+		if (ret && ((ret != EINPROGRESS) || (ret != -EALREADY)))
+			goto err_core_task;
+		rcu_read_lock();
+		put_task_struct(t);
+		ret = 0;
+		t = next_thread(t);
+	} while (t != task);
+
+	rcu_read_unlock();
+	/*
+	 * Now that we have attached engines to all threads, explicitly
+	 * ask for each thread to be quiesced.
+	 */
+	quiesce_all_threads(core_proc, true);
+	return core_proc;
+
+err_core_task:
+	put_task_struct(t);
+	cleanup_core_proc(core_proc);
+	return ERR_PTR(ret);
+}
+
 static ssize_t gen_core_write(struct file *file, const char __user *buf,
 		size_t count, loff_t *ppos)
 {
 	struct task_struct *task;
+	struct core_proc *core_proc;
 	char buffer[PROC_NUMBUF], *end;
 	unsigned int val;
 	int ret;
@@ -53,8 +393,20 @@ static ssize_t gen_core_write(struct fil
 
 	ret = end - buffer;
 
-	/* TODO: call core dumper */
+	/* Attach an engine to each thread */
+	core_proc = attach_utrace_engines(task);
+	if (IS_ERR(core_proc)) {
+		ret = (int)PTR_ERR(core_proc);
+		goto out_no_engines;
+	}
+
+	/* Wait for the dump to complete or error out */
+	wait_for_completion(&core_proc->complete);
+
+	/* Cleanup after */
+	cleanup_core_proc(core_proc);
 
+out_no_engines:
 	put_task_struct(task);
 out_no_task:
 	return ret;




More information about the utrace-devel mailing list