utrace-indirect branch

Roland McGrath roland at redhat.com
Thu Oct 29 01:55:32 UTC 2009


Please take a look at the patch below and tell me what you think.  This
is the new(ish) utrace-indirect branch (not to be confused with what's
now old/utrace-indirect).  I first made it a while ago, but I don't
recall if I ever mentioned it.  This compiles and looks right, but I
have not done any testing at all.

Unlike the old code that freaked upstream reviewers all the way out,
this has very simple lifetime rules for struct utrace.  Once allocated,
it lives until task_struct is freed.  The utrace_task_alloc() logic
covers the only race (at first attach), and that seems pretty easy
to understand and be confident in.

For those of us who hack on utrace guts, this is much nicer.  The struct
is private in utrace.c where it naturally belongs, and we don't have to
wait for the whole kernel to be recompiled every time we want to tweak
that struct.

For those of us who have to maintain long-term stable production kernels,
this is crucial.  We need the freedom to change utrace implementation
details (and backport new utrace APIs) into our kernels in the future,
without perturbing the all-important task_struct layout.  This is not a
concern that upstream ever cares about, but it is unavoidable in real life.

I am a bit concerned about letting the ugly utrace_struct.h way ever get
merged in, so that to fix it later we have to convince people all over
again about "changing what works".  But of course I am really not at all
sure whether even this innocuous version will upset the reviewers.

What do you think?


Thanks,
Roland

---
 include/linux/init_task.h     |    1 -
 include/linux/sched.h         |    3 +-
 include/linux/tracehook.h     |   12 ++++++
 include/linux/utrace.h        |    9 ++---
 include/linux/utrace_struct.h |   59 ------------------------------
 kernel/fork.c                 |    1 +
 kernel/utrace.c               |   80 ++++++++++++++++++++++++++++++++++++-----
 7 files changed, 89 insertions(+), 76 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a598a6b..21a6f5d 100644  
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -177,7 +177,6 @@ extern struct cred init_cred;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
-	INIT_UTRACE(tsk)						\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 94ca5c5..8eeb46a 100644  
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -61,7 +61,6 @@ struct sched_param {
 #include <linux/errno.h>
 #include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/utrace_struct.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
@@ -1395,7 +1394,7 @@ struct task_struct {
 	seccomp_t seccomp;
 
 #ifdef CONFIG_UTRACE
-	struct utrace utrace;
+	struct utrace *utrace;
 	unsigned long utrace_flags;
 #endif
 
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 3caaf30..a01f8a9 100644  
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -609,6 +609,18 @@ static inline void tracehook_report_deat
 		utrace_report_death(task, death_cookie, group_dead, signal);
 }
 
+/**
+ * tracehook_free_task - task_struct is being freed
+ * @task:		dead &struct task_struct being freed
+ *
+ * Called from free_task() when @task is no longer in use.
+ */
+static inline void tracehook_free_task(struct task_struct *task)
+{
+	if (task_utrace_struct(task))
+		utrace_free_task(task);
+}
+
 #ifdef TIF_NOTIFY_RESUME
 /**
  * set_notify_resume - cause tracehook_notify_resume() to be called
diff --git a/include/linux/utrace.h b/include/linux/utrace.h
index f968792..6e8129b 100644  
--- a/include/linux/utrace.h
+++ b/include/linux/utrace.h
@@ -92,6 +92,8 @@ enum utrace_events {
  * tracehook.h doesn't need to #ifndef CONFIG_UTRACE them to
  * avoid external references in case of unoptimized compilation.
  */
+void utrace_free_task(struct task_struct *)
+	__attribute__((weak));
 bool utrace_interrupt_pending(void)
 	__attribute__((weak));
 void utrace_resume(struct task_struct *, struct pt_regs *)
@@ -155,16 +157,13 @@ static inline unsigned long task_utrace_
 
 static inline struct utrace *task_utrace_struct(struct task_struct *task)
 {
-	return &task->utrace;
+	return task->utrace;
 }
 
 static inline void utrace_init_task(struct task_struct *task)
 {
 	task->utrace_flags = 0;
-	memset(&task->utrace, 0, sizeof(task->utrace));
-	INIT_LIST_HEAD(&task->utrace.attached);
-	INIT_LIST_HEAD(&task->utrace.attaching);
-	spin_lock_init(&task->utrace.lock);
+	task->utrace = NULL;
 }
 
 void utrace_release_task(struct task_struct *);
diff --git a/include/linux/utrace_struct.h b/include/linux/utrace_struct.h
deleted file mode 100644
index ee7489f... .  
--- a/include/linux/utrace_struct.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * 'struct utrace' data structure for kernel/utrace.c private use.
- *
- * Copyright (C) 2006-2009 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
-#ifndef _LINUX_UTRACE_STRUCT_H
-#define _LINUX_UTRACE_STRUCT_H	1
-
-#ifdef CONFIG_UTRACE
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-/*
- * Per-thread structure private to utrace implementation.  This properly
- * belongs in kernel/utrace.c and its use is entirely private to the code
- * there.  It is only defined in a header file so that it can be embedded
- * in the struct task_struct layout.  It is here rather than in utrace.h
- * to avoid header nesting order issues getting too complex.
- *
- */
-struct utrace {
-	struct task_struct *cloning;
-
-	struct list_head attached, attaching;
-	spinlock_t lock;
-
-	struct utrace_engine *reporting;
-
-	unsigned int stopped:1;
-	unsigned int report:1;
-	unsigned int interrupt:1;
-	unsigned int signal_handler:1;
-	unsigned int vfork_stop:1; /* need utrace_stop() before vfork wait */
-	unsigned int death:1;	/* in utrace_report_death() now */
-	unsigned int reap:1;	/* release_task() has run */
-	unsigned int pending_attach:1; /* need splice_attaching() */
-};
-
-# define INIT_UTRACE(tsk)						      \
-	.utrace_flags = 0,						      \
-	.utrace = {							      \
-		.lock = __SPIN_LOCK_UNLOCKED(tsk.utrace.lock),		      \
-		.attached = LIST_HEAD_INIT(tsk.utrace.attached),	      \
-		.attaching = LIST_HEAD_INIT(tsk.utrace.attaching),	      \
-	},
-
-#else
-
-# define INIT_UTRACE(tsk)	/* Nothing. */
-
-#endif	/* CONFIG_UTRACE */
-
-#endif	/* linux/utrace_struct.h */
diff --git a/kernel/fork.c b/kernel/fork.c
index 4c20fff..77a3b25 100644  
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -147,6 +147,7 @@ static void account_kernel_stack(struct 
 
 void free_task(struct task_struct *tsk)
 {
+	tracehook_free_task(tsk);
 	prop_local_destroy_single(&tsk->dirties);
 	account_kernel_stack(tsk->stack, -1);
 	free_thread_info(tsk->stack);
diff --git a/kernel/utrace.c b/kernel/utrace.c
index 25c0908..eadc87d 100644  
--- a/kernel/utrace.c
+++ b/kernel/utrace.c
@@ -25,8 +25,10 @@
 
 
 /*
- * Rules for 'struct utrace', defined in <linux/utrace_struct.h>
- * but used entirely privately in this file.
+ * Per-thread structure private to utrace implementation.
+ * If task_struct.utrace_flags is nonzero, task_struct.utrace
+ * has always been allocated first.  Once allocated, it is
+ * never freed until free_task().
  *
  * The common event reporting loops are done by the task making the
  * report without ever taking any locks.  To facilitate this, the two
@@ -49,18 +51,71 @@
  * engines attached asynchronously go on the stable @attached list
  * in time to have their callbacks seen.
  */
+struct utrace {
+	struct task_struct *cloning;
 
+	struct list_head attached, attaching;
+	spinlock_t lock;
+
+	struct utrace_engine *reporting;
+
+	unsigned int stopped:1;
+	unsigned int report:1;
+	unsigned int interrupt:1;
+	unsigned int signal_handler:1;
+	unsigned int vfork_stop:1; /* need utrace_stop() before vfork wait */
+	unsigned int death:1;	/* in utrace_report_death() now */
+	unsigned int reap:1;	/* release_task() has run */
+	unsigned int pending_attach:1; /* need splice_attaching() */
+};
+
+static struct kmem_cache *utrace_cachep;
 static struct kmem_cache *utrace_engine_cachep;
 static const struct utrace_engine_ops utrace_detached_ops; /* forward decl */
 
 static int __init utrace_init(void)
 {
+	utrace_cachep = KMEM_CACHE(utrace, SLAB_PANIC);
 	utrace_engine_cachep = KMEM_CACHE(utrace_engine, SLAB_PANIC);
 	return 0;
 }
 module_init(utrace_init);
 
 /*
+ * Set up @task.utrace for the first time.  We can have races
+ * between two utrace_attach_task() calls here.  The task_lock()
+ * governs installing the new pointer.  If another one got in first,
+ * we just punt the new one we allocated.
+ *
+ * This returns false only in case of a memory allocation failure.
+ */
+static bool utrace_task_alloc(struct task_struct *task)
+{
+	struct utrace *utrace = kmem_cache_zalloc(utrace_cachep, GFP_KERNEL);
+	if (unlikely(!utrace))
+		return false;
+	INIT_LIST_HEAD(&utrace->attached);
+	INIT_LIST_HEAD(&utrace->attaching);
+	spin_lock_init(&utrace->lock);
+	task_lock(task);
+	if (likely(!task->utrace))
+		task->utrace = utrace;
+	task_unlock(task);
+	if (unlikely(task->utrace != utrace))
+		kmem_cache_free(utrace_cachep, utrace);
+	return true;
+}
+
+/*
+ * This is called via tracehook_free_task() from free_task()
+ * when @task is being deallocated.
+ */
+void utrace_free_task(struct task_struct *task)
+{
+	kmem_cache_free(utrace_cachep, task->utrace);
+}
+
+/*
  * This is called with @utrace->lock held when the task is safely
  * quiescent, i.e. it won't consult utrace->attached without the lock.
  * Move any engines attached asynchronously from @utrace->attaching
@@ -119,7 +174,8 @@ static struct utrace_engine *matching_en
 static inline int utrace_attach_delay(struct task_struct *target)
 {
 	if ((target->flags & PF_STARTING) &&
-	    current->utrace.cloning != target)
+	    task_utrace_struct(current) &&
+	    task_utrace_struct(current)->cloning != target)
 		do {
 			schedule_timeout_interruptible(1);
 			if (signal_pending(current))
@@ -225,6 +281,8 @@ struct utrace_engine *utrace_attach_task
 	int ret;
 
 	if (!(flags & UTRACE_ATTACH_CREATE)) {
+		if (unlikely(!utrace))
+			ERR_PTR(-ENOENT);
 		spin_lock(&utrace->lock);
 		engine = matching_engine(utrace, flags, ops, data);
 		if (engine)
@@ -242,6 +300,12 @@ struct utrace_engine *utrace_attach_task
 		 */
 		return ERR_PTR(-EPERM);
 
+	if (!utrace) {
+		if (unlikely(!utrace_task_alloc(target)))
+			return ERR_PTR(-ENOMEM);
+		utrace = target->utrace;
+	}
+
 	engine = kmem_cache_alloc(utrace_engine_cachep, GFP_KERNEL);
 	if (unlikely(!engine))
 		return ERR_PTR(-ENOMEM);
@@ -370,7 +434,7 @@ static struct utrace *get_utrace_lock(st
 		return attached ? ERR_PTR(-ESRCH) : ERR_PTR(-ERESTARTSYS);
 	}
 
-	utrace = &target->utrace;
+	utrace = task_utrace_struct(target);
 	spin_lock(&utrace->lock);
 	if (unlikely(!engine->ops) ||
 	    unlikely(engine->ops == &utrace_detached_ops)) {
@@ -440,9 +504,7 @@ static void utrace_reap(struct task_stru
  */
 void utrace_release_task(struct task_struct *target)
 {
-	struct utrace *utrace;
-
-	utrace = &target->utrace;
+	struct utrace *utrace = task_utrace_struct(target);
 
 	spin_lock(&utrace->lock);
 
@@ -1896,7 +1958,7 @@ int utrace_get_signal(struct task_struct
 	u32 ret;
 	int signr;
 
-	utrace = &task->utrace;
+	utrace = task_utrace_struct(task);
 	if (utrace->interrupt || utrace->report || utrace->signal_handler) {
 		/*
 		 * We've been asked for an explicit report before we
@@ -2335,7 +2397,7 @@ EXPORT_SYMBOL_GPL(task_user_regset_view)
  */
 void task_utrace_proc_status(struct seq_file *m, struct task_struct *p)
 {
-	struct utrace *utrace = &p->utrace;
+	struct utrace *utrace = task_utrace_struct(p);
 	seq_printf(m, "Utrace:\t%lx%s%s%s\n",
 		   p->utrace_flags,
 		   utrace->stopped ? " (stopped)" : "",




More information about the utrace-devel mailing list