commit 03b9fdf4ce26b4e39a3e755fc717fe4e5ab773dd Author: David Smith Date: Thu Apr 28 10:20:47 2016 -0500 Fix PR19954 by avoiding "suspicious RCU usage" message. * runtime/transport/symbols.c (_stp_module_update_self): Properly handle RCU locking when retrieving the 'kallsyms' member of the module structure. diff --git a/runtime/transport/symbols.c b/runtime/transport/symbols.c index cb7964f..98b0239 100644 --- a/runtime/transport/symbols.c +++ b/runtime/transport/symbols.c @@ -249,7 +249,12 @@ static int _stp_module_update_self (void) } else if (!strcmp(".symtab", attr->name)) { #ifdef STAPCONF_MOD_KALLSYMS - struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); + struct mod_kallsyms *kallsyms; + + rcu_read_lock_sched(); + kallsyms = rcu_dereference_sched(mod->kallsyms); + rcu_read_unlock_sched(); + if (attr->address == (unsigned long) kallsyms->symtab) _stp_module_self.sections[0].size = kallsyms->num_symtab * sizeof(kallsyms->symtab[0]); commit c8db32343c9b8012fa348cf4f8f104617960f793 Author: David Smith Date: Thu Apr 28 10:59:50 2016 -0500 Improved fake utrace locking. * runtime/stp_utrace.c: Fixed potential locking issues by changing the 'task_work_added' and 'report_work_added' members of 'struct utrace' to be atomic variables. In the process, I also renamed 'task_work_added' to 'resume_work_added'. As atomice variables, they can be modified without locking the utrace struct. Also renamed the 'work' member of 'struct utrace' to 'resume_work' (to match up with 'resume_work_added'). diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c index a8afc0d..bb2d663 100644 --- a/runtime/stp_utrace.c +++ b/runtime/stp_utrace.c @@ -66,17 +66,23 @@ struct utrace { unsigned int vfork_stop:1; /* need utrace_stop() before vfork wait */ unsigned int death:1; /* in utrace_report_death() now */ unsigned int reap:1; /* release_task() has run */ - unsigned int pending_attach:1; /* need splice_attaching() */ - unsigned int task_work_added:1; /* called task_work_add() on 'work' */ - unsigned int report_work_added:1; /* called task_work_add() - * on 'report_work' */ + unsigned int pending_attach:1; /* need splice_attaching() */ + + /* We need the '*_work_added' variables to be atomic so they + * can be modified without locking the utrace struct. This is + * typically done in atomic context where we can't grab the + * lock. */ + atomic_t resume_work_added; /* called task_work_add() on + * 'resume_work' */ + atomic_t report_work_added; /* called task_work_add() on + * 'report_work' */ unsigned long utrace_flags; struct hlist_node hlist; /* task_utrace_table linkage */ struct task_struct *task; - struct task_work work; + struct task_work resume_work; struct task_work report_work; }; @@ -349,17 +355,20 @@ static int utrace_exit(void) static void stp_task_notify_resume(struct task_struct *target, struct utrace *utrace) { - if (! utrace->task_work_added) { - int rc = stp_task_work_add(target, &utrace->work); - if (rc == 0) { - utrace->task_work_added = 1; - } - /* stp_task_work_add() returns -ESRCH if the task has - * already passed exit_task_work(). Just ignore this - * error. */ - else if (rc != -ESRCH) { - printk(KERN_ERR "%s:%d - task_work_add() returned %d\n", - __FUNCTION__, __LINE__, rc); + if (atomic_add_unless(&utrace->resume_work_added, 1, 1)) { + int rc = stp_task_work_add(target, &utrace->resume_work); + if (rc != 0) { + atomic_set(&utrace->report_work_added, 0); + + /* stp_task_work_add() returns -ESRCH if the + * task has already passed + * exit_task_work(). Just ignore this + * error. */ + if (rc != -ESRCH) { + printk(KERN_ERR + "%s:%d - task_work_add() returned %d\n", + __FUNCTION__, __LINE__, rc); + } } } } @@ -394,8 +403,9 @@ static void utrace_cleanup(struct utrace *utrace) list_del(&engine->entry); kmem_cache_free(utrace_engine_cachep, engine); } + stp_spin_unlock(&utrace->lock); - if (utrace->task_work_added) { + if (atomic_add_unless(&utrace->resume_work_added, -1, 0)) { #ifdef STP_TF_DEBUG if (stp_task_work_cancel(utrace->task, &utrace_resume) == NULL) printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n", @@ -406,9 +416,8 @@ static void utrace_cleanup(struct utrace *utrace) #else stp_task_work_cancel(utrace->task, &utrace_resume); #endif - utrace->task_work_added = 0; } - if (utrace->report_work_added) { + if (atomic_add_unless(&utrace->report_work_added, -1, 0)) { #ifdef STP_TF_DEBUG if (stp_task_work_cancel(utrace->task, &utrace_report_work) == NULL) printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n", @@ -419,9 +428,7 @@ static void utrace_cleanup(struct utrace *utrace) #else stp_task_work_cancel(utrace->task, &utrace_report_work); #endif - utrace->report_work_added = 0; } - stp_spin_unlock(&utrace->lock); /* Free the struct utrace itself. */ kmem_cache_free(utrace_cachep, utrace); @@ -522,8 +529,10 @@ static bool utrace_task_alloc(struct task_struct *task) INIT_LIST_HEAD(&utrace->attaching); utrace->resume = UTRACE_RESUME; utrace->task = task; - stp_init_task_work(&utrace->work, &utrace_resume); + stp_init_task_work(&utrace->resume_work, &utrace_resume); stp_init_task_work(&utrace->report_work, &utrace_report_work); + atomic_set(&utrace->resume_work_added, 0); + atomic_set(&utrace->report_work_added, 0); stp_spin_lock(&task_utrace_lock); u = __task_utrace_struct(task); @@ -558,8 +567,8 @@ static void utrace_free(struct utrace *utrace) stp_spin_unlock(&task_utrace_lock); /* Free the utrace struct. */ - stp_spin_lock(&utrace->lock); #ifdef STP_TF_DEBUG + stp_spin_lock(&utrace->lock); if (unlikely(utrace->reporting) || unlikely(!list_empty(&utrace->attached)) || unlikely(!list_empty(&utrace->attaching))) @@ -567,27 +576,31 @@ static void utrace_free(struct utrace *utrace) __FUNCTION__, __LINE__, utrace->reporting, list_empty(&utrace->attached), list_empty(&utrace->attaching)); + stp_spin_unlock(&utrace->lock); #endif - if (utrace->task_work_added) { - if (stp_task_work_cancel(utrace->task, &utrace_resume) == NULL) - printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n", + if (atomic_add_unless(&utrace->resume_work_added, -1, 0)) { + if ((stp_task_work_cancel(utrace->task, &utrace_resume) + == NULL) + && (utrace->task->flags & ~PF_EXITING) + && (utrace->task->exit_state == 0)) + printk(KERN_ERR "%s:%d * task_work_cancel() failed? task %p, %d, %s, 0x%lx 0x%x\n", __FUNCTION__, __LINE__, utrace->task, utrace->task->tgid, (utrace->task->comm ? utrace->task->comm - : "UNKNOWN")); - utrace->task_work_added = 0; - } - if (utrace->report_work_added) { - if (stp_task_work_cancel(utrace->task, &utrace_report_work) == NULL) - printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n", + : "UNKNOWN"), utrace->task->state, utrace->task->exit_state); + } + if (atomic_add_unless(&utrace->report_work_added, -1, 0)) { + if ((stp_task_work_cancel(utrace->task, &utrace_report_work) + == NULL) + && (utrace->task->flags & ~PF_EXITING) + && (utrace->task->exit_state == 0)) + printk(KERN_ERR "%s:%d ** task_work_cancel() failed? task %p, %d, %s, 0x%lx, 0x%x\n", __FUNCTION__, __LINE__, utrace->task, utrace->task->tgid, (utrace->task->comm ? utrace->task->comm - : "UNKNOWN")); - utrace->report_work_added = 0; + : "UNKNOWN"), utrace->task->state, utrace->task->exit_state); } - stp_spin_unlock(&utrace->lock); kmem_cache_free(utrace_cachep, utrace); } @@ -2257,7 +2270,7 @@ static void utrace_report_death(void *cb_data __attribute__ ((unused)), * of detach bookkeeping. */ if (in_atomic() || irqs_disabled()) { - if (! utrace->report_work_added) { + if (atomic_add_unless(&utrace->report_work_added, 1, 1)) { int rc; #ifdef STP_TF_DEBUG printk(KERN_ERR "%s:%d - adding task_work\n", @@ -2265,17 +2278,17 @@ static void utrace_report_death(void *cb_data __attribute__ ((unused)), #endif rc = stp_task_work_add(task, &utrace->report_work); - if (rc == 0) { - utrace->report_work_added = 1; - } - /* stp_task_work_add() returns -ESRCH if the - * task has already passed - * exit_task_work(). Just ignore this - * error. */ - else if (rc != -ESRCH) { - printk(KERN_ERR - "%s:%d - task_work_add() returned %d\n", - __FUNCTION__, __LINE__, rc); + if (rc != 0) { + atomic_set(&utrace->report_work_added, 0); + /* stp_task_work_add() returns -ESRCH + * if the task has already passed + * exit_task_work(). Just ignore this + * error. */ + if (rc != -ESRCH) { + printk(KERN_ERR + "%s:%d - task_work_add() returned %d\n", + __FUNCTION__, __LINE__, rc); + } } } } @@ -2337,13 +2350,13 @@ static void utrace_resume(struct task_work *work) * instantaneous (where 'task_utrace_struct()' has to do a * hash lookup). */ - struct utrace *utrace = container_of(work, struct utrace, work); + struct utrace *utrace = container_of(work, struct utrace, resume_work); struct task_struct *task = current; INIT_REPORT(report); struct utrace_engine *engine; might_sleep(); - utrace->task_work_added = 0; + atomic_set(&utrace->resume_work_added, 0); /* Make sure the task isn't exiting. */ if (task->flags & PF_EXITING) { @@ -2436,8 +2449,8 @@ static void utrace_report_work(struct task_work *work) __FUNCTION__, __LINE__, in_atomic(), irqs_disabled()); #endif might_sleep(); - utrace->report_work_added = 0; + atomic_set(&utrace->report_work_added, 0); stp_spin_lock(&utrace->lock); BUG_ON(utrace->death); utrace->death = 1; commit 0859b50e3ac4782e53c4c10b82c6e24174378bd4 Author: Mateusz Guzik Date: Mon May 2 12:28:55 2016 -0500 Plug preempt leak in _stp_runtime_entryfn_put/get_context. If _stp_runtime_entryfn_get_context returns a context, preemption counter is always incremented. On the other hand _stp_runtime_entryfn_put_context only decrements the counter if the passed context matches the one currently set on the cpu. The context can be set to NULL by _stp_runtime_contexts_free, making the comparison false and in effect leading to a leak, e.g.: timer: _stp_ctl_work_callback+0x0/0x1e0[stap_af8544c7eb51251ef8c 377abff659b05_25070] preempt leak: 00000101 -> 00000102 diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h index c9ffe18..9d325da 100644 --- a/runtime/linux/runtime_context.h +++ b/runtime/linux/runtime_context.h @@ -80,11 +80,12 @@ static struct context * _stp_runtime_entryfn_get_context(void) static inline void _stp_runtime_entryfn_put_context(struct context *c) { - if (c && c == _stp_runtime_get_context()) { - atomic_dec(&c->busy); + if (c) { + if (c == _stp_runtime_get_context()) + atomic_dec(&c->busy); + /* else, warn about bad state? */ preempt_enable_no_resched(); } - /* else, warn about bad state? */ return; } commit 0beafcec9a2971d466419d430d13fdb2c4f50d94 Author: David Smith Date: Tue May 3 13:23:58 2016 -0500 Fix PR20040 by keeping the task_exe_file function from sleeping. * tapset/linux/task.stp: No longer call get_task_mm()/mmput(), so that the function won't sleep and cause kernel bugs. diff --git a/tapset/linux/task.stp b/tapset/linux/task.stp index 9b204f5..774cf58 100644 --- a/tapset/linux/task.stp +++ b/tapset/linux/task.stp @@ -795,13 +795,14 @@ function task_exe_file:long(task:long) STAP_ERROR ("invalid task struct pointer"); } + // We'd like to call get_task_mm()/mmput() here, but they can + // sleep. So, let's hope incrementing the task's usage (by + // calling get_task_struct) is enough to keep the mm around. get_task_struct(task); - mm = get_task_mm(task); - put_task_struct(task); - if (mm) { + mm = task->mm; + if (mm) exe_file = stap_find_exe_file(mm); - mmput(mm); - } + put_task_struct(task); if (exe_file) { STAP_RETURN((unsigned long)exe_file);