]> Pileus Git - ~andy/linux/blobdiff - kernel/trace/ftrace.c
ftrace: Use manual free after synchronize_sched() not call_rcu_sched()
[~andy/linux] / kernel / trace / ftrace.c
index 41473b4ad7a47c4ef41672da3ecf797bb722a89b..25770824598f8fc5b498d2134715680a404b915c 100644 (file)
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
 #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
 #endif
 
+/*
+ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
+ * can use rcu_dereference_raw() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism.  The rcu_dereference_raw() calls are needed to handle
+ * concurrent insertions into the ftrace_global_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
+#define do_for_each_ftrace_op(op, list)                        \
+       op = rcu_dereference_raw(list);                 \
+       do
+
+/*
+ * Optimized for just a single item in the list (as that is the normal case).
+ */
+#define while_for_each_ftrace_op(op)                           \
+       while (likely(op = rcu_dereference_raw((op)->next)) &&  \
+              unlikely((op) != &ftrace_list_end))
+
 /**
  * ftrace_nr_registered_ops - return number of ops registered
  *
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
        return cnt;
 }
 
-/*
- * Traverse the ftrace_global_list, invoking all entries.  The reason that we
- * can use rcu_dereference_raw() is that elements removed from this list
- * are simply leaked, so there is no need to interact with a grace-period
- * mechanism.  The rcu_dereference_raw() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
- *
- * Silly Alpha and silly pointer-speculation compiler optimizations!
- */
 static void
 ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
                        struct ftrace_ops *op, struct pt_regs *regs)
 {
-       if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
+       int bit;
+
+       bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
+       if (bit < 0)
                return;
 
-       trace_recursion_set(TRACE_GLOBAL_BIT);
-       op = rcu_dereference_raw(ftrace_global_list); /*see above*/
-       while (op != &ftrace_list_end) {
+       do_for_each_ftrace_op(op, ftrace_global_list) {
                op->func(ip, parent_ip, op, regs);
-               op = rcu_dereference_raw(op->next); /*see above*/
-       };
-       trace_recursion_clear(TRACE_GLOBAL_BIT);
+       } while_for_each_ftrace_op(op);
+
+       trace_clear_recursion(bit);
 }
 
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
@@ -221,10 +233,24 @@ static void update_global_ops(void)
         * registered callers.
         */
        if (ftrace_global_list == &ftrace_list_end ||
-           ftrace_global_list->next == &ftrace_list_end)
+           ftrace_global_list->next == &ftrace_list_end) {
                func = ftrace_global_list->func;
-       else
+               /*
+                * As we are calling the function directly.
+                * If it does not have recursion protection,
+                * the function_trace_op needs to be updated
+                * accordingly.
+                */
+               if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
+                       global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
+               else
+                       global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
+       } else {
                func = ftrace_global_list_func;
+               /* The list has its own recursion protection. */
+               global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
+       }
+
 
        /* If we filter on pids, update to use the pid function */
        if (!list_empty(&ftrace_pids)) {
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
        if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
                return -EINVAL;
 
-#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
        /*
         * If the ftrace_ops specifies SAVE_REGS, then it only can be used
         * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -1042,7 +1068,7 @@ struct ftrace_func_probe {
        unsigned long           flags;
        unsigned long           ip;
        void                    *data;
-       struct rcu_head         rcu;
+       struct list_head        free_list;
 };
 
 struct ftrace_func_entry {
@@ -2952,28 +2978,27 @@ static void __disable_ftrace_function_probe(void)
 }
 
 
-static void ftrace_free_entry_rcu(struct rcu_head *rhp)
+static void ftrace_free_entry(struct ftrace_func_probe *entry)
 {
-       struct ftrace_func_probe *entry =
-               container_of(rhp, struct ftrace_func_probe, rcu);
-
        if (entry->ops->free)
-               entry->ops->free(&entry->data);
+               entry->ops->free(entry->ops, entry->ip, &entry->data);
        kfree(entry);
 }
 
-
 int
 register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                              void *data)
 {
        struct ftrace_func_probe *entry;
+       struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+       struct ftrace_hash *hash;
        struct ftrace_page *pg;
        struct dyn_ftrace *rec;
        int type, len, not;
        unsigned long key;
        int count = 0;
        char *search;
+       int ret;
 
        type = filter_parse_regex(glob, strlen(glob), &search, &not);
        len = strlen(search);
@@ -2984,8 +3009,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 
        mutex_lock(&ftrace_lock);
 
-       if (unlikely(ftrace_disabled))
+       hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
+       if (!hash) {
+               count = -ENOMEM;
+               goto out_unlock;
+       }
+
+       if (unlikely(ftrace_disabled)) {
+               count = -ENODEV;
                goto out_unlock;
+       }
 
        do_for_each_ftrace_rec(pg, rec) {
 
@@ -3009,14 +3042,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                 * for each function we find. We call the callback
                 * to give the caller an opportunity to do so.
                 */
-               if (ops->callback) {
-                       if (ops->callback(rec->ip, &entry->data) < 0) {
+               if (ops->init) {
+                       if (ops->init(ops, rec->ip, &entry->data) < 0) {
                                /* caller does not like this func */
                                kfree(entry);
                                continue;
                        }
                }
 
+               ret = enter_record(hash, rec, 0);
+               if (ret < 0) {
+                       kfree(entry);
+                       count = ret;
+                       goto out_unlock;
+               }
+
                entry->ops = ops;
                entry->ip = rec->ip;
 
@@ -3024,10 +3064,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
 
        } while_for_each_ftrace_rec();
+
+       ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+       if (ret < 0)
+               count = ret;
+
        __enable_ftrace_function_probe();
 
  out_unlock:
        mutex_unlock(&ftrace_lock);
+       free_ftrace_hash(hash);
 
        return count;
 }
@@ -3041,7 +3087,12 @@ static void
 __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                                  void *data, int flags)
 {
+       struct ftrace_func_entry *rec_entry;
        struct ftrace_func_probe *entry;
+       struct ftrace_func_probe *p;
+       struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
+       struct list_head free_list;
+       struct ftrace_hash *hash;
        struct hlist_node *n, *tmp;
        char str[KSYM_SYMBOL_LEN];
        int type = MATCH_FULL;
@@ -3062,6 +3113,14 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
        }
 
        mutex_lock(&ftrace_lock);
+
+       hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
+       if (!hash)
+               /* Hmm, should report this somehow */
+               goto out_unlock;
+
+       INIT_LIST_HEAD(&free_list);
+
        for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
                struct hlist_head *hhd = &ftrace_func_hash[i];
 
@@ -3082,12 +3141,30 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
                                        continue;
                        }
 
-                       hlist_del(&entry->node);
-                       call_rcu(&entry->rcu, ftrace_free_entry_rcu);
+                       rec_entry = ftrace_lookup_ip(hash, entry->ip);
+                       /* It is possible more than one entry had this ip */
+                       if (rec_entry)
+                               free_hash_entry(hash, rec_entry);
+
+                       hlist_del_rcu(&entry->node);
+                       list_add(&entry->free_list, &free_list);
                }
        }
        __disable_ftrace_function_probe();
+       /*
+        * Remove after the disable is called. Otherwise, if the last
+        * probe is removed, a null hash means *all enabled*.
+        */
+       ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+       synchronize_sched();
+       list_for_each_entry_safe(entry, p, &free_list, free_list) {
+               list_del(&entry->free_list);
+               ftrace_free_entry(entry);
+       }
+               
+ out_unlock:
        mutex_unlock(&ftrace_lock);
+       free_ftrace_hash(hash);
 }
 
 void
@@ -3970,37 +4047,51 @@ static void ftrace_init_module(struct module *mod,
        ftrace_process_locs(mod, start, end);
 }
 
-static int ftrace_module_notify(struct notifier_block *self,
-                               unsigned long val, void *data)
+static int ftrace_module_notify_enter(struct notifier_block *self,
+                                     unsigned long val, void *data)
 {
        struct module *mod = data;
 
-       switch (val) {
-       case MODULE_STATE_COMING:
+       if (val == MODULE_STATE_COMING)
                ftrace_init_module(mod, mod->ftrace_callsites,
                                   mod->ftrace_callsites +
                                   mod->num_ftrace_callsites);
-               break;
-       case MODULE_STATE_GOING:
+       return 0;
+}
+
+static int ftrace_module_notify_exit(struct notifier_block *self,
+                                    unsigned long val, void *data)
+{
+       struct module *mod = data;
+
+       if (val == MODULE_STATE_GOING)
                ftrace_release_mod(mod);
-               break;
-       }
 
        return 0;
 }
 #else
-static int ftrace_module_notify(struct notifier_block *self,
-                               unsigned long val, void *data)
+static int ftrace_module_notify_enter(struct notifier_block *self,
+                                     unsigned long val, void *data)
+{
+       return 0;
+}
+static int ftrace_module_notify_exit(struct notifier_block *self,
+                                    unsigned long val, void *data)
 {
        return 0;
 }
 #endif /* CONFIG_MODULES */
 
-struct notifier_block ftrace_module_nb = {
-       .notifier_call = ftrace_module_notify,
+struct notifier_block ftrace_module_enter_nb = {
+       .notifier_call = ftrace_module_notify_enter,
        .priority = INT_MAX,    /* Run before anything that can use kprobes */
 };
 
+struct notifier_block ftrace_module_exit_nb = {
+       .notifier_call = ftrace_module_notify_exit,
+       .priority = INT_MIN,    /* Run after anything that can remove kprobes */
+};
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
@@ -4032,9 +4123,13 @@ void __init ftrace_init(void)
                                  __start_mcount_loc,
                                  __stop_mcount_loc);
 
-       ret = register_module_notifier(&ftrace_module_nb);
+       ret = register_module_notifier(&ftrace_module_enter_nb);
        if (ret)
-               pr_warning("Failed to register trace ftrace module notifier\n");
+               pr_warning("Failed to register trace ftrace module enter notifier\n");
+
+       ret = register_module_notifier(&ftrace_module_exit_nb);
+       if (ret)
+               pr_warning("Failed to register trace ftrace module exit notifier\n");
 
        set_ftrace_early_filters();
 
@@ -4090,14 +4185,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
         */
        preempt_disable_notrace();
        trace_recursion_set(TRACE_CONTROL_BIT);
-       op = rcu_dereference_raw(ftrace_control_list);
-       while (op != &ftrace_list_end) {
+       do_for_each_ftrace_op(op, ftrace_control_list) {
                if (!ftrace_function_local_disabled(op) &&
                    ftrace_ops_test(op, ip))
                        op->func(ip, parent_ip, op, regs);
-
-               op = rcu_dereference_raw(op->next);
-       };
+       } while_for_each_ftrace_op(op);
        trace_recursion_clear(TRACE_CONTROL_BIT);
        preempt_enable_notrace();
 }
@@ -4112,27 +4204,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
                       struct ftrace_ops *ignored, struct pt_regs *regs)
 {
        struct ftrace_ops *op;
+       int bit;
 
        if (function_trace_stop)
                return;
 
-       if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
+       bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+       if (bit < 0)
                return;
 
-       trace_recursion_set(TRACE_INTERNAL_BIT);
        /*
         * Some of the ops may be dynamically allocated,
         * they must be freed after a synchronize_sched().
         */
        preempt_disable_notrace();
-       op = rcu_dereference_raw(ftrace_ops_list);
-       while (op != &ftrace_list_end) {
+       do_for_each_ftrace_op(op, ftrace_ops_list) {
                if (ftrace_ops_test(op, ip))
                        op->func(ip, parent_ip, op, regs);
-               op = rcu_dereference_raw(op->next);
-       };
+       } while_for_each_ftrace_op(op);
        preempt_enable_notrace();
-       trace_recursion_clear(TRACE_INTERNAL_BIT);
+       trace_clear_recursion(bit);
 }
 
 /*
@@ -4143,8 +4234,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
  * Archs are to support both the regs and ftrace_ops at the same time.
  * If they support ftrace_ops, it is assumed they support regs.
  * If call backs want to use regs, they must either check for regs
- * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
- * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
+ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
+ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
  * set the ARCH_SUPPORT_FTARCE_OPS.
  */