]> Pileus Git - ~andy/linux/blobdiff - kernel/trace/ftrace.c
Merge branch 'for-next-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/nab...
[~andy/linux] / kernel / trace / ftrace.c
index 900b409543db10cfc46b9da703f463a1cee9b78e..683d559a0eefef4cc9495c1b97fcbee0836065e1 100644 (file)
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
 #include <linux/uaccess.h>
+#include <linux/bsearch.h>
 #include <linux/module.h>
 #include <linux/ftrace.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/sort.h>
 #include <linux/list.h>
 #include <linux/hash.h>
 #include <linux/rcupdate.h>
@@ -152,7 +154,6 @@ void clear_ftrace_function(void)
        ftrace_pid_function = ftrace_stub;
 }
 
-#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 /*
  * For those archs that do not test ftrace_trace_stop in their
@@ -948,13 +949,6 @@ struct ftrace_func_probe {
        struct rcu_head         rcu;
 };
 
-enum {
-       FTRACE_ENABLE_CALLS             = (1 << 0),
-       FTRACE_DISABLE_CALLS            = (1 << 1),
-       FTRACE_UPDATE_TRACE_FUNC        = (1 << 2),
-       FTRACE_START_FUNC_RET           = (1 << 3),
-       FTRACE_STOP_FUNC_RET            = (1 << 4),
-};
 struct ftrace_func_entry {
        struct hlist_node hlist;
        unsigned long ip;
@@ -985,18 +979,19 @@ static struct ftrace_ops global_ops = {
        .filter_hash            = EMPTY_HASH,
 };
 
-static struct dyn_ftrace *ftrace_new_addrs;
-
 static DEFINE_MUTEX(ftrace_regex_lock);
 
 struct ftrace_page {
        struct ftrace_page      *next;
+       struct dyn_ftrace       *records;
        int                     index;
-       struct dyn_ftrace       records[];
+       int                     size;
 };
 
-#define ENTRIES_PER_PAGE \
-  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
+static struct ftrace_page *ftrace_new_pgs;
+
+#define ENTRY_SIZE sizeof(struct dyn_ftrace)
+#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
 
 /* estimate from running different kernels */
 #define NR_TO_INIT             10000
@@ -1004,7 +999,10 @@ struct ftrace_page {
 static struct ftrace_page      *ftrace_pages_start;
 static struct ftrace_page      *ftrace_pages;
 
-static struct dyn_ftrace *ftrace_free_records;
+static bool ftrace_hash_empty(struct ftrace_hash *hash)
+{
+       return !hash || !hash->count;
+}
 
 static struct ftrace_func_entry *
 ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
@@ -1014,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
        struct hlist_head *hhd;
        struct hlist_node *n;
 
-       if (!hash->count)
+       if (ftrace_hash_empty(hash))
                return NULL;
 
        if (hash->size_bits > 0)
@@ -1158,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
                return NULL;
 
        /* Empty hash? */
-       if (!hash || !hash->count)
+       if (ftrace_hash_empty(hash))
                return new_hash;
 
        size = 1 << hash->size_bits;
@@ -1212,7 +1210,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
        if (!src->count) {
                free_ftrace_hash_rcu(*dst);
                rcu_assign_pointer(*dst, EMPTY_HASH);
-               return 0;
+               /* still need to update the function records */
+               ret = 0;
+               goto out;
        }
 
        /*
@@ -1281,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
        filter_hash = rcu_dereference_raw(ops->filter_hash);
        notrace_hash = rcu_dereference_raw(ops->notrace_hash);
 
-       if ((!filter_hash || !filter_hash->count ||
+       if ((ftrace_hash_empty(filter_hash) ||
             ftrace_lookup_ip(filter_hash, ip)) &&
-           (!notrace_hash || !notrace_hash->count ||
+           (ftrace_hash_empty(notrace_hash) ||
             !ftrace_lookup_ip(notrace_hash, ip)))
                ret = 1;
        else
@@ -1306,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
                }                               \
        }
 
+
+static int ftrace_cmp_recs(const void *a, const void *b)
+{
+       const struct dyn_ftrace *reca = a;
+       const struct dyn_ftrace *recb = b;
+
+       if (reca->ip > recb->ip)
+               return 1;
+       if (reca->ip < recb->ip)
+               return -1;
+       return 0;
+}
+
+/**
+ * ftrace_location - return true if the ip giving is a traced location
+ * @ip: the instruction pointer to check
+ *
+ * Returns 1 if @ip given is a pointer to a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+int ftrace_location(unsigned long ip)
+{
+       struct ftrace_page *pg;
+       struct dyn_ftrace *rec;
+       struct dyn_ftrace key;
+
+       key.ip = ip;
+
+       for (pg = ftrace_pages_start; pg; pg = pg->next) {
+               rec = bsearch(&key, pg->records, pg->index,
+                             sizeof(struct dyn_ftrace),
+                             ftrace_cmp_recs);
+               if (rec)
+                       return 1;
+       }
+
+       return 0;
+}
+
 static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
                                     int filter_hash,
                                     bool inc)
@@ -1335,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
        if (filter_hash) {
                hash = ops->filter_hash;
                other_hash = ops->notrace_hash;
-               if (!hash || !hash->count)
+               if (ftrace_hash_empty(hash))
                        all = 1;
        } else {
                inc = !inc;
@@ -1345,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
                 * If the notrace hash has no items,
                 * then there's nothing to do.
                 */
-               if (hash && !hash->count)
+               if (ftrace_hash_empty(hash))
                        return;
        }
 
@@ -1362,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
                        if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
                                match = 1;
                } else {
-                       in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
-                       in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
+                       in_hash = !!ftrace_lookup_ip(hash, rec->ip);
+                       in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
 
                        /*
                         *
@@ -1371,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
                        if (filter_hash && in_hash && !in_other_hash)
                                match = 1;
                        else if (!filter_hash && in_hash &&
-                                (in_other_hash || !other_hash->count))
+                                (in_other_hash || ftrace_hash_empty(other_hash)))
                                match = 1;
                }
                if (!match)
@@ -1405,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
        __ftrace_hash_rec_update(ops, filter_hash, 1);
 }
 
-static void ftrace_free_rec(struct dyn_ftrace *rec)
-{
-       rec->freelist = ftrace_free_records;
-       ftrace_free_records = rec;
-       rec->flags |= FTRACE_FL_FREE;
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
-       struct dyn_ftrace *rec;
-
-       /* First check for freed records */
-       if (ftrace_free_records) {
-               rec = ftrace_free_records;
-
-               if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
-                       FTRACE_WARN_ON_ONCE(1);
-                       ftrace_free_records = NULL;
+       if (ftrace_pages->index == ftrace_pages->size) {
+               /* We should have allocated enough */
+               if (WARN_ON(!ftrace_pages->next))
                        return NULL;
-               }
-
-               ftrace_free_records = rec->freelist;
-               memset(rec, 0, sizeof(*rec));
-               return rec;
-       }
-
-       if (ftrace_pages->index == ENTRIES_PER_PAGE) {
-               if (!ftrace_pages->next) {
-                       /* allocate another page */
-                       ftrace_pages->next =
-                               (void *)get_zeroed_page(GFP_KERNEL);
-                       if (!ftrace_pages->next)
-                               return NULL;
-               }
                ftrace_pages = ftrace_pages->next;
        }
 
@@ -1458,8 +1471,6 @@ ftrace_record_ip(unsigned long ip)
                return NULL;
 
        rec->ip = ip;
-       rec->newlist = ftrace_new_addrs;
-       ftrace_new_addrs = rec;
 
        return rec;
 }
@@ -1474,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
                printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
 }
 
-static void ftrace_bug(int failed, unsigned long ip)
+/**
+ * ftrace_bug - report and shutdown function tracer
+ * @failed: The failed type (EFAULT, EINVAL, EPERM)
+ * @ip: The address that failed
+ *
+ * The arch code that enables or disables the function tracing
+ * can call ftrace_bug() when it has detected a problem in
+ * modifying the code. @failed should be one of either:
+ * EFAULT - if the problem happens on reading the @ip address
+ * EINVAL - if what is read at @ip is not what was expected
+ * EPERM - if the problem happens on writting to the @ip address
+ */
+void ftrace_bug(int failed, unsigned long ip)
 {
        switch (failed) {
        case -EFAULT:
@@ -1516,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end)
        return 0;
 }
 
-
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 {
-       unsigned long ftrace_addr;
        unsigned long flag = 0UL;
 
-       ftrace_addr = (unsigned long)FTRACE_ADDR;
-
        /*
-        * If we are enabling tracing:
+        * If we are updating calls:
         *
         *   If the record has a ref count, then we need to enable it
         *   because someone is using it.
         *
         *   Otherwise we make sure its disabled.
         *
-        * If we are disabling tracing, then disable all records that
+        * If we are disabling calls, then disable all records that
         * are enabled.
         */
        if (enable && (rec->flags & ~FTRACE_FL_MASK))
@@ -1541,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 
        /* If the state of this record hasn't changed, then do nothing */
        if ((rec->flags & FTRACE_FL_ENABLED) == flag)
-               return 0;
+               return FTRACE_UPDATE_IGNORE;
 
        if (flag) {
-               rec->flags |= FTRACE_FL_ENABLED;
+               if (update)
+                       rec->flags |= FTRACE_FL_ENABLED;
+               return FTRACE_UPDATE_MAKE_CALL;
+       }
+
+       if (update)
+               rec->flags &= ~FTRACE_FL_ENABLED;
+
+       return FTRACE_UPDATE_MAKE_NOP;
+}
+
+/**
+ * ftrace_update_record, set a record that now is tracing or not
+ * @rec: the record to update
+ * @enable: set to 1 if the record is tracing, zero to force disable
+ *
+ * The records that represent all functions that can be traced need
+ * to be updated when tracing has been enabled.
+ */
+int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+{
+       return ftrace_check_record(rec, enable, 1);
+}
+
+/**
+ * ftrace_test_record, check if the record has been enabled or not
+ * @rec: the record to test
+ * @enable: set to 1 to check if enabled, 0 if it is disabled
+ *
+ * The arch code may need to test if a record is already set to
+ * tracing to determine how to modify the function code that it
+ * represents.
+ */
+int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+{
+       return ftrace_check_record(rec, enable, 0);
+}
+
+static int
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+       unsigned long ftrace_addr;
+       int ret;
+
+       ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+       ret = ftrace_update_record(rec, enable);
+
+       switch (ret) {
+       case FTRACE_UPDATE_IGNORE:
+               return 0;
+
+       case FTRACE_UPDATE_MAKE_CALL:
                return ftrace_make_call(rec, ftrace_addr);
+
+       case FTRACE_UPDATE_MAKE_NOP:
+               return ftrace_make_nop(NULL, rec, ftrace_addr);
        }
 
-       rec->flags &= ~FTRACE_FL_ENABLED;
-       return ftrace_make_nop(NULL, rec, ftrace_addr);
+       return -1; /* unknow ftrace bug */
 }
 
-static void ftrace_replace_code(int enable)
+static void ftrace_replace_code(int update)
 {
        struct dyn_ftrace *rec;
        struct ftrace_page *pg;
@@ -1562,11 +1634,7 @@ static void ftrace_replace_code(int enable)
                return;
 
        do_for_each_ftrace_rec(pg, rec) {
-               /* Skip over free records */
-               if (rec->flags & FTRACE_FL_FREE)
-                       continue;
-
-               failed = __ftrace_replace_code(rec, enable);
+               failed = __ftrace_replace_code(rec, update);
                if (failed) {
                        ftrace_bug(failed, rec->ip);
                        /* Stop processing */
@@ -1575,6 +1643,78 @@ static void ftrace_replace_code(int enable)
        } while_for_each_ftrace_rec();
 }
 
+struct ftrace_rec_iter {
+       struct ftrace_page      *pg;
+       int                     index;
+};
+
+/**
+ * ftrace_rec_iter_start, start up iterating over traced functions
+ *
+ * Returns an iterator handle that is used to iterate over all
+ * the records that represent address locations where functions
+ * are traced.
+ *
+ * May return NULL if no records are available.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_start(void)
+{
+       /*
+        * We only use a single iterator.
+        * Protected by the ftrace_lock mutex.
+        */
+       static struct ftrace_rec_iter ftrace_rec_iter;
+       struct ftrace_rec_iter *iter = &ftrace_rec_iter;
+
+       iter->pg = ftrace_pages_start;
+       iter->index = 0;
+
+       /* Could have empty pages */
+       while (iter->pg && !iter->pg->index)
+               iter->pg = iter->pg->next;
+
+       if (!iter->pg)
+               return NULL;
+
+       return iter;
+}
+
+/**
+ * ftrace_rec_iter_next, get the next record to process.
+ * @iter: The handle to the iterator.
+ *
+ * Returns the next iterator after the given iterator @iter.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
+{
+       iter->index++;
+
+       if (iter->index >= iter->pg->index) {
+               iter->pg = iter->pg->next;
+               iter->index = 0;
+
+               /* Could have empty pages */
+               while (iter->pg && !iter->pg->index)
+                       iter->pg = iter->pg->next;
+       }
+
+       if (!iter->pg)
+               return NULL;
+
+       return iter;
+}
+
+/**
+ * ftrace_rec_iter_record, get the record at the iterator location
+ * @iter: The current iterator location
+ *
+ * Returns the record that the current @iter is at.
+ */
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
+{
+       return &iter->pg->records[iter->index];
+}
+
 static int
 ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 {
@@ -1616,13 +1756,7 @@ static int __ftrace_modify_code(void *data)
 {
        int *command = data;
 
-       /*
-        * Do not call function tracer while we update the code.
-        * We are in stop machine, no worrying about races.
-        */
-       function_trace_stop++;
-
-       if (*command & FTRACE_ENABLE_CALLS)
+       if (*command & FTRACE_UPDATE_CALLS)
                ftrace_replace_code(1);
        else if (*command & FTRACE_DISABLE_CALLS)
                ftrace_replace_code(0);
@@ -1635,21 +1769,33 @@ static int __ftrace_modify_code(void *data)
        else if (*command & FTRACE_STOP_FUNC_RET)
                ftrace_disable_ftrace_graph_caller();
 
-#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
-       /*
-        * For archs that call ftrace_test_stop_func(), we must
-        * wait till after we update all the function callers
-        * before we update the callback. This keeps different
-        * ops that record different functions from corrupting
-        * each other.
-        */
-       __ftrace_trace_function = __ftrace_trace_function_delay;
-#endif
-       function_trace_stop--;
-
        return 0;
 }
 
+/**
+ * ftrace_run_stop_machine, go back to the stop machine method
+ * @command: The command to tell ftrace what to do
+ *
+ * If an arch needs to fall back to the stop machine method, the
+ * it can call this function.
+ */
+void ftrace_run_stop_machine(int command)
+{
+       stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
+/**
+ * arch_ftrace_update_code, modify the code to trace or not trace
+ * @command: The command that needs to be done
+ *
+ * Archs can override this function if it does not need to
+ * run stop_machine() to modify code.
+ */
+void __weak arch_ftrace_update_code(int command)
+{
+       ftrace_run_stop_machine(command);
+}
+
 static void ftrace_run_update_code(int command)
 {
        int ret;
@@ -1658,8 +1804,31 @@ static void ftrace_run_update_code(int command)
        FTRACE_WARN_ON(ret);
        if (ret)
                return;
+       /*
+        * Do not call function tracer while we update the code.
+        * We are in stop machine.
+        */
+       function_trace_stop++;
 
-       stop_machine(__ftrace_modify_code, &command, NULL);
+       /*
+        * By default we use stop_machine() to modify the code.
+        * But archs can do what ever they want as long as it
+        * is safe. The stop_machine() is the safest, but also
+        * produces the most overhead.
+        */
+       arch_ftrace_update_code(command);
+
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+       /*
+        * For archs that call ftrace_test_stop_func(), we must
+        * wait till after we update all the function callers
+        * before we update the callback. This keeps different
+        * ops that record different functions from corrupting
+        * each other.
+        */
+       __ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+       function_trace_stop--;
 
        ret = ftrace_arch_code_modify_post_process();
        FTRACE_WARN_ON(ret);
@@ -1690,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
                return -ENODEV;
 
        ftrace_start_up++;
-       command |= FTRACE_ENABLE_CALLS;
+       command |= FTRACE_UPDATE_CALLS;
 
        /* ops marked global share the filter hashes */
        if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
@@ -1742,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
        if (ops != &global_ops || !global_start_up)
                ops->flags &= ~FTRACE_OPS_FL_ENABLED;
 
-       if (!ftrace_start_up)
-               command |= FTRACE_DISABLE_CALLS;
+       command |= FTRACE_UPDATE_CALLS;
 
        if (saved_ftrace_func != ftrace_trace_function) {
                saved_ftrace_func = ftrace_trace_function;
@@ -1765,7 +1933,7 @@ static void ftrace_startup_sysctl(void)
        saved_ftrace_func = NULL;
        /* ftrace_start_up is true if we want ftrace running */
        if (ftrace_start_up)
-               ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+               ftrace_run_update_code(FTRACE_UPDATE_CALLS);
 }
 
 static void ftrace_shutdown_sysctl(void)
@@ -1787,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops)
        struct ftrace_hash *hash;
 
        hash = ops->filter_hash;
-       return !!(!hash || !hash->count);
+       return ftrace_hash_empty(hash);
 }
 
 static int ftrace_update_code(struct module *mod)
 {
+       struct ftrace_page *pg;
        struct dyn_ftrace *p;
        cycle_t start, stop;
        unsigned long ref = 0;
+       int i;
 
        /*
         * When adding a module, we need to check if tracers are
@@ -1816,46 +1986,44 @@ static int ftrace_update_code(struct module *mod)
        start = ftrace_now(raw_smp_processor_id());
        ftrace_update_cnt = 0;
 
-       while (ftrace_new_addrs) {
+       for (pg = ftrace_new_pgs; pg; pg = pg->next) {
 
-               /* If something went wrong, bail without enabling anything */
-               if (unlikely(ftrace_disabled))
-                       return -1;
+               for (i = 0; i < pg->index; i++) {
+                       /* If something went wrong, bail without enabling anything */
+                       if (unlikely(ftrace_disabled))
+                               return -1;
 
-               p = ftrace_new_addrs;
-               ftrace_new_addrs = p->newlist;
-               p->flags = ref;
+                       p = &pg->records[i];
+                       p->flags = ref;
 
-               /*
-                * Do the initial record conversion from mcount jump
-                * to the NOP instructions.
-                */
-               if (!ftrace_code_disable(mod, p)) {
-                       ftrace_free_rec(p);
-                       /* Game over */
-                       break;
-               }
+                       /*
+                        * Do the initial record conversion from mcount jump
+                        * to the NOP instructions.
+                        */
+                       if (!ftrace_code_disable(mod, p))
+                               break;
 
-               ftrace_update_cnt++;
+                       ftrace_update_cnt++;
 
-               /*
-                * If the tracing is enabled, go ahead and enable the record.
-                *
-                * The reason not to enable the record immediatelly is the
-                * inherent check of ftrace_make_nop/ftrace_make_call for
-                * correct previous instructions.  Making first the NOP
-                * conversion puts the module to the correct state, thus
-                * passing the ftrace_make_call check.
-                */
-               if (ftrace_start_up && ref) {
-                       int failed = __ftrace_replace_code(p, 1);
-                       if (failed) {
-                               ftrace_bug(failed, p->ip);
-                               ftrace_free_rec(p);
+                       /*
+                        * If the tracing is enabled, go ahead and enable the record.
+                        *
+                        * The reason not to enable the record immediatelly is the
+                        * inherent check of ftrace_make_nop/ftrace_make_call for
+                        * correct previous instructions.  Making first the NOP
+                        * conversion puts the module to the correct state, thus
+                        * passing the ftrace_make_call check.
+                        */
+                       if (ftrace_start_up && ref) {
+                               int failed = __ftrace_replace_code(p, 1);
+                               if (failed)
+                                       ftrace_bug(failed, p->ip);
                        }
                }
        }
 
+       ftrace_new_pgs = NULL;
+
        stop = ftrace_now(raw_smp_processor_id());
        ftrace_update_time = stop - start;
        ftrace_update_tot_cnt += ftrace_update_cnt;
@@ -1863,57 +2031,108 @@ static int ftrace_update_code(struct module *mod)
        return 0;
 }
 
-static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+static int ftrace_allocate_records(struct ftrace_page *pg, int count)
 {
-       struct ftrace_page *pg;
+       int order;
        int cnt;
-       int i;
 
-       /* allocate a few pages */
-       ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
-       if (!ftrace_pages_start)
-               return -1;
+       if (WARN_ON(!count))
+               return -EINVAL;
+
+       order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
 
        /*
-        * Allocate a few more pages.
-        *
-        * TODO: have some parser search vmlinux before
-        *   final linking to find all calls to ftrace.
-        *   Then we can:
-        *    a) know how many pages to allocate.
-        *     and/or
-        *    b) set up the table then.
-        *
-        *  The dynamic code is still necessary for
-        *  modules.
+        * We want to fill as much as possible. No more than a page
+        * may be empty.
         */
+       while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
+               order--;
 
-       pg = ftrace_pages = ftrace_pages_start;
+ again:
+       pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
 
-       cnt = num_to_init / ENTRIES_PER_PAGE;
-       pr_info("ftrace: allocating %ld entries in %d pages\n",
-               num_to_init, cnt + 1);
+       if (!pg->records) {
+               /* if we can't allocate this size, try something smaller */
+               if (!order)
+                       return -ENOMEM;
+               order >>= 1;
+               goto again;
+       }
 
-       for (i = 0; i < cnt; i++) {
-               pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+       cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
+       pg->size = cnt;
 
-               /* If we fail, we'll try later anyway */
-               if (!pg->next)
+       if (cnt > count)
+               cnt = count;
+
+       return cnt;
+}
+
+static struct ftrace_page *
+ftrace_allocate_pages(unsigned long num_to_init)
+{
+       struct ftrace_page *start_pg;
+       struct ftrace_page *pg;
+       int order;
+       int cnt;
+
+       if (!num_to_init)
+               return 0;
+
+       start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
+       if (!pg)
+               return NULL;
+
+       /*
+        * Try to allocate as much as possible in one continues
+        * location that fills in all of the space. We want to
+        * waste as little space as possible.
+        */
+       for (;;) {
+               cnt = ftrace_allocate_records(pg, num_to_init);
+               if (cnt < 0)
+                       goto free_pages;
+
+               num_to_init -= cnt;
+               if (!num_to_init)
                        break;
 
+               pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
+               if (!pg->next)
+                       goto free_pages;
+
                pg = pg->next;
        }
 
-       return 0;
+       return start_pg;
+
+ free_pages:
+       while (start_pg) {
+               order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+               free_pages((unsigned long)pg->records, order);
+               start_pg = pg->next;
+               kfree(pg);
+               pg = start_pg;
+       }
+       pr_info("ftrace: FAILED to allocate memory for functions\n");
+       return NULL;
 }
 
-enum {
-       FTRACE_ITER_FILTER      = (1 << 0),
-       FTRACE_ITER_NOTRACE     = (1 << 1),
-       FTRACE_ITER_PRINTALL    = (1 << 2),
-       FTRACE_ITER_HASH        = (1 << 3),
-       FTRACE_ITER_ENABLED     = (1 << 4),
-};
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+{
+       int cnt;
+
+       if (!num_to_init) {
+               pr_info("ftrace: No functions to be traced?\n");
+               return -1;
+       }
+
+       cnt = num_to_init / ENTRIES_PER_PAGE;
+       pr_info("ftrace: allocating %ld entries in %d pages\n",
+               num_to_init, cnt + 1);
+
+       return 0;
+}
 
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
 
@@ -1979,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
        void *p = NULL;
        loff_t l;
 
+       if (!(iter->flags & FTRACE_ITER_DO_HASH))
+               return NULL;
+
        if (iter->func_pos > *pos)
                return NULL;
 
@@ -2022,7 +2244,7 @@ static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
        struct ftrace_iterator *iter = m->private;
-       struct ftrace_ops *ops = &global_ops;
+       struct ftrace_ops *ops = iter->ops;
        struct dyn_ftrace *rec = NULL;
 
        if (unlikely(ftrace_disabled))
@@ -2046,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                }
        } else {
                rec = &iter->pg->records[iter->idx++];
-               if ((rec->flags & FTRACE_FL_FREE) ||
-
-                   ((iter->flags & FTRACE_ITER_FILTER) &&
+               if (((iter->flags & FTRACE_ITER_FILTER) &&
                     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
 
                    ((iter->flags & FTRACE_ITER_NOTRACE) &&
@@ -2080,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
        struct ftrace_iterator *iter = m->private;
-       struct ftrace_ops *ops = &global_ops;
+       struct ftrace_ops *ops = iter->ops;
        void *p = NULL;
        loff_t l;
 
@@ -2100,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
         * off, we can short cut and just print out that all
         * functions are enabled.
         */
-       if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
+       if (iter->flags & FTRACE_ITER_FILTER &&
+           ftrace_hash_empty(ops->filter_hash)) {
                if (*pos > 0)
                        return t_hash_start(m, pos);
                iter->flags |= FTRACE_ITER_PRINTALL;
@@ -2125,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
                        break;
        }
 
-       if (!p) {
-               if (iter->flags & FTRACE_ITER_FILTER)
-                       return t_hash_start(m, pos);
-
-               return NULL;
-       }
+       if (!p)
+               return t_hash_start(m, pos);
 
        return iter;
 }
@@ -2188,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
                return -ENOMEM;
 
        iter->pg = ftrace_pages_start;
+       iter->ops = &global_ops;
 
        ret = seq_open(file, &show_ftrace_seq_ops);
        if (!ret) {
@@ -2216,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
 
        iter->pg = ftrace_pages_start;
        iter->flags = FTRACE_ITER_ENABLED;
+       iter->ops = &global_ops;
 
        ret = seq_open(file, &show_ftrace_seq_ops);
        if (!ret) {
@@ -2236,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
        mutex_unlock(&ftrace_lock);
 }
 
-static int
+/**
+ * ftrace_regex_open - initialize function tracer filter files
+ * @ops: The ftrace_ops that hold the hash filters
+ * @flag: The type of filter to process
+ * @inode: The inode, usually passed in to your open routine
+ * @file: The file, usually passed in to your open routine
+ *
+ * ftrace_regex_open() initializes the filter files for the
+ * @ops. Depending on @flag it may process the filter hash or
+ * the notrace hash of @ops. With this called from the open
+ * routine, you can use ftrace_filter_write() for the write
+ * routine if @flag has FTRACE_ITER_FILTER set, or
+ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
+ * ftrace_regex_lseek() should be used as the lseek routine, and
+ * release must call ftrace_regex_release().
+ */
+int
 ftrace_regex_open(struct ftrace_ops *ops, int flag,
                  struct inode *inode, struct file *file)
 {
@@ -2305,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-       return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
-                                inode, file);
+       return ftrace_regex_open(&global_ops,
+                       FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
+                       inode, file);
 }
 
 static int
@@ -2316,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
                                 inode, file);
 }
 
-static loff_t
+loff_t
 ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
 {
        loff_t ret;
@@ -2425,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff,
                goto out_unlock;
 
        do_for_each_ftrace_rec(pg, rec) {
-
                if (ftrace_match_record(rec, mod, search, search_len, type)) {
                        ret = enter_record(hash, rec, not);
                        if (ret < 0) {
@@ -2870,14 +3105,14 @@ out_unlock:
        return ret;
 }
 
-static ssize_t
+ssize_t
 ftrace_filter_write(struct file *file, const char __user *ubuf,
                    size_t cnt, loff_t *ppos)
 {
        return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
 }
 
-static ssize_t
+ssize_t
 ftrace_notrace_write(struct file *file, const char __user *ubuf,
                     size_t cnt, loff_t *ppos)
 {
@@ -2918,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
        ret = ftrace_hash_move(ops, enable, orig_hash, hash);
        if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
            && ftrace_enabled)
-               ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+               ftrace_run_update_code(FTRACE_UPDATE_CALLS);
 
        mutex_unlock(&ftrace_lock);
 
@@ -3044,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init
-set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 {
        char *func;
 
@@ -3058,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 static void __init set_ftrace_early_filters(void)
 {
        if (ftrace_filter_buf[0])
-               set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
+               ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
        if (ftrace_notrace_buf[0])
-               set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
+               ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        if (ftrace_graph_buf[0])
                set_ftrace_early_graph(ftrace_graph_buf);
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 }
 
-static int
-ftrace_regex_release(struct inode *inode, struct file *file)
+int ftrace_regex_release(struct inode *inode, struct file *file)
 {
        struct seq_file *m = (struct seq_file *)file->private_data;
        struct ftrace_iterator *iter;
@@ -3106,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
                                       orig_hash, iter->hash);
                if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
                    && ftrace_enabled)
-                       ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+                       ftrace_run_update_code(FTRACE_UPDATE_CALLS);
 
                mutex_unlock(&ftrace_lock);
        }
@@ -3269,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 
        do_for_each_ftrace_rec(pg, rec) {
 
-               if (rec->flags & FTRACE_FL_FREE)
-                       continue;
-
                if (ftrace_match_record(rec, NULL, search, search_len, type)) {
                        /* if it is in the array */
                        exists = false;
@@ -3380,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
        return 0;
 }
 
+static void ftrace_swap_recs(void *a, void *b, int size)
+{
+       struct dyn_ftrace *reca = a;
+       struct dyn_ftrace *recb = b;
+       struct dyn_ftrace t;
+
+       t = *reca;
+       *reca = *recb;
+       *recb = t;
+}
+
 static int ftrace_process_locs(struct module *mod,
                               unsigned long *start,
                               unsigned long *end)
 {
+       struct ftrace_page *pg;
+       unsigned long count;
        unsigned long *p;
        unsigned long addr;
        unsigned long flags = 0; /* Shut up gcc */
+       int ret = -ENOMEM;
+
+       count = end - start;
+
+       if (!count)
+               return 0;
+
+       pg = ftrace_allocate_pages(count);
+       if (!pg)
+               return -ENOMEM;
 
        mutex_lock(&ftrace_lock);
+
+       /*
+        * Core and each module needs their own pages, as
+        * modules will free them when they are removed.
+        * Force a new page to be allocated for modules.
+        */
+       if (!mod) {
+               WARN_ON(ftrace_pages || ftrace_pages_start);
+               /* First initialization */
+               ftrace_pages = ftrace_pages_start = pg;
+       } else {
+               if (!ftrace_pages)
+                       goto out;
+
+               if (WARN_ON(ftrace_pages->next)) {
+                       /* Hmm, we have free pages? */
+                       while (ftrace_pages->next)
+                               ftrace_pages = ftrace_pages->next;
+               }
+
+               ftrace_pages->next = pg;
+               ftrace_pages = pg;
+       }
+
        p = start;
        while (p < end) {
                addr = ftrace_call_adjust(*p++);
@@ -3400,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod,
                 */
                if (!addr)
                        continue;
-               ftrace_record_ip(addr);
+               if (!ftrace_record_ip(addr))
+                       break;
        }
 
+       /* These new locations need to be initialized */
+       ftrace_new_pgs = pg;
+
+       /* Make each individual set of pages sorted by ips */
+       for (; pg; pg = pg->next)
+               sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
+                    ftrace_cmp_recs, ftrace_swap_recs);
+
        /*
         * We only need to disable interrupts on start up
         * because we are modifying code that an interrupt
@@ -3416,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod,
        ftrace_update_code(mod);
        if (!mod)
                local_irq_restore(flags);
+       ret = 0;
+ out:
        mutex_unlock(&ftrace_lock);
 
-       return 0;
+       return ret;
 }
 
 #ifdef CONFIG_MODULES
+
+#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
+
 void ftrace_release_mod(struct module *mod)
 {
        struct dyn_ftrace *rec;
+       struct ftrace_page **last_pg;
        struct ftrace_page *pg;
+       int order;
 
        mutex_lock(&ftrace_lock);
 
        if (ftrace_disabled)
                goto out_unlock;
 
-       do_for_each_ftrace_rec(pg, rec) {
+       /*
+        * Each module has its own ftrace_pages, remove
+        * them from the list.
+        */
+       last_pg = &ftrace_pages_start;
+       for (pg = ftrace_pages_start; pg; pg = *last_pg) {
+               rec = &pg->records[0];
                if (within_module_core(rec->ip, mod)) {
                        /*
-                        * rec->ip is changed in ftrace_free_rec()
-                        * It should not between s and e if record was freed.
+                        * As core pages are first, the first
+                        * page should never be a module page.
                         */
-                       FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-                       ftrace_free_rec(rec);
-               }
-       } while_for_each_ftrace_rec();
+                       if (WARN_ON(pg == ftrace_pages_start))
+                               goto out_unlock;
+
+                       /* Check if we are deleting the last page */
+                       if (pg == ftrace_pages)
+                               ftrace_pages = next_to_ftrace_page(last_pg);
+
+                       *last_pg = pg->next;
+                       order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+                       free_pages((unsigned long)pg->records, order);
+                       kfree(pg);
+               } else
+                       last_pg = &pg->next;
+       }
  out_unlock:
        mutex_unlock(&ftrace_lock);
 }